Fix bug pre-processing magicdata

pull/141/head
babysor00 3 years ago
parent a824b54122
commit 8b43ec9a64

@ -7,7 +7,7 @@ from tqdm import tqdm
import numpy as np import numpy as np
from encoder import inference as encoder from encoder import inference as encoder
from synthesizer.preprocess_speaker import preprocess_speaker_general from synthesizer.preprocess_speaker import preprocess_speaker_general
from synthesizer.preprocess_transcript import preprocess_transcript_aishell3 from synthesizer.preprocess_transcript import preprocess_transcript_aishell3, preprocess_transcript_magicdata
data_info = { data_info = {
"aidatatang_200zh": { "aidatatang_200zh": {
@ -18,7 +18,8 @@ data_info = {
"magicdata": { "magicdata": {
"subfolders": ["train"], "subfolders": ["train"],
"trans_filepath": "train/TRANS.txt", "trans_filepath": "train/TRANS.txt",
"speak_func": preprocess_speaker_general "speak_func": preprocess_speaker_general,
"transcript_func": preprocess_transcript_magicdata,
}, },
"aishell3":{ "aishell3":{
"subfolders": ["train/wav"], "subfolders": ["train/wav"],

@ -6,4 +6,13 @@ def preprocess_transcript_aishell3(dict_info, dict_transcript):
transList = [] transList = []
for i in range(2, len(v), 2): for i in range(2, len(v), 2):
transList.append(v[i]) transList.append(v[i])
dict_info[v[0]] = " ".join(transList) dict_info[v[0]] = " ".join(transList)
def preprocess_transcript_magicdata(dict_info, dict_transcript):
for v in dict_transcript:
if not v:
continue
v = v.strip().replace("\n","").replace("\t"," ").split(" ")
dict_info[v[0]] = " ".join(v[2:])
Loading…
Cancel
Save