From 8b43ec9a640563719940dccd8211724373416049 Mon Sep 17 00:00:00 2001 From: babysor00 Date: Tue, 12 Oct 2021 20:01:37 +0800 Subject: [PATCH] Fix bug pre-processing magicdata --- synthesizer/preprocess.py | 5 +++-- synthesizer/preprocess_transcript.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/synthesizer/preprocess.py b/synthesizer/preprocess.py index 430b037..c4779ed 100644 --- a/synthesizer/preprocess.py +++ b/synthesizer/preprocess.py @@ -7,7 +7,7 @@ from tqdm import tqdm import numpy as np from encoder import inference as encoder from synthesizer.preprocess_speaker import preprocess_speaker_general -from synthesizer.preprocess_transcript import preprocess_transcript_aishell3 +from synthesizer.preprocess_transcript import preprocess_transcript_aishell3, preprocess_transcript_magicdata data_info = { "aidatatang_200zh": { @@ -18,7 +18,8 @@ data_info = { "magicdata": { "subfolders": ["train"], "trans_filepath": "train/TRANS.txt", - "speak_func": preprocess_speaker_general + "speak_func": preprocess_speaker_general, + "transcript_func": preprocess_transcript_magicdata, }, "aishell3":{ "subfolders": ["train/wav"], diff --git a/synthesizer/preprocess_transcript.py b/synthesizer/preprocess_transcript.py index 8810a92..7a26672 100644 --- a/synthesizer/preprocess_transcript.py +++ b/synthesizer/preprocess_transcript.py @@ -6,4 +6,13 @@ def preprocess_transcript_aishell3(dict_info, dict_transcript): transList = [] for i in range(2, len(v), 2): transList.append(v[i]) - dict_info[v[0]] = " ".join(transList) \ No newline at end of file + dict_info[v[0]] = " ".join(transList) + + +def preprocess_transcript_magicdata(dict_info, dict_transcript): + for v in dict_transcript: + if not v: + continue + v = v.strip().replace("\n","").replace("\t"," ").split(" ") + dict_info[v[0]] = " ".join(v[2:]) + \ No newline at end of file