From 3c86cd5bca03393da14a7c94b1ef3d700962b880 Mon Sep 17 00:00:00 2001 From: babysor00 Date: Sun, 22 Aug 2021 23:44:25 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90bugfix=E3=80=91=20fix=20bug=20causing?= =?UTF-8?q?=20non-sense=20output=20for=20long=20texts=20=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E5=A4=9A=E6=AE=B5=E6=96=87=E5=AD=97=E5=8F=91=E9=9F=B3=E9=94=99?= =?UTF-8?q?=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- synthesizer/inference.py | 7 ++----- synthesizer/utils/symbols.py | 2 +- toolbox/__init__.py | 8 ++++++++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/synthesizer/inference.py b/synthesizer/inference.py index 07cf881..694134f 100644 --- a/synthesizer/inference.py +++ b/synthesizer/inference.py @@ -90,13 +90,10 @@ class Synthesizer: simple_table([("Tacotron", str(tts_k) + "k"), ("r", self._model.r)]) - - #convert chinese char to pinyin - list_of_pinyin = lazy_pinyin(texts, style=Style.TONE3) - texts = [" ".join([v for v in list_of_pinyin if v.strip()])] + texts = [" ".join(lazy_pinyin(v, style=Style.TONE3)) for v in texts] # Preprocess text inputs - inputs = [text_to_sequence(text.strip(), hparams.tts_cleaner_names) for text in texts] + inputs = [text_to_sequence(text, hparams.tts_cleaner_names) for text in texts] if not isinstance(embeddings, list): embeddings = [embeddings] diff --git a/synthesizer/utils/symbols.py b/synthesizer/utils/symbols.py index 2dbec9a..d9c3967 100644 --- a/synthesizer/utils/symbols.py +++ b/synthesizer/utils/symbols.py @@ -8,7 +8,7 @@ through Unidecode. For other data, you can modify _characters. See TRAINING_DATA _pad = "_" _eos = "~" -_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz12340!\'(),-.:;? ' +_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!\'(),-.:;? ' # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters): #_arpabet = ["@' + s for s in cmudict.valid_symbols] diff --git a/toolbox/__init__.py b/toolbox/__init__.py index 74c6b3c..22eda81 100644 --- a/toolbox/__init__.py +++ b/toolbox/__init__.py @@ -10,6 +10,7 @@ import traceback import sys import torch import librosa +import re from audioread.exceptions import NoBackendError # Use this directory structure for your datasets, or modify it to fit your needs @@ -224,6 +225,13 @@ class Toolbox: self.init_synthesizer() texts = self.ui.text_prompt.toPlainText().split("\n") + punctuation = '!,。、,' # punctuate and split/clean text + processed_texts = [] + for text in texts: + for processed_text in re.sub(r'[{}]+'.format(punctuation), '\n', text).split('\n'): + if processed_text: + processed_texts.append(processed_text.strip()) + texts = processed_texts embed = self.ui.selected_utterance.embed embeds = [embed] * len(texts) specs = self.synthesizer.synthesize_spectrograms(texts, embeds)