From 5950eea895611cd8a1e172d7fd2bcb3b467a4917 Mon Sep 17 00:00:00 2001 From: babysor00 Date: Sun, 29 Aug 2021 15:43:54 +0800 Subject: [PATCH] Support training your own vocoder --- .vscode/launch.json | 28 +++++++++++++++++++ synthesizer/hparams.py | 3 -- synthesizer/synthesize.py | 16 +++++------ ...der_preprocess.py => vocoder_preprocess.py | 2 +- .../vocoder_train.py => vocoder_train.py | 0 5 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 .vscode/launch.json rename archived_untest_files/vocoder_preprocess.py => vocoder_preprocess.py (97%) rename archived_untest_files/vocoder_train.py => vocoder_train.py (100%) diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..6d33413 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,28 @@ +{ + // 使用 IntelliSense 了解相关属性。 + // 悬停以查看现有属性的描述。 + // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Vocoder Preprocess", + "type": "python", + "request": "launch", + "program": "vocoder_preprocess.py", + "console": "integratedTerminal", + "args": [ + "..\\..\\chs1" + ], + }, + { + "name": "Python: Vocoder Train", + "type": "python", + "request": "launch", + "program": "vocoder_train.py", + "console": "integratedTerminal", + "args": [ + "dev", "..\\..\\chs1" + ], + } + ] +} \ No newline at end of file diff --git a/synthesizer/hparams.py b/synthesizer/hparams.py index 544aeb5..77db2ce 100644 --- a/synthesizer/hparams.py +++ b/synthesizer/hparams.py @@ -87,6 +87,3 @@ hparams = HParams( silence_min_duration_split = 0.4, # Duration in seconds of a silence for an utterance to be split utterance_min_duration = 1.6, # Duration in seconds below which utterances are discarded ) - -def hparams_debug_string(): - return str(hparams) \ No newline at end of file diff --git a/synthesizer/synthesize.py b/synthesizer/synthesize.py index ff05d0e..e2dd02c 100644 --- a/synthesizer/synthesize.py +++ b/synthesizer/synthesize.py @@ -1,6 +1,5 @@ import torch from torch.utils.data import DataLoader -from synthesizer.hparams import hparams_debug_string from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer from synthesizer.models.tacotron import Tacotron from synthesizer.utils.text import text_to_sequence @@ -8,13 +7,14 @@ from synthesizer.utils.symbols import symbols import numpy as np from pathlib import Path from tqdm import tqdm +import sys def run_synthesis(in_dir, out_dir, model_dir, hparams): # This generates ground truth-aligned mels for vocoder training synth_dir = Path(out_dir).joinpath("mels_gta") - synth_dir.mkdir(exist_ok=True) - print(hparams_debug_string(hparams)) + synth_dir.mkdir(parents=True, exist_ok=True) + print(str(hparams)) # Check for GPU if torch.cuda.is_available(): @@ -59,12 +59,12 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams): metadata_fpath = in_dir.joinpath("train.txt") mel_dir = in_dir.joinpath("mels") embed_dir = in_dir.joinpath("embeds") - + num_workers = 0 if sys.platform.startswith("win") else 2; dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams) data_loader = DataLoader(dataset, - collate_fn=lambda batch: collate_synthesizer(batch, r), + collate_fn=lambda batch: collate_synthesizer(batch), batch_size=hparams.synthesis_batch_size, - num_workers=2, + num_workers=num_workers, shuffle=False, pin_memory=True) @@ -78,9 +78,9 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams): # Parallelize model onto GPUS using workaround due to python bug if device.type == "cuda" and torch.cuda.device_count() > 1: - _, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds) + _, mels_out, _ , _ = data_parallel_workaround(model, texts, mels, embeds) else: - _, mels_out, _ = model(texts, mels, embeds) + _, mels_out, _, _ = model(texts, mels, embeds) for j, k in enumerate(idx): # Note: outputs mel-spectrogram files and target ones have same names, just different folders diff --git a/archived_untest_files/vocoder_preprocess.py b/vocoder_preprocess.py similarity index 97% rename from archived_untest_files/vocoder_preprocess.py rename to vocoder_preprocess.py index 0828d72..b8951a3 100644 --- a/archived_untest_files/vocoder_preprocess.py +++ b/vocoder_preprocess.py @@ -17,7 +17,7 @@ if __name__ == "__main__": "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and " "--out_dir, this argument won't be used.") parser.add_argument("--model_dir", type=str, - default="synthesizer/saved_models/pretrained/", help=\ + default="synthesizer/saved_models/train3/", help=\ "Path to the pretrained model directory.") parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \ "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the " diff --git a/archived_untest_files/vocoder_train.py b/vocoder_train.py similarity index 100% rename from archived_untest_files/vocoder_train.py rename to vocoder_train.py