From 5950eea895611cd8a1e172d7fd2bcb3b467a4917 Mon Sep 17 00:00:00 2001
From: babysor00 <babysor00@gmail.com>
Date: Sun, 29 Aug 2021 15:43:54 +0800
Subject: [PATCH] Support training your own vocoder

---
 .vscode/launch.json                           | 28 +++++++++++++++++++
 synthesizer/hparams.py                        |  3 --
 synthesizer/synthesize.py                     | 16 +++++------
 ...der_preprocess.py => vocoder_preprocess.py |  2 +-
 .../vocoder_train.py => vocoder_train.py      |  0
 5 files changed, 37 insertions(+), 12 deletions(-)
 create mode 100644 .vscode/launch.json
 rename archived_untest_files/vocoder_preprocess.py => vocoder_preprocess.py (97%)
 rename archived_untest_files/vocoder_train.py => vocoder_train.py (100%)

diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..6d33413
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,28 @@
+{
+    // 使用 IntelliSense 了解相关属性。 
+    // 悬停以查看现有属性的描述。
+    // 欲了解更多信息，请访问: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Vocoder Preprocess",
+            "type": "python",
+            "request": "launch",
+            "program": "vocoder_preprocess.py",
+            "console": "integratedTerminal",
+            "args": [
+                "..\\..\\chs1"
+            ],
+        },
+        {
+            "name": "Python: Vocoder Train",
+            "type": "python",
+            "request": "launch",
+            "program": "vocoder_train.py",
+            "console": "integratedTerminal",
+            "args": [
+                "dev", "..\\..\\chs1"
+            ],
+        }
+    ]
+}
\ No newline at end of file
diff --git a/synthesizer/hparams.py b/synthesizer/hparams.py
index 544aeb5..77db2ce 100644
--- a/synthesizer/hparams.py
+++ b/synthesizer/hparams.py
@@ -87,6 +87,3 @@ hparams = HParams(
         silence_min_duration_split = 0.4,           # Duration in seconds of a silence for an utterance to be split
         utterance_min_duration = 1.6,               # Duration in seconds below which utterances are discarded
         )
-
-def hparams_debug_string():
-    return str(hparams)
\ No newline at end of file
diff --git a/synthesizer/synthesize.py b/synthesizer/synthesize.py
index ff05d0e..e2dd02c 100644
--- a/synthesizer/synthesize.py
+++ b/synthesizer/synthesize.py
@@ -1,6 +1,5 @@
 import torch
 from torch.utils.data import DataLoader
-from synthesizer.hparams import hparams_debug_string
 from synthesizer.synthesizer_dataset import SynthesizerDataset, collate_synthesizer
 from synthesizer.models.tacotron import Tacotron
 from synthesizer.utils.text import text_to_sequence
@@ -8,13 +7,14 @@ from synthesizer.utils.symbols import symbols
 import numpy as np
 from pathlib import Path
 from tqdm import tqdm
+import sys
 
 
 def run_synthesis(in_dir, out_dir, model_dir, hparams):
     # This generates ground truth-aligned mels for vocoder training
     synth_dir = Path(out_dir).joinpath("mels_gta")
-    synth_dir.mkdir(exist_ok=True)
-    print(hparams_debug_string(hparams))
+    synth_dir.mkdir(parents=True, exist_ok=True)
+    print(str(hparams))
 
     # Check for GPU
     if torch.cuda.is_available():
@@ -59,12 +59,12 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
     metadata_fpath = in_dir.joinpath("train.txt")
     mel_dir = in_dir.joinpath("mels")
     embed_dir = in_dir.joinpath("embeds")
-
+    num_workers = 0 if sys.platform.startswith("win") else 2;
     dataset = SynthesizerDataset(metadata_fpath, mel_dir, embed_dir, hparams)
     data_loader = DataLoader(dataset,
-                             collate_fn=lambda batch: collate_synthesizer(batch, r),
+                             collate_fn=lambda batch: collate_synthesizer(batch),
                              batch_size=hparams.synthesis_batch_size,
-                             num_workers=2,
+                             num_workers=num_workers,
                              shuffle=False,
                              pin_memory=True)
 
@@ -78,9 +78,9 @@ def run_synthesis(in_dir, out_dir, model_dir, hparams):
 
             # Parallelize model onto GPUS using workaround due to python bug
             if device.type == "cuda" and torch.cuda.device_count() > 1:
-                _, mels_out, _ = data_parallel_workaround(model, texts, mels, embeds)
+                _, mels_out, _ , _ = data_parallel_workaround(model, texts, mels, embeds)
             else:
-                _, mels_out, _ = model(texts, mels, embeds)
+                _, mels_out, _, _  = model(texts, mels, embeds)
 
             for j, k in enumerate(idx):
                 # Note: outputs mel-spectrogram files and target ones have same names, just different folders
diff --git a/archived_untest_files/vocoder_preprocess.py b/vocoder_preprocess.py
similarity index 97%
rename from archived_untest_files/vocoder_preprocess.py
rename to vocoder_preprocess.py
index 0828d72..b8951a3 100644
--- a/archived_untest_files/vocoder_preprocess.py
+++ b/vocoder_preprocess.py
@@ -17,7 +17,7 @@ if __name__ == "__main__":
         "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
         "--out_dir, this argument won't be used.")
     parser.add_argument("--model_dir", type=str, 
-                        default="synthesizer/saved_models/pretrained/", help=\
+                        default="synthesizer/saved_models/train3/", help=\
         "Path to the pretrained model directory.")
     parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
         "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "
diff --git a/archived_untest_files/vocoder_train.py b/vocoder_train.py
similarity index 100%
rename from archived_untest_files/vocoder_train.py
rename to vocoder_train.py