added a fixed reference for the tensorboard audio section

IAHispano · Nov 12, 2024 · 4c317e0 · 4c317e0
1 parent ea7c629
commit 4c317e0
Show file tree

Hide file tree

Showing 13 changed files with 26 additions and 10 deletions.
diff --git a/logs/reference/ref32000.wav b/logs/reference/ref32000.wav
diff --git a/logs/reference/ref32000_f0c.npy b/logs/reference/ref32000_f0c.npy
diff --git a/logs/reference/ref32000_f0f.npy b/logs/reference/ref32000_f0f.npy
diff --git a/logs/reference/ref32000_feats.npy b/logs/reference/ref32000_feats.npy
diff --git a/logs/reference/ref40000.wav b/logs/reference/ref40000.wav
diff --git a/logs/reference/ref40000_f0c.npy b/logs/reference/ref40000_f0c.npy
diff --git a/logs/reference/ref40000_f0f.npy b/logs/reference/ref40000_f0f.npy
diff --git a/logs/reference/ref40000_feats.npy b/logs/reference/ref40000_feats.npy
diff --git a/logs/reference/ref48000.wav b/logs/reference/ref48000.wav
diff --git a/logs/reference/ref48000_f0c.npy b/logs/reference/ref48000_f0c.npy
diff --git a/logs/reference/ref48000_f0f.npy b/logs/reference/ref48000_f0f.npy
diff --git a/logs/reference/ref48000_feats.npy b/logs/reference/ref48000_feats.npy
diff --git a/rvc/train/train.py b/rvc/train/train.py
@@ -439,16 +439,32 @@ def run(
 
     cache = []
     # get the first sample as reference for tensorboard evaluation
-    for info in train_loader:
-        phone, phone_lengths, pitch, pitchf, _, _, _, _, sid = info
-        reference = (
-            phone.to(device),
-            phone_lengths.to(device),
-            pitch.to(device) if pitch_guidance else None,
-            pitchf.to(device) if pitch_guidance else None,
-            sid.to(device),
-        )
-        break
+    if os.path.isfile(os.path.join("logs", "reference", f"ref{sample_rate}.wav")):
+        import numpy as np
+        phone = np.load(os.path.join("logs", "reference", f"ref{sample_rate}_feats.npy"))
+        #expanding x2 to match pitch size
+        phone = np.repeat(phone, 2, axis=0)
+        phone = torch.FloatTensor(phone).unsqueeze(0).to(device)
+        phone_lengths = torch.LongTensor(phone.size(0)).to(device)
+        pitch = np.load(os.path.join("logs", "reference", f"ref{sample_rate}_f0c.npy"))
+        # removed last frame to match features
+        pitch = torch.LongTensor(pitch[:-1]).unsqueeze(0).to(device)
+        pitchf = np.load(os.path.join("logs", "reference", f"ref{sample_rate}_f0f.npy"))
+        # removed last frame to match features
+        pitchf = torch.FloatTensor(pitchf[:-1]).unsqueeze(0).to(device)
+        sid = torch.LongTensor([0]).to(device)
+        reference = (phone, phone_lengths, pitch, pitchf, sid)
+    else:
+        for info in train_loader:
+            phone, phone_lengths, pitch, pitchf, _, _, _, _, sid = info
+            reference = (
+                phone.to(device),
+                phone_lengths.to(device),
+                pitch.to(device) if pitch_guidance else None,
+                pitchf.to(device) if pitch_guidance else None,
+                sid.to(device),
+            )
+            break
 
     for epoch in range(epoch_str, total_epoch + 1):
         train_and_evaluate(