Skip to content

Commit

Permalink
fix(matcher): perform unicode NFKC before matching (#76)
Browse files Browse the repository at this point in the history
  • Loading branch information
qwqcode authored Jan 10, 2025
1 parent 224b389 commit 38f293a
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 2 deletions.
33 changes: 33 additions & 0 deletions SubRenamer.Tests/MatcherTests/FilenameNfkcTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using SubRenamer.Core;
using SubRenamer.Helper;

namespace SubRenamer.Tests.MatcherTests;

/// <summary>
/// Test for filename normalization
///
/// NFKC is means Unicode Normalization Form KC (Compatibility Composition)
/// https://unicode.org/reports/tr15/
/// </summary>
[TestFixture]
public class FilenameNfkcTests
{
[Test]
public void Basic()
{
var normalizer = new MatcherFilenameNormalizer();
List<MatchItem> originalItems = [
new("", "\u30CF\u309A", "\u30D5\u3099"),
];

var normalizedItems = normalizer.Normalize(originalItems);

Assert.That(normalizedItems, Is.EqualTo([
new MatchItem("", "\u30D1", "\u30D6"),
]), "Normalize");

Assert.That(normalizer.Denormalize(normalizedItems), Is.EqualTo(originalItems), "Denormalize");

normalizer.Clear();
}
}
7 changes: 5 additions & 2 deletions SubRenamer.Tests/MatcherTests/TopLevelTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System.Text.Json;
using SubRenamer.Core;
using SubRenamer.Helper;

namespace SubRenamer.Tests.MatcherTests;

Expand All @@ -26,7 +27,8 @@ private static IEnumerable<TestCaseData> TestData
[Test, TestCaseSource(nameof(TestData))]
public void TestCasesFromJson(string name, List<MatchItem> input, List<MatchItem> expected)
{
var actual = Matcher.Execute(input);
var normalizer = new MatcherFilenameNormalizer();
var actual = Matcher.Execute(normalizer.Normalize(input));

var jsonOpts = new JsonSerializerOptions { WriteIndented = true, Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping };
TestContext.Progress.WriteLine("{1}\n\n \ud83c\udf1f Matcher Test Case: {0}\n\n{1}", name, new string('=', 50));
Expand All @@ -37,6 +39,7 @@ public void TestCasesFromJson(string name, List<MatchItem> input, List<MatchItem
TestContext.Progress.WriteLine("{2}\n {0}\n{2}\n{1}", "Actual", JsonSerializer.Serialize(actual, jsonOpts),
new string('-', 50));

Assert.That(actual, Is.EqualTo(expected));
Assert.That(normalizer.Denormalize(actual), Is.EqualTo(expected));
normalizer.Clear();
}
}
32 changes: 32 additions & 0 deletions SubRenamer.Tests/MatcherTests/TopLevelTests.json
Original file line number Diff line number Diff line change
Expand Up @@ -145,5 +145,37 @@
{"Key": "1", "Video": "视频 1 xyz.mov", "Subtitle": "字幕 1xyz.srt"},
{"Key": "77", "Video": "视频 77 test xyz.mov", "Subtitle": "字幕 77test xyz.srt"}
]
},
{
"Name": "Nのために (Japanese, Unicode NFKD Test#1)",
"Input": [
{"Key": "", "Video": "Nのために EP01 720p HDTV x264 AAC-DoA.mkv", "Subtitle": ""},
{"Key": "", "Video": "Nのために EP02 720p HDTV x264 AAC-DoA.mkv", "Subtitle": ""},
{"Key": "", "Video": "Nのために EP10 End 720p HDTV x264 AAC-DoA.mkv", "Subtitle": ""},
{"Key": "", "Video": "", "Subtitle": "[ドラマ][Nのために 第01話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC].tc.srt"},
{"Key": "", "Video": "", "Subtitle": "[ドラマ][Nのために 第02話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC].tc.srt"},
{"Key": "", "Video": "", "Subtitle": "[ドラマ][Nのために 第10話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC][最終話].tc.srt"}
],
"Output": [
{"Key": "1", "Video": "Nのために EP01 720p HDTV x264 AAC-DoA.mkv", "Subtitle": "[ドラマ][Nのために 第01話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC].tc.srt"},
{"Key": "2", "Video": "Nのために EP02 720p HDTV x264 AAC-DoA.mkv", "Subtitle": "[ドラマ][Nのために 第02話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC].tc.srt"},
{"Key": "10", "Video": "Nのために EP10 End 720p HDTV x264 AAC-DoA.mkv", "Subtitle": "[ドラマ][Nのために 第10話]「榮倉奈々、窪田正孝、賀来賢人、小出恵介、徳井義実、小西真奈美、三浦友和」[720p x264 AAC][最終話].tc.srt"}
]
},
{
"Name": "機動警察 (Japanese, Unicode NFKD Test#2)",
"Input": [
{"Key": "", "Video": "[AI-Raws] 机动警察パトレイバー #1 (BD HEVC 1440x1080 FLAC)[9F318F24].mkv", "Subtitle": ""},
{"Key": "", "Video": "[AI-Raws] 机动警察パトレイバー #2 (BD HEVC 1440x1080 FLAC)[7F3281F4].mkv", "Subtitle": ""},
{"Key": "", "Video": "[AI-Raws] 机动警察パトレイバー #10 (BD HEVC 1440x1080 FLAC)[8F01DF54].mkv", "Subtitle": ""},
{"Key": "", "Video": "", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 01.ass"},
{"Key": "", "Video": "", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 02.ass"},
{"Key": "", "Video": "", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 10.ass"}
],
"Output": [
{"Key": "1", "Video": "[AI-Raws] 机动警察パトレイバー #1 (BD HEVC 1440x1080 FLAC)[9F318F24].mkv", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 01.ass"},
{"Key": "2", "Video": "[AI-Raws] 机动警察パトレイバー #2 (BD HEVC 1440x1080 FLAC)[7F3281F4].mkv", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 02.ass"},
{"Key": "10", "Video": "[AI-Raws] 机动警察パトレイバー #10 (BD HEVC 1440x1080 FLAC)[8F01DF54].mkv", "Subtitle": "[炎之川字幕] 机动警察 機動警察パトレイバー 10.ass"}
]
}
]
72 changes: 72 additions & 0 deletions SubRenamer/Helper/MatcherFilenameNormalizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using System.Collections.Generic;
using System.Text;

namespace SubRenamer.Helper;

/// <summary>
/// Handles normalization and denormalization of filenames in MatchItems to ensure consistent Unicode handling.
/// Uses NormalizationForm.FormKC for compatibility normalization with composition.
/// </summary>
public class MatcherFilenameNormalizer
{
private readonly Dictionary<string, string> _normalizedToRawVideos = new();
private readonly Dictionary<string, string> _normalizedToRawSubtitles = new();

/// <summary>
/// Normalizes the filenames in a list of MatchItems using NormalizationForm.FormKC.
/// </summary>
/// <param name="matchItems">The list of MatchItems to normalize.</param>
/// <returns>A new list of MatchItems with normalized filenames.</returns>
public List<Core.MatchItem> Normalize(IReadOnlyList<Core.MatchItem> matchItems)
{
if (matchItems.Count == 0) return [];

var result = new List<Core.MatchItem>(matchItems.Count);
foreach (var item in matchItems)
{
var normalizedVideo = item.Video.Normalize(NormalizationForm.FormKC);
var normalizedSubtitle = item.Subtitle.Normalize(NormalizationForm.FormKC);

if (!string.IsNullOrEmpty(item.Video))
_normalizedToRawVideos[normalizedVideo] = item.Video;
if (!string.IsNullOrEmpty(item.Subtitle))
_normalizedToRawSubtitles[normalizedSubtitle] = item.Subtitle;

result.Add(new Core.MatchItem(item.Key, normalizedVideo, normalizedSubtitle));
}

return result;
}

/// <summary>
/// Denormalizes the filenames in a list of MatchItems back to their original form.
/// </summary>
/// <param name="matchItems">The list of MatchItems to denormalize.</param>
/// <returns>A new list of MatchItems with original filenames.</returns>
/// <exception cref="KeyNotFoundException">Thrown when a normalized filename cannot be mapped back to its original form.</exception>
public List<Core.MatchItem> Denormalize(IReadOnlyList<Core.MatchItem> matchItems)
{
if (matchItems.Count == 0) return [];

var result = new List<Core.MatchItem>(matchItems.Count);
foreach (var item in matchItems)
{
var originalVideo = !string.IsNullOrEmpty(item.Video) ? _normalizedToRawVideos[item.Video] : string.Empty;
var originalSubtitle = !string.IsNullOrEmpty(item.Subtitle)
? _normalizedToRawSubtitles[item.Subtitle]
: string.Empty;
result.Add(new Core.MatchItem(item.Key, originalVideo, originalSubtitle));
}

return result;
}

/// <summary>
/// Clears the internal mapping dictionaries.
/// </summary>
public void Clear()
{
_normalizedToRawVideos.Clear();
_normalizedToRawSubtitles.Clear();
}
}
4 changes: 4 additions & 0 deletions SubRenamer/ViewModels/MainViewModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,19 @@ partial void OnSubSyncEnabledChanged(bool value)
[RelayCommand]
private void PerformMatch()
{
var filenameNormalizer = new MatcherFilenameNormalizer();
ShowRenameTasks = false;
var inputItems = MatcherDataConverter.ConvertMatchItems(MatchList);
inputItems = filenameNormalizer.Normalize(inputItems);
var m = Config.Get().MatchMode;
var resultRaw = Matcher.Execute(inputItems, new MatcherOptions()
{
// Convert Config to MatcherOptions
VideoRegex = (m != MatchMode.Diff) ? (m == MatchMode.Manual ? Config.Get().ManualVideoRegex : Config.Get().VideoRegex) : null,
SubtitleRegex = (m != MatchMode.Diff) ? (m == MatchMode.Manual ? Config.Get().ManualSubtitle : Config.Get().SubtitleRegex) : null,
});
resultRaw = filenameNormalizer.Denormalize(resultRaw);
filenameNormalizer.Clear();
var result = MatcherDataConverter.ConvertMatchItems(resultRaw);
result.ForEach(UpdateMatchItemStatus);
MatchList = new ObservableCollection<MatchItem>(result);
Expand Down

0 comments on commit 38f293a

Please sign in to comment.