Implement the track distance calculation for cue sheet tracks.

Implement the diacritics and punctuation marks sanitation feature for the cue sheet track mapping.

# Conflicts:
#	src/NzbDrone.Core/Lidarr.Core.csproj

(cherry picked from commit 27622cebe9433b27bb0e7dfaf3720e2bd284e513)
This commit is contained in:
zhangdoa 2023-11-19 16:51:10 +01:00
commit 26678e79fa
7 changed files with 181 additions and 25 deletions

View file

@ -4,6 +4,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Dapper" Version="2.0.123" />
<PackageReference Include="Diacritics" Version="3.3.18" />
<PackageReference Include="Diacritical.Net" Version="1.0.4" />
<PackageReference Include="Polly" Version="8.2.1" />
<PackageReference Include="System.Text.Json" Version="6.0.9" />

View file

@ -3,7 +3,9 @@ using System.Collections.Generic;
using System.IO;
using System.IO.Abstractions;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using Diacritics.Extensions;
using NLog;
using NzbDrone.Core.MediaFiles.TrackImport;
using NzbDrone.Core.Music;
@ -19,6 +21,15 @@ namespace NzbDrone.Core.MediaFiles
public IdentificationOverrides IdOverrides { get; set; }
public CueSheet CueSheet { get; set; }
public bool IsForMediaFile(string path) => CueSheet != null && CueSheet.Files.Count > 0 && CueSheet.Files.Any(x => Path.GetFileName(path) == x.Name);
public CueSheet.FileEntry TryToGetFileEntryForMediaFile(string path)
{
if (CueSheet != null && CueSheet.Files.Count > 0)
{
return CueSheet.Files.Find(x => Path.GetFileName(path) == x.Name);
}
return null;
}
}
public interface ICueSheetService
@ -50,6 +61,39 @@ namespace NzbDrone.Core.MediaFiles
_logger = logger;
}
private class PunctuationReplacer
{
private readonly Dictionary<char, char> _replacements = new Dictionary<char, char>
{
{ '', '\'' }, { '', '\'' }, // Single quotes
{ '“', '"' }, { '”', '"' }, // Double quotes
{ '', '<' }, { '', '>' }, // Angle quotes
{ '«', '<' }, { '»', '>' }, // Guillemets
{ '', '-' }, { '—', '-' }, // Dashes
{ '…', '.' }, // Ellipsis
{ '¡', '!' }, { '¿', '?' }, // Inverted punctuation (Spanish)
};
public string ReplacePunctuation(string input)
{
var output = new StringBuilder(input.Length);
foreach (var c in input)
{
if (_replacements.TryGetValue(c, out var replacement))
{
output.Append(replacement);
}
else
{
output.Append(c);
}
}
return output.ToString();
}
}
public List<ImportDecision<LocalTrack>> GetImportDecisions(ref List<IFileInfo> mediaFileList, IdentificationOverrides idOverrides, ImportDecisionMakerInfo itemInfo, ImportDecisionMakerConfig config)
{
var decisions = new List<ImportDecision<LocalTrack>>();
@ -119,7 +163,6 @@ namespace NzbDrone.Core.MediaFiles
}
}
var addedTracks = new List<Track>();
decisions.ForEach(decision =>
{
if (!decision.Item.IsSingleFileRelease)
@ -156,14 +199,53 @@ namespace NzbDrone.Core.MediaFiles
return;
}
// TODO diacritics could cause false positives here
decision.Item.Tracks = tracksFromRelease.Where(trackFromRelease => !addedTracks.Contains(trackFromRelease) && tracksFromCueSheet.Any(trackFromCueSheet => string.Equals(trackFromCueSheet.Title, trackFromRelease.Title, StringComparison.OrdinalIgnoreCase))).ToList();
addedTracks.AddRange(decision.Item.Tracks);
var replacer = new PunctuationReplacer();
var i = 0;
while (i < tracksFromRelease.Count)
{
var trackFromRelease = tracksFromRelease[i];
var trackFromReleaseTitle = NormalizeTitle(replacer, trackFromRelease.Title);
var j = 0;
var anyMatch = false;
while (j < tracksFromCueSheet.Count)
{
var trackFromCueSheet = tracksFromCueSheet[j];
var trackFromCueSheetTitle = NormalizeTitle(replacer, trackFromCueSheet.Title);
anyMatch = string.Equals(trackFromReleaseTitle, trackFromCueSheetTitle, StringComparison.InvariantCultureIgnoreCase);
if (anyMatch)
{
decision.Item.Tracks.Add(trackFromRelease);
tracksFromRelease.RemoveAt(i);
tracksFromCueSheet.RemoveAt(j);
break;
}
else
{
j++;
}
}
if (!anyMatch)
{
i++;
}
}
});
return decisions;
}
private static string NormalizeTitle(PunctuationReplacer replacer, string title)
{
title.Normalize(NormalizationForm.FormKD);
title = title.RemoveDiacritics();
title = replacer.ReplacePunctuation(title);
return title;
}
private CueSheet LoadCueSheet(IFileInfo fileInfo)
{
using (var fs = fileInfo.OpenRead())
@ -337,7 +419,7 @@ namespace NzbDrone.Core.MediaFiles
}
else if (performers.Count > 1)
{
return _parsingService.GetArtist("Various Artist");
return _parsingService.GetArtist("various artists");
}
return null;

View file

@ -30,6 +30,11 @@ namespace NzbDrone.Core.MediaFiles.TrackImport.Identification
localTrack.FileTrackInfo.TrackNumbers[0] != totalTrackNumber;
}
private static bool TrackIndexIncorrect(CueSheet.TrackEntry cuesheetTrack, Track mbTrack, int totalTrackNumber)
{
return cuesheetTrack.Number != mbTrack.AbsoluteTrackNumber;
}
public static int GetTotalTrackNumber(Track track, List<Track> allTracks)
{
return track.AbsoluteTrackNumber + allTracks.Count(t => t.MediumNumber < track.MediumNumber);
@ -79,6 +84,28 @@ namespace NzbDrone.Core.MediaFiles.TrackImport.Identification
return dist;
}
public static Distance TrackDistance(CueSheet.TrackEntry cuesheetTrack, Track mbTrack, int totalTrackNumber, bool includeArtist = false)
{
var dist = new Distance();
// musicbrainz never has 'featuring' in the track title
// see https://musicbrainz.org/doc/Style/Artist_Credits
dist.AddString("track_title", cuesheetTrack.Title ?? "", mbTrack.Title);
if (includeArtist && cuesheetTrack.Performers.Count == 1
&& !VariousArtistNames.Any(x => x.Equals(cuesheetTrack.Performers[0], StringComparison.InvariantCultureIgnoreCase)))
{
dist.AddString("track_artist", cuesheetTrack.Performers[0], mbTrack.ArtistMetadata.Value.Name);
}
if (mbTrack.AbsoluteTrackNumber > 0)
{
dist.AddBool("track_index", TrackIndexIncorrect(cuesheetTrack, mbTrack, totalTrackNumber));
}
return dist;
}
public static Distance AlbumReleaseDistance(List<LocalTrack> localTracks, AlbumRelease release, TrackMapping mapping)
{
var dist = new Distance();
@ -179,9 +206,14 @@ namespace NzbDrone.Core.MediaFiles.TrackImport.Identification
}
// tracks
if (localTracks.All(x => x.IsSingleFileRelease == true))
if (mapping.CuesheetTrackMapping.Count != 0)
{
dist.Add("tracks", 0);
foreach (var pair in mapping.CuesheetTrackMapping)
{
dist.Add("tracks", pair.Value.Item2.NormalizedDistance());
}
Logger.Trace("after trackMapping: {0}", dist.NormalizedDistance());
}
else
{
@ -192,14 +224,6 @@ namespace NzbDrone.Core.MediaFiles.TrackImport.Identification
Logger.Trace("after trackMapping: {0}", dist.NormalizedDistance());
// missing tracks
foreach (var track in mapping.MBExtra.Take(localTracks.Count))
{
dist.Add("missing_tracks", 1.0);
}
Logger.Trace("after missing tracks: {0}", dist.NormalizedDistance());
// unmatched tracks
foreach (var track in mapping.LocalExtra.Take(localTracks.Count))
{
@ -209,6 +233,14 @@ namespace NzbDrone.Core.MediaFiles.TrackImport.Identification
Logger.Trace("after unmatched tracks: {0}", dist.NormalizedDistance());
}
// missing tracks
foreach (var track in mapping.MBExtra.Take(localTracks.Count))
{
dist.Add("missing_tracks", 1.0);
}
Logger.Trace("after missing tracks: {0}", dist.NormalizedDistance());
return dist;
}
}

View file

@ -323,7 +323,8 @@ namespace NzbDrone.Core.MediaFiles.TrackImport.Identification
var extraTracks = extraTracksOnDisk.Where(x => extraTrackPaths.Contains(x.Path)).ToList();
var allLocalTracks = localAlbumRelease.LocalTracks.Concat(extraTracks).DistinctBy(x => x.Path).ToList();
var mapping = MapReleaseTracks(allLocalTracks, release.Tracks.Value);
var isSingleFileRelease = allLocalTracks.All(x => x.IsSingleFileRelease == true);
var mapping = isSingleFileRelease ? MapSingleFileReleaseTracks(allLocalTracks, release.Tracks.Value) : MapReleaseTracks(allLocalTracks, release.Tracks.Value);
var distance = DistanceCalculator.AlbumReleaseDistance(allLocalTracks, release, mapping);
var currDistance = distance.NormalizedDistance();
@ -356,12 +357,6 @@ namespace NzbDrone.Core.MediaFiles.TrackImport.Identification
public TrackMapping MapReleaseTracks(List<LocalTrack> localTracks, List<Track> mbTracks)
{
var result = new TrackMapping();
result.IsSingleFileRelease = localTracks.All(x => x.IsSingleFileRelease == true);
if (result.IsSingleFileRelease)
{
return result;
}
var distances = new Distance[localTracks.Count, mbTracks.Count];
var costs = new double[localTracks.Count, mbTracks.Count];
@ -392,5 +387,46 @@ namespace NzbDrone.Core.MediaFiles.TrackImport.Identification
return result;
}
public TrackMapping MapSingleFileReleaseTracks(List<LocalTrack> localTracks, List<Track> mbTracks)
{
var result = new TrackMapping();
var cuesheetTracks = new List<CueSheet.TrackEntry>();
foreach (var localTrack in localTracks)
{
if (localTrack.CueSheetFileEntry != null)
{
cuesheetTracks.AddRange(localTrack.CueSheetFileEntry.Tracks);
}
}
var distances = new Distance[cuesheetTracks.Count, mbTracks.Count];
var costs = new double[cuesheetTracks.Count, mbTracks.Count];
for (var col = 0; col < mbTracks.Count; col++)
{
var totalTrackNumber = DistanceCalculator.GetTotalTrackNumber(mbTracks[col], mbTracks);
for (var row = 0; row < cuesheetTracks.Count; row++)
{
distances[row, col] = DistanceCalculator.TrackDistance(cuesheetTracks[row], mbTracks[col], totalTrackNumber, false);
costs[row, col] = distances[row, col].NormalizedDistance();
}
}
var m = new Munkres(costs);
m.Run();
foreach (var pair in m.Solution)
{
result.CuesheetTrackMapping.Add(cuesheetTracks[pair.Item1], Tuple.Create(mbTracks[pair.Item2], distances[pair.Item1, pair.Item2]));
_logger.Trace("Mapped {0} to {1}, dist: {2}", cuesheetTracks[pair.Item1], mbTracks[pair.Item2], costs[pair.Item1, pair.Item2]);
}
result.MBExtra = mbTracks.Except(result.CuesheetTrackMapping.Values.Select(x => x.Item1)).ToList();
_logger.Trace($"Missing tracks:\n{string.Join("\n", result.MBExtra)}");
return result;
}
}
}

View file

@ -159,11 +159,12 @@ namespace NzbDrone.Core.MediaFiles.TrackImport
{
localTracks.ForEach(localTrack =>
{
var cueSheetFindResult = itemInfo.CueSheetInfos.Find(x => x.IsForMediaFile(localTrack.Path));
var cueSheet = cueSheetFindResult?.CueSheet;
var cueSheetInfo = itemInfo.CueSheetInfos.Find(x => x.IsForMediaFile(localTrack.Path));
var cueSheet = cueSheetInfo?.CueSheet;
if (cueSheet != null)
{
localTrack.IsSingleFileRelease = cueSheet.IsSingleFileRelease;
localTrack.CueSheetFileEntry = cueSheetInfo.TryToGetFileEntryForMediaFile(localTrack.Path);
localTrack.Artist = idOverrides.Artist;
localTrack.Album = idOverrides.Album;
}

View file

@ -2,6 +2,7 @@ using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.MediaFiles.TrackImport.Identification;
using NzbDrone.Core.Music;
@ -70,11 +71,12 @@ namespace NzbDrone.Core.Parser.Model
public TrackMapping()
{
Mapping = new Dictionary<LocalTrack, Tuple<Track, Distance>>();
CuesheetTrackMapping = new Dictionary<CueSheet.TrackEntry, Tuple<Track, Distance>>();
}
public Dictionary<LocalTrack, Tuple<Track, Distance>> Mapping { get; set; }
public List<LocalTrack> LocalExtra { get; set; }
public List<Track> MBExtra { get; set; }
public bool IsSingleFileRelease { get; set; }
public Dictionary<CueSheet.TrackEntry, Tuple<Track, Distance>> CuesheetTrackMapping { get; set; }
}
}

View file

@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.MediaFiles.TrackImport.Identification;
using NzbDrone.Core.Music;
using NzbDrone.Core.Qualities;
@ -32,6 +33,7 @@ namespace NzbDrone.Core.Parser.Model
public string ReleaseGroup { get; set; }
public string SceneName { get; set; }
public bool IsSingleFileRelease { get; set; }
public CueSheet.FileEntry CueSheetFileEntry { get; set; }
public string CueSheetPath { get; set; }
public override string ToString()
{