Improve the fuzzy matching (#522)

* Fixed: improve track matching

* Deal with tracks sequentially numbered across discs
This commit is contained in:
ta264 2018-11-16 17:46:46 +00:00 committed by Qstick
parent 8320508688
commit e260a29b57
19 changed files with 767 additions and 164 deletions

View file

@ -5,6 +5,7 @@ using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.MediaFiles.Events;
using NzbDrone.Core.Messaging.Events;
using NzbDrone.Core.Music.Events;
using NzbDrone.Core.Parser;
using NzbDrone.Common.Extensions;
using System;
using System.Collections.Generic;
@ -81,60 +82,73 @@ namespace NzbDrone.Core.Music
public Track FindTrackByTitle(int artistId, int albumId, int mediumNumber, int trackNumber, string releaseTitle)
{
// TODO: can replace this search mechanism with something smarter/faster/better
var normalizedReleaseTitle = Parser.Parser.NormalizeTrackTitle(releaseTitle).Replace(".", " ");
var normalizedReleaseTitle = releaseTitle.NormalizeTrackTitle().Replace(".", " ");
var tracks = _trackRepository.GetTracksByMedium(albumId, mediumNumber);
var matches = from track in tracks
//if we have a trackNumber use it
let trackNumCheck = (trackNumber == 0 || track.AbsoluteTrackNumber == trackNumber)
//if release title is longer than track title
let posReleaseTitle = normalizedReleaseTitle.IndexOf(Parser.Parser.NormalizeTrackTitle(track.Title), StringComparison.CurrentCultureIgnoreCase)
//if track title is longer than release title
let posTrackTitle = Parser.Parser.NormalizeTrackTitle(track.Title).IndexOf(normalizedReleaseTitle, StringComparison.CurrentCultureIgnoreCase)
where track.Title.Length > 0 && trackNumCheck && (posReleaseTitle >= 0 || posTrackTitle >= 0)
orderby posReleaseTitle, posTrackTitle
select new
{
NormalizedLength = Parser.Parser.NormalizeTrackTitle(track.Title).Length,
Track = track
};
var matches = tracks.Where(t => (trackNumber == 0 || t.AbsoluteTrackNumber == trackNumber)
&& t.Title.Length > 0
&& (normalizedReleaseTitle.Contains(t.Title.NormalizeTrackTitle())
|| t.Title.NormalizeTrackTitle().Contains(normalizedReleaseTitle)));
return matches.OrderByDescending(e => e.NormalizedLength).FirstOrDefault()?.Track;
return matches.Count() > 1 ? null : matches.SingleOrDefault();
}
public Track FindTrackByTitleInexact(int artistId, int albumId, int mediumNumber, int trackNumber, string releaseTitle)
public Track FindTrackByTitleInexact(int artistId, int albumId, int mediumNumber, int trackNumber, string title)
{
double fuzzThreshold = 0.6;
double fuzzGap = 0.2;
var normalizedReleaseTitle = Parser.Parser.NormalizeTrackTitle(releaseTitle).Replace(".", " ");
var normalizedTitle = title.NormalizeTrackTitle().Replace(".", " ");
var tracks = _trackRepository.GetTracksByMedium(albumId, mediumNumber);
var matches = from track in tracks
let normalizedTitle = Parser.Parser.NormalizeTrackTitle(track.Title).Replace(".", " ")
let matchProb = normalizedTitle.FuzzyMatch(normalizedReleaseTitle)
where track.Title.Length > 0
orderby matchProb descending
select new
Func< Func<Track, string, double>, string, Tuple<Func<Track, string, double>, string>> tc = Tuple.Create;
var scoringFunctions = new List<Tuple<Func<Track, string, double>, string>> {
tc((a, t) => a.Title.NormalizeTrackTitle().FuzzyMatch(t), normalizedTitle),
tc((a, t) => a.Title.NormalizeTrackTitle().FuzzyContains(t), normalizedTitle),
tc((a, t) => t.FuzzyContains(a.Title.NormalizeTrackTitle()), normalizedTitle)
};
foreach (var func in scoringFunctions)
{
var track = FindByStringInexact(tracks, func.Item1, func.Item2, trackNumber);
if (track != null)
{
MatchProb = matchProb,
NormalizedTitle = normalizedTitle,
Track = track
};
return track;
}
}
var matchList = matches.ToList();
return null;
}
if (!matchList.Any())
private Track FindByStringInexact(List<Track> tracks, Func<Track, string, double> scoreFunction, string title, int trackNumber)
{
const double fuzzThreshold = 0.7;
const double fuzzGap = 0.2;
var sortedTracks = tracks.Select(s => new
{
MatchProb = scoreFunction(s, title),
Track = s
})
.ToList()
.OrderByDescending(s => s.MatchProb)
.ToList();
if (!sortedTracks.Any())
{
return null;
}
_logger.Trace("\nFuzzy track match on '{0}':\n{1}",
normalizedReleaseTitle,
string.Join("\n", matchList.Select(x => $"{x.NormalizedTitle}: {x.MatchProb}")));
_logger.Trace("\nFuzzy track match on '{0:D2} - {1}':\n{2}",
trackNumber,
title,
string.Join("\n", sortedTracks.Select(x => $"{x.Track.AbsoluteTrackNumber:D2} - {x.Track.Title}: {x.MatchProb}")));
if (matchList[0].MatchProb > fuzzThreshold
&& (matchList.Count == 1 || matchList[0].MatchProb - matchList[1].MatchProb > fuzzGap)
&& (trackNumber == 0 || matchList[0].Track.AbsoluteTrackNumber == trackNumber))
return matchList[0].Track;
if (sortedTracks[0].MatchProb > fuzzThreshold
&& (sortedTracks.Count == 1 || sortedTracks[0].MatchProb - sortedTracks[1].MatchProb > fuzzGap)
&& (trackNumber == 0
|| sortedTracks[0].Track.AbsoluteTrackNumber == trackNumber
|| sortedTracks[0].Track.AbsoluteTrackNumber + tracks.Count(t => t.MediumNumber < sortedTracks[0].Track.MediumNumber) == trackNumber))
{
return sortedTracks[0].Track;
}
return null;
}