mirror of
https://github.com/lidarr/lidarr.git
synced 2025-07-16 10:03:51 -07:00
Fuzzy matching (#508)
* Fixed: correctly match albums whose title is all special characters * New: fuzzy matching on album and track names
This commit is contained in:
parent
3ae079a541
commit
eadd6996ef
7 changed files with 292 additions and 35 deletions
|
@ -141,5 +141,42 @@ namespace NzbDrone.Common.Extensions
|
|||
return CamelCaseRegex.Replace(input, match => " " + match.Value);
|
||||
}
|
||||
|
||||
public static double FuzzyMatch(this string a, string b)
|
||||
{
|
||||
if (a.Contains(" ") && b.Contains(" "))
|
||||
{
|
||||
var partsA = a.Split(' ');
|
||||
var partsB = b.Split(' ');
|
||||
var weightedHighCoefficients = new double[partsA.Length];
|
||||
var distanceRatios = new double[partsA.Length];
|
||||
for (int i = 0; i < partsA.Length; i++)
|
||||
{
|
||||
double high = 0.0;
|
||||
int indexDistance = 0;
|
||||
for (int x = 0; x < partsB.Length; x++)
|
||||
{
|
||||
var coef = LevenshteinCoefficient(partsA[i], partsB[x]);
|
||||
if (coef > high)
|
||||
{
|
||||
high = coef;
|
||||
indexDistance = Math.Abs(i - x);
|
||||
}
|
||||
}
|
||||
double distanceWeight = 1.0 - (double)indexDistance / (double)partsA.Length;
|
||||
weightedHighCoefficients[i] = high * distanceWeight;
|
||||
}
|
||||
return weightedHighCoefficients.Sum() / (double)partsA.Length;
|
||||
}
|
||||
else
|
||||
{
|
||||
return LevenshteinCoefficient(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
public static double LevenshteinCoefficient(this string a, string b)
|
||||
{
|
||||
return 1.0 - (double)a.LevenshteinDistance(b) / Math.Max(a.Length, b.Length);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ using System.Collections.Generic;
|
|||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using NLog;
|
||||
|
||||
namespace NzbDrone.Core.Test.MusicTests.AlbumRepositoryTests
|
||||
{
|
||||
|
@ -16,6 +17,8 @@ namespace NzbDrone.Core.Test.MusicTests.AlbumRepositoryTests
|
|||
{
|
||||
private Artist _artist;
|
||||
private Album _album;
|
||||
private Album _albumSpecial;
|
||||
private Album _albumSimilar;
|
||||
private AlbumRepository _albumRepo;
|
||||
|
||||
[SetUp]
|
||||
|
@ -29,12 +32,15 @@ namespace NzbDrone.Core.Test.MusicTests.AlbumRepositoryTests
|
|||
Id = 1
|
||||
};
|
||||
|
||||
_albumRepo = Mocker.Resolve<AlbumRepository>();
|
||||
|
||||
_album = new Album
|
||||
{
|
||||
Title = "ANThology",
|
||||
ForeignAlbumId = "1",
|
||||
CleanTitle = "anthology",
|
||||
Artist = _artist,
|
||||
ArtistId = _artist.Id,
|
||||
AlbumType = "",
|
||||
Releases = new List<AlbumRelease>
|
||||
{
|
||||
|
@ -46,9 +52,46 @@ namespace NzbDrone.Core.Test.MusicTests.AlbumRepositoryTests
|
|||
|
||||
};
|
||||
|
||||
_albumRepo = Mocker.Resolve<AlbumRepository>();
|
||||
|
||||
_albumRepo.Insert(_album);
|
||||
|
||||
_albumSpecial = new Album
|
||||
{
|
||||
Title = "+",
|
||||
ForeignAlbumId = "2",
|
||||
CleanTitle = "",
|
||||
Artist = _artist,
|
||||
ArtistId = _artist.Id,
|
||||
AlbumType = "",
|
||||
Releases = new List<AlbumRelease>
|
||||
{
|
||||
new AlbumRelease
|
||||
{
|
||||
Id = "fake id"
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
_albumRepo.Insert(_albumSpecial);
|
||||
|
||||
_albumSimilar = new Album
|
||||
{
|
||||
Title = "ANThology2",
|
||||
ForeignAlbumId = "3",
|
||||
CleanTitle = "anthology2",
|
||||
Artist = _artist,
|
||||
ArtistId = _artist.Id,
|
||||
AlbumType = "",
|
||||
Releases = new List<AlbumRelease>
|
||||
{
|
||||
new AlbumRelease
|
||||
{
|
||||
Id = "fake id 2"
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -59,9 +102,62 @@ namespace NzbDrone.Core.Test.MusicTests.AlbumRepositoryTests
|
|||
|
||||
var album = _albumRepo.FindAlbumByRelease(id);
|
||||
|
||||
album.Should().NotBeNull();
|
||||
album.Title.Should().Be(_album.Title);
|
||||
}
|
||||
|
||||
[TestCase("ANThology")]
|
||||
[TestCase("anthology")]
|
||||
[TestCase("anthology!")]
|
||||
public void should_find_album_in_db_by_title(string title)
|
||||
{
|
||||
var album = _albumRepo.FindByTitle(_artist.Id, title);
|
||||
|
||||
album.Should().NotBeNull();
|
||||
album.Title.Should().Be(_album.Title);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void should_find_album_in_db_by_title_all_special_characters()
|
||||
{
|
||||
var album = _albumRepo.FindByTitle(_artist.Id, "+");
|
||||
|
||||
album.Should().NotBeNull();
|
||||
album.Title.Should().Be(_albumSpecial.Title);
|
||||
}
|
||||
|
||||
[TestCase("ANTholog")]
|
||||
[TestCase("nthology")]
|
||||
[TestCase("antholoyg")]
|
||||
public void should_not_find_album_in_db_by_incorrect_title(string title)
|
||||
{
|
||||
var album = _albumRepo.FindByTitle(_artist.Id, title);
|
||||
|
||||
album.Should().BeNull();
|
||||
}
|
||||
|
||||
[TestCase("ANTholog")]
|
||||
[TestCase("antholoyg")]
|
||||
[TestCase("ANThology CD")]
|
||||
public void should_find_album_in_db_by_inexact_title(string title)
|
||||
{
|
||||
var album = _albumRepo.FindByTitleInexact(_artist.Id, title);
|
||||
|
||||
album.Should().NotBeNull();
|
||||
album.Title.Should().Be(_album.Title);
|
||||
}
|
||||
|
||||
[TestCase("ANTholog")]
|
||||
[TestCase("antholoyg")]
|
||||
[TestCase("ANThology CD")]
|
||||
public void should_not_find_album_in_db_by_inexact_title_when_two_similar_matches(string title)
|
||||
{
|
||||
_albumRepo.Insert(_albumSimilar);
|
||||
var album = _albumRepo.FindByTitleInexact(_artist.Id, title);
|
||||
|
||||
album.Should().BeNull();
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void should_not_find_album_in_db_by_partial_releaseid()
|
||||
{
|
||||
|
|
|
@ -5,6 +5,7 @@ using NUnit.Framework;
|
|||
using NzbDrone.Core.Configuration;
|
||||
using NzbDrone.Core.Music;
|
||||
using NzbDrone.Core.Test.Framework;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace NzbDrone.Core.Test.MusicTests.TitleMatchingTests
|
||||
{
|
||||
|
@ -21,49 +22,82 @@ namespace NzbDrone.Core.Test.MusicTests.TitleMatchingTests
|
|||
_trackService =
|
||||
new TrackService(_trackRepository, Mocker.Resolve<ConfigService>(), Mocker.Resolve<Logger>());
|
||||
|
||||
_trackRepository.Insert(new Track
|
||||
{
|
||||
Title = "This is the short test title",
|
||||
ForeignTrackId = "this is a fake id2",
|
||||
AlbumId = 4321,
|
||||
AbsoluteTrackNumber = 1,
|
||||
MediumNumber = 1,
|
||||
TrackFileId = 1
|
||||
});
|
||||
var trackNames = new List<string> {
|
||||
"Courage",
|
||||
"Movies",
|
||||
"Flesh and Bone",
|
||||
"Whisper",
|
||||
"Summer",
|
||||
"Sticks and Stones",
|
||||
"Attitude",
|
||||
"Stranded",
|
||||
"Wish",
|
||||
"Calico",
|
||||
"(Happy) Death Day",
|
||||
"Smooth Criminal",
|
||||
"Universe / Orange Appeal"
|
||||
};
|
||||
|
||||
for (int i = 0; i < trackNames.Count; i++) {
|
||||
_trackRepository.Insert(new Track
|
||||
{
|
||||
Title = "This is the long test title",
|
||||
ForeignTrackId = "this is a fake id",
|
||||
Title = trackNames[i],
|
||||
ForeignTrackId = (i+1).ToString(),
|
||||
AlbumId = 4321,
|
||||
AbsoluteTrackNumber = 2,
|
||||
AbsoluteTrackNumber = i+1,
|
||||
MediumNumber = 1,
|
||||
TrackFileId = 2
|
||||
TrackFileId = i+1
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void should_find_track_in_db_by_tracktitle_longer_then_relaeasetitle()
|
||||
public void should_find_track_in_db_by_tracktitle_longer_then_releasetitle()
|
||||
{
|
||||
var track = _trackService.FindTrackByTitle(1234, 4321, 1, 1, "This is the short test title with some bla");
|
||||
var track = _trackService.FindTrackByTitle(1234, 4321, 1, 1, "Courage with some bla");
|
||||
|
||||
track.Should().NotBeNull();
|
||||
track.Title.Should().Be(_trackRepository.GetTracksByFileId(1).First().Title);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void should_find_track_in_db_by_tracktitle_shorter_then_relaeasetitle()
|
||||
public void should_find_track_in_db_by_tracktitle_shorter_then_releasetitle()
|
||||
{
|
||||
var track = _trackService.FindTrackByTitle(1234, 4321, 1, 2, "test title");
|
||||
var track = _trackService.FindTrackByTitle(1234, 4321, 1, 3, "and Bone");
|
||||
|
||||
track.Title.Should().Be(_trackRepository.GetTracksByFileId(2).First().Title);
|
||||
track.Should().NotBeNull();
|
||||
track.Title.Should().Be(_trackRepository.GetTracksByFileId(3).First().Title);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void should_not_find_track_in_db_by_wrong_title()
|
||||
{
|
||||
var track = _trackService.FindTrackByTitle(1234, 4321, 1, 1, "the short title");
|
||||
var track = _trackService.FindTrackByTitle(1234, 4321, 1, 1, "Not a track");
|
||||
|
||||
track.Should().BeNull();
|
||||
}
|
||||
|
||||
[TestCase("Fesh and Bone", 3)]
|
||||
[TestCase("Atitude", 7)]
|
||||
[TestCase("Smoth cRimnal", 12)]
|
||||
[TestCase("Sticks and Stones (live)", 6)]
|
||||
public void should_find_track_in_db_by_inexact_title(string title, int trackNumber)
|
||||
{
|
||||
var track = _trackService.FindTrackByTitleInexact(1234, 4321, 1, trackNumber, title);
|
||||
|
||||
track.Should().NotBeNull();
|
||||
track.Title.Should().Be(_trackRepository.GetTracksByFileId(trackNumber).First().Title);
|
||||
}
|
||||
|
||||
[TestCase("A random title", 1)]
|
||||
[TestCase("Stones and Sticks", 6)]
|
||||
public void should_not_find_track_in_db_by_different_inexact_title(string title, int trackId)
|
||||
{
|
||||
var track = _trackService.FindTrackByTitleInexact(1234, 4321, 1, trackId, title);
|
||||
|
||||
track.Should().BeNull();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
using System;
|
||||
using System.Linq;
|
||||
using NLog;
|
||||
using Marr.Data.QGen;
|
||||
using NzbDrone.Core.Datastore;
|
||||
using NzbDrone.Core.Datastore.Extensions;
|
||||
|
@ -16,6 +17,7 @@ namespace NzbDrone.Core.Music
|
|||
List<Album> GetAlbums(int artistId);
|
||||
Album FindByName(string cleanTitle);
|
||||
Album FindByTitle(int artistId, string title);
|
||||
Album FindByTitleInexact(int artistId, string title);
|
||||
Album FindByArtistAndName(string artistName, string cleanTitle);
|
||||
Album FindById(string spotifyId);
|
||||
PagingSpec<Album> AlbumsWithoutFiles(PagingSpec<Album> pagingSpec);
|
||||
|
@ -31,14 +33,15 @@ namespace NzbDrone.Core.Music
|
|||
public class AlbumRepository : BasicRepository<Album>, IAlbumRepository
|
||||
{
|
||||
private readonly IMainDatabase _database;
|
||||
private readonly Logger _logger;
|
||||
|
||||
public AlbumRepository(IMainDatabase database, IEventAggregator eventAggregator)
|
||||
public AlbumRepository(IMainDatabase database, IEventAggregator eventAggregator, Logger logger)
|
||||
: base(database, eventAggregator)
|
||||
{
|
||||
_database = database;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
|
||||
public List<Album> GetAlbums(int artistId)
|
||||
{
|
||||
return Query.Where(s => s.ArtistId == artistId).ToList();
|
||||
|
@ -287,13 +290,49 @@ namespace NzbDrone.Core.Music
|
|||
|
||||
public Album FindByTitle(int artistId, string title)
|
||||
{
|
||||
title = Parser.Parser.CleanArtistName(title);
|
||||
var cleanTitle = Parser.Parser.CleanArtistName(title);
|
||||
|
||||
return Query.Where(s => s.CleanTitle == title)
|
||||
if (string.IsNullOrEmpty(cleanTitle))
|
||||
cleanTitle = title;
|
||||
|
||||
return Query.Where(s => s.CleanTitle == cleanTitle || s.Title == title)
|
||||
.AndWhere(s => s.ArtistId == artistId)
|
||||
.FirstOrDefault();
|
||||
}
|
||||
|
||||
public Album FindByTitleInexact(int artistId, string title)
|
||||
{
|
||||
double fuzzThreshold = 0.7;
|
||||
double fuzzGap = 0.4;
|
||||
var cleanTitle = Parser.Parser.CleanArtistName(title);
|
||||
|
||||
if (string.IsNullOrEmpty(cleanTitle))
|
||||
cleanTitle = title;
|
||||
|
||||
var sortedAlbums = Query.Where(s => s.ArtistId == artistId)
|
||||
.Select(s => new
|
||||
{
|
||||
MatchProb = s.CleanTitle.FuzzyMatch(cleanTitle),
|
||||
Album = s
|
||||
})
|
||||
.ToList()
|
||||
.OrderByDescending(s => s.MatchProb)
|
||||
.ToList();
|
||||
|
||||
if (!sortedAlbums.Any())
|
||||
return null;
|
||||
|
||||
_logger.Trace("\nFuzzy album match on '{0}':\n{1}",
|
||||
cleanTitle,
|
||||
string.Join("\n", sortedAlbums.Select(x => $"{x.Album.CleanTitle}: {x.MatchProb}")));
|
||||
|
||||
if (sortedAlbums[0].MatchProb > fuzzThreshold
|
||||
&& (sortedAlbums.Count == 1 || sortedAlbums[0].MatchProb - sortedAlbums[1].MatchProb > fuzzGap))
|
||||
return sortedAlbums[0].Album;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public Album FindByArtistAndName(string artistName, string cleanTitle)
|
||||
{
|
||||
var cleanArtistName = Parser.Parser.CleanArtistName(artistName);
|
||||
|
|
|
@ -17,7 +17,7 @@ namespace NzbDrone.Core.Music
|
|||
List<Album> AddAlbums(List<Album> newAlbums);
|
||||
Album FindById(string spotifyId);
|
||||
Album FindByTitle(int artistId, string title);
|
||||
Album FindByTitleInexact(string title);
|
||||
Album FindByTitleInexact(int artistId, string title);
|
||||
void DeleteAlbum(int albumId, bool deleteFiles);
|
||||
List<Album> GetAllAlbums();
|
||||
Album UpdateAlbum(Album album);
|
||||
|
@ -87,9 +87,9 @@ namespace NzbDrone.Core.Music
|
|||
return _albumRepository.FindByTitle(artistId, title);
|
||||
}
|
||||
|
||||
public Album FindByTitleInexact(string title)
|
||||
public Album FindByTitleInexact(int artistId, string title)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
return _albumRepository.FindByTitleInexact(artistId, title);
|
||||
}
|
||||
|
||||
public List<Album> GetAllAlbums()
|
||||
|
|
|
@ -5,6 +5,7 @@ using NzbDrone.Core.MediaFiles;
|
|||
using NzbDrone.Core.MediaFiles.Events;
|
||||
using NzbDrone.Core.Messaging.Events;
|
||||
using NzbDrone.Core.Music.Events;
|
||||
using NzbDrone.Common.Extensions;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
@ -18,6 +19,7 @@ namespace NzbDrone.Core.Music
|
|||
List<Track> GetTracks(IEnumerable<int> ids);
|
||||
Track FindTrack(int artistId, int albumId, int mediumNumber, int trackNumber);
|
||||
Track FindTrackByTitle(int artistId, int albumId, int mediumNumber, int trackNumber, string releaseTitle);
|
||||
Track FindTrackByTitleInexact(int artistId, int albumId, int mediumNumber, int trackNumber, string releaseTitle);
|
||||
List<Track> GetTracksByArtist(int artistId);
|
||||
List<Track> GetTracksByAlbum(int albumId);
|
||||
//List<Track> GetTracksByAlbumTitle(string artistId, string albumTitle);
|
||||
|
@ -100,6 +102,43 @@ namespace NzbDrone.Core.Music
|
|||
return matches.OrderByDescending(e => e.NormalizedLength).FirstOrDefault()?.Track;
|
||||
}
|
||||
|
||||
public Track FindTrackByTitleInexact(int artistId, int albumId, int mediumNumber, int trackNumber, string releaseTitle)
|
||||
{
|
||||
double fuzzThreshold = 0.6;
|
||||
double fuzzGap = 0.2;
|
||||
|
||||
var normalizedReleaseTitle = Parser.Parser.NormalizeTrackTitle(releaseTitle).Replace(".", " ");
|
||||
var tracks = _trackRepository.GetTracksByMedium(albumId, mediumNumber);
|
||||
|
||||
var matches = from track in tracks
|
||||
let normalizedTitle = Parser.Parser.NormalizeTrackTitle(track.Title).Replace(".", " ")
|
||||
let matchProb = normalizedTitle.FuzzyMatch(normalizedReleaseTitle)
|
||||
where track.Title.Length > 0
|
||||
orderby matchProb descending
|
||||
select new
|
||||
{
|
||||
MatchProb = matchProb,
|
||||
NormalizedTitle = normalizedTitle,
|
||||
Track = track
|
||||
};
|
||||
|
||||
var matchList = matches.ToList();
|
||||
|
||||
if (!matchList.Any())
|
||||
return null;
|
||||
|
||||
_logger.Trace("\nFuzzy track match on '{0}':\n{1}",
|
||||
normalizedReleaseTitle,
|
||||
string.Join("\n", matchList.Select(x => $"{x.NormalizedTitle}: {x.MatchProb}")));
|
||||
|
||||
if (matchList[0].MatchProb > fuzzThreshold
|
||||
&& (matchList.Count == 1 || matchList[0].MatchProb - matchList[1].MatchProb > fuzzGap)
|
||||
&& (trackNumber == 0 || matchList[0].Track.AbsoluteTrackNumber == trackNumber))
|
||||
return matchList[0].Track;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public List<Track> TracksWithFiles(int artistId)
|
||||
{
|
||||
return _trackRepository.TracksWithFiles(artistId);
|
||||
|
|
|
@ -290,6 +290,12 @@ namespace NzbDrone.Core.Parser
|
|||
album = _albumService.FindByTitle(artist.Id, cleanAlbumTitle);
|
||||
}
|
||||
|
||||
if (album == null)
|
||||
{
|
||||
_logger.Debug("Trying inexact album match for {0}", parsedTrackInfo);
|
||||
album = _albumService.FindByTitleInexact(artist.Id, cleanAlbumTitle);
|
||||
}
|
||||
|
||||
if (album == null)
|
||||
{
|
||||
_logger.Debug("Parsed album title not found in Db for {0}", parsedTrackInfo);
|
||||
|
@ -318,6 +324,12 @@ namespace NzbDrone.Core.Parser
|
|||
trackInfo = _trackService.FindTrackByTitle(artist.Id, album.Id, parsedTrackInfo.DiscNumber, parsedTrackInfo.TrackNumbers.FirstOrDefault(), parsedTrackInfo.Title);
|
||||
}
|
||||
|
||||
if (trackInfo == null)
|
||||
{
|
||||
_logger.Debug("Trying inexact track match for {0}", parsedTrackInfo);
|
||||
trackInfo = _trackService.FindTrackByTitleInexact(artist.Id, album.Id, parsedTrackInfo.DiscNumber, parsedTrackInfo.TrackNumbers.FirstOrDefault(), cleanTrackTitle);
|
||||
}
|
||||
|
||||
if (trackInfo != null)
|
||||
{
|
||||
_logger.Debug("Track {0} selected for {1}", trackInfo, parsedTrackInfo);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue