Sanitize contributor names (#1518)

This commit is contained in:
Michael Bucari-Tovo
2025-12-31 13:06:58 -07:00
parent e50d8c74de
commit 09e610fe08
4 changed files with 74 additions and 32 deletions

View File

@@ -1,16 +1,17 @@
using System; using AudibleApi;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Channels;
using System.Threading.Tasks;
using System.Diagnostics;
using AudibleApi;
using AudibleApi.Common; using AudibleApi.Common;
using Dinah.Core; using Dinah.Core;
using LibationFileManager;
using Newtonsoft.Json.Linq;
using Polly; using Polly;
using Polly.Retry; using Polly.Retry;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading; using System.Threading;
using LibationFileManager; using System.Threading.Channels;
using System.Threading.Tasks;
#nullable enable #nullable enable
namespace AudibleUtilities namespace AudibleUtilities
@@ -82,6 +83,23 @@ namespace AudibleUtilities
return policy.ExecuteAsync(() => getItemsAsync(libraryOptions)); return policy.ExecuteAsync(() => getItemsAsync(libraryOptions));
} }
/// <summary>
/// A debugging method used to simulate a library scan from a LibraryScans.zip json file.
/// Simply replace the Api call to GetLibraryItemsPagesAsync() with a call to this method.
/// </summary>
private static async IAsyncEnumerable<Item[]> GetItemsFromJsonFile()
{
var libraryScanJsonPath = @"Path/to/libraryscan.json";
using var jsonFile = System.IO.File.OpenText(libraryScanJsonPath);
var json = await JToken.ReadFromAsync(new Newtonsoft.Json.JsonTextReader(jsonFile));
if (json?["Items"] is not JArray items)
yield break;
foreach (var batch in items.Select(i => Item.FromJson(i as JObject)).Chunk(BatchSize))
yield return batch;
}
private async Task<List<Item>> getItemsAsync(LibraryOptions libraryOptions) private async Task<List<Item>> getItemsAsync(LibraryOptions libraryOptions)
{ {
Serilog.Log.Logger.Debug("Beginning library scan."); Serilog.Log.Logger.Debug("Beginning library scan.");
@@ -162,6 +180,7 @@ namespace AudibleUtilities
Serilog.Log.Logger.Information("Completed indexing series episodes after {elappsed_ms} ms.", sw.ElapsedMilliseconds); Serilog.Log.Logger.Information("Completed indexing series episodes after {elappsed_ms} ms.", sw.ElapsedMilliseconds);
Serilog.Log.Logger.Information($"Completed library scan in {totalTime.TotalMilliseconds:F0} ms."); Serilog.Log.Logger.Information($"Completed library scan in {totalTime.TotalMilliseconds:F0} ms.");
Array.ForEach(ISanitizer.GetAllSanitizers(), s => s.Sanitize(items));
var allExceptions = IValidator.GetAllValidators().SelectMany(v => v.Validate(items)).ToList(); var allExceptions = IValidator.GetAllValidators().SelectMany(v => v.Validate(items)).ToList();
if (allExceptions?.Count > 0) if (allExceptions?.Count > 0)
throw new ImportValidationException(items, allExceptions); throw new ImportValidationException(items, allExceptions);

View File

@@ -0,0 +1,33 @@
using AudibleApi.Common;
using System.Collections.Generic;
using System.Linq;
#nullable enable
namespace AudibleUtilities;
public interface ISanitizer
{
void Sanitize(IEnumerable<Item> items);
public static ISanitizer[] GetAllSanitizers() => [
new ContributorSanitizer()
];
}
public class ContributorSanitizer : ISanitizer
{
public void Sanitize(IEnumerable<Item> items)
{
foreach (var item in items)
{
item.Authors = SanitizePersonArray(item.Authors);
item.Narrators = SanitizePersonArray(item.Narrators);
}
}
private static Person[]? SanitizePersonArray(Person?[]? contributors)
=> contributors
?.OfType<Person>()
.Where(c => !string.IsNullOrWhiteSpace(c.Asin) && !string.IsNullOrWhiteSpace(c.Name))
.ToArray();
}

View File

@@ -9,17 +9,21 @@ namespace AudibleUtilities
{ {
IEnumerable<Exception> Validate(IEnumerable<Item> items); IEnumerable<Exception> Validate(IEnumerable<Item> items);
public static IValidator[] GetAllValidators() public static IValidator[] GetAllValidators() => [
=> new IValidator[]
{
new LibraryValidator(), new LibraryValidator(),
new BookValidator(), new BookValidator(),
new CategoryValidator(), new CategoryValidator(),
new ContributorValidator(),
new SeriesValidator(), new SeriesValidator(),
}; ];
} }
/// <summary>
/// To be used when no validation is desired
/// </summary>
public class ClearValidator : IValidator
{
public IEnumerable<Exception> Validate(IEnumerable<Item> items) => [];
}
public class LibraryValidator : IValidator public class LibraryValidator : IValidator
{ {
public IEnumerable<Exception> Validate(IEnumerable<Item> items) public IEnumerable<Exception> Validate(IEnumerable<Item> items)
@@ -68,20 +72,6 @@ namespace AudibleUtilities
return exceptions; return exceptions;
} }
} }
public class ContributorValidator : IValidator
{
public IEnumerable<Exception> Validate(IEnumerable<Item> items)
{
var exceptions = new List<Exception>();
if (items.GetAuthorsDistinct().Any(a => string.IsNullOrWhiteSpace(a.Name)))
exceptions.Add(new ArgumentException($"Collection contains {nameof(Item.Authors)} with null {nameof(Person.Name)}", nameof(items)));
if (items.GetNarratorsDistinct().Any(a => string.IsNullOrWhiteSpace(a.Name)))
exceptions.Add(new ArgumentException($"Collection contains {nameof(Item.Narrators)} with null {nameof(Person.Name)}", nameof(items)));
return exceptions;
}
}
public class SeriesValidator : IValidator public class SeriesValidator : IValidator
{ {
public IEnumerable<Exception> Validate(IEnumerable<Item> items) public IEnumerable<Exception> Validate(IEnumerable<Item> items)

View File

@@ -10,7 +10,7 @@ namespace DtoImporterService
{ {
public class ContributorImporter : ItemsImporterBase public class ContributorImporter : ItemsImporterBase
{ {
protected override IValidator Validator => new ContributorValidator(); protected override IValidator Validator => new ClearValidator();
public Dictionary<string, Contributor> Cache { get; private set; } = new(); public Dictionary<string, Contributor> Cache { get; private set; } = new();