Created
December 1, 2023 15:56
-
-
Save joelverhagen/b50c44dcb6d1cbd5b6031773449630cd to your computer and use it in GitHub Desktop.
Validate consistency of NuGet.org catalog page items and pages
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Concurrent; | |
using System.Diagnostics; | |
using System.Text.Json; | |
using System.Text.Json.Serialization; | |
using Xunit; | |
Console.WriteLine("Starting..."); | |
var indexUrl = "https://api.nuget.org/v3/catalog0/index.json"; | |
using var httpClient = new HttpClient(); | |
Console.WriteLine($"Downloading {indexUrl}..."); | |
using var indexStream = await httpClient.GetStreamAsync(indexUrl); | |
var index = (await JsonSerializer.DeserializeAsync<CatalogIndex>(indexStream))!; | |
var pageItems = new ConcurrentBag<(int Index, string FileName, CatalogPageItem Item)>(); | |
foreach (var pageItem in index.Items) | |
{ | |
var fileName = new Uri(pageItem.Url).AbsolutePath.Split('/').Last(); | |
pageItems.Add((GetPageIndex(pageItem.Url), fileName, pageItem)); | |
} | |
var cachedFiles = Directory | |
.GetFiles(Directory.GetCurrentDirectory(), "page*.json") | |
.Select(p => Path.GetFileName(p)); | |
string? latestFileName = null; | |
if (cachedFiles.Any()) | |
{ | |
latestFileName = cachedFiles.MaxBy(GetPageIndex); | |
Console.WriteLine("Latest cached file name: " + latestFileName); | |
} | |
pageItems = new ConcurrentBag<(int Index, string FileName, CatalogPageItem Item)>(pageItems.OrderBy(x => x.Index).Take(100_000)); | |
var pageCount = pageItems.Count; | |
Console.WriteLine($"There are {pageCount} pages."); | |
var pages = new ConcurrentBag<(int Index, string FileName, CatalogPageItem Item, CatalogPage Page)>(); | |
var fetched = 0; | |
Console.WriteLine(); | |
await Task.WhenAll(Enumerable | |
.Range(0, 16) | |
.Select(async x => | |
{ | |
while (pageItems.TryTake(out var pageItem)) | |
{ | |
var sw = Stopwatch.StartNew(); | |
var cached = true; | |
if (!File.Exists(pageItem.FileName) || pageItem.FileName == latestFileName) | |
{ | |
var tempFileName = pageItem.FileName + ".temp"; | |
using (var pageStream = await httpClient.GetStreamAsync(pageItem.Item.Url)) | |
using (var fileStream = new FileStream(tempFileName, FileMode.Create)) | |
{ | |
pageStream.CopyTo(fileStream); | |
} | |
File.Move(tempFileName, pageItem.FileName, overwrite: true); | |
cached = false; | |
} | |
using (var fileStream = new FileStream(pageItem.FileName, FileMode.Open)) | |
{ | |
var page = (await JsonSerializer.DeserializeAsync<CatalogPage>(fileStream))!; | |
pages.Add((pageItem.Index, pageItem.FileName, pageItem.Item, page)); | |
} | |
Interlocked.Increment(ref fetched); | |
Console.Write(cached ? "." : "o"); | |
} | |
})); | |
Console.WriteLine(); | |
var rules = new Dictionary<string, Action<(int Index, string FileName, CatalogPageItem Item, CatalogPage Page)>> | |
{ | |
{ "Item has unexpected @type", page => Assert.Equal("CatalogPage", page.Item.Type) }, | |
{ "Item has unexpected count", page => Assert.Equal(page.Page.Items.Count, page.Item.Count) }, | |
{ "Item has unexpected commitId", page => Assert.Equal(page.Page.Items.MaxBy(x => DateTimeOffset.Parse(x.CommitTimestamp))!.CommitId, page.Item.CommitId) }, | |
{ "Item has unexpected commitTimeStamp", page => Assert.Equal(page.Page.Items.MaxBy(x => DateTimeOffset.Parse(x.CommitTimestamp))!.CommitTimestamp, page.Item.CommitTimestamp) }, | |
{ "Page has unexpected @id", page => Assert.Equal(page.Item.Url, page.Page.Url) }, | |
{ "Page has unexpected parent", page => Assert.Equal(indexUrl, page.Page.Parent) }, | |
{ "Page has unexpected @type", page => Assert.Equal("CatalogPage", page.Page.Type) }, | |
{ "Page has unexpected count", page => Assert.Equal(page.Page.Items.Count, page.Page.Count) }, | |
{ "Page has unexpected commitId", page => Assert.Equal(page.Page.Items.MaxBy(x => DateTimeOffset.Parse(x.CommitTimestamp))!.CommitId, page.Page.CommitId) }, | |
{ "Page has unexpected commitTimestamp", page => Assert.Equal(page.Page.Items.MaxBy(x => DateTimeOffset.Parse(x.CommitTimestamp))!.CommitTimestamp, page.Page.CommitTimestamp) }, | |
}; | |
Console.WriteLine(new string('-', 40)); | |
foreach (var page in pages.OrderBy(x => x.Index)) | |
{ | |
var anyFailures = false; | |
foreach ((var ruleName, var rule) in rules.OrderBy(x => x.Key)) | |
{ | |
try | |
{ | |
rule.Invoke(page); | |
} | |
catch (Exception ex) | |
{ | |
if (!anyFailures) | |
{ | |
Console.WriteLine(page.FileName); | |
anyFailures = true; | |
} | |
Console.WriteLine($"{ruleName}: " + ex.Message); | |
} | |
} | |
if (anyFailures) | |
{ | |
Console.WriteLine(new string('-', 40)); | |
} | |
} | |
int GetPageIndex(string path) | |
{ | |
var fileName = path.Replace('\\', '/').Split('/').Last(); | |
return int.Parse(fileName.Substring("page".Length, fileName.Length - "page.json".Length)); | |
} | |
public class CatalogIndex | |
{ | |
[JsonPropertyName("@id")] | |
public string Url { get; set; } | |
[JsonPropertyName("commitId")] | |
public string CommitId { get; set; } | |
[JsonPropertyName("commitTimeStamp")] | |
public string CommitTimestamp { get; set; } | |
[JsonPropertyName("count")] | |
public int Count { get; set; } | |
[JsonPropertyName("items")] | |
public List<CatalogPageItem> Items { get; set; } | |
} | |
public class CatalogPageItem | |
{ | |
[JsonPropertyName("@id")] | |
public string Url { get; set; } | |
[JsonPropertyName("@type")] | |
public string Type { get; set; } | |
[JsonPropertyName("commitId")] | |
public string CommitId { get; set; } | |
[JsonPropertyName("commitTimeStamp")] | |
public string CommitTimestamp { get; set; } | |
[JsonPropertyName("count")] | |
public int Count { get; set; } | |
} | |
public class CatalogPage | |
{ | |
[JsonPropertyName("@id")] | |
public string Url { get; set; } | |
[JsonPropertyName("@type")] | |
public string Type { get; set; } | |
[JsonPropertyName("commitId")] | |
public string CommitId { get; set; } | |
[JsonPropertyName("commitTimeStamp")] | |
public string CommitTimestamp { get; set; } | |
[JsonPropertyName("count")] | |
public int Count { get; set; } | |
[JsonPropertyName("items")] | |
public List<CatalogLeafItem> Items { get; set; } | |
[JsonPropertyName("parent")] | |
public string Parent { get; set; } | |
} | |
public class CatalogLeafItem | |
{ | |
[JsonPropertyName("@id")] | |
public string Url { get; set; } | |
[JsonPropertyName("@type")] | |
public string Type { get; set; } | |
[JsonPropertyName("commitTimeStamp")] | |
public string CommitTimestamp { get; set; } | |
[JsonPropertyName("nuget:id")] | |
public string PackageId { get; set; } | |
[JsonPropertyName("nuget:version")] | |
public string PackageVersion { get; set; } | |
[JsonPropertyName("commitId")] | |
public string CommitId { get; set; } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment