From 145f7b0ee1c5d569d72afcd62a3b7567fe59e402 Mon Sep 17 00:00:00 2001 From: Dan Cojocaru Date: Wed, 9 Nov 2022 05:17:39 +0100 Subject: [PATCH] Transition from fs+JSON to MongoDB --- server/Controllers/V2/StationsController.cs | 3 +- server/Controllers/V2/TrainsController.cs | 3 +- server/Controllers/V3/StationsController.cs | 3 +- server/Controllers/V3/TrainsController.cs | 3 +- server/Models/Database/MongoSettings.cs | 5 + server/Models/Database/StationListing.cs | 17 ++ server/Models/Database/TrainListing.cs | 17 ++ server/Services/Implementations/Database.cs | 212 ++++++++++---------- server/Services/Interfaces/IDatabase.cs | 16 +- server/Startup.cs | 6 + server/appsettings.Development.json | 6 +- server/appsettings.json | 4 + server/server.csproj | 1 + 13 files changed, 167 insertions(+), 129 deletions(-) create mode 100644 server/Models/Database/MongoSettings.cs create mode 100644 server/Models/Database/StationListing.cs create mode 100644 server/Models/Database/TrainListing.cs diff --git a/server/Controllers/V2/StationsController.cs b/server/Controllers/V2/StationsController.cs index 24a7750..57fea61 100644 --- a/server/Controllers/V2/StationsController.cs +++ b/server/Controllers/V2/StationsController.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using Microsoft.AspNetCore.Mvc; +using Server.Models.Database; using Server.Services.Interfaces; namespace Server.Controllers.V2; @@ -15,7 +16,7 @@ public class StationsController : Controller { } [HttpGet("")] - public ActionResult> ListStations() { + public ActionResult> ListStations() { return Ok(Database.Stations); } } diff --git a/server/Controllers/V2/TrainsController.cs b/server/Controllers/V2/TrainsController.cs index a3e32b2..136403d 100644 --- a/server/Controllers/V2/TrainsController.cs +++ b/server/Controllers/V2/TrainsController.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using Microsoft.AspNetCore.Mvc; +using Server.Models.Database; using Server.Services.Interfaces; namespace Server.Controllers.V2; @@ -15,7 +16,7 @@ public class TrainsController : Controller { } [HttpGet("")] - public ActionResult> ListTrains() { + public ActionResult> ListTrains() { return Ok(Database.Trains); } } diff --git a/server/Controllers/V3/StationsController.cs b/server/Controllers/V3/StationsController.cs index 9e30fe5..e64e542 100644 --- a/server/Controllers/V3/StationsController.cs +++ b/server/Controllers/V3/StationsController.cs @@ -4,6 +4,7 @@ using System.Threading.Tasks; using InfoferScraper.Models.Station; using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Mvc; +using Server.Models.Database; using Server.Services.Interfaces; namespace Server.Controllers.V3; @@ -21,7 +22,7 @@ public class StationsController : Controller { } [HttpGet("")] - public ActionResult> ListStations() { + public ActionResult> ListStations() { return Ok(Database.Stations); } diff --git a/server/Controllers/V3/TrainsController.cs b/server/Controllers/V3/TrainsController.cs index 55bfd34..8d3ebe3 100644 --- a/server/Controllers/V3/TrainsController.cs +++ b/server/Controllers/V3/TrainsController.cs @@ -5,6 +5,7 @@ using InfoferScraper.Models.Train; using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Mvc; using scraper.Exceptions; +using Server.Models.Database; using Server.Services.Interfaces; namespace Server.Controllers.V3; @@ -22,7 +23,7 @@ public class TrainsController : Controller { } [HttpGet("")] - public ActionResult> ListTrains() { + public ActionResult> ListTrains() { return Ok(Database.Trains); } diff --git a/server/Models/Database/MongoSettings.cs b/server/Models/Database/MongoSettings.cs new file mode 100644 index 0000000..e4a79f4 --- /dev/null +++ b/server/Models/Database/MongoSettings.cs @@ -0,0 +1,5 @@ +namespace Server.Models.Database; + +public record MongoSettings(string ConnectionString, string DatabaseName) { + public MongoSettings() : this("", "") { } +} diff --git a/server/Models/Database/StationListing.cs b/server/Models/Database/StationListing.cs new file mode 100644 index 0000000..715b629 --- /dev/null +++ b/server/Models/Database/StationListing.cs @@ -0,0 +1,17 @@ +using System.Collections.Generic; +using System.Text.Json.Serialization; +using MongoDB.Bson; +using MongoDB.Bson.Serialization.Attributes; + +namespace Server.Models.Database; + +public record StationListing( + [property: BsonId] + [property: BsonRepresentation(BsonType.ObjectId)] + [property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + string? Id, + string Name, + List StoppedAtBy +) { + public StationListing(string name, List stoppedAtBy) : this(null, name, stoppedAtBy) { } +} diff --git a/server/Models/Database/TrainListing.cs b/server/Models/Database/TrainListing.cs new file mode 100644 index 0000000..66d012d --- /dev/null +++ b/server/Models/Database/TrainListing.cs @@ -0,0 +1,17 @@ +using System.Text.Json.Serialization; +using MongoDB.Bson; +using MongoDB.Bson.Serialization.Attributes; + +namespace Server.Models.Database; + +public record TrainListing( + [property: BsonId] + [property: BsonRepresentation(BsonType.ObjectId)] + [property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + string? Id, + string Rank, + string Number, + string Company +) { + public TrainListing(string rank, string number, string company) : this(null, rank, number, company) { } +} \ No newline at end of file diff --git a/server/Services/Implementations/Database.cs b/server/Services/Implementations/Database.cs index 238d8c2..f6dd497 100644 --- a/server/Services/Implementations/Database.cs +++ b/server/Services/Implementations/Database.cs @@ -7,6 +7,11 @@ using System.Text.Json.Nodes; using System.Text.Json.Serialization; using System.Threading.Tasks; using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using MongoDB.Bson; +using MongoDB.Bson.Serialization.Attributes; +using MongoDB.Driver; +using Server.Models.Database; namespace Server.Services.Implementations; @@ -17,77 +22,47 @@ public class Database : Server.Services.Interfaces.IDatabase { private ILogger Logger { get; } - private bool shouldCommitOnEveryChange = true; - private bool dbDataDirty = false; - private bool stationsDirty = false; - private bool trainsDirty = false; + public DbRecord DbData { get; private set; } = new(3); - public DbRecord DbData { get; private set; } = new(2); - private List stations = new(); - private List trains = new(); - - public IReadOnlyList Stations => stations; - public IReadOnlyList Trains => trains; + public IReadOnlyList Stations => stationListingsCollection.FindSync(_ => true).ToList(); + public IReadOnlyList Trains => trainListingsCollection.FindSync(_ => true).ToList(); private static readonly string DbDir = Environment.GetEnvironmentVariable("DB_DIR") ?? Path.Join(Environment.CurrentDirectory, "db"); private static readonly string DbFile = Path.Join(DbDir, "db.json"); private static readonly string StationsFile = Path.Join(DbDir, "stations.json"); private static readonly string TrainsFile = Path.Join(DbDir, "trains.json"); - public IDisposable MakeDbTransaction() { - shouldCommitOnEveryChange = false; - return new Server.Utils.ActionDisposable(() => { - if (dbDataDirty) File.WriteAllText(DbFile, JsonSerializer.Serialize(DbData, serializerOptions)); - if (stationsDirty) { - stations.Sort((s1, s2) => s2.StoppedAtBy.Count.CompareTo(s1.StoppedAtBy.Count)); - File.WriteAllText(StationsFile, JsonSerializer.Serialize(stations, serializerOptions)); - } - if (trainsDirty) File.WriteAllText(TrainsFile, JsonSerializer.Serialize(trains, serializerOptions)); - dbDataDirty = stationsDirty = trainsDirty = false; - shouldCommitOnEveryChange = true; - }); - } + private readonly IMongoDatabase db; + private readonly IMongoCollection dbRecordCollection; + private readonly IMongoCollection trainListingsCollection; + private readonly IMongoCollection stationListingsCollection; - public Database(ILogger logger) { + public Database(ILogger logger, IOptions mongoSettings) { Logger = logger; - if (!Directory.Exists(DbDir)) { - Logger.LogDebug("Creating directory: {DbDir}", DbDir); - Directory.CreateDirectory(DbDir); - } + MongoClient mongoClient = new(mongoSettings.Value.ConnectionString); + db = mongoClient.GetDatabase(mongoSettings.Value.DatabaseName) ?? throw new NullReferenceException("Unable to get Mongo database"); + dbRecordCollection = db.GetCollection("db"); + trainListingsCollection = db.GetCollection("trainListings"); + stationListingsCollection = db.GetCollection("stationListings"); Migration(); - - if (File.Exists(DbFile)) { - DbData = JsonSerializer.Deserialize(File.ReadAllText(DbFile), serializerOptions)!; - } - else { - File.WriteAllText(DbFile, JsonSerializer.Serialize(DbData, serializerOptions)); - } - - if (File.Exists(StationsFile)) { - stations = JsonSerializer.Deserialize>(File.ReadAllText(StationsFile), serializerOptions)!; - } - - if (File.Exists(TrainsFile)) { - trains = JsonSerializer.Deserialize>(File.ReadAllText(TrainsFile), serializerOptions)!; - } } private void Migration() { - if (!File.Exists(DbFile)) { -// using var _ = Logger.BeginScope("Migrating DB version 1 -> 2"); + if (!File.Exists(DbFile) && File.Exists(TrainsFile)) { Logger.LogInformation("Migrating DB version 1 -> 2"); if (File.Exists(StationsFile)) { Logger.LogDebug("Converting StationsFile"); var oldStations = JsonNode.Parse(File.ReadAllText(StationsFile)); + List stations = new(); if (oldStations != null) { Logger.LogDebug("Found {StationsCount} stations", oldStations.AsArray().Count); foreach (var station in oldStations.AsArray()) { if (station == null) continue; station["stoppedAtBy"] = new JsonArray(station["stoppedAtBy"]!.AsArray().Select(num => (JsonNode)(num!).ToString()!).ToArray()); } - stations = JsonSerializer.Deserialize>(oldStations, serializerOptions)!; + stations = oldStations.Deserialize>(serializerOptions)!; } Logger.LogDebug("Rewriting StationsFile"); File.WriteAllText(StationsFile, JsonSerializer.Serialize(stations, serializerOptions)); @@ -95,6 +70,7 @@ public class Database : Server.Services.Interfaces.IDatabase { if (File.Exists(TrainsFile)) { Logger.LogDebug("Converting TrainsFile"); var oldTrains = JsonNode.Parse(File.ReadAllText(TrainsFile)); + List trains = new(); if (oldTrains != null) { Logger.LogDebug("Found {TrainsCount} trains", oldTrains.AsArray().Count); foreach (var train in oldTrains.AsArray()) { @@ -102,7 +78,7 @@ public class Database : Server.Services.Interfaces.IDatabase { train["number"] = train["numberString"]; train.AsObject().Remove("numberString"); } - trains = JsonSerializer.Deserialize>(oldTrains, serializerOptions)!; + trains = oldTrains.Deserialize>(serializerOptions)!; } Logger.LogDebug("Rewriting TrainsFile"); File.WriteAllText(TrainsFile, JsonSerializer.Serialize(trains, serializerOptions)); @@ -111,72 +87,101 @@ public class Database : Server.Services.Interfaces.IDatabase { File.WriteAllText(DbFile, JsonSerializer.Serialize(DbData, serializerOptions)); Migration(); } - else { + else if (File.Exists(DbFile)) { var oldDbData = JsonNode.Parse(File.ReadAllText(DbFile)); if (((int?)oldDbData?["version"]) == 2) { - Logger.LogInformation("DB Version: 2; noop"); + Logger.LogInformation("Migrating DB version 2 -> 3 (transition from fs+JSON to MongoDB)"); + + if (File.Exists(StationsFile)) { + Logger.LogDebug("Converting StationsFile"); + var stations = JsonSerializer.Deserialize>(File.ReadAllText(StationsFile)); + stationListingsCollection.InsertMany(stations); + File.Delete(StationsFile); + } + + if (File.Exists(TrainsFile)) { + Logger.LogDebug("Converting TrainsFile"); + var trains = JsonSerializer.Deserialize>(File.ReadAllText(TrainsFile)); + trainListingsCollection.InsertMany(trains); + File.Delete(TrainsFile); + } + + File.Delete(DbFile); + try { + Directory.Delete(DbDir); + } + catch (Exception) { + // Deleting of the directory is optional; may not be allowed in Docker or similar + } + + var x = dbRecordCollection.FindSync(_ => true).ToList()!; + if (x.Count != 0) { + Logger.LogWarning("db collection contained data when migrating to V3"); + using (var _ = Logger.BeginScope("Already existing data:")) { + foreach (var dbRecord in x) { + Logger.LogInformation("Id: {Id}, Version: {Version}", dbRecord.Id, dbRecord.Version); + } + } + Logger.LogInformation("Backing up existing data"); + var backupDbRecordCollection = db.GetCollection("db-backup"); + backupDbRecordCollection.InsertMany(x); + Logger.LogDebug("Removing existing data"); + dbRecordCollection.DeleteMany(_ => true); + } + dbRecordCollection.InsertOne(new(3)); + Migration(); + } + else { + throw new("Unexpected Database version, only DB Version 2 uses DbFile"); + } + } + else { + var datas = dbRecordCollection.FindSync(_ => true).ToList(); + if (datas.Count == 0) { + Logger.LogInformation("No db record found, new database"); + dbRecordCollection.InsertOne(DbData); } else { - throw new Exception("Unexpected Database version"); + DbData = datas[0]; + } + if (DbData.Version == 3) { + Logger.LogInformation("Using MongoDB Database Version 3; noop"); + } + else { + throw new($"Unexpected Database version: {DbData.Version}"); } } } public async Task FoundTrain(string rank, string number, string company) { - number = string.Join("", number.TakeWhile(c => '0' <= c && c <= '9')); - if (!trains.Where(train => train.Number == number).Any()) { + number = string.Join("", number.TakeWhile(c => c is >= '0' and <= '9')); + if (!await (await trainListingsCollection.FindAsync(Builders.Filter.Eq("number", number))).AnyAsync()) { Logger.LogDebug("Found train {Rank} {Number} from {Company}", rank, number, company); - trains.Add(new(number, rank, company)); - if (shouldCommitOnEveryChange) { - await File.WriteAllTextAsync(TrainsFile, JsonSerializer.Serialize(trains, serializerOptions)); - } - else { - trainsDirty = true; - } + await trainListingsCollection.InsertOneAsync(new(number: number, rank: rank, company: company)); } return number; } public async Task FoundStation(string name) { - if (!stations.Where(station => station.Name == name).Any()) { + if (!await (stationListingsCollection.Find(Builders.Filter.Eq("name", name))).AnyAsync()) { Logger.LogDebug("Found station {StationName}", name); - stations.Add(new(name, new())); - if (shouldCommitOnEveryChange) { - await File.WriteAllTextAsync(StationsFile, JsonSerializer.Serialize(stations, serializerOptions)); - } - else { - stationsDirty = true; - } + await stationListingsCollection.InsertOneAsync(new(name, new())); } } public async Task FoundTrainAtStation(string stationName, string trainNumber) { - trainNumber = string.Join("", trainNumber.TakeWhile(c => '0' <= c && c <= '9')); + trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9')); await FoundStation(stationName); - var dirty = false; - for (var i = 0; i < stations.Count; i++) { - if (stations[i].Name == stationName) { - if (!stations[i].StoppedAtBy.Contains(trainNumber)) { - Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName); - stations[i].ActualStoppedAtBy.Add(trainNumber); - dirty = true; - } - break; - } - } - if (dirty) { - if (shouldCommitOnEveryChange) { - stations.Sort((s1, s2) => s2.StoppedAtBy.Count.CompareTo(s1.StoppedAtBy.Count)); - await File.WriteAllTextAsync(StationsFile, JsonSerializer.Serialize(stations, serializerOptions)); - } - else { - stationsDirty = true; - } + var updateResult = await stationListingsCollection.UpdateOneAsync( + Builders.Filter.Eq("name", stationName), + Builders.Update.AddToSet("stoppedAtBy", trainNumber) + ); + if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) { + Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName); } } public async Task OnTrainData(InfoferScraper.Models.Train.ITrainScrapeResult trainData) { - using var _ = MakeDbTransaction(); var trainNumber = await FoundTrain(trainData.Rank, trainData.Number, trainData.Operator); foreach (var group in trainData.Groups) { foreach (var station in group.Stations) { @@ -199,8 +204,6 @@ public class Database : Server.Services.Interfaces.IDatabase { } } - using var _ = MakeDbTransaction(); - if (stationData.Arrivals != null) { foreach (var train in stationData.Arrivals) { await ProcessTrain(train); @@ -214,25 +217,12 @@ public class Database : Server.Services.Interfaces.IDatabase { } } -public record DbRecord(int Version); - -public record StationRecord : Server.Services.Interfaces.IStationRecord { - [JsonPropertyName("stoppedAtBy")] - public List ActualStoppedAtBy { get; init; } - - public string Name { get; init; } - [JsonIgnore] - public IReadOnlyList StoppedAtBy => ActualStoppedAtBy; - - public StationRecord() { - Name = ""; - ActualStoppedAtBy = new(); - } - - public StationRecord(string name, List stoppedAtBy) { - Name = name; - ActualStoppedAtBy = stoppedAtBy; - } +public record DbRecord( + [property: BsonId] + [property: BsonRepresentation(BsonType.ObjectId)] + [property: JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + string? Id, + int Version +) { + public DbRecord(int version) : this(null, version) { } } - -public record TrainRecord(string Number, string Rank, string Company) : Server.Services.Interfaces.ITrainRecord; diff --git a/server/Services/Interfaces/IDatabase.cs b/server/Services/Interfaces/IDatabase.cs index 1df6ad4..c6a4901 100644 --- a/server/Services/Interfaces/IDatabase.cs +++ b/server/Services/Interfaces/IDatabase.cs @@ -2,12 +2,13 @@ using System.Collections.Generic; using System.Threading.Tasks; using InfoferScraper.Models.Train; using InfoferScraper.Models.Station; +using Server.Models.Database; namespace Server.Services.Interfaces; public interface IDatabase { - public IReadOnlyList Stations { get; } - public IReadOnlyList Trains { get; } + public IReadOnlyList Stations { get; } + public IReadOnlyList Trains { get; } public Task FoundTrain(string rank, string number, string company); public Task FoundStation(string name); @@ -15,14 +16,3 @@ public interface IDatabase { public Task OnTrainData(ITrainScrapeResult trainData); public Task OnStationData(IStationScrapeResult stationData); } - -public interface IStationRecord { - public string Name { get; } - public IReadOnlyList StoppedAtBy { get; } -} - -public interface ITrainRecord { - public string Rank { get; } - public string Number { get; } - public string Company { get; } -} diff --git a/server/Startup.cs b/server/Startup.cs index 4b4fe9c..fa2c664 100644 --- a/server/Startup.cs +++ b/server/Startup.cs @@ -8,6 +8,8 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; using Microsoft.OpenApi.Models; +using MongoDB.Bson.Serialization.Conventions; +using Server.Models.Database; using Server.Services.Implementations; using Server.Services.Interfaces; @@ -26,6 +28,10 @@ namespace Server { options.KnownProxies.Add(Dns.GetHostAddresses("host.docker.internal")[0]); }); } + + services.Configure(Configuration.GetSection("TrainDataMongo")); + var conventionPack = new ConventionPack { new CamelCaseElementNameConvention() }; + ConventionRegistry.Register("camelCase", conventionPack, _ => true); services.AddSingleton(); services.AddSingleton(); services.AddSingleton(NodaTime.DateTimeZoneProviders.Tzdb); diff --git a/server/appsettings.Development.json b/server/appsettings.Development.json index ab5b812..246dc14 100644 --- a/server/appsettings.Development.json +++ b/server/appsettings.Development.json @@ -5,5 +5,9 @@ "Microsoft": "Warning", "Microsoft.Hosting.Lifetime": "Information" } - } + }, + "TrainDataMongo": { + "ConnectionString": "mongodb://localhost:27017", + "DatabaseName": "NewInfoferScraper" + }, } diff --git a/server/appsettings.json b/server/appsettings.json index e409635..9750930 100644 --- a/server/appsettings.json +++ b/server/appsettings.json @@ -9,5 +9,9 @@ "Microsoft.Hosting.Lifetime": "Information" } }, + "TrainDataMongo": { + "ConnectionString": "mongodb://mongo:27017", + "DatabaseName": "NewInfoferScraper" + }, "AllowedHosts": "*" } diff --git a/server/server.csproj b/server/server.csproj index ea47826..f1dc258 100644 --- a/server/server.csproj +++ b/server/server.csproj @@ -10,6 +10,7 @@ +