Compare commits

..

No commits in common. 'master' and 'python3' have entirely different histories.

  1. 24
      .dockerignore
  2. 1
      .github/workflows/build-image.yml
  3. 453
      .gitignore
  4. 46
      .vscode/launch.json
  5. 53
      .vscode/tasks.json
  6. 26
      ConsoleTest/.vscode/launch.json
  7. 42
      ConsoleTest/.vscode/tasks.json
  8. 12
      ConsoleTest/ConsoleTest.csproj
  9. 95
      ConsoleTest/Program.cs
  10. 41
      Dockerfile
  11. 11
      Pipfile
  12. 20
      Pipfile.lock
  13. 10
      docker-compose.yml
  14. 62
      new-infofer-scraper.sln
  15. 24
      omnisharp.json
  16. 14
      scraper/Pipfile
  17. 85
      scraper/Pipfile.lock
  18. 1
      scraper/__init__.py
  19. 44
      scraper/main.py
  20. 1
      scraper/omnisharp.json
  21. 20
      scraper/schemas.py
  22. 91
      scraper/scrape_station.py
  23. 138
      scraper/scrape_station_schema_v2.json
  24. 146
      scraper/scrape_train.py
  25. 134
      scraper/scrape_train_schema.json
  26. 134
      scraper/scrape_train_schema_v2.json
  27. 16
      scraper/scraper.csproj
  28. 12
      scraper/scraper.py
  29. 8
      scraper/setup.py
  30. 16
      scraper/src/Exceptions/TrainNotThisDayException.cs
  31. 62
      scraper/src/Models/Itinerary.cs
  32. 110
      scraper/src/Models/Station.cs
  33. 18
      scraper/src/Models/Status.cs
  34. 246
      scraper/src/Models/Train.cs
  35. 220
      scraper/src/Scrapers/Route.cs
  36. 228
      scraper/src/Scrapers/Station.cs
  37. 261
      scraper/src/Scrapers/Train.cs
  38. 25
      scraper/src/Utils/DateTimeSequencer.cs
  39. 18
      scraper/src/Utils/DeconstructIEnumerable.cs
  40. 5
      scraper/src/Utils/RoLetters.cs
  41. 23
      scraper/src/Utils/RoLettersToEn.cs
  42. 12
      scraper/src/Utils/WithCollapsedSpaces.cs
  43. 79
      scraper/utils.py
  44. 35
      server/.vscode/launch.json
  45. 42
      server/.vscode/tasks.json
  46. 73
      server/Controllers/V1/TrainController.cs
  47. 22
      server/Controllers/V1/TrainsController.cs
  48. 50
      server/Controllers/V2/StationController.cs
  49. 22
      server/Controllers/V2/StationsController.cs
  50. 77
      server/Controllers/V2/TrainController.cs
  51. 23
      server/Controllers/V2/TrainsController.cs
  52. 40
      server/Controllers/V3/ItinerariesController.cs
  53. 45
      server/Controllers/V3/StationsController.cs
  54. 67
      server/Controllers/V3/TrainsController.cs
  55. 5
      server/Models/Database/MongoSettings.cs
  56. 17
      server/Models/Database/StationAlias.cs
  57. 18
      server/Models/Database/StationListing.cs
  58. 20
      server/Models/Database/TrainListing.cs
  59. 9
      server/Models/ProxySettings.cs
  60. 57
      server/Models/V1/TrainScrapeResult.cs
  61. 39
      server/Models/V2/StationScrapeResult.cs
  62. 57
      server/Models/V2/TrainScrapeResult.cs
  63. 15
      server/Pipfile
  64. 394
      server/Pipfile.lock
  65. 17
      server/Program.cs
  66. 31
      server/Properties/launchSettings.json
  67. 116
      server/Services/Implementations/DataManager.cs
  68. 390
      server/Services/Implementations/Database.cs
  69. 14
      server/Services/Interfaces/IDataManager.cs
  70. 20
      server/Services/Interfaces/IDatabase.cs
  71. 105
      server/Startup.cs
  72. 15
      server/Utils/ActionDisposable.cs
  73. 38
      server/Utils/AsyncThrottle.cs
  74. 69
      server/Utils/Cache.cs
  75. 7
      server/Utils/Constants.cs
  76. 33
      server/Utils/IAsyncCusorAsyncAdapter.cs
  77. 13
      server/appsettings.Development.json
  78. 17
      server/appsettings.json
  79. 18
      server/main.py
  80. 1
      server/omnisharp.json
  81. 25
      server/server.csproj
  82. 1
      server/server/__init__.py
  83. 18
      server/server/cache.py
  84. 173
      server/server/db.py
  85. 29
      server/server/flask_utils.py
  86. 1
      server/server/scraper
  87. 65
      server/server/server.py
  88. 41
      server/server/utils.py
  89. 1
      server/server/v2/__init__.py
  90. 94
      server/server/v2/v2.py

24
.dockerignore

@ -1,8 +1,16 @@
**/bin # Docker
**/obj .dockerignore
**/out Dockerfile
**/.vscode
**/.vs # CPython compiler output
**/omnisharp.json __pycache__
.dotnet *.pyc
.Microsoft.DotNet.ImageBuilder
# Python package stuff
reqlib-metadata
pyproject.toml
*.egg-info
# VS Code
.vscode

1
.github/workflows/build-image.yml

@ -19,6 +19,5 @@ jobs:
- name: Publish - name: Publish
uses: docker/build-push-action@v2 uses: docker/build-push-action@v2
with: with:
context: .
tags: ${{ format('ghcr.io/{0}/new_infofer_scraper:latest', github.actor) }} tags: ${{ format('ghcr.io/{0}/new_infofer_scraper:latest', github.actor) }}
push: true push: true

453
.gitignore vendored

@ -1,448 +1,11 @@
## Ignore Visual Studio temporary files, build results, and # CPython compiler output
## files generated by popular Visual Studio add-ons. __pycache__
##
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# Tye
.tye/
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*[.json, .xml, .info]
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# Ionide - VsCode extension for F# Support
.ionide/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc *.pyc
# Cake - Uncomment if you are using it # Python package stuff
# tools/** reqlib-metadata
# !tools/packages.config pyproject.toml
*.egg-info
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
##
## Visual studio for Mac
##
# globs
Makefile.in
*.userprefs
*.usertasks
config.make
config.status
aclocal.m4
install-sh
autom4te.cache/
*.tar.gz
tarballs/
test-results/
# Mac bundle stuff
*.dmg
*.app
# content below from: https://github.com/github/gitignore/blob/master/Global/macOS.gitignore
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# content below from: https://github.com/github/gitignore/blob/master/Global/Windows.gitignore
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
# JetBrains Rider
.idea/
*.sln.iml
## # VS Code
## Visual Studio Code .vscode
##
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json

46
.vscode/launch.json vendored

@ -1,46 +0,0 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "ConsoleTest",
"type": "coreclr",
"request": "launch",
"preLaunchTask": "buildConsoleTest",
"program": "${workspaceFolder}/ConsoleTest/bin/Debug/net7.0/ConsoleTest.dll",
"args": [],
"cwd": "${workspaceFolder}",
"stopAtEntry": false,
"console": "integratedTerminal"
},
{
// Use IntelliSense to find out which attributes exist for C# debugging
// Use hover for the description of the existing attributes
// For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md
"name": ".NET Core Launch (web)",
"type": "coreclr",
"request": "launch",
"preLaunchTask": "build",
// If you have changed target frameworks, make sure to update the program path.
"program": "${workspaceFolder}/server/bin/Debug/net6.0/Server.dll",
"args": [],
"cwd": "${workspaceFolder}/server",
"stopAtEntry": false,
// Enable launching a web browser when ASP.NET Core starts. For more information: https://aka.ms/VSCode-CS-LaunchJson-WebBrowser
// "serverReadyAction": {
// "action": "openExternally",
// "pattern": "\\bNow listening on:\\s+(https?://\\S+)"
// },
"env": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"sourceFileMap": {
"/Views": "${workspaceFolder}/Views"
}
},
{
"name": ".NET Core Attach",
"type": "coreclr",
"request": "attach"
}
]
}

53
.vscode/tasks.json vendored

@ -1,53 +0,0 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "build",
"command": "dotnet",
"type": "process",
"args": [
"build",
"${workspaceFolder}/server/server.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
"problemMatcher": "$msCompile"
},
{
"label": "buildConsoleTest",
"command": "dotnet",
"type": "process",
"args": [
"build",
"${workspaceFolder}/ConsoleTest/ConsoleTest.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
"problemMatcher": "$msCompile"
},
{
"label": "publish",
"command": "dotnet",
"type": "process",
"args": [
"publish",
"${workspaceFolder}/server/server.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
"problemMatcher": "$msCompile"
},
{
"label": "watch",
"command": "dotnet",
"type": "process",
"args": [
"watch",
"run",
"--project",
"${workspaceFolder}/server/server.csproj"
],
"problemMatcher": "$msCompile"
}
]
}

26
ConsoleTest/.vscode/launch.json vendored

@ -1,26 +0,0 @@
{
"version": "0.2.0",
"configurations": [
{
// Use IntelliSense to find out which attributes exist for C# debugging
// Use hover for the description of the existing attributes
// For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md
"name": ".NET Core Launch (console)",
"type": "coreclr",
"request": "launch",
"preLaunchTask": "build",
// If you have changed target frameworks, make sure to update the program path.
"program": "${workspaceFolder}/bin/Debug/net5.0/ConsoleTest.dll",
"args": [],
"cwd": "${workspaceFolder}",
// For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console
"console": "internalConsole",
"stopAtEntry": false
},
{
"name": ".NET Core Attach",
"type": "coreclr",
"request": "attach"
}
]
}

42
ConsoleTest/.vscode/tasks.json vendored

@ -1,42 +0,0 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "build",
"command": "dotnet",
"type": "process",
"args": [
"build",
"${workspaceFolder}/ConsoleTest.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
"problemMatcher": "$msCompile"
},
{
"label": "publish",
"command": "dotnet",
"type": "process",
"args": [
"publish",
"${workspaceFolder}/ConsoleTest.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
"problemMatcher": "$msCompile"
},
{
"label": "watch",
"command": "dotnet",
"type": "process",
"args": [
"watch",
"run",
"${workspaceFolder}/ConsoleTest.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
"problemMatcher": "$msCompile"
}
]
}

12
ConsoleTest/ConsoleTest.csproj

@ -1,12 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<ItemGroup>
<ProjectReference Include="..\scraper\scraper.csproj" />
</ItemGroup>
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup>
</Project>

95
ConsoleTest/Program.cs

@ -1,95 +0,0 @@
using System;
using System.Linq;
using System.Text.Json;
using System.Threading.Tasks;
using InfoferScraper;
using InfoferScraper.Scrapers;
while (true) {
Console.WriteLine("1. Scrape Train");
Console.WriteLine("2. Scrape Station");
Console.WriteLine("3. Scrape Itineraries");
Console.WriteLine("0. Exit");
var input = Console.ReadLine()?.Trim();
switch (input) {
case "1":
await PrintTrain();
break;
case "2":
await PrintStation();
break;
case "3":
await ScrapeItineraries();
break;
case null:
case "0":
goto INPUT_LOOP_BREAK;
}
Console.WriteLine();
}
INPUT_LOOP_BREAK:;
async Task PrintTrain() {
Console.Write("Train number: ");
var trainNumber = Console.ReadLine()?.Trim();
if (trainNumber == null) {
return;
}
Console.WriteLine(
JsonSerializer.Serialize(
await new TrainScraper().Scrape(trainNumber),
new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true,
}
)
);
}
async Task PrintStation() {
Console.Write("Station name: ");
var stationName = Console.ReadLine()?.Trim();
if (stationName == null) {
return;
}
Console.WriteLine(
JsonSerializer.Serialize(
await new StationScraper().Scrape(stationName),
new JsonSerializerOptions {
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true,
}
)
);
}
async Task ScrapeItineraries() {
Console.Write("From station: ");
var from = Console.ReadLine();
Console.Write("To station: ");
var to = Console.ReadLine();
if (from == null || to == null) return;
var data = await new RouteScraper().Scrape(from, to);
Console.WriteLine($"{data.Count} itineraries:");
Console.WriteLine();
void PrintArrDepLine(DateTimeOffset date, string station) {
Console.WriteLine($"{date:HH:mm} {station}");
}
foreach (var itinerary in data) {
foreach (var train in itinerary.Trains) {
PrintArrDepLine(train.DepartureDate, train.From);
Console.WriteLine($" {train.TrainRank,-4} {train.TrainNumber,-5} ({train.Operator}), {train.Km,3} km via {string.Join(", ", train.IntermediateStops)}");
PrintArrDepLine(train.ArrivalDate, train.To);
}
Console.WriteLine();
}
}

41
Dockerfile

@ -1,24 +1,23 @@
# https://hub.docker.com/_/microsoft-dotnet FROM python:slim
FROM mcr.microsoft.com/dotnet/sdk:8.0 AS build
WORKDIR /source
# copy csproj and restore as distinct layers RUN pip install pipenv
COPY *.sln .
COPY server/*.csproj ./server/
COPY scraper/*.csproj ./scraper/
COPY ConsoleTest/*.csproj ./ConsoleTest/
RUN dotnet restore
# copy everything else and build app WORKDIR /var/app/scraper
COPY server/. ./server/ COPY scraper/Pipfil* ./
COPY scraper/. ./scraper/ COPY scraper/setup.py ./
COPY ConsoleTest/. ./ConsoleTest/ WORKDIR /var/app/server
WORKDIR /source/server COPY server/Pipfil* ./
RUN dotnet publish -f net8.0 -c release -o /app --no-restore RUN pipenv install
RUN pipenv graph
# final stage/image WORKDIR /var/app/scraper
FROM mcr.microsoft.com/dotnet/aspnet:8.0 COPY scraper .
WORKDIR /app WORKDIR /var/app/server
COPY --from=build /app ./ COPY server .
ENV INSIDE_DOCKER=true RUN rm server/scraper
ENTRYPOINT ["dotnet", "Server.dll"] RUN ln -s /var/app/scraper ./server/scraper
ENV PORT 5000
EXPOSE ${PORT}
CMD ["pipenv", "run", "python3", "-m", "main"]

11
Pipfile

@ -0,0 +1,11 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
[dev-packages]
[requires]
python_version = "3.9"

20
Pipfile.lock generated

@ -0,0 +1,20 @@
{
"_meta": {
"hash": {
"sha256": "a36a5392bb1e8bbc06bfaa0761e52593cf2d83b486696bf54667ba8da616c839"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.9"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {},
"develop": {}
}

10
docker-compose.yml

@ -1,10 +0,0 @@
version: '3'
services:
infofer_scraper:
image: new_infofer_scraper
build: .
ports:
- ${PORT:-5001}:80
environment:
DB_DIR: /data

62
new-infofer-scraper.sln

@ -1,62 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.6.30114.105
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "scraper", "scraper\scraper.csproj", "{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "server", "server\server.csproj", "{C2D22A33-5317-47A3-B28A-E151224D3E46}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleTest", "ConsoleTest\ConsoleTest.csproj", "{0D8E3B5F-2511-4174-8129-275500753585}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|Any CPU = Release|Any CPU
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|x64.ActiveCfg = Debug|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|x64.Build.0 = Debug|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|x86.ActiveCfg = Debug|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Debug|x86.Build.0 = Debug|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|Any CPU.Build.0 = Release|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|x64.ActiveCfg = Release|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|x64.Build.0 = Release|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|x86.ActiveCfg = Release|Any CPU
{E08BC25C-B39B-40F9-8114-A8D6545EE1C1}.Release|x86.Build.0 = Release|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|x64.ActiveCfg = Debug|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|x64.Build.0 = Debug|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|x86.ActiveCfg = Debug|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Debug|x86.Build.0 = Debug|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|Any CPU.Build.0 = Release|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|x64.ActiveCfg = Release|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|x64.Build.0 = Release|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|x86.ActiveCfg = Release|Any CPU
{C2D22A33-5317-47A3-B28A-E151224D3E46}.Release|x86.Build.0 = Release|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Debug|x64.ActiveCfg = Debug|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Debug|x64.Build.0 = Debug|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Debug|x86.ActiveCfg = Debug|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Debug|x86.Build.0 = Debug|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Release|Any CPU.Build.0 = Release|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Release|x64.ActiveCfg = Release|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Release|x64.Build.0 = Release|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Release|x86.ActiveCfg = Release|Any CPU
{0D8E3B5F-2511-4174-8129-275500753585}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

24
omnisharp.json

@ -1,24 +0,0 @@
{
"$schema": "https://json.schemastore.org/omnisharp",
"FormattingOptions": {
"OrganizeImports": true,
"UseTabs": true,
"TabSize": 4,
"IndentationSize": 4,
"NewLinesForBracesInTypes": false,
"NewLinesForBracesInMethods": false,
"NewLinesForBracesInProperties": false,
"NewLinesForBracesInAccessors": false,
"NewLinesForBracesInAnonymousMethods": false,
"NewLinesForBracesInControlBlocks": false,
"NewLinesForBracesInAnonymousTypes": false,
"NewLinesForBracesInObjectCollectionArrayInitializers": false,
"NewLinesForBracesInLambdaExpressionBody": false,
"NewLineForElse": true,
"NewLineForCatch": true,
"NewLineForFinally": true,
"NewLineForMembersInObjectInit": false,
"NewLineForMembersInAnonymousTypes": false,
"NewLineForClausesInQuery": false
}
}

14
scraper/Pipfile

@ -0,0 +1,14 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
beautifulsoup4 = "*"
requests = "*"
pytz = "*"
[dev-packages]
[requires]
python_version = "3.9"

85
scraper/Pipfile.lock generated

@ -0,0 +1,85 @@
{
"_meta": {
"hash": {
"sha256": "d7e3ebca9807b4f0c9dcac014554e9d1c9cb3a0c30b5c71b0b7cd4ccdc4934e1"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.9"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"beautifulsoup4": {
"hashes": [
"sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35",
"sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25",
"sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"
],
"index": "pypi",
"version": "==4.9.3"
},
"certifi": {
"hashes": [
"sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee",
"sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8"
],
"version": "==2021.5.30"
},
"charset-normalizer": {
"hashes": [
"sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
"sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"
],
"markers": "python_version >= '3'",
"version": "==2.0.4"
},
"idna": {
"hashes": [
"sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
"sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
],
"markers": "python_version >= '3'",
"version": "==3.2"
},
"pytz": {
"hashes": [
"sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da",
"sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"
],
"index": "pypi",
"version": "==2021.1"
},
"requests": {
"hashes": [
"sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
"sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
],
"index": "pypi",
"version": "==2.26.0"
},
"soupsieve": {
"hashes": [
"sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc",
"sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b"
],
"markers": "python_version >= '3'",
"version": "==2.2.1"
},
"urllib3": {
"hashes": [
"sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
"sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.6"
}
},
"develop": {}
}

1
scraper/__init__.py

@ -0,0 +1 @@
__all__ = ['scraper']

44
scraper/main.py

@ -0,0 +1,44 @@
from scraper import scrape
_NO_DEFAULT = object()
def check_yes_no(input: str, default=_NO_DEFAULT, considered_yes=None) -> bool:
input = str(input).strip().lower()
if not input:
if default == _NO_DEFAULT:
raise Exception('Empty input with no default')
return default
if not considered_yes:
considered_yes = ['y', 'yes', 't', 'true', '1']
return input in considered_yes
def main():
train_no = int(input('Train number: '))
use_yesterday = input('Train departed yesterday? [y/N] ')
data = scrape(train_no, use_yesterday=check_yes_no(use_yesterday, default=False))
print(f'Train {train_no}\t{data["route"]["from"]}\t{data["route"]["to"]}')
print()
if 'status' in data and data['status']:
delay = data['status']['delay']
if delay == 0:
delay = 'on time'
else:
delay = f'{delay} min'
state = data['status']['state']
station = data['status']['station']
print(f'Status: {delay}\t{state}\t{station}')
print()
for station in data['stations']:
if 'arrival' in station and station['arrival']:
print(station['arrival']['scheduleTime'], end='\t')
else:
print(end='\t')
print(station['name'], end='\t')
if 'departure' in station and station['departure']:
print(station['departure']['scheduleTime'], end='\t')
else:
print(end='\t')
print()
if __name__ == '__main__':
main()

1
scraper/omnisharp.json

@ -1 +0,0 @@
../omnisharp.json

20
scraper/schemas.py

@ -0,0 +1,20 @@
from contextlib import ExitStack as _ExitStack
_es = _ExitStack()
def _load_file(name: str):
import json
from os.path import join, dirname
dir = dirname(__file__)
return json.load(_es.enter_context(open(join(dir, name))))
TRAIN_INFO_SCHEMA = {
'v1': _load_file('scrape_train_schema.json'),
'v2': _load_file('scrape_train_schema_v2.json'),
}
STATION_SCHEMA = {
'v2': _load_file('scrape_station_schema_v2.json'),
}
_es.close()

91
scraper/scrape_station.py

@ -0,0 +1,91 @@
import re
from datetime import datetime, timedelta
import pytz
import requests
from bs4 import BeautifulSoup
from .utils import *
# region regex definitions
RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ'
STATION_INFO_REGEX = re.compile(rf'^([{RO_LETTERS}.0-9 ]+) în ([0-9.]+)$')
STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) (min|sec) \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$')
# endregion
def scrape(station_name: str):
station_name = ro_letters_to_en(station_name)
# Start scrapping session
s = requests.Session()
r = s.get(build_url(
'https://mersultrenurilor.infofer.ro/ro-RO/Statie/{station}',
station=station_name.replace(' ', '-'),
))
soup = BeautifulSoup(r.text, features='html.parser')
sform = soup.find(id='form-search')
result_data = { elem['name']: elem['value'] for elem in sform('input') }
r = s.post('https://mersultrenurilor.infofer.ro/ro-RO/Stations/StationsResult', data=result_data)
soup = BeautifulSoup(r.text, features='html.parser')
scraped = {}
station_info_div, _, departures_div, arrivals_div, *_ = soup('div', recursive=False)
scraped['stationName'], scraped['date'] = STATION_INFO_REGEX.match(collapse_space(station_info_div.h2.text)).groups()
date_d, date_m, date_y = (int(comp) for comp in scraped['date'].split('.'))
date = datetime(date_y, date_m, date_d)
dt_seq = DateTimeSequencer(date.year, date.month, date.day)
tz = pytz.timezone('Europe/Bucharest')
def parse_arrdep_list(elem, end_station_field_name):
if elem.div.ul is None:
return None
def parse_item(elem):
result = {}
try:
data_div, status_div = elem('div', recursive=False)
except ValueError:
data_div, *_ = elem('div', recursive=False)
status_div = None
data_main_div, data_details_div = data_div('div', recursive=False)
time_div, dest_div, train_div, *_ = data_main_div('div', recursive=False)
operator_div, route_div, stopping_time_div = data_details_div.div('div', recursive=False)
result['time'] = collapse_space(time_div.div.div('div', recursive=False)[1].text)
st_hr, st_min = (int(comp) for comp in result['time'].split(':'))
result['time'] = tz.localize(dt_seq(st_hr, st_min)).isoformat()
unknown_st, st, minsec, st_opposite_time = STOPPING_TIME_REGEX.match(
collapse_space(stopping_time_div.div('div', recursive=False)[1].text)
).groups()
if unknown_st:
result['stoppingTime'] = None
elif st:
minutes = minsec == 'min'
result['stoppingTime'] = int(st) * 60 if minutes else int(st)
result['train'] = {}
result['train']['rank'] = collapse_space(train_div.div.div('div', recursive=False)[1].span.text)
result['train']['number'] = collapse_space(train_div.div.div('div', recursive=False)[1].a.text)
result['train'][end_station_field_name] = collapse_space(dest_div.div.div('div', recursive=False)[1].text)
result['train']['operator'] = collapse_space(operator_div.div('div', recursive=False)[1].text)
result['train']['route'] = collapse_space(route_div.div('div', recursive=False)[1].text).split(' - ')
return result
return [parse_item(elem) for elem in elem.div.ul('li', recursive=False)]
scraped['departures'] = parse_arrdep_list(departures_div, 'destination')
scraped['arrivals'] = parse_arrdep_list(arrivals_div, 'origin')
return scraped

138
scraper/scrape_station_schema_v2.json

@ -0,0 +1,138 @@
{
"$schema": "http://json-schema.org/schema",
"title": "Train Info InfoFer Scrap Station Schema",
"description": "Results of scrapping InfoFer website for station arrival/departure info",
"definitions": {
"arrDepItem": {
"type": "object",
"properties": {
"time": {
"description": "Time of arrival/departure",
"type": "string",
"format": "date-time"
},
"train": {
"type": "object",
"properties": {
"rank": {
"type": "string",
"examples": [
"R",
"R-E",
"IR",
"IRN"
]
},
"number": {
"type": "string",
"examples": [
"74",
"15934"
]
},
"operator": {
"type": "string",
"examples": [
"CFR Călători",
"Softrans",
"Regio Călători"
]
},
"route": {
"description": "All the stations the train stops at",
"type": "array",
"items": {
"type": "string"
}
}
},
"required": [
"rank",
"number",
"operator"
]
},
"stoppingTime": {
"description": "The number of seconds the train stops in the station",
"type": [
"integer",
"null"
],
"minimum": 0
}
},
"required": [
"time",
"train",
"stoppingTime"
]
}
},
"type": "object",
"properties": {
"arrivals": {
"type": ["array", "null"],
"items": {
"allOf": [
{
"$ref": "#/definitions/arrDepItem"
},
{
"type": "object",
"properties": {
"train": {
"type": "object",
"properties": {
"origin": {
"type": "string"
}
},
"required": ["origin"]
}
},
"required": ["train"]
}
]
}
},
"departures": {
"type": ["array", "null"],
"items": {
"allOf": [
{
"$ref": "#/definitions/arrDepItem"
},
{
"type": "object",
"properties": {
"train": {
"type": "object",
"properties": {
"destination": {
"type": "string"
}
},
"required": ["destination"]
}
},
"required": ["train"]
}
]
}
},
"stationName": {
"type": "string"
},
"date": {
"description": "Date for which the data is provided (likely today)",
"type": "string",
"pattern": "^[0-9]{1,2}\\.[0-9]{2}\\.[0-9]{4}$"
}
},
"required": [
"arrivals",
"departures",
"stationName",
"date"
]
}

146
scraper/scrape_train.py

@ -0,0 +1,146 @@
import re
from datetime import datetime, timedelta
import pytz
import requests
from bs4 import BeautifulSoup
from .utils import *
# region regex definitions
TRAIN_INFO_REGEX = re.compile(r'^([A-Z-]+) ([0-9]+) în ([0-9.]+)$')
OPERATOR_REGEX = re.compile(r'^Operat de (.+)$')
SL_REGEX = re.compile(r'^(?:Fără|([0-9]+) min) (întârziere|mai devreme) la (trecerea fără oprire prin|sosirea în|plecarea din) (.+)\.$')
SL_STATE_MAP = {
't': 'passing',
's': 'arrival',
'p': 'departure',
}
RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ'
ROUTE_REGEX = re.compile(rf'^Parcurs tren ([{RO_LETTERS} ]+)[-–]([{RO_LETTERS} ]+)$')
KM_REGEX = re.compile(r'^km ([0-9]+)$')
PLATFORM_REGEX = re.compile(r'^linia (.+)$')
STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) (min|sec) oprire$')
STATION_DEPARR_STATUS_REGEX = re.compile(r'^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$')
# endregion
def scrape(train_no: str, use_yesterday=False, date_override=None):
# Start scrapping session
s = requests.Session()
date = datetime.today()
if use_yesterday:
date -= timedelta(days=1)
if date_override:
date = date_override
r = s.get(build_url(
'https://mersultrenurilor.infofer.ro/ro-RO/Tren/{train_no}',
train_no=train_no,
query=[
('Date', date.strftime('%d.%m.%Y')),
],
))
soup = BeautifulSoup(r.text, features='html.parser')
sform = soup.find(id='form-search')
result_data = { elem['name']: elem['value'] for elem in sform('input') }
r = s.post('https://mersultrenurilor.infofer.ro/ro-RO/Trains/TrainsResult', data=result_data)
soup = BeautifulSoup(r.text, features='html.parser')
scraped = {}
train_info_div, _, _, results_div, *_ = soup('div', recursive=False)
train_info_div = train_info_div.div('div', recursive=False)[0]
scraped['rank'], scraped['number'], scraped['date'] = TRAIN_INFO_REGEX.match(collapse_space(train_info_div.h2.text)).groups()
date_d, date_m, date_y = (int(comp) for comp in scraped['date'].split('.'))
date = datetime(date_y, date_m, date_d)
scraped['operator'] = OPERATOR_REGEX.match(collapse_space(train_info_div.p.text)).groups()[0]
results_div = results_div.div
status_div = results_div('div', recursive=False)[0]
route_text = collapse_space(status_div.h4.text)
route_from, route_to = ROUTE_REGEX.match(route_text).groups()
scraped['route'] = {
'from': route_from,
'to': route_to,
}
try:
status_line_match = SL_REGEX.match(collapse_space(status_div.div.text))
slm_delay, slm_late, slm_arrival, slm_station = status_line_match.groups()
scraped['status'] = {
'delay': (int(slm_delay) if slm_late == 'întârziere' else -int(slm_delay)) if slm_delay else 0,
'station': slm_station,
'state': SL_STATE_MAP[slm_arrival[0]],
}
except Exception:
scraped['status'] = None
stations = status_div.ul('li', recursive=False)
scraped['stations'] = []
dt_seq = DateTimeSequencer(date.year, date.month, date.day)
tz = pytz.timezone('Europe/Bucharest')
for station in stations:
station_scraped = {}
left, middle, right = station.div('div', recursive=False)
station_scraped['name'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[0].text)
station_scraped['km'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[1].text)
station_scraped['km'] = int(KM_REGEX.match(station_scraped['km']).groups()[0])
station_scraped['stoppingTime'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[2].text)
if not station_scraped['stoppingTime']:
station_scraped['stoppingTime'] = None
else:
st_value, st_minsec = STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()
station_scraped['stoppingTime'] = int(st_value)
if st_minsec == 'min':
station_scraped['stoppingTime'] *= 60
station_scraped['platform'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[3].text)
if not station_scraped['platform']:
station_scraped['platform'] = None
else:
station_scraped['platform'] = PLATFORM_REGEX.match(station_scraped['platform']).groups()[0]
def scrape_time(elem, setter):
parts = elem.div.div('div', recursive=False)
if parts:
result = {}
time, *_ = parts
result['scheduleTime'] = collapse_space(time.text)
st_hr, st_min = (int(comp) for comp in result['scheduleTime'].split(':'))
result['scheduleTime'] = tz.localize(dt_seq(st_hr, st_min)).isoformat()
if len(parts) >= 2:
_, status, *_ = parts
result['status'] = {}
on_time, delay, approx = STATION_DEPARR_STATUS_REGEX.match(collapse_space(status.text)).groups()
result['status']['delay'] = 0 if on_time else int(delay)
result['status']['real'] = not approx
else:
result['status'] = None
setter(result)
else:
setter(None)
scrape_time(left, lambda value: station_scraped.update(arrival=value))
scrape_time(right, lambda value: station_scraped.update(departure=value))
scraped['stations'].append(station_scraped)
return scraped

134
scraper/scrape_train_schema.json

@ -0,0 +1,134 @@
{
"$schema": "http://json-schema.org/schema",
"title": "Train Info InfoFer Scrap Train Schema",
"description": "Results of scrapping InfoFer website for train info",
"definitions": {
"delayType": {
"description": "Delay of the train (negative for being early)",
"type": "integer"
},
"stationArrDepTime": {
"description": "Time of arrival at/departure from station",
"type": ["object", "null"],
"properties": {
"scheduleTime": {
"description": "The time the train is scheduled to arrive/depart",
"type": "string",
"pattern": "^[0-9]{1,2}:[0-9]{2}$"
},
"status": {
"type": ["object", "null"],
"properties": {
"delay": {
"$ref": "#/definitions/delayType"
},
"real": {
"description": "Determines whether delay was actually reported or is an approximation",
"type": "boolean"
}
},
"required": ["delay", "real"]
}
},
"required": ["scheduleTime"]
}
},
"type": "object",
"properties": {
"rank": {
"description": "The rank of the train",
"type": "string",
"examples": [
"R",
"R-E",
"IR",
"IRN"
]
},
"number": {
"description": "The number of the train",
"type": "string",
"examples": [
"74",
"15934"
]
},
"date": {
"description": "Date of departure from the first station (dd.mm.yyyy)",
"type": "string",
"pattern": "^[0-9]{1,2}\\.[0-9]{2}\\.[0-9]{4}$"
},
"operator": {
"description": "Operator of the train",
"type": "string",
"examples": [
"CFR Călători",
"Softrans",
"Regio Călători"
]
},
"route": {
"description": "Route of the train",
"type": "object",
"properties": {
"from": {
"type": "string"
},
"to": {
"type": "string"
}
},
"required": ["from", "to"]
},
"status": {
"description": "Current status of the train",
"type": ["object", "null"],
"properties": {
"delay": {
"$ref": "#/definitions/delayType"
},
"station": {
"type": "string"
},
"state": {
"type": "string",
"enum": ["passing", "arrival", "departure"]
}
},
"required": ["delay", "station", "state"]
},
"stations": {
"description": "List of stations the train stops at",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"km": {
"description": "The distance the train travelled until reaching this station",
"type": "integer"
},
"stoppingTime": {
"description": "The number of minutes the train is scheduled to stop in this station",
"type": ["integer", "null"],
"minimum": 0
},
"platform": {
"description": "The platform the train stopped at",
"type": ["string", "null"]
},
"arrival": {
"$ref": "#/definitions/stationArrDepTime"
},
"departure": {
"$ref": "#/definitions/stationArrDepTime"
}
},
"required": ["name", "km"]
}
}
},
"required": ["route", "stations", "rank", "number", "date", "operator"]
}

134
scraper/scrape_train_schema_v2.json

@ -0,0 +1,134 @@
{
"$schema": "http://json-schema.org/schema",
"title": "Train Info InfoFer Scrap Train Schema",
"description": "Results of scrapping InfoFer website for train info",
"definitions": {
"delayType": {
"description": "Delay of the train (negative for being early)",
"type": "integer"
},
"stationArrDepTime": {
"description": "Time of arrival at/departure from station",
"type": ["object", "null"],
"properties": {
"scheduleTime": {
"description": "The time the train is scheduled to arrive/depart",
"type": "string",
"format": "date-time"
},
"status": {
"type": ["object", "null"],
"properties": {
"delay": {
"$ref": "#/definitions/delayType"
},
"real": {
"description": "Determines whether delay was actually reported or is an approximation",
"type": "boolean"
}
},
"required": ["delay", "real"]
}
},
"required": ["scheduleTime"]
}
},
"type": "object",
"properties": {
"rank": {
"description": "The rank of the train",
"type": "string",
"examples": [
"R",
"R-E",
"IR",
"IRN"
]
},
"number": {
"description": "The number of the train",
"type": "string",
"examples": [
"74",
"15934"
]
},
"date": {
"description": "Date of departure from the first station (dd.mm.yyyy)",
"type": "string",
"pattern": "^[0-9]{1,2}\\.[0-9]{2}\\.[0-9]{4}$"
},
"operator": {
"description": "Operator of the train",
"type": "string",
"examples": [
"CFR Călători",
"Softrans",
"Regio Călători"
]
},
"route": {
"description": "Route of the train",
"type": "object",
"properties": {
"from": {
"type": "string"
},
"to": {
"type": "string"
}
},
"required": ["from", "to"]
},
"status": {
"description": "Current status of the train",
"type": ["object", "null"],
"properties": {
"delay": {
"$ref": "#/definitions/delayType"
},
"station": {
"type": "string"
},
"state": {
"type": "string",
"enum": ["passing", "arrival", "departure"]
}
},
"required": ["delay", "station", "state"]
},
"stations": {
"description": "List of stations the train stops at",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"km": {
"description": "The distance the train travelled until reaching this station",
"type": "integer"
},
"stoppingTime": {
"description": "The number of seconds the train is scheduled to stop in this station",
"type": ["integer", "null"],
"minimum": 1
},
"platform": {
"description": "The platform the train stopped at",
"type": ["string", "null"]
},
"arrival": {
"$ref": "#/definitions/stationArrDepTime"
},
"departure": {
"$ref": "#/definitions/stationArrDepTime"
}
},
"required": ["name", "km"]
}
}
},
"required": ["route", "stations", "rank", "number", "date", "operator"]
}

16
scraper/scraper.csproj

@ -1,16 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Nullable>enable</Nullable>
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AngleSharp" Version="0.16.0" />
<PackageReference Include="Flurl" Version="3.0.2" />
<PackageReference Include="Jetbrains.Annotations" Version="2021.2.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="NodaTime" Version="3.0.5" />
</ItemGroup>
</Project>

12
scraper/scraper.py

@ -0,0 +1,12 @@
#! /usr/bin/env python3
from .scrape_train import scrape as scrape_train
from .scrape_station import scrape as scrape_station
def main():
train_no = 1538
print(f'Testing package with train number {train_no}')
from pprint import pprint
pprint(scrape_train(train_no))
if __name__ == '__main__':
main()

8
scraper/setup.py

@ -0,0 +1,8 @@
from distutils.core import setup
setup(
name='InfoFer_Scraper',
version='0.1',
author='Dan Cojocaru',
install_requires=['beautifulsoup4', 'requests', 'pytz']
)

16
scraper/src/Exceptions/TrainNotThisDayException.cs

@ -1,16 +0,0 @@
using System;
using System.Runtime.Serialization;
using JetBrains.Annotations;
namespace scraper.Exceptions {
/// <summary>
/// The train that the information was requested for might be running,
/// but it is not running on the requested day.
/// </summary>
public class TrainNotThisDayException : Exception {
public TrainNotThisDayException() : base() { }
protected TrainNotThisDayException([NotNull] SerializationInfo info, StreamingContext context) : base(info, context) { }
public TrainNotThisDayException([CanBeNull] string? message) : base(message) { }
public TrainNotThisDayException([CanBeNull] string? message, [CanBeNull] Exception? innerException) : base(message, innerException) { }
}
}

62
scraper/src/Models/Itinerary.cs

@ -1,62 +0,0 @@
using System;
using System.Collections.Generic;
namespace scraper.Models.Itinerary;
#region Interfaces
public interface IItinerary {
public IReadOnlyList<IItineraryTrain> Trains { get; }
}
public interface IItineraryTrain {
public string From { get; }
public string To { get; }
public IReadOnlyList<string> IntermediateStops { get; }
public DateTimeOffset DepartureDate { get; }
public DateTimeOffset ArrivalDate { get; }
public int Km { get; }
public string Operator { get; }
public string TrainRank { get; }
public string TrainNumber { get; }
}
#endregion
#region Implementations
internal record Itinerary : IItinerary {
private List<IItineraryTrain> ModifyableTrains { get; set; } = new();
public IReadOnlyList<IItineraryTrain> Trains => ModifyableTrains;
internal void AddTrain(IItineraryTrain train) {
ModifyableTrains.Add(train);
}
internal void AddTrain(Action<ItineraryTrain> configurator) {
ItineraryTrain newTrain = new();
configurator(newTrain);
AddTrain(newTrain);
}
}
internal record ItineraryTrain : IItineraryTrain {
private List<string> ModifyableIntermediateStops { get; set; } = new();
public string From { get; internal set; } = "";
public string To { get; internal set; } = "";
public IReadOnlyList<string> IntermediateStops => ModifyableIntermediateStops;
public DateTimeOffset DepartureDate { get; internal set; } = new();
public DateTimeOffset ArrivalDate { get; internal set; } = new();
public int Km { get; internal set; } = 0;
public string Operator { get; internal set; } = "";
public string TrainRank { get; internal set; } = "";
public string TrainNumber { get; internal set; } = "";
internal void AddIntermediateStop(string stop) {
ModifyableIntermediateStops.Add(stop);
}
}
#endregion

110
scraper/src/Models/Station.cs

@ -1,110 +0,0 @@
using System;
using System.Collections.Generic;
using InfoferScraper.Models.Status;
namespace InfoferScraper.Models.Station {
#region Interfaces
public interface IStationScrapeResult {
public string StationName { get; }
/// <summary>
/// Date in the DD.MM.YYYY format
/// This date is taken as-is from the result.
/// </summary>
public string Date { get; }
public IReadOnlyList<IStationArrDep>? Arrivals { get; }
public IReadOnlyList<IStationArrDep>? Departures { get; }
}
public interface IStationArrDep {
public int? StoppingTime { get; }
public DateTimeOffset Time { get; }
public IStationTrain Train { get; }
public IStationStatus Status { get; }
}
public interface IStationTrain {
public string Number { get; }
public string Operator { get; }
public string Rank { get; }
public IReadOnlyList<string> Route { get; }
/// <summary>
/// Arrivals -> Departure station; Departures -> Destination station
/// </summary>
public string Terminus { get; }
public DateTimeOffset DepartureDate { get; }
}
public interface IStationStatus : IStatus {
public string? Platform { get; }
}
#endregion
#region Implementations
internal record StationScrapeResult : IStationScrapeResult {
private List<StationArrDep>? _modifyableArrivals = new();
private List<StationArrDep>? _modifyableDepartures = new();
public string StationName { get; internal set; } = "";
public string Date { get; internal set; } = "";
public IReadOnlyList<IStationArrDep>? Arrivals => _modifyableArrivals?.AsReadOnly();
public IReadOnlyList<IStationArrDep>? Departures => _modifyableDepartures?.AsReadOnly();
private void AddStationArrival(StationArrDep arrival) {
_modifyableArrivals ??= new List<StationArrDep>();
_modifyableArrivals.Add(arrival);
}
private void AddStationDeparture(StationArrDep departure) {
_modifyableDepartures ??= new List<StationArrDep>();
_modifyableDepartures.Add(departure);
}
internal void AddNewStationArrival(Action<StationArrDep> configurator) {
StationArrDep newStationArrDep = new();
configurator(newStationArrDep);
AddStationArrival(newStationArrDep);
}
internal void AddNewStationDeparture(Action<StationArrDep> configurator) {
StationArrDep newStationArrDep = new();
configurator(newStationArrDep);
AddStationDeparture(newStationArrDep);
}
}
internal record StationArrDep : IStationArrDep {
public int? StoppingTime { get; internal set; }
public DateTimeOffset Time { get; internal set; }
public IStationTrain Train => ModifyableTrain;
public IStationStatus Status => ModifyableStatus;
internal readonly StationTrain ModifyableTrain = new();
internal readonly StationStatus ModifyableStatus = new();
}
internal record StationTrain : IStationTrain {
private readonly List<string> _modifyableRoute = new();
public string Number { get; internal set; } = "";
public string Operator { get; internal set; } = "";
public string Rank { get; internal set; } = "";
public IReadOnlyList<string> Route => _modifyableRoute.AsReadOnly();
public string Terminus { get; internal set; } = "";
public DateTimeOffset DepartureDate { get; internal set; }
internal void AddRouteStation(string station) => _modifyableRoute.Add(station);
}
internal record StationStatus : IStationStatus {
public int Delay { get; internal set; }
public bool Real { get; internal set; }
public bool Cancelled { get; internal set; }
public string? Platform { get; internal set; }
}
#endregion
}

18
scraper/src/Models/Status.cs

@ -1,18 +0,0 @@
namespace InfoferScraper.Models.Status {
public interface IStatus {
public int Delay { get; }
/// <summary>
/// Determines whether delay was actually reported or is an approximation
/// </summary>
public bool Real { get; }
public bool Cancelled { get; }
}
internal record Status : IStatus {
public int Delay { get; set; }
public bool Real { get; set; }
public bool Cancelled { get; set; }
}
}

246
scraper/src/Models/Train.cs

@ -1,246 +0,0 @@
using System;
using System.Collections.Generic;
using InfoferScraper.Models.Status;
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
using Newtonsoft.Json.Serialization;
namespace InfoferScraper.Models.Train {
#region Interfaces
public interface ITrainScrapeResult {
public string Rank { get; }
public string Number { get; }
/// <summary>
/// Date in the DD.MM.YYYY format
/// This date is taken as-is from the result.
/// </summary>
public string Date { get; }
public string Operator { get; }
public IReadOnlyList<ITrainGroup> Groups { get; }
}
public interface ITrainGroup {
public ITrainRoute Route { get; }
public ITrainStatus? Status { get; }
public IReadOnlyList<ITrainStopDescription> Stations { get; }
}
public interface ITrainRoute {
public string From { get; }
public string To { get; }
}
public interface ITrainStatus {
public int Delay { get; }
public string Station { get; }
public StatusKind State { get; }
}
public interface ITrainStopDescription {
public string Name { get; }
public string LinkName { get; }
public int Km { get; }
/// <summary>
/// The time the train waits in the station in seconds
/// </summary>
public int? StoppingTime { get; }
public string? Platform { get; }
public ITrainStopArrDep? Arrival { get; }
public ITrainStopArrDep? Departure { get; }
public IReadOnlyList<object> Notes { get; }
}
public interface ITrainStopNote {
public NoteKind Kind { get; }
}
public interface ITrainStopTrainNumberChangeNote : ITrainStopNote {
public string Rank { get; }
public string Number { get; }
}
public interface ITrainStopDepartsAsNote : ITrainStopNote {
public string Rank { get; }
public string Number { get; }
public DateTimeOffset DepartureDate { get; }
}
public interface ITrainStopDetachingWagonsNote : ITrainStopNote {
public string Station { get; }
}
public interface ITrainStopReceivingWagonsNote : ITrainStopNote {
public string Station { get; }
}
public interface ITrainStopArrDep {
public DateTimeOffset ScheduleTime { get; }
public IStatus? Status { get; }
}
#endregion
[JsonConverter(typeof(StringEnumConverter), typeof(CamelCaseNamingStrategy))]
public enum StatusKind {
Passing,
Arrival,
Departure,
}
[JsonConverter(typeof(StringEnumConverter), typeof(CamelCaseNamingStrategy))]
public enum NoteKind {
TrainNumberChange,
DetachingWagons,
ReceivingWagons,
DepartsAs,
}
#region Implementations
internal record TrainScrapeResult : ITrainScrapeResult {
private List<ITrainGroup> ModifyableGroups { get; set; } = new();
public string Rank { get; set; } = "";
public string Number { get; set; } = "";
public string Date { get; set; } = "";
public string Operator { get; set; } = "";
public IReadOnlyList<ITrainGroup> Groups => ModifyableGroups.AsReadOnly();
private void AddTrainGroup(ITrainGroup trainGroup) {
ModifyableGroups.Add(trainGroup);
}
internal void AddTrainGroup(Action<TrainGroup> configurator) {
TrainGroup newTrainGroup = new();
configurator(newTrainGroup);
AddTrainGroup(newTrainGroup);
}
}
internal record TrainGroup : ITrainGroup {
private List<ITrainStopDescription> ModifyableStations { get; set; } = new();
public ITrainRoute Route { get; init; } = new TrainRoute();
public ITrainStatus? Status { get; private set; }
public IReadOnlyList<ITrainStopDescription> Stations => ModifyableStations.AsReadOnly();
private void AddStopDescription(ITrainStopDescription stopDescription) {
ModifyableStations.Add(stopDescription);
}
internal void AddStopDescription(Action<TrainStopDescription> configurator) {
TrainStopDescription newStopDescription = new();
configurator(newStopDescription);
AddStopDescription(newStopDescription);
}
internal void ConfigureRoute(Action<TrainRoute> configurator) {
configurator((TrainRoute)Route);
}
internal void MakeStatus(Action<TrainStatus> configurator) {
TrainStatus newStatus = new();
configurator(newStatus);
Status = newStatus;
}
}
internal record TrainRoute : ITrainRoute {
public TrainRoute() {
From = "";
To = "";
}
public string From { get; set; }
public string To { get; set; }
}
internal record TrainStatus : ITrainStatus {
public int Delay { get; set; }
public string Station { get; set; } = "";
public StatusKind State { get; set; }
}
internal record TrainStopDescription : ITrainStopDescription {
private List<ITrainStopNote> ModifyableNotes { get; } = new();
public string Name { get; set; } = "";
public string LinkName { get; set; } = "";
public int Km { get; set; }
public int? StoppingTime { get; set; }
public string? Platform { get; set; }
public ITrainStopArrDep? Arrival { get; private set; }
public ITrainStopArrDep? Departure { get; private set; }
public IReadOnlyList<object> Notes => ModifyableNotes.AsReadOnly();
internal void MakeArrival(Action<TrainStopArrDep> configurator) {
TrainStopArrDep newArrival = new();
configurator(newArrival);
Arrival = newArrival;
}
internal void MakeDeparture(Action<TrainStopArrDep> configurator) {
TrainStopArrDep newDeparture = new();
configurator(newDeparture);
Departure = newDeparture;
}
class DepartsAsNote : ITrainStopDepartsAsNote {
public NoteKind Kind => NoteKind.DepartsAs;
public string Rank { get; set; } = "";
public string Number { get; set; } = "";
public DateTimeOffset DepartureDate { get; set; }
}
class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote {
public NoteKind Kind => NoteKind.TrainNumberChange;
public string Rank { get; set; } = "";
public string Number { get; set; } = "";
}
class ReceivingWagonsNote : ITrainStopReceivingWagonsNote {
public NoteKind Kind => NoteKind.ReceivingWagons;
public string Station { get; set; } = "";
}
class DetachingWagonsNote : ITrainStopReceivingWagonsNote {
public NoteKind Kind => NoteKind.DetachingWagons;
public string Station { get; set; } = "";
}
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) {
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate });
}
internal void AddTrainNumberChangeNote(string rank, string number) {
ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number });
}
internal void AddReceivingWagonsNote(string station) {
ModifyableNotes.Add(new ReceivingWagonsNote { Station = station });
}
internal void AddDetachingWagonsNote(string station) {
ModifyableNotes.Add(new DetachingWagonsNote { Station = station });
}
}
public record TrainStopArrDep : ITrainStopArrDep {
public DateTimeOffset ScheduleTime { get; set; }
public IStatus? Status { get; private set; }
internal void MakeStatus(Action<Status.Status> configurator) {
Status.Status newStatus = new();
configurator(newStatus);
Status = newStatus;
}
}
#endregion
}

220
scraper/src/Scrapers/Route.cs

@ -1,220 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using Flurl;
using InfoferScraper.Models.Train;
using NodaTime;
using NodaTime.Extensions;
using scraper.Models.Itinerary;
namespace InfoferScraper.Scrapers;
public class RouteScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private static readonly Regex KmTrainRankNoRegex = new(@"^([0-9]+)\skm\scu\s([A-Z-]+)\s([0-9]+)$");
private static readonly Regex OperatorRegex = new(@$"^Operat\sde\s([{Utils.RoLetters}\s]+)$");
private static readonly Regex DepArrRegex = new(@"^(Ple|Sos)\s([0-9]+)\s([a-z]+)\.?\s([0-9]+):([0-9]+)$");
private static readonly Dictionary<string, int> Months = new Dictionary<string, int>() {
["ian"] = 1,
["feb"] = 2,
["mar"] = 3,
["apr"] = 4,
["mai"] = 5,
["iun"] = 6,
["iul"] = 7,
["aug"] = 8,
["sep"] = 9,
["oct"] = 10,
["noi"] = 11,
["dec"] = 12,
};
private readonly CookieContainer cookieContainer = new();
private readonly HttpClient httpClient;
public RouteScraper(HttpClientHandler? httpClientHandler = null) {
if (httpClientHandler == null) {
httpClientHandler = new HttpClientHandler {
CookieContainer = cookieContainer,
UseCookies = true,
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public async Task<List<IItinerary>?> Scrape(string from, string to, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new();
var asConfig = Configuration.Default;
var asContext = BrowsingContext.New(asConfig);
var firstUrl = "Rute-trenuri"
.AppendPathSegment(from)
.AppendPathSegment(to);
if (dateOverride != null) {
firstUrl = firstUrl.SetQueryParam("DepartureDate", $"{dateOverride:d.MM.yyyy}");
}
firstUrl = firstUrl.SetQueryParam("OrderingTypeId", "0");
firstUrl = firstUrl.SetQueryParam("TimeSelectionId", "0");
firstUrl = firstUrl.SetQueryParam("MinutesInDay", "0");
firstUrl = firstUrl.SetQueryParam("ConnectionsTypeId", "1");
firstUrl = firstUrl.SetQueryParam("BetweenTrainsMinimumMinutes", "5");
firstUrl = firstUrl.SetQueryParam("ChangeStationName", "");
var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
var firstResult = firstForm
.QuerySelectorAll<IHtmlInputElement>("input")
.Where(elem => elem.Name != null)
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Itineraries", "GetItineraries");
var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)
#pragma warning restore CS8620
);
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync();
var secondDocument = await asContext.OpenAsync(
req => req.Content(secondResponseContent)
);
var (itineraryInfoDiv, _) = secondDocument
.QuerySelectorAll("body > div");
if (itineraryInfoDiv == null) {
return null;
}
var itinerariesLi = secondDocument
.QuerySelectorAll("body > ul > li");
var itineraries = new List<IItinerary>();
foreach (var itineraryLi in itinerariesLi) {
var itinerary = new Itinerary();
var cardDivs = itineraryLi.QuerySelectorAll(":scope > div > div > div > div");
var detailsDivs = cardDivs.Last()
.QuerySelectorAll(":scope > div > div")[1]
.QuerySelectorAll(":scope > div");
var trainItineraryAndDetailsLis = detailsDivs[0]
.QuerySelectorAll(":scope > ul > li");
var stations = new List<string>();
var details = new List<ItineraryTrain>();
foreach (var (idx, li) in trainItineraryAndDetailsLis.Select((li, idx) => (idx, li))) {
if (idx % 2 == 0) {
// Station
stations.Add(
li
.QuerySelectorAll(":scope > div > div > div > div")[1]
.Text()
.WithCollapsedSpaces()
);
}
else {
var now = LocalDateTime.FromDateTime(DateTime.Now);
// Detail
var detailColumns = li.QuerySelectorAll(":scope > div > div");
var leftSideDivs = detailColumns[0].QuerySelectorAll(":scope > div");
var departureDateText = leftSideDivs[0]
.QuerySelectorAll(":scope > div")[1]
.Text()
.WithCollapsedSpaces();
var departureDateMatch = DepArrRegex.Match(departureDateText);
var departureDate = new LocalDateTime(
now.Year,
Months[departureDateMatch.Groups[3].Value],
int.Parse(departureDateMatch.Groups[2].Value),
int.Parse(departureDateMatch.Groups[4].Value),
int.Parse(departureDateMatch.Groups[5].Value),
0
);
if (departureDate < now.PlusDays(-1)) {
departureDate = departureDate.PlusYears(1);
}
var arrivalDateText = leftSideDivs[3]
.QuerySelectorAll(":scope > div")[1]
.Text()
.WithCollapsedSpaces();
var arrivalDateMatch = DepArrRegex.Match(arrivalDateText);
var arrivalDate = new LocalDateTime(
now.Year,
Months[arrivalDateMatch.Groups[3].Value],
int.Parse(arrivalDateMatch.Groups[2].Value),
int.Parse(arrivalDateMatch.Groups[4].Value),
int.Parse(arrivalDateMatch.Groups[5].Value),
0
);
if (arrivalDate < now.PlusDays(-1)) {
arrivalDate = arrivalDate.PlusYears(1);
}
var rightSideDivs = detailColumns[1].QuerySelectorAll(":scope > div > div");
var kmRankNumberText = rightSideDivs[0]
.QuerySelectorAll(":scope > div > div")[0]
.Text()
.WithCollapsedSpaces();
var kmRankNumberMatch = KmTrainRankNoRegex.Match(kmRankNumberText);
var operatorText = rightSideDivs[0]
.QuerySelectorAll(":scope > div > div")[1]
.Text()
.WithCollapsedSpaces();
var operatorMatch = OperatorRegex.Match(operatorText);
var train = new ItineraryTrain {
ArrivalDate = BucharestTz.AtLeniently(arrivalDate).ToDateTimeOffset(),
DepartureDate = BucharestTz.AtLeniently(departureDate).ToDateTimeOffset(),
Km = int.Parse(kmRankNumberMatch.Groups[1].Value),
TrainRank = kmRankNumberMatch.Groups[2].Value,
TrainNumber = kmRankNumberMatch.Groups[3].Value,
Operator = operatorMatch.Groups[1].Value,
};
foreach (var div in leftSideDivs[2]
.QuerySelectorAll(":scope > div")
.Where((_, i) => i % 2 != 0)) {
var text = div.Text().WithCollapsedSpaces();
if (text == "Nu sunt stații intermediare.") continue;
train.AddIntermediateStop(div.Text().WithCollapsedSpaces());
}
details.Add(train);
}
}
foreach (var ((iFrom, iTo), detail) in stations.Zip(stations.Skip(1)).Zip(details)) {
detail.From = iFrom;
detail.To = iTo;
itinerary.AddTrain(detail);
}
itineraries.Add(itinerary);
}
return itineraries;
}
}

228
scraper/src/Scrapers/Station.cs

@ -1,228 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using Flurl;
using InfoferScraper.Models.Station;
using NodaTime;
using NodaTime.Extensions;
namespace InfoferScraper.Scrapers {
public class StationScraper {
private static readonly Regex StationInfoRegex = new($@"^([{Utils.RoLetters}.0-9 ]+)\sîn\s([0-9.]+)$");
private static readonly Regex StoppingTimeRegex = new(
@"^(necunoscută \(stație terminus\))|(?:([0-9]+) (min|sec) \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$"
);
private static readonly Regex StatusRegex = new(
@"^(?:la timp|([+-]?[0-9]+) min \((?:întârziere|mai devreme)\))(\*?)$"
);
private static readonly Regex PlatformRegex = new(@"^linia\s([A-Za-z0-9]+)$");
private static readonly Regex TrainUrlDateRegex = new(@"Date=([0-9]{2}).([0-9]{2}).([0-9]{4})");
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private readonly CookieContainer cookieContainer = new();
private readonly HttpClient httpClient;
public StationScraper(HttpClientHandler? httpClientHandler = null) {
if (httpClientHandler == null) {
httpClientHandler = new HttpClientHandler {
CookieContainer = cookieContainer,
UseCookies = true,
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public async Task<IStationScrapeResult> Scrape(string stationName, DateTimeOffset? date = null) {
var dateInstant = date?.ToInstant().InZone(BucharestTz);
date = dateInstant?.ToDateTimeOffset();
stationName = stationName.RoLettersToEn();
var result = new StationScrapeResult();
var asConfig = Configuration.Default;
var asContext = BrowsingContext.New(asConfig);
var firstUrl = "Statie"
.AppendPathSegment(Regex.Replace(stationName, @"\s", "-"));
if (date != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{date:d.MM.yyyy}");
}
var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
var firstResult = firstForm
.QuerySelectorAll<IHtmlInputElement>("input")
.Where(elem => elem.Name != null)
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Stations", "StationsResult");
var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)
#pragma warning restore CS8620
);
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync();
var secondDocument = await asContext.OpenAsync(
req => req.Content(secondResponseContent)
);
var (stationInfoDiv, (_, (departuresDiv, (arrivalsDiv, _)))) = secondDocument
.QuerySelectorAll("body > div");
(result.StationName, (result.Date, _)) = (StationInfoRegex.Match(
stationInfoDiv
.QuerySelector(":scope > h2")!
.Text()
.WithCollapsedSpaces()
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
var (dateDay, (dateMonth, (dateYear, _))) = result.Date.Split('.').Select(int.Parse);
void ParseArrDepList(IElement element, Action<Action<StationArrDep>> adder) {
Utils.DateTimeSequencer dtSeq = new(dateYear, dateMonth, dateDay);
if (element.QuerySelector(":scope > div > ul") == null) return;
foreach (var trainElement in element.QuerySelectorAll(":scope > div > ul > li")) {
adder(arrDep => {
var divs = trainElement.QuerySelectorAll(":scope > div");
var dataDiv = divs[0];
var statusDiv = divs.Length >= 2 ? divs[1] : null;
var (dataMainDiv, (dataDetailsDiv, _)) = dataDiv
.QuerySelectorAll(":scope > div");
var (timeDiv, (destDiv, (trainDiv, _))) = dataMainDiv
.QuerySelectorAll(":scope > div");
var (operatorDiv, (routeDiv, (stoppingTimeDiv, _))) = dataDetailsDiv
.QuerySelectorAll(":scope > div > div");
var timeResult = timeDiv
.QuerySelectorAll(":scope > div > div > div")[1]
.Text()
.WithCollapsedSpaces();
var (stHr, (stMin, _)) = timeResult.Split(':').Select(int.Parse);
arrDep.Time = BucharestTz.AtLeniently(
dtSeq.Next(stHr, stMin).ToLocalDateTime()
).ToDateTimeOffset();
// ReSharper disable once UnusedVariable // stOppositeTime: might be useful in the future
var (unknownSt, (st, (minsec, (stOppositeTime, _)))) = (StoppingTimeRegex.Match(
stoppingTimeDiv.QuerySelectorAll(":scope > div > div")[1]
.Text()
.WithCollapsedSpaces()
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
if (unknownSt.Length == 0 && st.Length > 0) {
arrDep.StoppingTime = int.Parse(st);
if (minsec == "min") {
arrDep.StoppingTime *= 60;
}
}
arrDep.ModifyableTrain.Rank = trainDiv
.QuerySelectorAll(":scope > div > div > div")[1]
.QuerySelector(":scope > span")!
.Text()
.WithCollapsedSpaces();
arrDep.ModifyableTrain.Number = trainDiv
.QuerySelectorAll(":scope > div > div > div")[1]
.QuerySelector(":scope > a")!
.Text()
.WithCollapsedSpaces();
var trainUri = new Uri(
"http://localhost" + trainDiv
.QuerySelectorAll(":scope > div > div > div")[1]
.QuerySelector(":scope > a")!
.GetAttribute("href")!
);
var (trainDepDay, (trainDepMonth, (trainDepYear, _))) = TrainUrlDateRegex
.Match(trainUri.Query)
.Groups
.Values
.Skip(1)
.Select(g => int.Parse(g.Value));
arrDep.ModifyableTrain.DepartureDate = BucharestTz
.AtStartOfDay(new(trainDepYear, trainDepMonth, trainDepDay))
.ToDateTimeOffset()
.ToUniversalTime();
arrDep.ModifyableTrain.Terminus = destDiv
.QuerySelectorAll(":scope > div > div > div")[1]
.Text()
.WithCollapsedSpaces();
arrDep.ModifyableTrain.Operator = operatorDiv
.QuerySelectorAll(":scope > div > div")[1]
.Text()
.WithCollapsedSpaces();
foreach (var station in routeDiv.QuerySelectorAll(":scope > div > div")[1]
.Text()
.WithCollapsedSpaces()
.Split(" - ")) {
arrDep.ModifyableTrain.AddRouteStation(station);
}
if (statusDiv == null) {
return;
}
var statusDivComponents = statusDiv
.QuerySelectorAll(":scope > div")[0]
.QuerySelectorAll(":scope > div");
var delayDiv = statusDivComponents[0];
var (delayMin, (approx, _)) = (StatusRegex.Match(
delayDiv
.Text()
.WithCollapsedSpaces()
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
if (delayMin is null && delayDiv.Text().WithCollapsedSpaces() == "anulat") {
arrDep.ModifyableStatus.Cancelled = true;
}
else if (delayMin is null) {
throw new Exception($"Unexpected delayDiv value: {delayDiv.Text().WithCollapsedSpaces()}");
}
else {
arrDep.ModifyableStatus.Real = string.IsNullOrEmpty(approx);
arrDep.ModifyableStatus.Delay = delayMin.Length == 0 ? 0 : int.Parse(delayMin);
}
if (statusDivComponents.Length < 2) return;
var platformDiv = statusDivComponents[1];
arrDep.ModifyableStatus.Platform = PlatformRegex.Match(platformDiv.Text().WithCollapsedSpaces())
.Groups[1].Value;
});
}
}
ParseArrDepList(departuresDiv, result.AddNewStationDeparture);
ParseArrDepList(arrivalsDiv, result.AddNewStationArrival);
return result;
}
}
}

261
scraper/src/Scrapers/Train.cs

@ -1,261 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using Flurl;
using InfoferScraper.Models.Train;
using NodaTime;
using NodaTime.Extensions;
using scraper.Exceptions;
namespace InfoferScraper.Scrapers {
public class TrainScraper {
private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$");
private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$");
private static readonly Regex RouteRegex =
new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$");
private static readonly Regex SlRegex =
new(
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$");
private static readonly Dictionary<char, StatusKind> SlStateMap = new() {
{ 't', StatusKind.Passing },
{ 's', StatusKind.Arrival },
{ 'p', StatusKind.Departure },
};
private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$");
private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$");
private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$");
private static readonly Regex StationArrdepStatusRegex =
new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$");
private static readonly Regex TrainNumberChangeNoteRegex =
new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$");
private static readonly Regex DepartsAsNoteRegex =
new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$");
private static readonly Regex ReceivingWagonsNoteRegex =
new(@"^Trenul primește vagoane de la\s(.+)\.$");
private static readonly Regex DetachingWagonsNoteRegex =
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$");
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
private readonly CookieContainer cookieContainer = new();
private readonly HttpClient httpClient;
public TrainScraper(HttpClientHandler? httpClientHandler = null)
{
if (httpClientHandler == null) {
httpClientHandler = new HttpClientHandler {
CookieContainer = cookieContainer,
UseCookies = true,
};
}
else {
httpClientHandler.CookieContainer = cookieContainer;
httpClientHandler.UseCookies = true;
}
httpClient = new HttpClient(httpClientHandler) {
BaseAddress = new Uri(BaseUrl),
DefaultRequestVersion = new Version(2, 0),
};
}
public async Task<ITrainScrapeResult?> Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
dateOverride = dateOverrideInstant?.ToDateTimeOffset();
TrainScrapeResult result = new();
var asConfig = Configuration.Default;
var asContext = BrowsingContext.New(asConfig);
var firstUrl = "Tren"
.AppendPathSegment(trainNumber);
if (dateOverride != null) {
firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}");
}
var firstResponse = await httpClient.GetStringAsync(firstUrl);
var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
var firstForm = firstDocument.GetElementById("form-search")!;
var firstResult = firstForm
.QuerySelectorAll<IHtmlInputElement>("input")
.Where(elem => elem.Name != null)
.ToDictionary(elem => elem.Name!, elem => elem.Value);
var secondUrl = "".AppendPathSegments("Trains", "TrainsResult");
var secondResponse = await httpClient.PostAsync(
secondUrl,
#pragma warning disable CS8620
new FormUrlEncodedContent(firstResult)
#pragma warning restore CS8620
);
var secondResponseContent = await secondResponse.Content.ReadAsStringAsync();
var secondDocument = await asContext.OpenAsync(
req => req.Content(secondResponseContent)
);
var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument
.QuerySelectorAll("body > div");
if (trainInfoDiv == null) {
return null;
}
if (resultsDiv == null) {
throw new TrainNotThisDayException();
}
trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First();
(result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match(
trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces()
).Groups as IEnumerable<Group>).Select(group => group.Value).Skip(1);
var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date
.Split('.')
.Select(int.Parse);
var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD);
result.Operator = (OperatorRegex.Match(
trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces()
).Groups as IEnumerable<Group>).Skip(1).First().Value;
foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) {
result.AddTrainGroup(group => {
var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First();
var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces();
group.ConfigureRoute(route => {
(route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable<Group>).Skip(1)
.Select(group => group.Value);
});
try {
var statusLineMatch =
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces());
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) =
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
group.MakeStatus(status => {
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 :
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay);
status.Station = slmStation;
status.State = SlStateMap[slmArrival[0]];
});
}
catch {
// ignored
}
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day);
var stations = statusDiv.QuerySelectorAll(":scope > ul > li");
foreach (var station in stations) {
group.AddStopDescription(stopDescription => {
var (left, (middle, (right, _))) = station
.QuerySelectorAll(":scope > div > div");
var (stopDetails, (stopNotes, _)) = middle
.QuerySelectorAll(":scope > div > div > div");
stopDescription.Name = stopDetails
.QuerySelectorAll(":scope > div")[0]
.Text()
.WithCollapsedSpaces();
stopDescription.LinkName = new Flurl.Url(stopDetails
.QuerySelectorAll(":scope > div")[0]
.QuerySelector(":scope a")
.Attributes["href"]
.Value).PathSegments.Last();
var scrapedKm = stopDetails
.QuerySelectorAll(":scope > div")[1]
.Text()
.WithCollapsedSpaces();
stopDescription.Km = int.Parse(
(KmRegex.Match(scrapedKm).Groups as IEnumerable<Group>).Skip(1).First().Value
);
var scrapedStoppingTime = stopDetails
.QuerySelectorAll(":scope > div")[2]
.Text()
.WithCollapsedSpaces();
if (!string.IsNullOrEmpty(scrapedStoppingTime)) {
var (stValue, (stMinsec, _)) =
(StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable<Group>)
.Skip(1)
.Select(group => group.Value);
stopDescription.StoppingTime = int.Parse(stValue);
if (stMinsec == "min") stopDescription.StoppingTime *= 60;
}
var scrapedPlatform = stopDetails
.QuerySelectorAll(":scope > div")[3]
.Text()
.WithCollapsedSpaces();
if (!string.IsNullOrEmpty(scrapedPlatform))
stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value;
void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) {
var parts = element.QuerySelectorAll(":scope > div > div > div");
if (parts.Length == 0) throw new OperationCanceledException();
var time = parts[0];
var scrapedTime = time.Text().WithCollapsedSpaces();
var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse);
arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime())
.ToDateTimeOffset();
if (parts.Length < 2) return;
var statusElement = parts[1];
var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match(
statusElement.Text().WithCollapsedSpaces(replaceWith: " ")
).Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
arrDep.MakeStatus(status => {
if (string.IsNullOrEmpty(onTime) && delay == null) {
status.Cancelled = true;
}
else {
status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0;
}
status.Real = string.IsNullOrEmpty(approx);
});
}
try {
stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); });
}
catch (OperationCanceledException) { }
try {
stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); });
}
catch (OperationCanceledException) { }
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) {
var noteText = noteDiv.Text().WithCollapsedSpaces();
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons;
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) {
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value);
}
else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) {
var groups = departsAsMatch.Groups;
var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0));
stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset());
}
else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) {
stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value);
}
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) {
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value);
}
}
});
}
});
}
return result;
}
}
} // namespace

25
scraper/src/Utils/DateTimeSequencer.cs

@ -1,25 +0,0 @@
using System;
namespace InfoferScraper {
public static partial class Utils {
public class DateTimeSequencer {
private DateTime _current;
public DateTimeSequencer(int year, int month, int day) {
_current = new DateTime(year, month, day);
_current = _current.AddSeconds(-1);
}
public DateTimeSequencer(DateTime startingDateTime) {
_current = startingDateTime.AddSeconds(-1);
}
public DateTime Next(int hour, int minute = 0, int second = 0) {
DateTime potentialNewDate = new(_current.Year, _current.Month, _current.Day, hour, minute, second);
if (_current > potentialNewDate) potentialNewDate = potentialNewDate.AddDays(1);
_current = potentialNewDate;
return _current;
}
}
}
}

18
scraper/src/Utils/DeconstructIEnumerable.cs

@ -1,18 +0,0 @@
using System.Collections.Generic;
using System.Diagnostics;
namespace InfoferScraper {
public static partial class Utils {
[DebuggerStepThrough]
public static void Deconstruct<T>(this IEnumerable<T> enumerable, out T? first, out IEnumerable<T> rest) {
var enumerator = enumerable.GetEnumerator();
first = enumerator.MoveNext() ? enumerator.Current : default;
rest = enumerator.AsEnumerable();
}
[DebuggerStepThrough]
private static IEnumerable<T> AsEnumerable<T>(this IEnumerator<T> enumerator) {
while (enumerator.MoveNext()) yield return enumerator.Current;
}
}
}

5
scraper/src/Utils/RoLetters.cs

@ -1,5 +0,0 @@
namespace InfoferScraper {
public static partial class Utils {
public const string RoLetters = @"A-Za-zăâîșțĂÂÎȚȘ";
}
}

23
scraper/src/Utils/RoLettersToEn.cs

@ -1,23 +0,0 @@
using System.Collections.Generic;
using System.Linq;
namespace InfoferScraper;
public static partial class Utils {
private static readonly Dictionary<char, char> RoToEn = new() {
{ 'ă', 'a' },
{ 'Ă', 'A' },
{ 'â', 'a' },
{ 'Â', 'A' },
{ 'î', 'i' },
{ 'Î', 'I' },
{ 'ș', 's' },
{ 'Ș', 'S' },
{ 'ț', 't' },
{ 'Ț', 'T' },
};
public static string RoLettersToEn(this string str) {
return string.Concat(str.Select(letter => RoToEn.GetValueOrDefault(letter, letter)));
}
}

12
scraper/src/Utils/WithCollapsedSpaces.cs

@ -1,12 +0,0 @@
using System.Text.RegularExpressions;
namespace InfoferScraper {
public static partial class Utils {
private static readonly Regex WhitespaceRegex = new(@"(\s)\s*");
public static string WithCollapsedSpaces(this string str, bool trim = true, string replaceWith = "$1") {
var collapsed = WhitespaceRegex.Replace(str, replaceWith);
return trim ? collapsed.Trim() : collapsed;
}
}
}

79
scraper/utils.py

@ -0,0 +1,79 @@
import re
from datetime import datetime, timedelta
from urllib.parse import urlencode, quote
# From: https://en.wikipedia.org/wiki/Whitespace_character#Unicode
ASCII_WHITESPACE = [
'\u0009', # HT; Character Tabulation
'\u000a', # LF
'\u000b', # VT; Line Tabulation
'\u000c', # FF; Form Feed
'\u000d', # CR
'\u0020', # Space
]
WHITESPACE = ASCII_WHITESPACE + [
'\u0085', # NEL; Next Line
'\u00a0', # No-break Space; &nbsp;
'\u1680', # Ogham Space Mark
'\u2000', # En Quad
'\u2001', # Em Quad
'\u2002', # En Space
'\u2003', # Em Space
'\u2004', # Three-per-em Space
'\u2005', # Four-per-em Space
'\u2006', # Six-per-em Space
'\u2007', # Figure Space
'\u2008', # Punctuation Space
'\u2009', # Thin Space
'\u200A', # Hair Space
'\u2028', # Line Separator
'\u2029', # Paragraph Separator
'\u202f', # Narrow No-break Space
'\u205d', # Meduam Mathematical Space
'\u3000', # Ideographic Space
]
WHITESPACE_REGEX = re.compile(rf'[{"".join(WHITESPACE)}]+', flags=re.MULTILINE)
class DateTimeSequencer:
def __init__(self, year: int, month: int, day: int) -> None:
self.current = datetime(year, month, day, 0, 0, 0)
self.current -= timedelta(seconds=1)
def __call__(self, hour: int, minute: int = 0, second: int = 0) -> datetime:
potential_new_date = datetime(self.current.year, self.current.month, self.current.day, hour, minute, second)
if (self.current > potential_new_date):
potential_new_date += timedelta(days=1)
self.current = potential_new_date
return self.current
def collapse_space(string: str) -> str:
return WHITESPACE_REGEX.sub(
' ',
string,
).strip()
def build_url(base: str, /, query: dict = {}, **kwargs):
result = base.format(**{ k: quote(str(v)) for k, v in kwargs.items() })
if query:
result += '?'
result += urlencode(query)
return result
RO_TO_EN = {
'ă': 'a',
'Ă': 'A',
'â': 'a',
'Â': 'A',
'î': 'i',
'Î': 'I',
'ș': 's',
'Ș': 'S',
'ț': 't',
'Ț': 'T',
}
def ro_letters_to_en(string: str) -> str:
return ''.join((RO_TO_EN.get(letter, letter) for letter in string))

35
server/.vscode/launch.json vendored

@ -1,35 +0,0 @@
{
"version": "0.2.0",
"configurations": [
{
// Use IntelliSense to find out which attributes exist for C# debugging
// Use hover for the description of the existing attributes
// For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md
"name": ".NET Core Launch (web)",
"type": "coreclr",
"request": "launch",
"preLaunchTask": "build",
// If you have changed target frameworks, make sure to update the program path.
"program": "${workspaceFolder}/bin/Debug/net5.0/server.dll",
"args": [],
"cwd": "${workspaceFolder}",
"stopAtEntry": false,
// Enable launching a web browser when ASP.NET Core starts. For more information: https://aka.ms/VSCode-CS-LaunchJson-WebBrowser
"serverReadyAction": {
"action": "openExternally",
"pattern": "\\bNow listening on:\\s+(https?://\\S+)"
},
"env": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"sourceFileMap": {
"/Views": "${workspaceFolder}/Views"
}
},
{
"name": ".NET Core Attach",
"type": "coreclr",
"request": "attach"
}
]
}

42
server/.vscode/tasks.json vendored

@ -1,42 +0,0 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "build",
"command": "dotnet",
"type": "process",
"args": [
"build",
"${workspaceFolder}/server.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
"problemMatcher": "$msCompile"
},
{
"label": "publish",
"command": "dotnet",
"type": "process",
"args": [
"publish",
"${workspaceFolder}/server.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
"problemMatcher": "$msCompile"
},
{
"label": "watch",
"command": "dotnet",
"type": "process",
"args": [
"watch",
"run",
"${workspaceFolder}/server.csproj",
"/property:GenerateFullPaths=true",
"/consoleloggerparameters:NoSummary"
],
"problemMatcher": "$msCompile"
}
]
}

73
server/Controllers/V1/TrainController.cs

@ -1,73 +0,0 @@
using System;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Mvc;
using Server.Services.Interfaces;
namespace Server.Controllers.V1;
[ApiController]
[ApiExplorerSettings(GroupName = "v1")]
[Route("/[controller]")]
public class TrainController : Controller {
private IDataManager DataManager { get; }
public TrainController(IDataManager dataManager) {
this.DataManager = dataManager;
}
[HttpGet("{trainNumber:int}")]
public async Task<Models.V1.TrainScrapeResult> TrainInfo(
[FromRoute] int trainNumber
) {
var result = (await DataManager.FetchTrain(
trainNumber.ToString(),
DateTimeOffset.Now
))!;
return new Models.V1.TrainScrapeResult {
Date = result.Date,
Number = result.Number,
Operator = result.Operator,
Rank = result.Rank,
Route = {
From = result.Groups[0].Route.From,
To = result.Groups[0].Route.To,
},
Stations = result.Groups[0].Stations.Select(station => new Models.V1.TrainStopDescription {
Arrival = station.Arrival == null
? null
: new Models.V1.TrainStopArrDep {
ScheduleTime = station.Arrival.ScheduleTime.ToString("HH:mm"),
Status = station.Arrival.Status == null
? null
: new Models.V1.Status {
Delay = station.Arrival.Status.Delay,
Real = station.Arrival.Status.Real,
},
},
Departure = station.Departure == null
? null
: new Models.V1.TrainStopArrDep {
ScheduleTime = station.Departure.ScheduleTime.ToString("HH:mm"),
Status = station.Departure.Status == null
? null
: new Models.V1.Status {
Delay = station.Departure.Status.Delay,
Real = station.Departure.Status.Real,
},
},
Km = station.Km,
Name = station.Name,
Platform = station.Platform,
StoppingTime = station.StoppingTime,
}).ToList(),
Status = result.Groups[0].Status == null
? null
: new Models.V1.TrainStatus {
Delay = result.Groups[0].Status!.Delay,
State = result.Groups[0].Status!.State,
Station = result.Groups[0].Status!.Station,
},
};
}
}

22
server/Controllers/V1/TrainsController.cs

@ -1,22 +0,0 @@
using System.Collections.Generic;
using System.Linq;
using Microsoft.AspNetCore.Mvc;
using Server.Services.Interfaces;
namespace Server.Controllers.V1;
[ApiController]
[ApiExplorerSettings(GroupName = "v1")]
[Route("/[controller]")]
public class TrainsController : Controller {
private IDatabase Database { get; }
public TrainsController(IDatabase database) {
this.Database = database;
}
[HttpGet("")]
public ActionResult<IEnumerable<string>> ListTrains() {
return Ok(Database.Trains.Select(train => train.Number));
}
}

50
server/Controllers/V2/StationController.cs

@ -1,50 +0,0 @@
using System;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Mvc;
using Server.Services.Interfaces;
using Server.Models.V2;
namespace Server.Controllers.V2;
[ApiController]
[ApiExplorerSettings(GroupName = "v2")]
[Route("/v2/[controller]")]
public class StationController : Controller {
private IDataManager DataManager { get; }
public StationController(IDataManager dataManager) {
this.DataManager = dataManager;
}
[HttpGet("{stationName}")]
public async Task<Models.V2.StationScrapeResult> StationInfo([FromRoute] string stationName) {
var result = (await DataManager.FetchStation(stationName, DateTimeOffset.Now))!;
return new StationScrapeResult {
Date = result.Date,
StationName = result.StationName,
Arrivals = result.Arrivals?.Select(arrival => new StationArrival {
Time = arrival.Time,
StoppingTime = arrival.StoppingTime,
Train = new StationArrivalTrain {
Number = arrival.Train.Number,
Operator = arrival.Train.Operator,
Origin = arrival.Train.Terminus,
Rank = arrival.Train.Rank,
Route = arrival.Train.Route.ToList(),
},
})?.ToList(),
Departures = result.Departures?.Select(departure => new StationDeparture {
Time = departure.Time,
StoppingTime = departure.StoppingTime,
Train = new StationDepartureTrain {
Number = departure.Train.Number,
Operator = departure.Train.Operator,
Destination = departure.Train.Terminus,
Rank = departure.Train.Rank,
Route = departure.Train.Route.ToList(),
},
})?.ToList(),
};
}
}

22
server/Controllers/V2/StationsController.cs

@ -1,22 +0,0 @@
using System.Collections.Generic;
using Microsoft.AspNetCore.Mvc;
using Server.Models.Database;
using Server.Services.Interfaces;
namespace Server.Controllers.V2;
[ApiController]
[ApiExplorerSettings(GroupName = "v2")]
[Route("/v2/[controller]")]
public class StationsController : Controller {
private IDatabase Database { get; }
public StationsController(IDatabase database) {
this.Database = database;
}
[HttpGet("")]
public ActionResult<IEnumerable<StationListing>> ListStations() {
return Ok(Database.Stations);
}
}

77
server/Controllers/V2/TrainController.cs

@ -1,77 +0,0 @@
using System;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Mvc;
using Server.Services.Interfaces;
namespace Server.Controllers.V2;
[ApiController]
[ApiExplorerSettings(GroupName = "v2")]
[Route("/v2/[controller]")]
public class TrainController : Controller {
private IDataManager DataManager { get; }
public TrainController(IDataManager dataManager) {
this.DataManager = dataManager;
}
[HttpGet("{trainNumber}")]
public async Task<Models.V2.TrainScrapeResult> TrainInfo(
[FromRoute] string trainNumber,
[FromQuery] DateTimeOffset? date = null,
[FromQuery] string? useYesterday = null
) {
if (useYesterday != null &&
(new string[] { "y", "yes", "t", "true", "1" }).Contains(useYesterday?.Trim()?.ToLower())) {
date ??= DateTimeOffset.Now.Subtract(TimeSpan.FromDays(1));
}
var result = (await DataManager.FetchTrain(trainNumber, date ?? DateTimeOffset.Now))!;
return new Models.V2.TrainScrapeResult {
Date = result.Date,
Number = result.Number,
Operator = result.Operator,
Rank = result.Rank,
Route = {
From = result.Groups[0].Route.From,
To = result.Groups[0].Route.To,
},
Stations = result.Groups[0].Stations.Select(station => new Models.V2.TrainStopDescription {
Arrival = station.Arrival == null
? null
: new Models.V2.TrainStopArrDep {
ScheduleTime = station.Arrival.ScheduleTime.ToString("o"),
Status = station.Arrival.Status == null
? null
: new Models.V2.Status {
Delay = station.Arrival.Status.Delay,
Real = station.Arrival.Status.Real,
},
},
Departure = station.Departure == null
? null
: new Models.V2.TrainStopArrDep {
ScheduleTime = station.Departure.ScheduleTime.ToString("o"),
Status = station.Departure.Status == null
? null
: new Models.V2.Status {
Delay = station.Departure.Status.Delay,
Real = station.Departure.Status.Real,
},
},
Km = station.Km,
Name = station.Name,
Platform = station.Platform,
StoppingTime = station.StoppingTime,
}).ToList(),
Status = result.Groups[0].Status == null
? null
: new Models.V2.TrainStatus {
Delay = result.Groups[0].Status!.Delay,
State = result.Groups[0].Status!.State,
Station = result.Groups[0].Status!.Station,
},
};
}
}

23
server/Controllers/V2/TrainsController.cs

@ -1,23 +0,0 @@
using System.Collections.Generic;
using Microsoft.AspNetCore.Mvc;
using Server.Models.Database;
using Server.Services.Interfaces;
namespace Server.Controllers.V2;
[ApiController]
[ApiExplorerSettings(GroupName = "v2")]
[Route("/v2/[controller]")]
public class TrainsController : Controller {
private IDatabase Database { get; }
public TrainsController(IDatabase database) {
this.Database = database;
}
[HttpGet("")]
public ActionResult<IEnumerable<TrainListing>> ListTrains() {
return Ok(Database.Trains);
}
}

40
server/Controllers/V3/ItinerariesController.cs

@ -1,40 +0,0 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using scraper.Models.Itinerary;
using Server.Services.Interfaces;
namespace Server.Controllers.V3;
[ApiController]
[ApiExplorerSettings(GroupName = "v3")]
[Route("/v3/[controller]")]
public class ItinerariesController : Controller {
private IDataManager DataManager { get; }
private IDatabase Database { get; }
public ItinerariesController(IDataManager dataManager, IDatabase database) {
this.DataManager = dataManager;
this.Database = database;
}
[HttpGet("")]
[ProducesResponseType(typeof(IEnumerable<IItinerary>), StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status404NotFound)]
public async Task<ActionResult<IEnumerable<IItinerary>>> FindItineraries(
[FromQuery] string from,
[FromQuery] string to,
[FromQuery] DateTimeOffset? date
) {
var itineraries = await DataManager.FetchItineraries(from, to, date);
if (itineraries == null) {
return NotFound();
}
return Ok(itineraries);
}
}

45
server/Controllers/V3/StationsController.cs

@ -1,45 +0,0 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using InfoferScraper.Models.Station;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Server.Models.Database;
using Server.Services.Interfaces;
namespace Server.Controllers.V3;
[ApiController]
[ApiExplorerSettings(GroupName = "v3")]
[Route("/v3/[controller]")]
public class StationsController : Controller {
private IDataManager DataManager { get; }
private IDatabase Database { get; }
public StationsController(IDataManager dataManager, IDatabase database) {
this.DataManager = dataManager;
this.Database = database;
}
[HttpGet("")]
public ActionResult<IEnumerable<StationListing>> ListStations() {
return Ok(Database.Stations);
}
[HttpGet("{stationName}")]
[ProducesResponseType(typeof(IStationScrapeResult), StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status404NotFound)]
public async Task<ActionResult<IStationScrapeResult>> StationInfo(
[FromRoute] string stationName,
[FromQuery] DateTimeOffset? date = null,
[FromQuery] string? lastUpdateId = null
) {
var result = await DataManager.FetchStation(stationName, date ?? DateTimeOffset.Now);
if (result == null) {
return NotFound(new {
Reason = "station_not_found",
});
}
return Ok(result);
}
}

67
server/Controllers/V3/TrainsController.cs

@ -1,67 +0,0 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using InfoferScraper.Models.Train;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using scraper.Exceptions;
using Server.Models.Database;
using Server.Services.Interfaces;
namespace Server.Controllers.V3;
[ApiController]
[ApiExplorerSettings(GroupName = "v3")]
[Route("/v3/[controller]")]
public class TrainsController : Controller {
private IDataManager DataManager { get; }
private IDatabase Database { get; }
public TrainsController(IDataManager dataManager, IDatabase database) {
this.DataManager = dataManager;
this.Database = database;
}
[HttpGet("")]
public ActionResult<IEnumerable<TrainListing>> ListTrains() {
return Ok(Database.Trains);
}
/// <summary>
/// Searches for information about a train
/// </summary>
/// <param name="trainNumber">The number of the train, without additional things such as the rank</param>
/// <param name="date">The date when the train departs from the first station</param>
/// <returns>Information about the train</returns>
/// <response code="404">If the train number requested cannot be found (invalid or not running on the requested date)</response>
[HttpGet("{trainNumber}")]
[ProducesResponseType(typeof(ITrainScrapeResult), StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status404NotFound)]
public async Task<ActionResult<ITrainScrapeResult>> TrainInfoV3(
[FromRoute] string trainNumber,
[FromQuery] DateTimeOffset? date = null
) {
try {
var result = await DataManager.FetchTrain(trainNumber, date ?? DateTimeOffset.Now);
if (result == null) {
return NotFound(new {
Reason = "train_not_found",
});
}
return Ok(result);
} catch (TrainNotThisDayException) {
return NotFound(new {
Reason = "not_running_today",
});
}
// var (token, result) = await DataManager.GetNewTrainDataUpdate(
// trainNumber,
// date ?? DateTimeOffset.Now,
// lastUpdateId ?? ""
// );
// Response.Headers.Add("X-Update-Id", new StringValues(token));
// return Ok(result);
}
}

5
server/Models/Database/MongoSettings.cs

@ -1,5 +0,0 @@
namespace Server.Models.Database;
public record MongoSettings(string ConnectionString, string DatabaseName) {
public MongoSettings() : this("", "") { }
}

17
server/Models/Database/StationAlias.cs

@ -1,17 +0,0 @@
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;
using Newtonsoft.Json;
namespace Server.Models.Database;
public record StationAlias(
[property: BsonId]
[property: BsonRepresentation(BsonType.ObjectId)]
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
string? Id,
string Name,
[property: BsonRepresentation(BsonType.ObjectId)]
string? ListingId
) {
public StationAlias() : this(null, "", null) { }
}

18
server/Models/Database/StationListing.cs

@ -1,18 +0,0 @@
using System.Collections.Generic;
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;
using Newtonsoft.Json;
namespace Server.Models.Database;
public record StationListing(
[property: BsonId]
[property: BsonRepresentation(BsonType.ObjectId)]
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
string? Id,
string Name,
List<string> StoppedAtBy
) {
public StationListing() : this(null, "", new()) { }
public StationListing(string name, List<string> stoppedAtBy) : this(null, name, stoppedAtBy) { }
}

20
server/Models/Database/TrainListing.cs

@ -1,20 +0,0 @@
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;
using Newtonsoft.Json;
namespace Server.Models.Database;
public record TrainListing(
[property: BsonId]
[property: BsonRepresentation(BsonType.ObjectId)]
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
string? Id,
string Rank,
string Number,
string Company,
[property: BsonRepresentation(BsonType.ObjectId)]
string? LatestDescription
) {
public TrainListing() : this(null, "", "", "", null) { }
public TrainListing(string rank, string number, string company) : this(null, rank, number, company, null) { }
}

9
server/Models/ProxySettings.cs

@ -1,9 +0,0 @@
namespace Server.Models;
public record ProxySettings(bool UseProxy, string Url, ProxyCredentials? Credentials = null) {
public ProxySettings() : this(false, "") { }
}
public record ProxyCredentials(string Username, string Password) {
public ProxyCredentials() : this("", "") { }
}

57
server/Models/V1/TrainScrapeResult.cs

@ -1,57 +0,0 @@
using System.Collections.Generic;
namespace Server.Models.V1 {
public record TrainScrapeResult {
public string Rank { get; internal set; } = "";
public string Number { get; internal set; } = "";
/// <summary>
/// Date in the DD.MM.YYYY format
/// This date is taken as-is from the result.
/// </summary>
public string Date { get; internal set; } = "";
public string Operator { get; internal set; } = "";
public TrainRoute Route { get; } = new();
public TrainStatus? Status { get; internal set; } = new();
public List<TrainStopDescription> Stations { get; internal set; } = new();
}
public record TrainRoute {
public TrainRoute() {
From = "";
To = "";
}
public string From { get; set; }
public string To { get; set; }
}
public record TrainStatus {
public int Delay { get; set; }
public string Station { get; set; } = "";
public InfoferScraper.Models.Train.StatusKind State { get; set; }
}
public record TrainStopDescription {
public string Name { get; set; } = "";
public int Km { get; set; }
public int? StoppingTime { get; set; }
public string? Platform { get; set; }
public TrainStopArrDep? Arrival { get; set; }
public TrainStopArrDep? Departure { get; set; }
}
public record TrainStopArrDep {
public string ScheduleTime { get; set; } = "";
public Status? Status { get; set; }
}
public record Status {
public int Delay { get; set; }
public bool Real { get; set; }
}
}

39
server/Models/V2/StationScrapeResult.cs

@ -1,39 +0,0 @@
using System;
using System.Collections.Generic;
namespace Server.Models.V2 {
public record StationScrapeResult {
public string Date { get; internal set; } = "";
public string StationName { get; internal set; } = "";
public List<StationArrival>? Arrivals { get; internal set; }
public List<StationDeparture>? Departures { get; internal set; }
}
public record StationArrival {
public int? StoppingTime { get; internal set; }
public DateTimeOffset Time { get; internal set; }
public StationArrivalTrain Train { get; internal set; } = new();
}
public record StationArrivalTrain {
public string Number { get; internal set; }
public string Operator { get; internal set; }
public string Rank { get; internal set; }
public List<string> Route { get; internal set; }
public string Origin { get; internal set; }
}
public record StationDeparture {
public int? StoppingTime { get; internal set; }
public DateTimeOffset Time { get; internal set; }
public StationDepartureTrain Train { get; internal set; } = new();
}
public record StationDepartureTrain {
public string Number { get; internal set; }
public string Operator { get; internal set; }
public string Rank { get; internal set; }
public List<string> Route { get; internal set; }
public string Destination { get; internal set; }
}
}

57
server/Models/V2/TrainScrapeResult.cs

@ -1,57 +0,0 @@
using System.Collections.Generic;
namespace Server.Models.V2 {
public record TrainScrapeResult {
public string Rank { get; internal set; } = "";
public string Number { get; internal set; } = "";
/// <summary>
/// Date in the DD.MM.YYYY format
/// This date is taken as-is from the result.
/// </summary>
public string Date { get; internal set; } = "";
public string Operator { get; internal set; } = "";
public TrainRoute Route { get; } = new();
public TrainStatus? Status { get; internal set; } = new();
public List<TrainStopDescription> Stations { get; internal set; } = new();
}
public record TrainRoute {
public TrainRoute() {
From = "";
To = "";
}
public string From { get; set; }
public string To { get; set; }
}
public record TrainStatus {
public int Delay { get; set; }
public string Station { get; set; } = "";
public InfoferScraper.Models.Train.StatusKind State { get; set; }
}
public record TrainStopDescription {
public string Name { get; set; } = "";
public int Km { get; set; }
public int? StoppingTime { get; set; }
public string? Platform { get; set; }
public TrainStopArrDep? Arrival { get; set; }
public TrainStopArrDep? Departure { get; set; }
}
public record TrainStopArrDep {
public string ScheduleTime { get; set; } = "";
public Status? Status { get; set; }
}
public record Status {
public int Delay { get; set; }
public bool Real { get; set; }
}
}

15
server/Pipfile

@ -0,0 +1,15 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
flask = "*"
gevent = "*"
scraper = { editable = true, path = '../scraper' }
jsonschema = "*"
[dev-packages]
[requires]
python_version = "3.9"

394
server/Pipfile.lock generated

@ -0,0 +1,394 @@
{
"_meta": {
"hash": {
"sha256": "3c7f09679bdd68674754a714ee39503cf1a3ae265400eea074fec83559246dff"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.9"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"attrs": {
"hashes": [
"sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1",
"sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==21.2.0"
},
"beautifulsoup4": {
"hashes": [
"sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35",
"sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25",
"sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"
],
"version": "==4.9.3"
},
"certifi": {
"hashes": [
"sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee",
"sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8"
],
"version": "==2021.5.30"
},
"charset-normalizer": {
"hashes": [
"sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
"sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"
],
"markers": "python_version >= '3'",
"version": "==2.0.4"
},
"click": {
"hashes": [
"sha256:8c04c11192119b1ef78ea049e0a6f0463e4c48ef00a30160c704337586f3ad7a",
"sha256:fba402a4a47334742d782209a7c79bc448911afe1149d07bdabdf480b3e2f4b6"
],
"markers": "python_version >= '3.6'",
"version": "==8.0.1"
},
"flask": {
"hashes": [
"sha256:1c4c257b1892aec1398784c63791cbaa43062f1f7aeb555c4da961b20ee68f55",
"sha256:a6209ca15eb63fc9385f38e452704113d679511d9574d09b2cf9183ae7d20dc9"
],
"index": "pypi",
"version": "==2.0.1"
},
"gevent": {
"hashes": [
"sha256:02d1e8ca227d0ab0b7917fd7e411f9a534475e0a41fb6f434e9264b20155201a",
"sha256:0c7b4763514fec74c9fe6ad10c3de62d8fe7b926d520b1e35eb6887181b954ff",
"sha256:1c9c87b15f792af80edc950a83ab8ef4f3ba3889712211c2c42740ddb57b5492",
"sha256:23077d87d1589ac141c22923fd76853d2cc5b7e3c5e1f1f9cdf6ff23bc9790fc",
"sha256:37a469a99e6000b42dd0b9bbd9d716dbd66cdc6e5738f136f6a266c29b90ee99",
"sha256:3b600145dc0c5b39c6f89c2e91ec6c55eb0dd52dc8148228479ca42cded358e4",
"sha256:3f5ba654bdd3c774079b553fef535ede5b52c7abd224cb235a15da90ae36251b",
"sha256:43e93e1a4738c922a2416baf33f0afb0a20b22d3dba886720bc037cd02a98575",
"sha256:473f918bdf7d2096e391f66bd8ce1e969639aa235e710aaf750a37774bb585bd",
"sha256:4c94d27be9f0439b28eb8bd0f879e6142918c62092fda7fb96b6d06f01886b94",
"sha256:55ede95f41b74e7506fab293ad04cc7fc2b6f662b42281e9f2d668ad3817b574",
"sha256:6cad37a55e904879beef2a7e7c57c57d62fde2331fef1bec7f2b2a7ef14da6a2",
"sha256:72d4c2a8e65bbc702db76456841c7ddd6de2d9ab544a24aa74ad9c2b6411a269",
"sha256:75c29ed5148c916021d39d2fac90ccc0e19adf854626a34eaee012aa6b1fcb67",
"sha256:84e1af2dfb4ea9495cb914b00b6303ca0d54bf0a92e688a17e60f6b033873df2",
"sha256:8d8655ce581368b7e1ab42c8a3a166c0b43ea04e59970efbade9448864585e99",
"sha256:90131877d3ce1a05da1b718631860815b89ff44e93c42d168c9c9e8893b26318",
"sha256:9d46bea8644048ceac5737950c08fc89c37a66c34a56a6c9e3648726e60cb767",
"sha256:a8656d6e02bf47d7fa47728cf7a7cbf408f77ef1fad12afd9e0e3246c5de1707",
"sha256:aaf1451cd0d9c32f65a50e461084a0540be52b8ea05c18669c95b42e1f71592a",
"sha256:afc877ff4f277d0e51a1206d748fdab8c1e0256f7a05e1b1067abbed71c64da9",
"sha256:b10c3326edb76ec3049646dc5131608d6d3733b5adfc75d34852028ecc67c52c",
"sha256:ceec7c5f15fb2f9b767b194daa55246830db6c7c3c2f0b1c7e9e90cb4d01f3f9",
"sha256:e00dc0450f79253b7a3a7f2a28e6ca959c8d0d47c0f9fa2c57894c7974d5965f",
"sha256:e91632fdcf1c9a33e97e35f96edcbdf0b10e36cf53b58caa946dca4836bb688c",
"sha256:f39d5defda9443b5fb99a185050e94782fe7ac38f34f751b491142216ad23bc7"
],
"index": "pypi",
"version": "==21.8.0"
},
"greenlet": {
"hashes": [
"sha256:04e1849c88aa56584d4a0a6e36af5ec7cc37993fdc1fda72b56aa1394a92ded3",
"sha256:05e72db813c28906cdc59bd0da7c325d9b82aa0b0543014059c34c8c4ad20e16",
"sha256:07e6d88242e09b399682b39f8dfa1e7e6eca66b305de1ff74ed9eb1a7d8e539c",
"sha256:090126004c8ab9cd0787e2acf63d79e80ab41a18f57d6448225bbfcba475034f",
"sha256:1796f2c283faab2b71c67e9b9aefb3f201fdfbee5cb55001f5ffce9125f63a45",
"sha256:2f89d74b4f423e756a018832cd7a0a571e0a31b9ca59323b77ce5f15a437629b",
"sha256:34e6675167a238bede724ee60fe0550709e95adaff6a36bcc97006c365290384",
"sha256:3e594015a2349ec6dcceda9aca29da8dc89e85b56825b7d1f138a3f6bb79dd4c",
"sha256:3f8fc59bc5d64fa41f58b0029794f474223693fd00016b29f4e176b3ee2cfd9f",
"sha256:3fc6a447735749d651d8919da49aab03c434a300e9f0af1c886d560405840fd1",
"sha256:40abb7fec4f6294225d2b5464bb6d9552050ded14a7516588d6f010e7e366dcc",
"sha256:44556302c0ab376e37939fd0058e1f0db2e769580d340fb03b01678d1ff25f68",
"sha256:476ba9435afaead4382fbab8f1882f75e3fb2285c35c9285abb3dd30237f9142",
"sha256:4870b018ca685ff573edd56b93f00a122f279640732bb52ce3a62b73ee5c4a92",
"sha256:4adaf53ace289ced90797d92d767d37e7cdc29f13bd3830c3f0a561277a4ae83",
"sha256:4eae94de9924bbb4d24960185363e614b1b62ff797c23dc3c8a7c75bbb8d187e",
"sha256:5317701c7ce167205c0569c10abc4bd01c7f4cf93f642c39f2ce975fa9b78a3c",
"sha256:5c3b735ccf8fc8048664ee415f8af5a3a018cc92010a0d7195395059b4b39b7d",
"sha256:5cde7ee190196cbdc078511f4df0be367af85636b84d8be32230f4871b960687",
"sha256:655ab836324a473d4cd8cf231a2d6f283ed71ed77037679da554e38e606a7117",
"sha256:6ce9d0784c3c79f3e5c5c9c9517bbb6c7e8aa12372a5ea95197b8a99402aa0e6",
"sha256:6e0696525500bc8aa12eae654095d2260db4dc95d5c35af2b486eae1bf914ccd",
"sha256:75ff270fd05125dce3303e9216ccddc541a9e072d4fc764a9276d44dee87242b",
"sha256:8039f5fe8030c43cd1732d9a234fdcbf4916fcc32e21745ca62e75023e4d4649",
"sha256:84488516639c3c5e5c0e52f311fff94ebc45b56788c2a3bfe9cf8e75670f4de3",
"sha256:84782c80a433d87530ae3f4b9ed58d4a57317d9918dfcc6a59115fa2d8731f2c",
"sha256:8ddb38fb6ad96c2ef7468ff73ba5c6876b63b664eebb2c919c224261ae5e8378",
"sha256:98b491976ed656be9445b79bc57ed21decf08a01aaaf5fdabf07c98c108111f6",
"sha256:990e0f5e64bcbc6bdbd03774ecb72496224d13b664aa03afd1f9b171a3269272",
"sha256:9b02e6039eafd75e029d8c58b7b1f3e450ca563ef1fe21c7e3e40b9936c8d03e",
"sha256:a11b6199a0b9dc868990456a2667167d0ba096c5224f6258e452bfbe5a9742c5",
"sha256:a414f8e14aa7bacfe1578f17c11d977e637d25383b6210587c29210af995ef04",
"sha256:a91ee268f059583176c2c8b012a9fce7e49ca6b333a12bbc2dd01fc1a9783885",
"sha256:ac991947ca6533ada4ce7095f0e28fe25d5b2f3266ad5b983ed4201e61596acf",
"sha256:b050dbb96216db273b56f0e5960959c2b4cb679fe1e58a0c3906fa0a60c00662",
"sha256:b97a807437b81f90f85022a9dcfd527deea38368a3979ccb49d93c9198b2c722",
"sha256:bad269e442f1b7ffa3fa8820b3c3aa66f02a9f9455b5ba2db5a6f9eea96f56de",
"sha256:bf3725d79b1ceb19e83fb1aed44095518c0fcff88fba06a76c0891cfd1f36837",
"sha256:c0f22774cd8294078bdf7392ac73cf00bfa1e5e0ed644bd064fdabc5f2a2f481",
"sha256:c1862f9f1031b1dee3ff00f1027fcd098ffc82120f43041fe67804b464bbd8a7",
"sha256:c8d4ed48eed7414ccb2aaaecbc733ed2a84c299714eae3f0f48db085342d5629",
"sha256:cf31e894dabb077a35bbe6963285d4515a387ff657bd25b0530c7168e48f167f",
"sha256:d15cb6f8706678dc47fb4e4f8b339937b04eda48a0af1cca95f180db552e7663",
"sha256:dfcb5a4056e161307d103bc013478892cfd919f1262c2bb8703220adcb986362",
"sha256:e02780da03f84a671bb4205c5968c120f18df081236d7b5462b380fd4f0b497b",
"sha256:e2002a59453858c7f3404690ae80f10c924a39f45f6095f18a985a1234c37334",
"sha256:e22a82d2b416d9227a500c6860cf13e74060cf10e7daf6695cbf4e6a94e0eee4",
"sha256:e41f72f225192d5d4df81dad2974a8943b0f2d664a2a5cfccdf5a01506f5523c",
"sha256:f253dad38605486a4590f9368ecbace95865fea0f2b66615d121ac91fd1a1563",
"sha256:fddfb31aa2ac550b938d952bca8a87f1db0f8dc930ffa14ce05b5c08d27e7fd1"
],
"markers": "platform_python_implementation == 'CPython'",
"version": "==1.1.1"
},
"idna": {
"hashes": [
"sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
"sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
],
"markers": "python_version >= '3'",
"version": "==3.2"
},
"infofer-scraper": {
"editable": true,
"path": "./../scraper"
},
"itsdangerous": {
"hashes": [
"sha256:5174094b9637652bdb841a3029700391451bd092ba3db90600dea710ba28e97c",
"sha256:9e724d68fc22902a1435351f84c3fb8623f303fffcc566a4cb952df8c572cff0"
],
"markers": "python_version >= '3.6'",
"version": "==2.0.1"
},
"jinja2": {
"hashes": [
"sha256:1f06f2da51e7b56b8f238affdd6b4e2c61e39598a378cc49345bc1bd42a978a4",
"sha256:703f484b47a6af502e743c9122595cc812b0271f661722403114f71a79d0f5a4"
],
"markers": "python_version >= '3.6'",
"version": "==3.0.1"
},
"jsonschema": {
"hashes": [
"sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163",
"sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"
],
"index": "pypi",
"version": "==3.2.0"
},
"markupsafe": {
"hashes": [
"sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298",
"sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64",
"sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b",
"sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567",
"sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff",
"sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724",
"sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74",
"sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646",
"sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35",
"sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6",
"sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6",
"sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad",
"sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26",
"sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38",
"sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac",
"sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7",
"sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6",
"sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75",
"sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f",
"sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135",
"sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8",
"sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a",
"sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a",
"sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9",
"sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864",
"sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914",
"sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18",
"sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8",
"sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2",
"sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d",
"sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b",
"sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b",
"sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f",
"sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb",
"sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833",
"sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28",
"sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415",
"sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902",
"sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d",
"sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9",
"sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d",
"sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145",
"sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066",
"sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c",
"sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1",
"sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f",
"sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53",
"sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134",
"sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85",
"sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5",
"sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94",
"sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509",
"sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51",
"sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"
],
"markers": "python_version >= '3.6'",
"version": "==2.0.1"
},
"pyrsistent": {
"hashes": [
"sha256:097b96f129dd36a8c9e33594e7ebb151b1515eb52cceb08474c10a5479e799f2",
"sha256:2aaf19dc8ce517a8653746d98e962ef480ff34b6bc563fc067be6401ffb457c7",
"sha256:404e1f1d254d314d55adb8d87f4f465c8693d6f902f67eb6ef5b4526dc58e6ea",
"sha256:48578680353f41dca1ca3dc48629fb77dfc745128b56fc01096b2530c13fd426",
"sha256:4916c10896721e472ee12c95cdc2891ce5890898d2f9907b1b4ae0f53588b710",
"sha256:527be2bfa8dc80f6f8ddd65242ba476a6c4fb4e3aedbf281dfbac1b1ed4165b1",
"sha256:58a70d93fb79dc585b21f9d72487b929a6fe58da0754fa4cb9f279bb92369396",
"sha256:5e4395bbf841693eaebaa5bb5c8f5cdbb1d139e07c975c682ec4e4f8126e03d2",
"sha256:6b5eed00e597b5b5773b4ca30bd48a5774ef1e96f2a45d105db5b4ebb4bca680",
"sha256:73ff61b1411e3fb0ba144b8f08d6749749775fe89688093e1efef9839d2dcc35",
"sha256:772e94c2c6864f2cd2ffbe58bb3bdefbe2a32afa0acb1a77e472aac831f83427",
"sha256:773c781216f8c2900b42a7b638d5b517bb134ae1acbebe4d1e8f1f41ea60eb4b",
"sha256:a0c772d791c38bbc77be659af29bb14c38ced151433592e326361610250c605b",
"sha256:b29b869cf58412ca5738d23691e96d8aff535e17390128a1a52717c9a109da4f",
"sha256:c1a9ff320fa699337e05edcaae79ef8c2880b52720bc031b219e5b5008ebbdef",
"sha256:cd3caef37a415fd0dae6148a1b6957a8c5f275a62cca02e18474608cb263640c",
"sha256:d5ec194c9c573aafaceebf05fc400656722793dac57f254cd4741f3c27ae57b4",
"sha256:da6e5e818d18459fa46fac0a4a4e543507fe1110e808101277c5a2b5bab0cd2d",
"sha256:e79d94ca58fcafef6395f6352383fa1a76922268fa02caa2272fff501c2fdc78",
"sha256:f3ef98d7b76da5eb19c37fda834d50262ff9167c65658d1d8f974d2e4d90676b",
"sha256:f4c8cabb46ff8e5d61f56a037974228e978f26bfefce4f61a4b1ac0ba7a2ab72"
],
"markers": "python_version >= '3.6'",
"version": "==0.18.0"
},
"pytz": {
"hashes": [
"sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da",
"sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"
],
"version": "==2021.1"
},
"requests": {
"hashes": [
"sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
"sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'",
"version": "==2.26.0"
},
"scraper": {
"editable": true,
"path": "../scraper"
},
"six": {
"hashes": [
"sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
"sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.16.0"
},
"soupsieve": {
"hashes": [
"sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc",
"sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b"
],
"markers": "python_version >= '3'",
"version": "==2.2.1"
},
"urllib3": {
"hashes": [
"sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
"sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.6"
},
"werkzeug": {
"hashes": [
"sha256:1de1db30d010ff1af14a009224ec49ab2329ad2cde454c8a708130642d579c42",
"sha256:6c1ec500dcdba0baa27600f6a22f6333d8b662d22027ff9f6202e3367413caa8"
],
"markers": "python_version >= '3.6'",
"version": "==2.0.1"
},
"zope.event": {
"hashes": [
"sha256:2666401939cdaa5f4e0c08cf7f20c9b21423b95e88f4675b1443973bdb080c42",
"sha256:5e76517f5b9b119acf37ca8819781db6c16ea433f7e2062c4afc2b6fbedb1330"
],
"version": "==4.5.0"
},
"zope.interface": {
"hashes": [
"sha256:08f9636e99a9d5410181ba0729e0408d3d8748026ea938f3b970a0249daa8192",
"sha256:0b465ae0962d49c68aa9733ba92a001b2a0933c317780435f00be7ecb959c702",
"sha256:0cba8477e300d64a11a9789ed40ee8932b59f9ee05f85276dbb4b59acee5dd09",
"sha256:0cee5187b60ed26d56eb2960136288ce91bcf61e2a9405660d271d1f122a69a4",
"sha256:0ea1d73b7c9dcbc5080bb8aaffb776f1c68e807767069b9ccdd06f27a161914a",
"sha256:0f91b5b948686659a8e28b728ff5e74b1be6bf40cb04704453617e5f1e945ef3",
"sha256:15e7d1f7a6ee16572e21e3576d2012b2778cbacf75eb4b7400be37455f5ca8bf",
"sha256:17776ecd3a1fdd2b2cd5373e5ef8b307162f581c693575ec62e7c5399d80794c",
"sha256:194d0bcb1374ac3e1e023961610dc8f2c78a0f5f634d0c737691e215569e640d",
"sha256:1c0e316c9add0db48a5b703833881351444398b04111188069a26a61cfb4df78",
"sha256:205e40ccde0f37496904572035deea747390a8b7dc65146d30b96e2dd1359a83",
"sha256:273f158fabc5ea33cbc936da0ab3d4ba80ede5351babc4f577d768e057651531",
"sha256:2876246527c91e101184f63ccd1d716ec9c46519cc5f3d5375a3351c46467c46",
"sha256:2c98384b254b37ce50eddd55db8d381a5c53b4c10ee66e1e7fe749824f894021",
"sha256:2e5a26f16503be6c826abca904e45f1a44ff275fdb7e9d1b75c10671c26f8b94",
"sha256:334701327f37c47fa628fc8b8d28c7d7730ce7daaf4bda1efb741679c2b087fc",
"sha256:3748fac0d0f6a304e674955ab1365d515993b3a0a865e16a11ec9d86fb307f63",
"sha256:3c02411a3b62668200910090a0dff17c0b25aaa36145082a5a6adf08fa281e54",
"sha256:3dd4952748521205697bc2802e4afac5ed4b02909bb799ba1fe239f77fd4e117",
"sha256:3f24df7124c323fceb53ff6168da70dbfbae1442b4f3da439cd441681f54fe25",
"sha256:469e2407e0fe9880ac690a3666f03eb4c3c444411a5a5fddfdabc5d184a79f05",
"sha256:4de4bc9b6d35c5af65b454d3e9bc98c50eb3960d5a3762c9438df57427134b8e",
"sha256:5208ebd5152e040640518a77827bdfcc73773a15a33d6644015b763b9c9febc1",
"sha256:52de7fc6c21b419078008f697fd4103dbc763288b1406b4562554bd47514c004",
"sha256:5bb3489b4558e49ad2c5118137cfeaf59434f9737fa9c5deefc72d22c23822e2",
"sha256:5dba5f530fec3f0988d83b78cc591b58c0b6eb8431a85edd1569a0539a8a5a0e",
"sha256:5dd9ca406499444f4c8299f803d4a14edf7890ecc595c8b1c7115c2342cadc5f",
"sha256:5f931a1c21dfa7a9c573ec1f50a31135ccce84e32507c54e1ea404894c5eb96f",
"sha256:63b82bb63de7c821428d513607e84c6d97d58afd1fe2eb645030bdc185440120",
"sha256:66c0061c91b3b9cf542131148ef7ecbecb2690d48d1612ec386de9d36766058f",
"sha256:6f0c02cbb9691b7c91d5009108f975f8ffeab5dff8f26d62e21c493060eff2a1",
"sha256:71aace0c42d53abe6fc7f726c5d3b60d90f3c5c055a447950ad6ea9cec2e37d9",
"sha256:7d97a4306898b05404a0dcdc32d9709b7d8832c0c542b861d9a826301719794e",
"sha256:7df1e1c05304f26faa49fa752a8c690126cf98b40b91d54e6e9cc3b7d6ffe8b7",
"sha256:8270252effc60b9642b423189a2fe90eb6b59e87cbee54549db3f5562ff8d1b8",
"sha256:867a5ad16892bf20e6c4ea2aab1971f45645ff3102ad29bd84c86027fa99997b",
"sha256:877473e675fdcc113c138813a5dd440da0769a2d81f4d86614e5d62b69497155",
"sha256:8892f89999ffd992208754851e5a052f6b5db70a1e3f7d54b17c5211e37a98c7",
"sha256:9a9845c4c6bb56e508651f005c4aeb0404e518c6f000d5a1123ab077ab769f5c",
"sha256:a1e6e96217a0f72e2b8629e271e1b280c6fa3fe6e59fa8f6701bec14e3354325",
"sha256:a8156e6a7f5e2a0ff0c5b21d6bcb45145efece1909efcbbbf48c56f8da68221d",
"sha256:a9506a7e80bcf6eacfff7f804c0ad5350c8c95b9010e4356a4b36f5322f09abb",
"sha256:af310ec8335016b5e52cae60cda4a4f2a60a788cbb949a4fbea13d441aa5a09e",
"sha256:b0297b1e05fd128d26cc2460c810d42e205d16d76799526dfa8c8ccd50e74959",
"sha256:bf68f4b2b6683e52bec69273562df15af352e5ed25d1b6641e7efddc5951d1a7",
"sha256:d0c1bc2fa9a7285719e5678584f6b92572a5b639d0e471bb8d4b650a1a910920",
"sha256:d4d9d6c1a455d4babd320203b918ccc7fcbefe308615c521062bc2ba1aa4d26e",
"sha256:db1fa631737dab9fa0b37f3979d8d2631e348c3b4e8325d6873c2541d0ae5a48",
"sha256:dd93ea5c0c7f3e25335ab7d22a507b1dc43976e1345508f845efc573d3d779d8",
"sha256:f44e517131a98f7a76696a7b21b164bcb85291cee106a23beccce454e1f433a4",
"sha256:f7ee479e96f7ee350db1cf24afa5685a5899e2b34992fb99e1f7c1b0b758d263"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==5.4.0"
}
},
"develop": {}
}

17
server/Program.cs

@ -1,17 +0,0 @@
using System;
using Microsoft.AspNetCore.Hosting;
using Microsoft.Extensions.Hosting;
namespace Server {
public class Program {
public static void Main(string[] args) {
Console.WriteLine($"Current directory: {Environment.CurrentDirectory}");
CreateHostBuilder(args).Build().Run();
}
public static IHostBuilder CreateHostBuilder(string[] args) {
return Host.CreateDefaultBuilder(args)
.ConfigureWebHostDefaults(webBuilder => { webBuilder.UseStartup<Startup>(); });
}
}
}

31
server/Properties/launchSettings.json

@ -1,31 +0,0 @@
{
"$schema": "https://json.schemastore.org/launchsettings.json",
"iisSettings": {
"windowsAuthentication": false,
"anonymousAuthentication": true,
"iisExpress": {
"applicationUrl": "http://localhost:8771",
"sslPort": 44319
}
},
"profiles": {
"IIS Express": {
"commandName": "IISExpress",
"launchBrowser": true,
"launchUrl": "swagger",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
}
},
"server": {
"commandName": "Project",
"dotnetRunMessages": "true",
"launchBrowser": true,
"launchUrl": "swagger",
"applicationUrl": "https://localhost:5001;http://localhost:5000",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
}
}
}
}

116
server/Services/Implementations/DataManager.cs

@ -1,116 +0,0 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
using InfoferScraper;
using InfoferScraper.Models.Station;
using InfoferScraper.Models.Train;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using scraper.Models.Itinerary;
using Server.Models;
using Server.Services.Interfaces;
using Server.Utils;
namespace Server.Services.Implementations {
public class DataManager : IDataManager {
private ILogger<DataManager> Logger { get; }
private IDatabase Database { get; }
private NodaTime.IDateTimeZoneProvider TzProvider { get; }
private NodaTime.DateTimeZone CfrTimeZone => TzProvider["Europe/Bucharest"];
public DataManager(NodaTime.IDateTimeZoneProvider tzProvider, IDatabase database, ILogger<DataManager> logger, IOptions<ProxySettings> proxySettings) {
this.TzProvider = tzProvider;
this.Database = database;
this.Logger = logger;
HttpClientHandler httpClientHandler = new() {
UseProxy = proxySettings.Value.UseProxy,
Proxy = proxySettings.Value.UseProxy ? new WebProxy(proxySettings.Value.Url) {
Credentials = string.IsNullOrEmpty(proxySettings.Value.Credentials?.Username) ? null : new NetworkCredential(proxySettings.Value.Credentials.Username, proxySettings.Value.Credentials.Password),
} : null,
};
InfoferScraper.Scrapers.StationScraper stationScraper = new(httpClientHandler);
InfoferScraper.Scrapers.TrainScraper trainScraper = new(httpClientHandler);
InfoferScraper.Scrapers.RouteScraper routeScraper = new(httpClientHandler);
stationCache = new(async (t) => {
var (stationName, date) = t;
Logger.LogDebug("Fetching station {StationName} for date {Date}", stationName, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var station = await stationScraper.Scrape(stationName, zonedDate.ToDateTimeOffset());
if (station != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
await Database.OnStationData(station);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnStationData timing: {StationDataMs} ms", ms);
});
}
return station;
}, TimeSpan.FromMinutes(1));
trainCache = new(async (t) => {
var (trainNumber, date) = t;
Logger.LogDebug("Fetching train {TrainNumber} for date {Date}", trainNumber, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var train = await trainScraper.Scrape(trainNumber, zonedDate.ToDateTimeOffset());
if (train != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
await Database.OnTrainData(train);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnTrainData timing: {StationDataMs} ms", ms);
});
}
return train;
}, TimeSpan.FromSeconds(30));
itinerariesCache = new(async (t) => {
var (from, to, date) = t;
Logger.LogDebug("Fetching itinerary from {From} to {To} for date {Date}", from, to, date);
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
var itineraries = await routeScraper.Scrape(from, to, zonedDate.ToDateTimeOffset());
if (itineraries != null) {
_ = Task.Run(async () => {
var watch = Stopwatch.StartNew();
await Database.OnItineraries(itineraries);
var ms = watch.ElapsedMilliseconds;
Logger.LogInformation("OnItineraries timing: {StationDataMs} ms", ms);
});
}
return itineraries;
}, TimeSpan.FromMinutes(1));
}
private readonly AsyncCache<(string, DateOnly), IStationScrapeResult?> stationCache;
private readonly AsyncCache<(string, DateOnly), ITrainScrapeResult?> trainCache;
private readonly AsyncCache<(string, string, DateOnly), IReadOnlyList<IItinerary>?> itinerariesCache;
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date) {
var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone);
var cfrDate = new DateOnly(cfrDateTime.Year, cfrDateTime.Month, cfrDateTime.Day);
return stationCache.GetItem((stationName.RoLettersToEn().ToLowerInvariant(), cfrDate));
}
public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date) {
var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone);
var cfrDate = new DateOnly(cfrDateTime.Year, cfrDateTime.Month, cfrDateTime.Day);
return trainCache.GetItem((trainNumber, cfrDate));
}
public async Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null) {
var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date ?? DateTimeOffset.Now), CfrTimeZone);
var cfrDate = new DateOnly(cfrDateTime.Year, cfrDateTime.Month, cfrDateTime.Day);
return await itinerariesCache.GetItem((from, to, cfrDate));
}
}
}

390
server/Services/Implementations/Database.cs

@ -1,390 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using InfoferScraper.Models.Station;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MongoDB.Bson;
using MongoDB.Bson.Serialization.Attributes;
using MongoDB.Driver;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json.Serialization;
using scraper.Models.Itinerary;
using Server.Models.Database;
using Server.Utils;
namespace Server.Services.Implementations;
public class Database : Server.Services.Interfaces.IDatabase {
private static readonly JsonSerializerSettings jsonSerializerSettings = new() {
ContractResolver = new DefaultContractResolver {
NamingStrategy = new CamelCaseNamingStrategy(),
},
};
private ILogger<Database> Logger { get; }
public DbRecord DbData { get; private set; } = new(3);
public IReadOnlyList<StationListing> Stations => stationListingsCollection
.Aggregate(PipelineDefinition<StationListing, StationListing>.Create(
"{ $addFields: { stoppedAtCount: { $size: \"$stoppedAtBy\" } } }",
"{ $sort: { stoppedAtCount: -1 } }",
"{ $unset: \"stoppedAtCount\" }"
))
.ToList();
public IReadOnlyList<TrainListing> Trains => trainListingsCollection.FindSync(_ => true).ToList();
public IReadOnlyList<StationAlias> StationAliases => stationAliasCollection.FindSync(_ => true).ToList();
private static readonly string DbDir = Environment.GetEnvironmentVariable("DB_DIR") ?? Path.Join(Environment.CurrentDirectory, "db");
private static readonly string DbFile = Path.Join(DbDir, "db.json");
private static readonly string StationsFile = Path.Join(DbDir, "stations.json");
private static readonly string TrainsFile = Path.Join(DbDir, "trains.json");
private readonly IMongoDatabase db;
private readonly IMongoCollection<DbRecord> dbRecordCollection;
private readonly IMongoCollection<TrainListing> trainListingsCollection;
private readonly IMongoCollection<StationListing> stationListingsCollection;
private readonly IMongoCollection<StationAlias> stationAliasCollection;
private readonly AsyncThrottle throttle;
private readonly Dictionary<string, string> trainObjectIds = new();
private readonly Dictionary<string, string> stationObjectIds = new();
public Database(ILogger<Database> logger, IOptions<MongoSettings> mongoSettings) {
Logger = logger;
var settings = MongoClientSettings.FromConnectionString(mongoSettings.Value.ConnectionString);
settings.ServerApi = new(ServerApiVersion.V1);
settings.MaxConnectionPoolSize = 10000;
MongoClient mongoClient = new(settings);
Logger.LogDebug("Created monogClient");
throttle = new(mongoClient.Settings.MaxConnectionPoolSize / 2);
db = mongoClient.GetDatabase(mongoSettings.Value.DatabaseName) ?? throw new NullReferenceException("Unable to get Mongo database");
Logger.LogDebug("Created db");
dbRecordCollection = db.GetCollection<DbRecord>("db");
trainListingsCollection = db.GetCollection<TrainListing>("trainListings");
stationListingsCollection = db.GetCollection<StationListing>("stationListings");
stationAliasCollection = db.GetCollection<StationAlias>("stationAliases");
Migration();
Task.Run(async () => await Initialize());
}
private void Migration() {
if (!File.Exists(DbFile) && File.Exists(TrainsFile)) {
Logger.LogInformation("Migrating DB version 1 -> 2");
if (File.Exists(StationsFile)) {
Logger.LogDebug("Converting StationsFile");
var oldStations = JToken.Parse(File.ReadAllText(StationsFile));
List<StationListing> stations = new();
if (oldStations != null) {
Logger.LogDebug("Found {StationsCount} stations", oldStations.Children().Count());
foreach (var station in oldStations.Children()) {
if (station == null) continue;
station["stoppedAtBy"] = new JArray(station["stoppedAtBy"]!.Children().Select(num => (JToken)(num!).ToString()!).ToArray());
}
stations = oldStations.ToObject<List<StationListing>>(JsonSerializer.Create(jsonSerializerSettings))!;
}
Logger.LogDebug("Rewriting StationsFile");
File.WriteAllText(StationsFile, JsonConvert.SerializeObject(stations, jsonSerializerSettings));
}
if (File.Exists(TrainsFile)) {
Logger.LogDebug("Converting TrainsFile");
var oldTrains = JToken.Parse(File.ReadAllText(TrainsFile));
List<TrainListing> trains = new();
if (oldTrains != null) {
Logger.LogDebug("Found {TrainsCount} trains", oldTrains.Children().Count());
foreach (var train in oldTrains.Children()) {
if (train == null) continue;
train["number"] = train["numberString"];
train["numberString"]?.Remove();
}
trains = oldTrains.ToObject<List<TrainListing>>(JsonSerializer.Create(jsonSerializerSettings))!;
}
Logger.LogDebug("Rewriting TrainsFile");
File.WriteAllText(TrainsFile, JsonConvert.SerializeObject(trains, jsonSerializerSettings));
}
DbData = new(2);
File.WriteAllText(DbFile, JsonConvert.SerializeObject(DbData, jsonSerializerSettings));
Migration();
}
else if (File.Exists(DbFile)) {
var oldDbData = JToken.Parse(File.ReadAllText(DbFile));
if (((int?)oldDbData?["version"]) == 2) {
Logger.LogInformation("Migrating DB version 2 -> 3 (transition from fs+JSON to MongoDB)");
if (File.Exists(StationsFile)) {
Logger.LogDebug("Converting StationsFile");
var stations = JsonConvert.DeserializeObject<List<StationListing>>(File.ReadAllText(StationsFile));
stationListingsCollection.InsertMany(stations);
File.Delete(StationsFile);
}
if (File.Exists(TrainsFile)) {
Logger.LogDebug("Converting TrainsFile");
var trains = JsonConvert.DeserializeObject<List<TrainListing>>(File.ReadAllText(TrainsFile));
trainListingsCollection.InsertMany(trains);
File.Delete(TrainsFile);
}
File.Delete(DbFile);
try {
Directory.Delete(DbDir);
}
catch (Exception) {
// Deleting of the directory is optional; may not be allowed in Docker or similar
}
var x = dbRecordCollection.FindSync(_ => true).ToList()!;
if (x.Count != 0) {
Logger.LogWarning("db collection contained data when migrating to V3");
using (var _ = Logger.BeginScope("Already existing data:")) {
foreach (var dbRecord in x) {
Logger.LogInformation("Id: {Id}, Version: {Version}", dbRecord.Id, dbRecord.Version);
}
}
Logger.LogInformation("Backing up existing data");
var backupDbRecordCollection = db.GetCollection<DbRecord>("db-backup");
backupDbRecordCollection.InsertMany(x);
Logger.LogDebug("Removing existing data");
dbRecordCollection.DeleteMany(_ => true);
}
dbRecordCollection.InsertOne(new(3));
Migration();
}
else {
throw new("Unexpected Database version, only DB Version 2 uses DbFile");
}
}
else {
var datas = dbRecordCollection.FindSync(_ => true).ToList();
if (datas.Count == 0) {
Logger.LogInformation("No db record found, new database");
dbRecordCollection.InsertOne(DbData);
}
else {
DbData = datas[0];
}
if (DbData.Version == 3) {
Logger.LogInformation("Using MongoDB Database Version 3; noop");
}
else {
throw new($"Unexpected Database version: {DbData.Version}");
}
}
}
private async Task Initialize() {
await foreach (var entry in await stationAliasCollection.FindAsync(_ => true)) {
if (entry?.ListingId is null) continue;
stationObjectIds.Add(entry.Name, entry.ListingId);
}
}
private readonly SemaphoreSlim insertTrainLock = new (1, 1);
public async Task<string> FoundTrain(string rank, string number, string company) {
number = string.Join("", number.TakeWhile(c => c is >= '0' and <= '9'));
// If there is a matching ObjectId, then it's already in the database
if (trainObjectIds.ContainsKey(number)) return number;
await insertTrainLock.WaitAsync();
try {
var possibleTrains = await (await throttle.MakeRequest(() => trainListingsCollection.FindAsync(
Builders<TrainListing>.Filter.Eq("number", number)
))).ToListAsync();
if (possibleTrains.Count == 0) {
Logger.LogDebug("Found train {Rank} {Number} from {Company}", rank, number, company);
TrainListing listing = new(number: number, rank: rank, company: company);
await throttle.MakeRequest(() => trainListingsCollection.InsertOneAsync(listing));
if (listing.Id != null) {
trainObjectIds[number] = listing.Id;
}
}
else {
foreach (var possibleTrain in possibleTrains) {
trainObjectIds[possibleTrain.Number] = possibleTrain.Id!;
}
}
}
finally {
insertTrainLock.Release();
}
return number;
}
private readonly SemaphoreSlim insertStationLock = new (1, 1);
public async Task FoundStation(string name) {
// if (!await throttle.MakeRequest(() => stationListingsCollection.Find(Builders<StationListing>.Filter.Eq("name", name)).AnyAsync())) {
// Logger.LogDebug("Found station {StationName}", name);
// await throttle.MakeRequest(() => stationListingsCollection.InsertOneAsync(new(name, new())));
// }
// If there is a matching ObjectId, then it's already in the database
if (stationObjectIds.ContainsKey(name)) return;
await insertStationLock.WaitAsync();
UpdateResult update;
try {
update = await stationListingsCollection.UpdateOneAsync(
Builders<StationListing>.Filter.Eq("name", name),
Builders<StationListing>.Update.Combine(
Builders<StationListing>.Update.SetOnInsert("name", name),
Builders<StationListing>.Update.SetOnInsert("stoppedAtBy", new List<string>())
),
new UpdateOptions {
IsUpsert = true,
}
);
if (update.IsAcknowledged && update.ModifiedCount > 0) {
var listingId = update.UpsertedId.AsObjectId.ToString();
stationObjectIds[name] = listingId;
await stationAliasCollection.UpdateOneAsync(
Builders<StationAlias>.Filter.Eq("name", name),
Builders<StationAlias>.Update.Combine(
Builders<StationAlias>.Update.SetOnInsert("name", name),
Builders<StationAlias>.Update.SetOnInsert("listingId", listingId)
),
new UpdateOptions { IsUpsert = true }
);
}
}
finally {
insertStationLock.Release();
}
if (update.IsAcknowledged && update.MatchedCount == 0) {
Logger.LogDebug("Found station {StationName}", name);
}
}
public async Task FoundStations(IEnumerable<string> names) {
var unknownStations = names.ToList();
if (unknownStations.All(s => stationObjectIds.ContainsKey(s))) {
return;
}
unknownStations.RemoveAll(s => stationObjectIds.ContainsKey(s));
var existingStations = await (await stationListingsCollection.FindAsync(
Builders<StationListing>.Filter.StringIn("name", unknownStations.Select((n) => new StringOrRegularExpression(n)))
)).ToListAsync();
foreach (var existingStation in existingStations) {
stationObjectIds[existingStation.Name] = existingStation.Id!;
}
unknownStations.RemoveAll(s => existingStations.Select(st => st.Name).Contains(s));
if (unknownStations.Count == 0) return;
var unknownStationListings = unknownStations.Select((s) => new StationListing(s, new())).ToList();
await stationListingsCollection.InsertManyAsync(unknownStationListings);
foreach (var listing in unknownStationListings) {
stationObjectIds[listing.Name] = listing.Id!;
}
Logger.LogDebug("Found stations {StationNames}", unknownStations);
}
public async Task FoundTrainAtStation(string stationName, string trainNumber) {
trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9'));
await FoundStation(stationName);
UpdateResult updateResult;
if (stationObjectIds.ContainsKey(stationName)) {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync(
Builders<StationListing>.Filter.Eq("_id", ObjectId.Parse(stationObjectIds[stationName])),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
else {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateOneAsync(
Builders<StationListing>.Filter.Eq("name", stationName),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) {
Logger.LogDebug("Found train {TrainNumber} at station {StationName}", trainNumber, stationName);
}
}
public async Task FoundTrainAtStations(IEnumerable<string> stationNames, string trainNumber) {
trainNumber = string.Join("", trainNumber.TakeWhile(c => c is >= '0' and <= '9'));
var enumerable = stationNames as string[] ?? stationNames.ToArray();
await FoundStations(enumerable);
var objectIds = enumerable
.Select<string, ObjectId?>((stationName) => stationObjectIds.ContainsKey(stationName) ? ObjectId.Parse(stationObjectIds[stationName]) : null)
.ToList();
UpdateResult updateResult;
if (!objectIds.Any((id) => id is null)) {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync(
Builders<StationListing>.Filter.In("_id", objectIds),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
else {
updateResult = await throttle.MakeRequest(() => stationListingsCollection.UpdateManyAsync(
Builders<StationListing>.Filter.StringIn("name", enumerable.Select(sn => new StringOrRegularExpression(sn))),
Builders<StationListing>.Update.AddToSet("stoppedAtBy", trainNumber)
));
}
if (updateResult.IsAcknowledged && updateResult.ModifiedCount > 0) {
Logger.LogDebug("Found train {TrainNumber} at stations {StationNames}", trainNumber, stationNames);
}
}
public async Task OnTrainData(InfoferScraper.Models.Train.ITrainScrapeResult trainData) {
var trainNumber = await FoundTrain(trainData.Rank, trainData.Number, trainData.Operator);
await FoundTrainAtStations(
trainData.Groups
.SelectMany(g => g.Stations)
.Select(trainStop => trainStop.Name)
.Distinct(),
trainNumber
);
}
public async Task OnStationData(InfoferScraper.Models.Station.IStationScrapeResult stationData) {
var stationName = stationData.StationName;
async Task ProcessTrain(InfoferScraper.Models.Station.IStationArrDep train) {
var trainNumber = train.Train.Number;
trainNumber = await FoundTrain(train.Train.Rank, trainNumber, train.Train.Operator);
await FoundTrainAtStations(Enumerable.Repeat(stationName, 1).Concat(train.Train.Route).Distinct(), trainNumber);
}
List<IStationArrDep> arrdep = new();
if (stationData.Arrivals != null) {
arrdep.AddRange(stationData.Arrivals);
}
if (stationData.Departures != null) {
arrdep.AddRange(stationData.Departures);
}
foreach (var train in arrdep.DistinctBy((t) => t.Train.Number)) {
await ProcessTrain(train);
}
}
public async Task OnItineraries(IReadOnlyList<IItinerary> itineraries) {
foreach (var itinerary in itineraries) {
foreach (var train in itinerary.Trains) {
await FoundTrainAtStations(
train.IntermediateStops.Concat(new[] { train.From, train.To }),
train.TrainNumber
);
}
}
}
}
public record DbRecord(
[property: BsonId]
[property: BsonRepresentation(BsonType.ObjectId)]
[property: JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
string? Id,
int Version
) {
public DbRecord(int version) : this(null, version) { }
}

14
server/Services/Interfaces/IDataManager.cs

@ -1,14 +0,0 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using InfoferScraper.Models.Train;
using InfoferScraper.Models.Station;
using scraper.Models.Itinerary;
namespace Server.Services.Interfaces;
public interface IDataManager {
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date);
public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date);
public Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null);
}

20
server/Services/Interfaces/IDatabase.cs

@ -1,20 +0,0 @@
using System.Collections.Generic;
using System.Threading.Tasks;
using InfoferScraper.Models.Train;
using InfoferScraper.Models.Station;
using scraper.Models.Itinerary;
using Server.Models.Database;
namespace Server.Services.Interfaces;
public interface IDatabase {
public IReadOnlyList<StationListing> Stations { get; }
public IReadOnlyList<TrainListing> Trains { get; }
public Task<string> FoundTrain(string rank, string number, string company);
public Task FoundStation(string name);
public Task FoundTrainAtStation(string stationName, string trainName);
public Task OnTrainData(ITrainScrapeResult trainData);
public Task OnStationData(IStationScrapeResult stationData);
public Task OnItineraries(IReadOnlyList<IItinerary> itineraries);
}

105
server/Startup.cs

@ -1,105 +0,0 @@
using System;
using System.Net;
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.HttpOverrides;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.OpenApi.Models;
using MongoDB.Bson.Serialization.Conventions;
using Newtonsoft.Json.Serialization;
using Server.Models;
using Server.Models.Database;
using Server.Services.Implementations;
using Server.Services.Interfaces;
namespace Server {
public class Startup {
public Startup(IConfiguration configuration) {
Configuration = configuration;
}
public IConfiguration Configuration { get; }
// This method gets called by the runtime. Use this method to add services to the container.
public void ConfigureServices(IServiceCollection services) {
if ((Environment.GetEnvironmentVariable("INSIDE_DOCKER") ?? "").Length > 0) {
services.Configure<ForwardedHeadersOptions>(options => {
options.KnownProxies.Add(Dns.GetHostAddresses("host.docker.internal")[0]);
});
}
services.Configure<ProxySettings>(Configuration.GetSection("Proxy"));
services.Configure<MongoSettings>(Configuration.GetSection("TrainDataMongo"));
var conventionPack = new ConventionPack { new CamelCaseElementNameConvention() };
ConventionRegistry.Register("camelCase", conventionPack, _ => true);
services.AddSingleton<IDataManager, DataManager>();
services.AddSingleton<IDatabase, Database>();
services.AddSingleton(NodaTime.DateTimeZoneProviders.Tzdb);
services.AddControllers()
.AddNewtonsoftJson(options => {
options.SerializerSettings.ContractResolver = new DefaultContractResolver {
NamingStrategy = new CamelCaseNamingStrategy(),
};
});
services.AddSwaggerGen(c => {
c.SwaggerDoc("v1", new OpenApiInfo { Title = "InfoTren Scraper", Version = "v1" });
c.SwaggerDoc("v2", new OpenApiInfo { Title = "InfoTren Scraper", Version = "v2" });
c.SwaggerDoc("v3", new OpenApiInfo { Title = "InfoTren Scraper", Version = "v3" });
});
}
// This method gets called by the runtime. Use this method to configure the HTTP request pipeline.
public void Configure(IApplicationBuilder app, IWebHostEnvironment env) {
app.UseForwardedHeaders(new ForwardedHeadersOptions {
ForwardedHeaders = ForwardedHeaders.XForwardedFor | ForwardedHeaders.XForwardedProto,
});
if (env.IsDevelopment()) {
app.UseDeveloperExceptionPage();
}
app.UseSwagger();
app.UseSwaggerUI(c => {
c.SwaggerEndpoint("/swagger/v3/swagger.json", "InfoTren Scraper v3");
c.SwaggerEndpoint("/swagger/v2/swagger.json", "InfoTren Scraper v2");
c.SwaggerEndpoint("/swagger/v1/swagger.json", "InfoTren Scraper v1");
});
app.MapWhen(x => x.Request.Path.StartsWithSegments("/rapidoc"), appBuilder => {
appBuilder.Run(async context => {
context.Response.ContentType = "text/html";
await context.Response.WriteAsync(
"""
<!doctype html> <!-- Important: must specify -->
<html>
<head>
<meta charset="utf-8"> <!-- Important: rapi-doc uses utf8 characters -->
<script type="module" src="https://unpkg.com/rapidoc/dist/rapidoc-min.js"></script>
</head>
<body>
<rapi-doc
spec-url="/swagger/v3/swagger.json"
theme = "dark"
> </rapi-doc>
</body>
</html>
"""
);
});
});
// app.UseHttpsRedirection();
app.UseRouting();
app.UseAuthorization();
app.UseEndpoints(endpoints => { endpoints.MapControllers(); });
}
}
}

15
server/Utils/ActionDisposable.cs

@ -1,15 +0,0 @@
using System;
namespace Server.Utils;
public class ActionDisposable : IDisposable {
public Action Action { get; init; }
public ActionDisposable(Action action) {
Action = action;
}
public void Dispose() {
Action();
}
}

38
server/Utils/AsyncThrottle.cs

@ -1,38 +0,0 @@
using System;
using System.Threading;
using System.Threading.Tasks;
namespace Server.Utils;
// Inspired from: https://stackoverflow.com/a/57517920
public class AsyncThrottle {
private readonly SemaphoreSlim openConnectionSemaphore;
public AsyncThrottle(int limit) {
openConnectionSemaphore = new(limit, limit);
}
public async Task<T> MakeRequest<T>(Task<T> task) => await MakeRequest(() => task);
public async Task<T> MakeRequest<T>(Func<Task<T>> taskCreator) {
await openConnectionSemaphore.WaitAsync();
try {
var result = await taskCreator();
return result;
}
finally {
openConnectionSemaphore.Release();
}
}
public async Task MakeRequest(Task task) => await MakeRequest(() => task);
public async Task MakeRequest(Func<Task> taskCreator) {
await openConnectionSemaphore.WaitAsync();
try {
await taskCreator();
}
finally {
openConnectionSemaphore.Release();
}
}
}

69
server/Utils/Cache.cs

@ -1,69 +0,0 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
namespace Server.Utils;
public class Cache<TKey, TValue> where TKey: notnull {
private readonly IDictionary<TKey, (TValue Data, DateTimeOffset FetchTime)> cache;
public Func<TKey, TValue> Fetcher { get; init; }
public TimeSpan Validity { get; init; }
public bool StoreNull { get; init; }
public Cache(Func<TKey, TValue> fetcher, TimeSpan validity, bool storeNull = false) {
this.cache = new Dictionary<TKey, (TValue Data, DateTimeOffset FetchTime)>();
Fetcher = fetcher;
Validity = validity;
StoreNull = storeNull;
}
public TValue GetItem(TKey key) {
if (cache.ContainsKey(key)) {
if (cache[key].FetchTime + Validity > DateTimeOffset.Now) {
return cache[key].Data;
}
else {
cache.Remove(key);
}
}
var data = Fetcher(key);
if (data != null) {
cache[key] = (data, DateTimeOffset.Now);
}
return data;
}
}
public class AsyncCache<TKey, TValue> where TKey: notnull {
private readonly IDictionary<TKey, (TValue Data, DateTimeOffset FetchTime)> cache;
public Func<TKey, Task<TValue>> Fetcher { get; init; }
public TimeSpan Validity { get; init; }
public bool StoreNull { get; init; }
public AsyncCache(Func<TKey, Task<TValue>> fetcher, TimeSpan validity, bool storeNull = false) {
this.cache = new Dictionary<TKey, (TValue Data, DateTimeOffset FetchTime)>();
Fetcher = fetcher;
Validity = validity;
StoreNull = storeNull;
}
public async Task<TValue> GetItem(TKey key) {
if (cache.ContainsKey(key)) {
if (cache[key].FetchTime + Validity > DateTimeOffset.Now) {
return cache[key].Data;
}
else {
cache.Remove(key);
}
}
var data = await Fetcher(key);
if (data != null) {
cache[key] = (data, DateTimeOffset.Now);
}
return data;
}
}

7
server/Utils/Constants.cs

@ -1,7 +0,0 @@
using NodaTime;
namespace Server.Utils;
public static class Constants {
public static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
}

33
server/Utils/IAsyncCusorAsyncAdapter.cs

@ -1,33 +0,0 @@
using System.Collections.Generic;
using System.Threading.Tasks;
using MongoDB.Driver;
namespace Server.Utils;
public record IAsyncCusorAsyncEnumerator<T>(IAsyncCursor<T> Cursor) {
private IEnumerator<T>? enumerator = null;
public T Current => enumerator!.Current;
public async Task<bool> MoveNextAsync() {
bool result;
if (enumerator != null) {
result = enumerator.MoveNext();
if (result) return true;
}
result = await Cursor.MoveNextAsync();
if (result) {
enumerator = Cursor.Current.GetEnumerator();
return true;
}
return false;
}
}
public static class IAsyncCursorExtensions {
public static IAsyncCusorAsyncEnumerator<T> GetAsyncEnumerator<T>(this IAsyncCursor<T> cursor) {
return new(cursor);
}
}

13
server/appsettings.Development.json

@ -1,13 +0,0 @@
{
"Logging": {
"LogLevel": {
"Default": "Debug",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information"
}
},
"TrainDataMongo": {
"ConnectionString": "mongodb://localhost:27017",
"DatabaseName": "NewInfoferScraper"
},
}

17
server/appsettings.json

@ -1,17 +0,0 @@
{
"ConnectionStrings": {
"caching": "Data Source=./caching.sqlite"
},
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information"
}
},
"TrainDataMongo": {
"ConnectionString": "mongodb://mongo:27017",
"DatabaseName": "NewInfoferScraper"
},
"AllowedHosts": "*"
}

18
server/main.py

@ -0,0 +1,18 @@
from gevent.pywsgi import WSGIServer
from server.server import app
def main():
port = 5000
import os
try:
port = int(os.environ['PORT'])
except:
pass
print(f'Starting server on port {port}')
http_server = WSGIServer(('', port), app)
http_server.serve_forever()
if __name__ == '__main__':
main()

1
server/omnisharp.json

@ -1 +0,0 @@
../omnisharp.json

25
server/server.csproj

@ -1,25 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<Nullable>enable</Nullable>
<AssemblyName>Server</AssemblyName>
<RootNamespace>Server</RootNamespace>
<LangVersion>11</LangVersion>
<TargetFrameworks>net6.0;net7.0;net8.0</TargetFrameworks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="6.0.21" />
<PackageReference Include="Microsoft.Data.Sqlite" Version="6.0.1" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Sqlite" Version="5.0.13" />
<PackageReference Include="MongoDB.Analyzer" Version="1.1.0" />
<PackageReference Include="MongoDB.Driver" Version="2.19.1" />
<PackageReference Include="Nanoid" Version="2.1.0" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="5.6.3" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\scraper\scraper.csproj" />
</ItemGroup>
</Project>

1
server/server/__init__.py

@ -0,0 +1 @@
__all__ = ['server']

18
server/server/cache.py

@ -0,0 +1,18 @@
from datetime import date, datetime, timedelta
_NO_DEFAULT = object()
class CachedData:
def __init__(self, getter, initial_data=_NO_DEFAULT, validity=1000):
self.getter = getter
self.data = initial_data
self.last_refresh_date = datetime.now()
self.validity = timedelta(milliseconds=validity)
if initial_data == _NO_DEFAULT:
self.last_refresh_date -= self.validity
def __call__(self, *args, **kwds):
if self.last_refresh_date + self.validity < datetime.now():
self.data = self.getter()
self.last_refresh_date = datetime.now()
return self.data, self.last_refresh_date

173
server/server/db.py

@ -0,0 +1,173 @@
# Globals
stations = []
trains = []
db_data = {
'version': 2,
}
# Examples
example_station = {
'name': 'Gară',
'stoppedAtBy': [123, 456]
}
example_train = {
'rank': 'IR',
'numberString': '74',
'number': 74,
'company': 'CFR Călători'
}
# Init
import json
import os
from os import path, stat
from contextlib import contextmanager
from .utils import take_while
DB_DIR = os.environ.get('DB_DIR', '') or './db'
if not path.exists(DB_DIR):
os.mkdir(DB_DIR)
DB_FILE = path.join(DB_DIR, 'db.json')
STATIONS_FILE = path.join(DB_DIR, 'stations.json')
TRAINS_FILE = path.join(DB_DIR, 'trains.json')
def migration():
global db_data
global trains
global stations
if not path.exists(DB_FILE):
print('[Migration] Migrating DB version 1 -> 2')
if path.exists(STATIONS_FILE):
with open(STATIONS_FILE) as f:
stations = json.load(f)
for i in range(len(stations)):
stations[i]['stoppedAtBy'] = [str(num) for num in stations[i]['stoppedAtBy']]
with open(STATIONS_FILE, 'w') as f:
json.dump(stations, f)
if path.exists(TRAINS_FILE):
with open(TRAINS_FILE) as f:
trains = json.load(f)
for i in range(len(trains)):
trains[i]['number'] = trains[i]['numberString']
del trains[i]['numberString']
with open(TRAINS_FILE, 'w') as f:
json.dump(trains, f)
db_data = {
'version': 2,
}
with open(DB_FILE, 'w') as f:
json.dump(db_data, f)
migration()
else:
with open(DB_FILE) as f:
db_data = json.load(f)
if db_data['version'] == 2:
print('[Migration] DB Version: 2, noop')
migration()
if path.exists(DB_FILE):
with open(DB_FILE) as f:
db_data = json.load(f)
else:
with open(DB_FILE, 'w') as f:
json.dump(db_data, f)
if path.exists(STATIONS_FILE):
with open(STATIONS_FILE) as f:
stations = json.load(f)
if path.exists(TRAINS_FILE):
with open(TRAINS_FILE) as f:
trains = json.load(f)
_should_commit_on_every_change = True
@contextmanager
def db_transaction():
global _should_commit_on_every_change
_should_commit_on_every_change = False
yield
with open(DB_FILE, 'w') as f:
json.dump(db_data, f)
with open(STATIONS_FILE, 'w') as f:
stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True)
json.dump(stations, f)
with open(TRAINS_FILE, 'w') as f:
json.dump(trains, f)
_should_commit_on_every_change = True
def found_train(rank: str, number: str, company: str) -> int:
number = ''.join(take_while(lambda s: str(s).isnumeric(), number))
try:
next(filter(lambda tr: tr['number'] == number, trains))
except StopIteration:
trains.append({
'number': number,
'company': company,
'rank': rank,
})
if _should_commit_on_every_change:
with open(TRAINS_FILE, 'w') as f:
json.dump(trains, f)
return number
def found_station(name: str):
try:
next(filter(lambda s: s['name'] == name, stations))
except StopIteration:
stations.append({
'name': name,
'stoppedAtBy': [],
})
if _should_commit_on_every_change:
stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True)
with open(STATIONS_FILE, 'w') as f:
json.dump(stations, f)
def found_train_at_station(station_name: str, train_number: str):
train_number = ''.join(take_while(lambda s: str(s).isnumeric(), train_number))
found_station(station_name)
for i in range(len(stations)):
if stations[i]['name'] == station_name:
if train_number not in stations[i]['stoppedAtBy']:
stations[i]['stoppedAtBy'].append(train_number)
break
if _should_commit_on_every_change:
stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True)
with open(STATIONS_FILE, 'w') as f:
json.dump(stations, f)
def on_train_data(train_data: dict):
with db_transaction():
train_no = found_train(train_data['rank'], train_data['number'], train_data['operator'])
for station in train_data['stations']:
found_train_at_station(station['name'], train_no)
def on_train_lookup_failure(train_no: str):
pass
def on_station(station_data: dict):
station_name = station_data['stationName']
def process_train(train_data: dict):
train_number = train_data['train']['number']
train_number = found_train(train_data['train']['rank'], train_number, train_data['train']['operator'])
found_train_at_station(station_name, train_number)
if 'route' in train_data['train'] and train_data['train']['route']:
for station in train_data['train']['route']:
found_train_at_station(station, train_number)
with db_transaction():
if station_data['arrivals']:
for train in station_data['arrivals']:
process_train(train)
if station_data['departures']:
for train in station_data['departures']:
process_train(train)

29
server/server/flask_utils.py

@ -0,0 +1,29 @@
from flask import request as _f_request
from .utils import filter_result as _filter_result
def filtered_data(fn):
def filterer(*args, **kwargs):
filters = _f_request.args.get('filters', None)
if filters:
filters_raw = [f.split(':', 1) for f in filters.split(',')]
filters = {'.': []}
for key, value in filters_raw:
def add_to(obj, key, value):
if '.' in key:
prop, key = key.split('.', 1)
if prop not in filters:
obj[prop] = {'.': []}
add_to(obj[prop], key, value)
else:
obj['.'].append({key: value})
add_to(filters, key, value)
properties = _f_request.args.get('properties', None)
if properties:
properties = properties.split(',')
data = fn(*args, **kwargs)
return _filter_result(data, properties, filters)
return filterer

1
server/server/scraper

@ -0,0 +1 @@
../../scraper

65
server/server/server.py

@ -0,0 +1,65 @@
print(f'Server {__name__=}')
import datetime
from flask import Flask, jsonify, url_for
from jsonschema import validate
from .cache import CachedData
from .scraper.schemas import TRAIN_INFO_SCHEMA
from .utils import get_hostname
app = Flask(__name__)
from .v2 import v2
app.register_blueprint(v2.bp)
@app.route('/')
def root():
return 'Test'
@app.route('/train/.schema.json')
def get_train_info_schema():
return jsonify(TRAIN_INFO_SCHEMA['v1'])
train_data_cache = {}
@app.route('/train/<int:train_no>')
def get_train_info(train_no: int):
def get_data():
from .scraper.scraper import scrape_train
use_yesterday = False
result = scrape_train(train_no, use_yesterday=use_yesterday)
from . import db
db.on_train_data(result)
# Convert to v1
# datetime ISO string to hh:mm
for i in range(len(result['stations'])):
if result['stations'][i]['arrival']:
date = datetime.datetime.fromisoformat(result['stations'][i]['arrival']['scheduleTime'])
result['stations'][i]['arrival']['scheduleTime'] = f'{date.hour}:{date.minute:02}'
if result['stations'][i]['departure']:
date = datetime.datetime.fromisoformat(result['stations'][i]['departure']['scheduleTime'])
result['stations'][i]['departure']['scheduleTime'] = f'{date.hour}:{date.minute:02}'
if 'stoppingTime' in result['stations'][i] and result['stations'][i]['stoppingTime']:
result['stations'][i]['stoppingTime'] //= 60
return result
if train_no not in train_data_cache:
train_data_cache[train_no] = CachedData(get_data, validity=1000 * 30)
data, fetch_time = train_data_cache[train_no]()
data['$schema'] = get_hostname() + url_for('.get_train_info_schema')
validate(data, schema=TRAIN_INFO_SCHEMA['v1'])
resp = jsonify(data)
resp.headers['X-Last-Fetched'] = fetch_time.isoformat()
return resp
@app.route('/trains')
def get_trains():
return jsonify(list(train_data_cache.keys()))
if __name__ == '__main__':
print('Starting debug server on port 5001')
app.run(port=5000)

41
server/server/utils.py

@ -0,0 +1,41 @@
def take_while(predicate, input):
for element in input:
if not predicate(element):
break
yield element
_NO_DEFAULT = object()
def check_yes_no(input: str, default=_NO_DEFAULT, considered_yes=None) -> bool:
input = str(input).strip().lower()
if not input:
if default == _NO_DEFAULT:
raise Exception('Empty input with no default')
return default
if not considered_yes:
considered_yes = ['y', 'yes', 't', 'true', '1']
return input in considered_yes
def get_hostname():
import os
import platform
return os.getenv('HOSTNAME', os.getenv('COMPUTERNAME', platform.node()))
def filter_result(data, properties=None, filters=None):
is_array = not hasattr(data, 'get')
result = data if is_array else [data]
if filters:
# Todo: implement filters
pass
# def f(lst, filters):
# def condition(item):
# return list(filter(condition, lst))
# result = f(result, filters)
if properties:
for i in range(len(result)):
result[i] = {p:result[i].get(p, None) for p in properties}
return result if is_array else result[0]

1
server/server/v2/__init__.py

@ -0,0 +1 @@
__all__ = ['v2']

94
server/server/v2/v2.py

@ -0,0 +1,94 @@
from datetime import date, datetime
import json
from flask import Blueprint, jsonify, request
from flask.helpers import url_for
from jsonschema import validate
from .. import db
from ..cache import CachedData
from ..utils import check_yes_no, get_hostname
from ..flask_utils import filtered_data
from ..scraper.utils import ro_letters_to_en
from ..scraper.schemas import STATION_SCHEMA, TRAIN_INFO_SCHEMA
bp = Blueprint('v2', __name__, url_prefix='/v2')
@bp.get('/trains')
def get_known_trains():
@filtered_data
def get_data():
return db.trains
result = get_data()
return jsonify(result)
@bp.get('/stations')
def get_known_stations():
@filtered_data
def get_data():
return db.stations
result = get_data()
return jsonify(result)
train_data_cache = {}
@bp.route('/train/.schema.json')
def get_train_info_schema():
return jsonify(TRAIN_INFO_SCHEMA['v2'])
@bp.route('/train/<train_no>')
def get_train_info(train_no: str):
use_yesterday = check_yes_no(request.args.get('use_yesterday', ''), default=False)
date_override = request.args.get('date', default=None)
try:
date_override = datetime.fromisoformat(date_override)
except ValueError:
date_override = None
@filtered_data
def get_data():
from ..scraper.scraper import scrape_train
result = scrape_train(train_no, use_yesterday=use_yesterday, date_override=date_override)
db.on_train_data(result)
return result
if (train_no, use_yesterday) not in train_data_cache:
train_data_cache[(train_no, use_yesterday or date_override)] = CachedData(get_data, validity=1000 * 30)
data, fetch_time = train_data_cache[(train_no, use_yesterday or date_override)]()
data['$schema'] = get_hostname() + url_for('.get_train_info_schema')
validate(data, schema=TRAIN_INFO_SCHEMA['v2'])
resp = jsonify(data)
resp.headers['X-Last-Fetched'] = fetch_time.isoformat()
return resp
station_cache = {}
@bp.route('/station/.schema.json')
def get_station_schema():
return jsonify(STATION_SCHEMA['v2'])
@bp.route('/station/<station_name>')
def get_station(station_name: str):
station_name = ro_letters_to_en(station_name.lower().replace(' ', '-'))
def get_data():
from ..scraper.scraper import scrape_station
result = scrape_station(station_name)
db.on_station(result)
return result
if station_name not in train_data_cache:
station_cache[station_name] = CachedData(get_data, validity=1000 * 30)
data, fetch_time = station_cache[station_name]()
data['$schema'] = get_hostname() + url_for('.get_station_schema')
validate(data, schema=STATION_SCHEMA['v2'])
@filtered_data
def filter(data):
return data
resp = jsonify(filter(data))
resp.headers['X-Last-Fetched'] = fetch_time.isoformat()
return resp
Loading…
Cancel
Save