You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
308 lines
8.8 KiB
308 lines
8.8 KiB
#! /usr/bin/env ruby |
|
|
|
require 'erb' |
|
require 'json' |
|
require 'open3' |
|
require 'set' |
|
|
|
$datestr = (Time.now - 86400).strftime '%F' |
|
|
|
class Object |
|
def also |
|
yield self |
|
self |
|
end |
|
|
|
def let |
|
yield self |
|
end |
|
end |
|
|
|
# @param [String] station_name |
|
def normalize_station station_name |
|
station_name |
|
.downcase |
|
.gsub(/\s/, ' ') |
|
.gsub('ă', 'a') |
|
.gsub('â', 'a') |
|
.gsub('î', 'i') |
|
.gsub('ș', 's') |
|
.gsub('ț', 't') |
|
end |
|
|
|
$station_link_name_cache = {} |
|
|
|
# @param [String] station_name |
|
def get_station station_name |
|
# Try getting from file |
|
begin |
|
return JSON.parse(File.read(File.join($datestr, 'stations', "#{normalize_station(station_name)}.json")), symbolize_names: true) |
|
rescue |
|
end |
|
|
|
station_name = normalize_station station_name |
|
station_name = $station_link_name_cache.fetch(station_name, station_name) |
|
station_name = ERB::Util.url_encode(station_name) |
|
stdout, status = Open3.capture2('curl', '--silent', '--fail', '--show-error', "https://scraper.infotren.dcdev.ro/v3/stations/#{station_name}?date=#{$datestr}") |
|
if status != 0 |
|
nil |
|
else |
|
JSON.parse stdout, symbolize_names: true |
|
end |
|
end |
|
|
|
# @param [String] train_number |
|
def get_train train_number |
|
# Try getting from file |
|
begin |
|
return JSON.parse(File.read(File.join($datestr, 'trains', "#{train_number}.json")), symbolize_names: true) |
|
rescue |
|
end |
|
|
|
train_number = ERB::Util.url_encode(train_number) |
|
stdout, status = Open3.capture2('curl', '--silent', '--fail', '--show-error', "https://scraper.infotren.dcdev.ro/v3/trains/#{train_number}?date=#{$datestr}") |
|
if status != 0 |
|
nil |
|
else |
|
JSON.parse stdout, symbolize_names: true |
|
end |
|
end |
|
|
|
def populate_link_name_cache train |
|
train[:groups].each do |g| |
|
g[:stations].each do |s| |
|
$station_link_name_cache[s[:name]] = s[:linkName] |
|
end |
|
end |
|
end |
|
|
|
def get_stations_from_station station |
|
Set.new.also do |stations| |
|
station[:arrivals].each do |train| |
|
train[:train][:route].each do |r| |
|
stations << r |
|
end |
|
end |
|
station[:departures].each do |train| |
|
train[:train][:route].each do |r| |
|
stations << r |
|
end |
|
end |
|
end |
|
end |
|
|
|
# @param [Integer] times |
|
def reattempt times |
|
times.times do |n| |
|
result = yield |
|
unless result.nil? |
|
return result |
|
end |
|
end |
|
nil |
|
end |
|
|
|
def get_trains_from_station station |
|
Set.new.also do |trains| |
|
station[:arrivals].each do |train| |
|
trains << train[:train][:number] |
|
end |
|
station[:departures].each do |train| |
|
trains << train[:train][:number] |
|
end |
|
end |
|
end |
|
|
|
def get_stations_from_train train |
|
train[:groups].flat_map do |group| |
|
group[:stations].map do |station| |
|
station[:name] |
|
end |
|
end |
|
end |
|
|
|
class Log |
|
def initialize |
|
@to_erase = 0 |
|
end |
|
|
|
def add message |
|
print "\e[1A\e[2K" * @to_erase |
|
@to_erase = 0 |
|
puts message |
|
end |
|
|
|
def temporary_add message |
|
if $stdout.isatty |
|
puts message |
|
@to_erase += 1 |
|
end |
|
end |
|
end |
|
|
|
def main |
|
log = Log.new |
|
|
|
log.add "Creating archive for yesterday, #{$datestr}" |
|
|
|
existing = if Dir.exist? $datestr |
|
log.add 'Archive already exists. Merging.' |
|
true |
|
else |
|
Dir.mkdir $datestr |
|
Dir.mkdir(File.join $datestr, 'stations') |
|
Dir.mkdir(File.join $datestr, 'trains') |
|
false |
|
end |
|
|
|
# @type [Array<String>] |
|
roots = ['București Nord', 'Brașov', 'Iași', 'Titan Sud', 'Oltenița'] |
|
# @type [Array<String>] |
|
visited_stations = Dir.entries(File.join $datestr, 'stations') |
|
.filter { |entry| entry != '.' and entry != '..' } |
|
.map { |name| if name.end_with? ".json" then name[0...-5] else name end } |
|
.map { |name| get_station(name)[:stationName] } |
|
if existing |
|
log.add "#{visited_stations.count} visited stations" |
|
end |
|
# @type [Array<String>] |
|
unvisited_stations = [] |
|
# @type [Set<String>] |
|
failed_stations = Set.new |
|
|
|
# @type [Array<String>] |
|
visited_trains = Dir.entries(File.join $datestr, 'trains') |
|
.filter { |entry| entry != '.' and entry != '..' } |
|
.map { |name| if name.end_with? ".json" then name[0...-5] else name end } |
|
if existing |
|
log.add "#{visited_trains.count} visited trains" |
|
end |
|
# @type [Array<String>] |
|
unvisited_trains = [] |
|
# @type [Set<String>] |
|
failed_trains = Set.new |
|
|
|
roots.each do |station| |
|
unvisited_stations.push station unless visited_stations.include? station |
|
end |
|
|
|
# Get unvisited from visited |
|
visited_trains.each do |train_number| |
|
train = get_train train_number |
|
populate_link_name_cache train |
|
|
|
get_stations_from_train(train).each do |station| |
|
unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station |
|
end |
|
end |
|
visited_stations.each do |station_name| |
|
station = get_station station_name |
|
|
|
get_stations_from_station(station).each do |station| |
|
unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station |
|
end |
|
get_trains_from_station(station).each do |train| |
|
unvisited_trains << train unless visited_trains.include? train or unvisited_trains.include? train |
|
end |
|
end |
|
unless unvisited_stations.empty? |
|
log.add "#{unvisited_stations.count} unvisited stations" |
|
end |
|
unless unvisited_trains.empty? |
|
log.add "#{unvisited_trains.count} unvisited trains" |
|
end |
|
|
|
start_time = Time.now |
|
File.write(File.join($datestr, 'start_time.txt'), start_time.strftime('%FT%T%:z')) |
|
|
|
until unvisited_stations.empty? and unvisited_trains.empty? |
|
# First visit all trains |
|
if not unvisited_trains.empty? |
|
# @type [String] |
|
train_number = unvisited_trains.shift |
|
log.temporary_add "Getting train #{train_number}" |
|
train = reattempt 3 do get_train train_number end |
|
if train.nil? |
|
# Failed to get |
|
failed_trains << train_number |
|
log.add "Failed to get train #{train_number}" |
|
else |
|
populate_link_name_cache train |
|
visited_trains << train_number |
|
File.write(File.join($datestr, 'trains', "#{train_number}.json"), JSON.dump(train)) |
|
log.add "Got train #{train_number}" |
|
old_ustations_cnt = unvisited_stations.count |
|
get_stations_from_train(train).each do |station| |
|
unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station |
|
end |
|
unless unvisited_stations.count == old_ustations_cnt |
|
log.add "Found #{unvisited_stations.count - old_ustations_cnt} new stations" |
|
end |
|
end |
|
log.temporary_add "Stations: #{visited_stations.count} visited, #{failed_stations.count} failed, #{unvisited_stations.count} remaining" |
|
log.temporary_add "Trains: #{visited_trains.count} visited, #{failed_trains.count} failed, #{unvisited_trains.count} remaining" |
|
# Then visit stations |
|
elsif not unvisited_stations.empty? |
|
# @type [String] |
|
station_name = unvisited_stations.shift |
|
log.temporary_add "Getting station #{station_name}" |
|
station = reattempt 3 do get_station station_name end |
|
if station.nil? |
|
# Failed to get |
|
failed_stations << station_name |
|
log.add "Failed to get station #{station_name}" |
|
else |
|
visited_stations << station_name |
|
filename = normalize_station(station_name) |
|
filename = $station_link_name_cache.fetch(filename, filename) |
|
File.write(File.join($datestr, 'stations', "#{filename}.json"), JSON.dump(station)) |
|
log.add "Got station #{station_name}" |
|
old_ustations_cnt = unvisited_stations.count |
|
old_utrains_cnt = unvisited_trains.count |
|
get_stations_from_station(station).each do |station| |
|
unvisited_stations << station unless visited_stations.include? station or unvisited_stations.include? station |
|
end |
|
get_trains_from_station(station).each do |train| |
|
unvisited_trains << train unless visited_trains.include? train or unvisited_trains.include? train |
|
end |
|
unless unvisited_stations.count == old_ustations_cnt |
|
log.add "Found #{unvisited_stations.count - old_ustations_cnt} new stations" |
|
end |
|
unless unvisited_trains.count == old_utrains_cnt |
|
log.add "Found #{unvisited_trains.count - old_utrains_cnt} new trains" |
|
end |
|
end |
|
log.temporary_add "Stations: #{visited_stations.count} visited, #{failed_stations.count} failed, #{unvisited_stations.count} remaining" |
|
log.temporary_add "Trains: #{visited_trains.count} visited, #{failed_trains.count} failed, #{unvisited_trains.count} remaining" |
|
end |
|
end |
|
|
|
log.add "Stations: #{visited_stations.count} visited, #{failed_stations.count} failed, #{unvisited_stations.count} remaining" |
|
log.add "Trains: #{visited_trains.count} visited, #{failed_trains.count} failed, #{unvisited_trains.count} remaining" |
|
|
|
end_time = Time.now |
|
File.write(File.join($datestr, 'end_time.txt'), end_time.strftime('%FT%T%:z')) |
|
timespan = (end_time - start_time).to_i |
|
log.add "Finished in #{timespan}s (#{timespan / 3600}h #{timespan / 60 % 60}m #{timespan % 60}s)" |
|
|
|
# Finally, write failures log |
|
File.open(File.join($datestr, 'failed_stations.txt'), "w") do |f| |
|
failed_stations.each do |s| |
|
f.puts s |
|
end |
|
end |
|
File.open(File.join($datestr, 'failed_trains.txt'), "w") do |f| |
|
failed_trains.each do |s| |
|
f.puts s |
|
end |
|
end |
|
File.write(File.join($datestr, 'failures.json'), JSON.dump({ |
|
stations: failed_stations, |
|
trains: failed_trains, |
|
})) |
|
File.write('./last_executed_run.txt', $datestr) |
|
end |
|
|
|
if __FILE__ == $0 |
|
main |
|
end
|
|
|