From 0a7e2b25687bd6f46a94b5ef507dd751d188adac Mon Sep 17 00:00:00 2001
From: Dan Cojocaru <dan.cojocaru@3pillarglobal.com>
Date: Fri, 27 Aug 2021 15:38:24 +0300
Subject: [PATCH] Added station arr/dep scrapper

Added scrapper for arrivals and departures at station
---
 scraper/schemas.py                            |  20 ++
 scraper/scrape_station.py                     |  87 +++++++++
 scraper/scrape_station_schema_v2.json         | 137 ++++++++++++++
 scraper/scrape_train.py                       | 143 +++++++++++++++
 scraper/scrape_train_schema.json              | 134 ++++++++++++++
 ...chema.json => scrape_train_schema_v2.json} |  22 ++-
 scraper/scraper.py                            | 171 +-----------------
 scraper/utils.py                              |  79 ++++++++
 server/Pipfile                                |   1 +
 server/Pipfile.lock                           | 114 +++++-------
 server/server/db.py                           | 126 +++++++++++--
 server/server/flask_utils.py                  |  29 +++
 server/server/server.py                       |  16 +-
 server/server/utils.py                        |  23 +++
 server/server/v2/v2.py                        |  67 ++++++-
 15 files changed, 897 insertions(+), 272 deletions(-)
 create mode 100644 scraper/schemas.py
 create mode 100644 scraper/scrape_station.py
 create mode 100644 scraper/scrape_station_schema_v2.json
 create mode 100644 scraper/scrape_train.py
 create mode 100644 scraper/scrape_train_schema.json
 rename scraper/{trainInfoScrapResultSchema.json => scrape_train_schema_v2.json} (88%)
 create mode 100644 scraper/utils.py
 create mode 100644 server/server/flask_utils.py

diff --git a/scraper/schemas.py b/scraper/schemas.py
new file mode 100644
index 0000000..6509204
--- /dev/null
+++ b/scraper/schemas.py
@@ -0,0 +1,20 @@
+from contextlib import ExitStack as _ExitStack
+
+_es = _ExitStack()
+
+def _load_file(name: str):
+	import json
+	from os.path import join, dirname
+	dir = dirname(__file__)
+
+	return json.load(_es.enter_context(open(join(dir, name))))
+
+TRAIN_INFO_SCHEMA = {
+	'v1': _load_file('scrape_train_schema.json'),
+	'v2': _load_file('scrape_train_schema_v2.json'),
+}
+STATION_SCHEMA = {
+	'v2': _load_file('scrape_station_schema_v2.json'),
+}
+
+_es.close()
diff --git a/scraper/scrape_station.py b/scraper/scrape_station.py
new file mode 100644
index 0000000..83bf7fa
--- /dev/null
+++ b/scraper/scrape_station.py
@@ -0,0 +1,87 @@
+import re
+
+from datetime import datetime, timedelta
+
+import pytz
+import requests
+from bs4 import BeautifulSoup
+
+from .utils import *
+
+# region regex definitions
+
+RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ'
+
+STATION_INFO_REGEX = re.compile(rf'^([{RO_LETTERS} ]+) în ([0-9.]+)$')
+
+STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) min \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$')
+
+# endregion
+
+def scrape(station_name: str):
+	station_name = ro_letters_to_en(station_name)
+	# Start scrapping session
+	s = requests.Session()
+
+	r = s.get(build_url(
+		'https://mersultrenurilor.infofer.ro/ro-RO/Statie/{station}', 
+		station=station_name.replace(' ', '-'), 
+	))
+
+	soup = BeautifulSoup(r.text, features='html.parser')
+	sform = soup.find(id='form-search')
+	result_data = { elem['name']: elem['value'] for elem in sform('input') }
+
+	r = s.post('https://mersultrenurilor.infofer.ro/ro-RO/Stations/StationsResult', data=result_data)
+	soup = BeautifulSoup(r.text, features='html.parser')
+
+	scraped = {}
+
+	station_info_div, _, departures_div, arrivals_div, *_ = soup('div', recursive=False)
+
+	scraped['stationName'], scraped['date'] = STATION_INFO_REGEX.match(collapse_space(station_info_div.h2.text)).groups()
+	date_d, date_m, date_y = (int(comp) for comp in scraped['date'].split('.'))
+	date = datetime(date_y, date_m, date_d)
+	dt_seq = DateTimeSequencer(date.year, date.month, date.day)
+	tz = pytz.timezone('Europe/Bucharest')
+
+	def parse_arrdep_list(elem, end_station_field_name):
+		def parse_item(elem):
+			result = {}
+
+			try:
+				data_div, status_div = elem('div', recursive=False)
+			except ValueError:
+				data_div, *_ = elem('div', recursive=False)
+				status_div = None
+			data_main_div, data_details_div = data_div('div', recursive=False)
+			time_div, dest_div, train_div, *_ = data_main_div('div', recursive=False)
+			operator_div, route_div, stopping_time_div = data_details_div.div('div', recursive=False)
+			
+			result['time'] = collapse_space(time_div.div.div('div', recursive=False)[1].text)
+			st_hr, st_min = (int(comp) for comp in result['time'].split(':'))
+			result['time'] = tz.localize(dt_seq(st_hr, st_min)).isoformat()
+
+			unknown_st, st, st_opposite_time = STOPPING_TIME_REGEX.match(
+				collapse_space(stopping_time_div.div('div', recursive=False)[1].text)
+			).groups()
+			if unknown_st:
+				result['stoppingTime'] = None
+			elif st:
+				result['stoppingTime'] = int(st)
+
+			result['train'] = {}
+			result['train']['rank'] = collapse_space(train_div.div.div('div', recursive=False)[1].span.text)
+			result['train']['number'] = collapse_space(train_div.div.div('div', recursive=False)[1].a.text)
+			result['train'][end_station_field_name] = collapse_space(dest_div.div.div('div', recursive=False)[1].text)
+			result['train']['operator'] = collapse_space(operator_div.div('div', recursive=False)[1].text)
+			result['train']['route'] = collapse_space(route_div.div('div', recursive=False)[1].text).split(' - ')
+
+			return result
+
+		return [parse_item(elem) for elem in elem.div.ul('li', recursive=False)]
+
+	scraped['departures'] = parse_arrdep_list(departures_div, 'destination')
+	scraped['arrivals'] = parse_arrdep_list(arrivals_div, 'origin')
+
+	return scraped
diff --git a/scraper/scrape_station_schema_v2.json b/scraper/scrape_station_schema_v2.json
new file mode 100644
index 0000000..75e36c8
--- /dev/null
+++ b/scraper/scrape_station_schema_v2.json
@@ -0,0 +1,137 @@
+{
+	"$schema": "http://json-schema.org/schema",
+	"title": "Train Info InfoFer Scrap Station Schema",
+	"description": "Results of scrapping InfoFer website for station arrival/departure info",
+	"definitions": {
+		"arrDepItem": {
+			"type": "object",
+			"properties": {
+				"time": {
+					"description": "Time of arrival/departure",
+					"type": "string",
+					"format": "date-time"
+				},
+				"train": {
+					"type": "object",
+					"properties": {
+						"rank": {
+							"type": "string",
+							"examples": [
+								"R",
+								"R-E",
+								"IR",
+								"IRN"
+							]
+						},
+						"number": {
+							"type": "string",
+							"examples": [
+								"74",
+								"15934"
+							]
+						},
+						"operator": {
+							"type": "string",
+							"examples": [
+								"CFR Călători",
+								"Softrans",
+								"Regio Călători"
+							]
+						},
+						"route": {
+							"description": "All the stations the train stops at",
+							"type": "array",
+							"items": {
+								"type": "string"
+							}
+						}
+					},
+					"required": [
+						"rank",
+						"number",
+						"operator"
+					]
+				},
+				"stoppingTime": {
+					"type": [
+						"integer",
+						"null"
+					],
+					"minimum": 1
+				}
+			},
+			"required": [
+				"time",
+				"train",
+				"stoppingTime"
+			]
+		}
+	},
+	"type": "object",
+	"properties": {
+		"arrivals": {
+			"type": "array",
+			"items": {
+				"allOf": [
+					{
+						"$ref": "#/definitions/arrDepItem"
+					},
+					{
+						"type": "object",
+						"properties": {
+							"train": {
+								"type": "object",
+								"properties": {
+									"origin": {
+										"type": "string"
+									}
+								},
+								"required": ["origin"]
+							}
+						},
+						"required": ["train"]
+					}
+				]
+			}
+		},
+		"departures": {
+			"type": "array",
+			"items": {
+				"allOf": [
+					{
+						"$ref": "#/definitions/arrDepItem"
+					},
+					{
+						"type": "object",
+						"properties": {
+							"train": {
+								"type": "object",
+								"properties": {
+									"destination": {
+										"type": "string"
+									}
+								},
+								"required": ["destination"]
+							}
+						},
+						"required": ["train"]
+					}
+				]
+			}
+		},
+		"stationName": {
+			"type": "string"
+		},
+		"date": {
+			"description": "Date for which the data is provided (likely today)",
+			"type": "string",
+			"pattern": "^[0-9]{1,2}\\.[0-9]{2}\\.[0-9]{4}$"
+		}
+	},
+	"required": [
+		"arrivals",
+		"departures",
+		"stationName",
+		"date"
+	]
+}
\ No newline at end of file
diff --git a/scraper/scrape_train.py b/scraper/scrape_train.py
new file mode 100644
index 0000000..df9c7ff
--- /dev/null
+++ b/scraper/scrape_train.py
@@ -0,0 +1,143 @@
+import re
+
+from datetime import datetime, timedelta
+
+import pytz
+import requests
+from bs4 import BeautifulSoup
+
+from .utils import *
+
+# region regex definitions
+
+TRAIN_INFO_REGEX = re.compile(r'^([A-Z-]+) ([0-9]+) în ([0-9.]+)$')
+
+OPERATOR_REGEX = re.compile(r'^Operat de (.+)$')
+
+SL_REGEX = re.compile(r'^(?:Fără|([0-9]+) min) (întârziere|mai devreme) la (trecerea fără oprire prin|sosirea în|plecarea din) (.+)\.$')
+SL_STATE_MAP = {
+	't': 'passing',
+	's': 'arrival',
+	'p': 'departure',
+}
+
+RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ'
+
+ROUTE_REGEX = re.compile(rf'^Parcurs tren ([{RO_LETTERS} ]+)[-–]([{RO_LETTERS} ]+)$')
+
+KM_REGEX = re.compile(r'^km ([0-9]+)$')
+
+PLATFORM_REGEX = re.compile(r'^linia (.+)$')
+
+STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) min oprire$')
+
+STATION_DEPARR_STATUS_REGEX = re.compile(r'^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$')
+
+# endregion
+
+def scrape(train_no: int, use_yesterday=False, date_override=None):
+	# Start scrapping session
+	s = requests.Session()
+
+	date = datetime.today()
+	if use_yesterday:
+		date -= timedelta(days=1)
+	if date_override:
+		date = date_override
+
+	r = s.get(build_url(
+		'https://mersultrenurilor.infofer.ro/ro-RO/Tren/{train_no}', 
+		train_no=train_no, 
+		query=[
+			('Date', date.strftime('%d.%m.%Y')),
+		],
+	))
+
+	soup = BeautifulSoup(r.text, features='html.parser')
+	sform = soup.find(id='form-search')
+	result_data = { elem['name']: elem['value'] for elem in sform('input') }
+
+	r = s.post('https://mersultrenurilor.infofer.ro/ro-RO/Trains/TrainsResult', data=result_data)
+	soup = BeautifulSoup(r.text, features='html.parser')
+
+	scraped = {}
+
+	train_info_div, _, _, results_div, *_ = soup('div', recursive=False)
+
+	train_info_div = train_info_div.div('div', recursive=False)[0]
+
+	scraped['rank'], scraped['number'], scraped['date'] = TRAIN_INFO_REGEX.match(collapse_space(train_info_div.h2.text)).groups()
+	date_d, date_m, date_y = (int(comp) for comp in scraped['date'].split('.'))
+	date = datetime(date_y, date_m, date_d)
+
+	scraped['operator'] = OPERATOR_REGEX.match(collapse_space(train_info_div.p.text)).groups()[0]
+
+	results_div = results_div.div
+	status_div = results_div('div', recursive=False)[0]
+	route_text = collapse_space(status_div.h4.text)
+	route_from, route_to = ROUTE_REGEX.match(route_text).groups()
+	scraped['route'] = {
+		'from': route_from,
+		'to': route_to,
+	}
+	try:
+		status_line_match = SL_REGEX.match(collapse_space(status_div.div.text))
+		slm_delay, slm_late, slm_arrival, slm_station = status_line_match.groups()
+		scraped['status'] = {
+			'delay': (int(slm_delay) if slm_late == 'întârziere' else -int(slm_delay)) if slm_delay else 0,
+			'station': slm_station,
+			'state': SL_STATE_MAP[slm_arrival[0]],
+		}
+	except Exception:
+		scraped['status'] = None
+
+	stations = status_div.ul('li', recursive=False)
+	scraped['stations'] = []
+	dt_seq = DateTimeSequencer(date.year, date.month, date.day)
+	tz = pytz.timezone('Europe/Bucharest')
+	for station in stations:
+		station_scraped = {}
+
+		left, middle, right = station.div('div', recursive=False)
+		station_scraped['name'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[0].text)
+		station_scraped['km'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[1].text)
+		station_scraped['km'] = int(KM_REGEX.match(station_scraped['km']).groups()[0])
+		station_scraped['stoppingTime'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[2].text)
+		if not station_scraped['stoppingTime']:
+			station_scraped['stoppingTime'] = None
+		else:
+			station_scraped['stoppingTime'] = int(STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()[0])
+		station_scraped['platform'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[3].text)
+		if not station_scraped['platform']:
+			station_scraped['platform'] = None
+		else:
+			station_scraped['platform'] = PLATFORM_REGEX.match(station_scraped['platform']).groups()[0]
+
+		def scrape_time(elem, setter):
+			parts = elem.div.div('div', recursive=False)
+			if parts:
+				result = {}
+
+				time, *_ = parts
+				result['scheduleTime'] = collapse_space(time.text)
+				st_hr, st_min = (int(comp) for comp in result['scheduleTime'].split(':'))
+				result['scheduleTime'] = tz.localize(dt_seq(st_hr, st_min)).isoformat()
+				if len(parts) >= 2:
+					_, status, *_ = parts
+					result['status'] = {}
+					on_time, delay, approx = STATION_DEPARR_STATUS_REGEX.match(collapse_space(status.text)).groups()
+					result['status']['delay'] = 0 if on_time else int(delay)
+					result['status']['real'] = not approx
+				else:
+					result['status'] = None
+
+				setter(result)
+			else:
+				setter(None)
+
+		scrape_time(left, lambda value: station_scraped.update(arrival=value))
+		scrape_time(right, lambda value: station_scraped.update(departure=value))
+
+		scraped['stations'].append(station_scraped)
+
+	return scraped
diff --git a/scraper/scrape_train_schema.json b/scraper/scrape_train_schema.json
new file mode 100644
index 0000000..541a657
--- /dev/null
+++ b/scraper/scrape_train_schema.json
@@ -0,0 +1,134 @@
+{
+	"$schema": "http://json-schema.org/schema",
+	"title": "Train Info InfoFer Scrap Train Schema",
+	"description": "Results of scrapping InfoFer website for train info",
+	"definitions": {
+		"delayType": {
+			"description": "Delay of the train (negative for being early)",
+			"type": "integer"
+		},
+		"stationArrDepTime": {
+			"description": "Time of arrival at/departure from station",
+			"type": ["object", "null"],
+			"properties": {
+				"scheduleTime": {
+					"description": "The time the train is scheduled to arrive/depart",
+					"type": "string",
+					"pattern": "^[0-9]{1,2}:[0-9]{2}$"
+				},
+				"status": {
+					"type": ["object", "null"],
+					"properties": {
+						"delay": {
+							"$ref": "#/definitions/delayType"
+						},
+						"real": {
+							"description": "Determines whether delay was actually reported or is an approximation",
+							"type": "boolean"
+						}
+					},
+					"required": ["delay", "real"]
+				}
+			},
+			"required": ["scheduleTime"]
+		}
+	},
+	"type": "object",
+	"properties": {
+		"rank": {
+			"description": "The rank of the train",
+			"type": "string",
+			"examples": [
+				"R",
+				"R-E",
+				"IR",
+				"IRN"
+			]
+		},
+		"number": {
+			"description": "The number of the train",
+			"type": "string",
+			"examples": [
+				"74",
+				"15934"
+			]
+		},
+		"date": {
+			"description": "Date of departure from the first station (dd.mm.yyyy)",
+			"type": "string",
+			"pattern": "^[0-9]{1,2}\\.[0-9]{2}\\.[0-9]{4}$"
+		},
+		"operator": {
+			"description": "Operator of the train",
+			"type": "string",
+			"examples": [
+				"CFR Călători",
+				"Softrans",
+				"Regio Călători"
+			]
+		},
+		"route": {
+			"description": "Route of the train",
+			"type": "object",
+			"properties": {
+				"from": {
+					"type": "string"
+				},
+				"to": {
+					"type": "string"
+				}
+			},
+			"required": ["from", "to"]
+		},
+		"status": {
+			"description": "Current status of the train",
+			"type": ["object", "null"],
+			"properties": {
+				"delay": {
+					"$ref": "#/definitions/delayType"
+				},
+				"station": {
+					"type": "string"
+				},
+				"state": {
+					"type": "string",
+					"enum": ["passing", "arrival", "departure"]
+				}
+			},
+			"required": ["delay", "station", "state"]
+		},
+		"stations": {
+			"description": "List of stations the train stops at",
+			"type": "array",
+			"items": {
+				"type": "object",
+				"properties": {
+					"name": {
+						"type": "string"
+					},
+					"km": {
+						"description": "The distance the train travelled until reaching this station",
+						"type": "integer"
+					},
+					"stoppingTime": {
+						"description": "The number of minutes the train is scheduled to stop in this station",
+						"type": ["integer", "null"],
+						"minimum": 1
+					},
+					"platform": {
+						"description": "The platform the train stopped at",
+						"type": ["string", "null"]
+					},
+					"arrival": {
+						"$ref": "#/definitions/stationArrDepTime"
+					},
+					"departure": {
+						"$ref": "#/definitions/stationArrDepTime"
+					}
+				},
+				"required": ["name", "km"]
+			}
+		}
+	},
+	"required": ["route", "stations", "rank", "number", "date", "operator"]
+}
\ No newline at end of file
diff --git a/scraper/trainInfoScrapResultSchema.json b/scraper/scrape_train_schema_v2.json
similarity index 88%
rename from scraper/trainInfoScrapResultSchema.json
rename to scraper/scrape_train_schema_v2.json
index d25307d..837ea40 100644
--- a/scraper/trainInfoScrapResultSchema.json
+++ b/scraper/scrape_train_schema_v2.json
@@ -1,6 +1,6 @@
 {
 	"$schema": "http://json-schema.org/schema",
-	"title": "Train Info InfoFer Scrap Result Schema",
+	"title": "Train Info InfoFer Scrap Train Schema",
 	"description": "Results of scrapping InfoFer website for train info",
 	"definitions": {
 		"delayType": {
@@ -13,7 +13,8 @@
 			"properties": {
 				"scheduleTime": {
 					"description": "The time the train is scheduled to arrive/depart",
-					"type": "string"
+					"type": "string",
+					"format": "date-time"
 				},
 				"status": {
 					"type": ["object", "null"],
@@ -38,23 +39,24 @@
 			"description": "The rank of the train",
 			"type": "string",
 			"examples": [
-				"74",
-				"15934"
+				"R",
+				"R-E",
+				"IR",
+				"IRN"
 			]
 		},
 		"number": {
 			"description": "The number of the train",
 			"type": "string",
 			"examples": [
-				"R",
-				"R-E",
-				"IR",
-				"IRN"
+				"74",
+				"15934"
 			]
 		},
 		"date": {
-			"description": "Date of departure from the first station",
-			"type": "string"
+			"description": "Date of departure from the first station (dd.mm.yyyy)",
+			"type": "string",
+			"pattern": "^[0-9]{1,2}\\.[0-9]{2}\\.[0-9]{4}$"
 		},
 		"operator": {
 			"description": "Operator of the train",
diff --git a/scraper/scraper.py b/scraper/scraper.py
index 9545b31..8a594d9 100644
--- a/scraper/scraper.py
+++ b/scraper/scraper.py
@@ -1,177 +1,12 @@
 #! /usr/bin/env python3
-
-from datetime import datetime, timedelta
-import re
-
-import pytz
-import requests
-from bs4 import BeautifulSoup
-from urllib.parse import quote, urlencode
-
-TRAIN_INFO_REGEX = re.compile(r'^([A-Z-]+) ([0-9]+) în ([0-9.]+)$')
-
-OPERATOR_REGEX = re.compile(r'^Operat de (.+)$')
-
-SL_REGEX = re.compile(r'^(?:Fără|([0-9]+) min) (întârziere|mai devreme) la (trecerea fără oprire prin|sosirea în|plecarea din) (.+)\.$')
-SL_STATE_MAP = {
-	't': 'passing',
-	's': 'arrival',
-	'p': 'departure',
-}
-
-RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ'
-
-ROUTE_REGEX = re.compile(rf'^Parcurs tren ([{RO_LETTERS} ]+)[-–]([{RO_LETTERS} ]+)$')
-
-KM_REGEX = re.compile(r'^km ([0-9]+)$')
-
-PLATFORM_REGEX = re.compile(r'^linia (.+)$')
-
-STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) min oprire$')
-
-STATION_DEPARR_STATUS_REGEX = re.compile(r'^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$')
-
-class DateTimeSequencer:
-	def __init__(self, year: int, month: int, day: int) -> None:
-		self.current = datetime(year, month, day, 0, 0, 0)
-		self.current -= timedelta(seconds=1)
-
-	def __call__(self, hour: int, minute: int = 0, second: int = 0) -> datetime:
-		potential_new_date = datetime(self.current.year, self.current.month, self.current.day, hour, minute, second)
-		if (self.current > potential_new_date):
-			potential_new_date += timedelta(days=1)
-		self.current = potential_new_date
-		return self.current
-
-def collapse_space(string: str) -> str:
-	return re.sub(
-		rf'[{BeautifulSoup.ASCII_SPACES}]+', 
-		' ', 
-		string, 
-		flags=re.MULTILINE
-	).strip()
-
-def build_url(base: str, /, query: dict, **kwargs):
-	result = base.format(**{ k: quote(str(v)) for k, v in kwargs.items() })
-	if query:
-		result += '?'
-		result += urlencode(query)
-	return result
-
-def scrape(train_no: int, use_yesterday=False, date_override=None):
-	# Start scrapping session
-	s = requests.Session()
-
-	date = datetime.today()
-	if use_yesterday:
-		date -= timedelta(days=1)
-	if date_override:
-		date = date_override
-
-	r = s.get(build_url(
-		'https://mersultrenurilor.infofer.ro/ro-RO/Tren/{train_no}', 
-		train_no=train_no, 
-		query=[
-			('Date', date.strftime('%d.%m.%Y')),
-		],
-	))
-
-	soup = BeautifulSoup(r.text, features='html.parser')
-	sform = soup.find(id='form-search')
-	result_data = { elem['name']: elem['value'] for elem in sform('input') }
-
-	r = s.post('https://mersultrenurilor.infofer.ro/ro-RO/Trains/TrainsResult', data=result_data)
-	soup = BeautifulSoup(r.text, features='html.parser')
-
-	scraped = {}
-
-	train_info_div, _, _, results_div, *_ = soup('div', recursive=False)
-
-	train_info_div = train_info_div.div('div', recursive=False)[0]
-
-	scraped['rank'], scraped['number'], scraped['date'] = TRAIN_INFO_REGEX.match(collapse_space(train_info_div.h2.text)).groups()
-	date_d, date_m, date_y = (int(comp) for comp in scraped['date'].split('.'))
-	date = datetime(date_y, date_m, date_d)
-
-	scraped['operator'] = OPERATOR_REGEX.match(collapse_space(train_info_div.p.text)).groups()[0]
-
-	results_div = results_div.div
-	status_div = results_div('div', recursive=False)[0]
-	route_text = collapse_space(status_div.h4.text)
-	route_from, route_to = ROUTE_REGEX.match(route_text).groups()
-	scraped['route'] = {
-		'from': route_from,
-		'to': route_to,
-	}
-	try:
-		status_line_match = SL_REGEX.match(collapse_space(status_div.div.text))
-		slm_delay, slm_late, slm_arrival, slm_station = status_line_match.groups()
-		scraped['status'] = {
-			'delay': (int(slm_delay) if slm_late == 'întârziere' else -int(slm_delay)) if slm_delay else 0,
-			'station': slm_station,
-			'state': SL_STATE_MAP[slm_arrival[0]],
-		}
-	except Exception:
-		scraped['status'] = None
-
-	stations = status_div.ul('li', recursive=False)
-	scraped['stations'] = []
-	dt_seq = DateTimeSequencer(date.year, date.month, date.day)
-	tz = pytz.timezone('Europe/Bucharest')
-	for station in stations:
-		station_scraped = {}
-
-		left, middle, right = station.div('div', recursive=False)
-		station_scraped['name'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[0].text)
-		station_scraped['km'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[1].text)
-		station_scraped['km'] = int(KM_REGEX.match(station_scraped['km']).groups()[0])
-		station_scraped['stoppingTime'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[2].text)
-		if not station_scraped['stoppingTime']:
-			station_scraped['stoppingTime'] = None
-		else:
-			station_scraped['stoppingTime'] = int(STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()[0])
-		station_scraped['platform'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[3].text)
-		if not station_scraped['platform']:
-			station_scraped['platform'] = None
-		else:
-			station_scraped['platform'] = PLATFORM_REGEX.match(station_scraped['platform']).groups()[0]
-
-		def scrape_time(elem, setter):
-			parts = elem.div.div('div', recursive=False)
-			if parts:
-				result = {}
-
-				time, *_ = parts
-				result['scheduleTime'] = collapse_space(time.text)
-				st_hr, st_min = (int(comp) for comp in result['scheduleTime'].split(':'))
-				result['scheduleTime'] = tz.localize(dt_seq(st_hr, st_min)).isoformat()
-				if len(parts) >= 2:
-					_, status, *_ = parts
-					result['status'] = {}
-					on_time, delay, approx = STATION_DEPARR_STATUS_REGEX.match(collapse_space(status.text)).groups()
-					result['status']['delay'] = 0 if on_time else int(delay)
-					result['status']['real'] = not approx
-				else:
-					result['status'] = None
-
-				setter(result)
-			else:
-				setter(None)
-
-		scrape_time(left, lambda value: station_scraped.update(arrival=value))
-		scrape_time(right, lambda value: station_scraped.update(departure=value))
-
-		scraped['stations'].append(station_scraped)
-
-	return scraped
-
+from .scrape_train import scrape as scrape_train
+from .scrape_station import scrape as scrape_station
 
 def main():
 	train_no = 1538
 	print(f'Testing package with train number {train_no}')
 	from pprint import pprint
-	# pprint(scrape('473'))
-	pprint(scrape(train_no))
+	pprint(scrape_train(train_no))
 
 if __name__ == '__main__':
 	main()
diff --git a/scraper/utils.py b/scraper/utils.py
new file mode 100644
index 0000000..ad314be
--- /dev/null
+++ b/scraper/utils.py
@@ -0,0 +1,79 @@
+import re
+
+from datetime import datetime, timedelta
+from urllib.parse import urlencode, quote
+
+# From: https://en.wikipedia.org/wiki/Whitespace_character#Unicode
+ASCII_WHITESPACE = [
+	'\u0009', # HT; Character Tabulation 
+	'\u000a', # LF
+	'\u000b', # VT; Line Tabulation
+	'\u000c', # FF; Form Feed
+	'\u000d', # CR
+	'\u0020', # Space
+]
+
+WHITESPACE = ASCII_WHITESPACE + [
+	'\u0085', # NEL; Next Line
+	'\u00a0', # No-break Space; &nbsp;
+	'\u1680', # Ogham Space Mark
+	'\u2000', # En Quad
+	'\u2001', # Em Quad
+	'\u2002', # En Space
+	'\u2003', # Em Space
+	'\u2004', # Three-per-em Space
+	'\u2005', # Four-per-em Space
+	'\u2006', # Six-per-em Space
+	'\u2007', # Figure Space
+	'\u2008', # Punctuation Space
+	'\u2009', # Thin Space
+	'\u200A', # Hair Space
+	'\u2028', # Line Separator
+	'\u2029', # Paragraph Separator
+	'\u202f', # Narrow No-break Space
+	'\u205d', # Meduam Mathematical Space
+	'\u3000', # Ideographic Space
+]
+
+WHITESPACE_REGEX = re.compile(rf'[{"".join(WHITESPACE)}]+', flags=re.MULTILINE)
+
+class DateTimeSequencer:
+	def __init__(self, year: int, month: int, day: int) -> None:
+		self.current = datetime(year, month, day, 0, 0, 0)
+		self.current -= timedelta(seconds=1)
+
+	def __call__(self, hour: int, minute: int = 0, second: int = 0) -> datetime:
+		potential_new_date = datetime(self.current.year, self.current.month, self.current.day, hour, minute, second)
+		if (self.current > potential_new_date):
+			potential_new_date += timedelta(days=1)
+		self.current = potential_new_date
+		return self.current
+
+def collapse_space(string: str) -> str:
+	return WHITESPACE_REGEX.sub(
+		' ', 
+		string, 
+	).strip()
+
+def build_url(base: str, /, query: dict = {}, **kwargs):
+	result = base.format(**{ k: quote(str(v)) for k, v in kwargs.items() })
+	if query:
+		result += '?'
+		result += urlencode(query)
+	return result
+
+RO_TO_EN = {
+	'ă': 'a',
+	'Ă': 'A',
+	'â': 'a',
+	'Â': 'A',
+	'î': 'i',
+	'Î': 'I',
+	'ș': 's',
+	'Ș': 'S',
+	'ț': 't',
+	'Ț': 'T',
+}
+
+def ro_letters_to_en(string: str) -> str:
+	return ''.join((RO_TO_EN.get(letter, letter) for letter in string))
diff --git a/server/Pipfile b/server/Pipfile
index 48a585d..3bcaa3b 100644
--- a/server/Pipfile
+++ b/server/Pipfile
@@ -7,6 +7,7 @@ name = "pypi"
 flask = "*"
 gevent = "*"
 scraper = { editable = true, path = '../scraper' }
+jsonschema = "*"
 
 [dev-packages]
 
diff --git a/server/Pipfile.lock b/server/Pipfile.lock
index b1bfb9f..7de0a9d 100644
--- a/server/Pipfile.lock
+++ b/server/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "9d422680ab15ce184b043276f5d0d2cac228ff60dfc66ec193b6314bdc0f6ce2"
+            "sha256": "3c7f09679bdd68674754a714ee39503cf1a3ae265400eea074fec83559246dff"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -16,6 +16,14 @@
         ]
     },
     "default": {
+        "attrs": {
+            "hashes": [
+                "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1",
+                "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+            "version": "==21.2.0"
+        },
         "beautifulsoup4": {
             "hashes": [
                 "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35",
@@ -31,57 +39,6 @@
             ],
             "version": "==2021.5.30"
         },
-        "cffi": {
-            "hashes": [
-                "sha256:06c54a68935738d206570b20da5ef2b6b6d92b38ef3ec45c5422c0ebaf338d4d",
-                "sha256:0c0591bee64e438883b0c92a7bed78f6290d40bf02e54c5bf0978eaf36061771",
-                "sha256:19ca0dbdeda3b2615421d54bef8985f72af6e0c47082a8d26122adac81a95872",
-                "sha256:22b9c3c320171c108e903d61a3723b51e37aaa8c81255b5e7ce102775bd01e2c",
-                "sha256:26bb2549b72708c833f5abe62b756176022a7b9a7f689b571e74c8478ead51dc",
-                "sha256:33791e8a2dc2953f28b8d8d300dde42dd929ac28f974c4b4c6272cb2955cb762",
-                "sha256:3c8d896becff2fa653dc4438b54a5a25a971d1f4110b32bd3068db3722c80202",
-                "sha256:4373612d59c404baeb7cbd788a18b2b2a8331abcc84c3ba40051fcd18b17a4d5",
-                "sha256:487d63e1454627c8e47dd230025780e91869cfba4c753a74fda196a1f6ad6548",
-                "sha256:48916e459c54c4a70e52745639f1db524542140433599e13911b2f329834276a",
-                "sha256:4922cd707b25e623b902c86188aca466d3620892db76c0bdd7b99a3d5e61d35f",
-                "sha256:55af55e32ae468e9946f741a5d51f9896da6b9bf0bbdd326843fec05c730eb20",
-                "sha256:57e555a9feb4a8460415f1aac331a2dc833b1115284f7ded7278b54afc5bd218",
-                "sha256:5d4b68e216fc65e9fe4f524c177b54964af043dde734807586cf5435af84045c",
-                "sha256:64fda793737bc4037521d4899be780534b9aea552eb673b9833b01f945904c2e",
-                "sha256:6d6169cb3c6c2ad50db5b868db6491a790300ade1ed5d1da29289d73bbe40b56",
-                "sha256:7bcac9a2b4fdbed2c16fa5681356d7121ecabf041f18d97ed5b8e0dd38a80224",
-                "sha256:80b06212075346b5546b0417b9f2bf467fea3bfe7352f781ffc05a8ab24ba14a",
-                "sha256:818014c754cd3dba7229c0f5884396264d51ffb87ec86e927ef0be140bfdb0d2",
-                "sha256:8eb687582ed7cd8c4bdbff3df6c0da443eb89c3c72e6e5dcdd9c81729712791a",
-                "sha256:99f27fefe34c37ba9875f224a8f36e31d744d8083e00f520f133cab79ad5e819",
-                "sha256:9f3e33c28cd39d1b655ed1ba7247133b6f7fc16fa16887b120c0c670e35ce346",
-                "sha256:a8661b2ce9694ca01c529bfa204dbb144b275a31685a075ce123f12331be790b",
-                "sha256:a9da7010cec5a12193d1af9872a00888f396aba3dc79186604a09ea3ee7c029e",
-                "sha256:aedb15f0a5a5949ecb129a82b72b19df97bbbca024081ed2ef88bd5c0a610534",
-                "sha256:b315d709717a99f4b27b59b021e6207c64620790ca3e0bde636a6c7f14618abb",
-                "sha256:ba6f2b3f452e150945d58f4badd92310449876c4c954836cfb1803bdd7b422f0",
-                "sha256:c33d18eb6e6bc36f09d793c0dc58b0211fccc6ae5149b808da4a62660678b156",
-                "sha256:c9a875ce9d7fe32887784274dd533c57909b7b1dcadcc128a2ac21331a9765dd",
-                "sha256:c9e005e9bd57bc987764c32a1bee4364c44fdc11a3cc20a40b93b444984f2b87",
-                "sha256:d2ad4d668a5c0645d281dcd17aff2be3212bc109b33814bbb15c4939f44181cc",
-                "sha256:d950695ae4381ecd856bcaf2b1e866720e4ab9a1498cba61c602e56630ca7195",
-                "sha256:e22dcb48709fc51a7b58a927391b23ab37eb3737a98ac4338e2448bef8559b33",
-                "sha256:e8c6a99be100371dbb046880e7a282152aa5d6127ae01783e37662ef73850d8f",
-                "sha256:e9dc245e3ac69c92ee4c167fbdd7428ec1956d4e754223124991ef29eb57a09d",
-                "sha256:eb687a11f0a7a1839719edd80f41e459cc5366857ecbed383ff376c4e3cc6afd",
-                "sha256:eb9e2a346c5238a30a746893f23a9535e700f8192a68c07c0258e7ece6ff3728",
-                "sha256:ed38b924ce794e505647f7c331b22a693bee1538fdf46b0222c4717b42f744e7",
-                "sha256:f0010c6f9d1a4011e429109fda55a225921e3206e7f62a0c22a35344bfd13cca",
-                "sha256:f0c5d1acbfca6ebdd6b1e3eded8d261affb6ddcf2186205518f1428b8569bb99",
-                "sha256:f10afb1004f102c7868ebfe91c28f4a712227fe4cb24974350ace1f90e1febbf",
-                "sha256:f174135f5609428cc6e1b9090f9268f5c8935fddb1b25ccb8255a2d50de6789e",
-                "sha256:f3ebe6e73c319340830a9b2825d32eb6d8475c1dac020b4f0aa774ee3b898d1c",
-                "sha256:f627688813d0a4140153ff532537fbe4afea5a3dffce1f9deb7f91f848a832b5",
-                "sha256:fd4305f86f53dfd8cd3522269ed7fc34856a8ee3709a5e28b2836b2db9d4cd69"
-            ],
-            "markers": "platform_python_implementation == 'CPython' and sys_platform == 'win32'",
-            "version": "==1.14.6"
-        },
         "charset-normalizer": {
             "hashes": [
                 "sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
@@ -98,14 +55,6 @@
             "markers": "python_version >= '3.6'",
             "version": "==8.0.1"
         },
-        "colorama": {
-            "hashes": [
-                "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b",
-                "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"
-            ],
-            "markers": "platform_system == 'Windows'",
-            "version": "==0.4.4"
-        },
         "flask": {
             "hashes": [
                 "sha256:1c4c257b1892aec1398784c63791cbaa43062f1f7aeb555c4da961b20ee68f55",
@@ -230,6 +179,14 @@
             "markers": "python_version >= '3.6'",
             "version": "==3.0.1"
         },
+        "jsonschema": {
+            "hashes": [
+                "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163",
+                "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"
+            ],
+            "index": "pypi",
+            "version": "==3.2.0"
+        },
         "markupsafe": {
             "hashes": [
                 "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298",
@@ -290,13 +247,32 @@
             "markers": "python_version >= '3.6'",
             "version": "==2.0.1"
         },
-        "pycparser": {
+        "pyrsistent": {
             "hashes": [
-                "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
-                "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
+                "sha256:097b96f129dd36a8c9e33594e7ebb151b1515eb52cceb08474c10a5479e799f2",
+                "sha256:2aaf19dc8ce517a8653746d98e962ef480ff34b6bc563fc067be6401ffb457c7",
+                "sha256:404e1f1d254d314d55adb8d87f4f465c8693d6f902f67eb6ef5b4526dc58e6ea",
+                "sha256:48578680353f41dca1ca3dc48629fb77dfc745128b56fc01096b2530c13fd426",
+                "sha256:4916c10896721e472ee12c95cdc2891ce5890898d2f9907b1b4ae0f53588b710",
+                "sha256:527be2bfa8dc80f6f8ddd65242ba476a6c4fb4e3aedbf281dfbac1b1ed4165b1",
+                "sha256:58a70d93fb79dc585b21f9d72487b929a6fe58da0754fa4cb9f279bb92369396",
+                "sha256:5e4395bbf841693eaebaa5bb5c8f5cdbb1d139e07c975c682ec4e4f8126e03d2",
+                "sha256:6b5eed00e597b5b5773b4ca30bd48a5774ef1e96f2a45d105db5b4ebb4bca680",
+                "sha256:73ff61b1411e3fb0ba144b8f08d6749749775fe89688093e1efef9839d2dcc35",
+                "sha256:772e94c2c6864f2cd2ffbe58bb3bdefbe2a32afa0acb1a77e472aac831f83427",
+                "sha256:773c781216f8c2900b42a7b638d5b517bb134ae1acbebe4d1e8f1f41ea60eb4b",
+                "sha256:a0c772d791c38bbc77be659af29bb14c38ced151433592e326361610250c605b",
+                "sha256:b29b869cf58412ca5738d23691e96d8aff535e17390128a1a52717c9a109da4f",
+                "sha256:c1a9ff320fa699337e05edcaae79ef8c2880b52720bc031b219e5b5008ebbdef",
+                "sha256:cd3caef37a415fd0dae6148a1b6957a8c5f275a62cca02e18474608cb263640c",
+                "sha256:d5ec194c9c573aafaceebf05fc400656722793dac57f254cd4741f3c27ae57b4",
+                "sha256:da6e5e818d18459fa46fac0a4a4e543507fe1110e808101277c5a2b5bab0cd2d",
+                "sha256:e79d94ca58fcafef6395f6352383fa1a76922268fa02caa2272fff501c2fdc78",
+                "sha256:f3ef98d7b76da5eb19c37fda834d50262ff9167c65658d1d8f974d2e4d90676b",
+                "sha256:f4c8cabb46ff8e5d61f56a037974228e978f26bfefce4f61a4b1ac0ba7a2ab72"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==2.20"
+            "markers": "python_version >= '3.6'",
+            "version": "==0.18.0"
         },
         "pytz": {
             "hashes": [
@@ -317,6 +293,14 @@
             "editable": true,
             "path": "../scraper"
         },
+        "six": {
+            "hashes": [
+                "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
+                "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==1.16.0"
+        },
         "soupsieve": {
             "hashes": [
                 "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc",
diff --git a/server/server/db.py b/server/server/db.py
index aa30109..05ec249 100644
--- a/server/server/db.py
+++ b/server/server/db.py
@@ -1,6 +1,9 @@
 # Globals
 stations = []
 trains = []
+db_data = {
+	'version': 2,
+}
 
 # Examples
 example_station = {
@@ -20,38 +23,100 @@ example_train = {
 import json
 import os
 from os import path, stat
+from contextlib import contextmanager
+
 from .utils import take_while
 
 DB_DIR = os.environ.get('DB_DIR', '') or './db'
 if not path.exists(DB_DIR):
 	os.mkdir(DB_DIR)
 
+DB_FILE = path.join(DB_DIR, 'db.json')
+
 STATIONS_FILE = path.join(DB_DIR, 'stations.json')
 
+TRAINS_FILE = path.join(DB_DIR, 'trains.json')
+
+def migration():
+	global db_data
+	global trains
+	global stations
+	if not path.exists(DB_FILE):
+		print('[Migration] Migrating DB version 1 -> 2')
+		if path.exists(STATIONS_FILE):
+			with open(STATIONS_FILE) as f:
+				stations = json.load(f)
+			for i in range(len(stations)):
+				stations[i]['stoppedAtBy'] = [str(num) for num in stations[i]['stoppedAtBy']]
+			with open(STATIONS_FILE, 'w') as f:
+				json.dump(stations, f)
+		if path.exists(TRAINS_FILE):
+			with open(TRAINS_FILE) as f:
+				trains = json.load(f)
+			for i in range(len(trains)):
+				trains[i]['number'] = trains[i]['numberString']
+				del trains[i]['numberString']
+			with open(TRAINS_FILE, 'w') as f:
+				json.dump(trains, f)
+		db_data = {
+			'version': 2,
+		}
+		with open(DB_FILE, 'w') as f:
+			json.dump(db_data, f)
+		migration()
+	else:
+		with open(DB_FILE) as f:
+			db_data = json.load(f)
+		if db_data['version'] == 2:
+			print('[Migration] DB Version: 2, noop')
+
+migration()
+
+if path.exists(DB_FILE):
+	with open(DB_FILE) as f:
+		db_data = json.load(f)
+else:
+	with open(DB_FILE, 'w') as f:
+		json.dump(db_data, f)
+
 if path.exists(STATIONS_FILE):
 	with open(STATIONS_FILE) as f:
 		stations = json.load(f)
 
-TRAINS_FILE = path.join(DB_DIR, 'trains.json')
-
 if path.exists(TRAINS_FILE):
 	with open(TRAINS_FILE) as f:
 		trains = json.load(f)
 
+_should_commit_on_every_change = True
+
+@contextmanager
+def db_transaction():
+	global _should_commit_on_every_change
+	_should_commit_on_every_change = False
+	yield
+	with open(DB_FILE, 'w') as f:
+		json.dump(db_data, f)
+	with open(STATIONS_FILE, 'w') as f:
+		stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True)
+		json.dump(stations, f)
+	with open(TRAINS_FILE, 'w') as f:
+		json.dump(trains, f)
+	_should_commit_on_every_change = True
+
 def found_train(rank: str, number: str, company: str) -> int:
-	number_int = int(''.join(take_while(lambda s: str(s).isnumeric(), number)))
+	number = ''.join(take_while(lambda s: str(s).isnumeric(), number))
 	try:
-		next(filter(lambda tr: tr['number'] == number_int, trains))
+		next(filter(lambda tr: tr['number'] == number, trains))
 	except StopIteration:
 		trains.append({
-			'number': number_int,
-			'numberString': number,
+			'number': number,
 			'company': company,
 			'rank': rank,
 		})
-		with open(TRAINS_FILE, 'w') as f:
-			json.dump(trains, f)
-	return number_int
+		if _should_commit_on_every_change:
+			with open(TRAINS_FILE, 'w') as f:
+				json.dump(trains, f)
+	return number
 
 def found_station(name: str):
 	try:
@@ -61,25 +126,46 @@ def found_station(name: str):
 			'name': name,
 			'stoppedAtBy': [],
 		})
-		stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True)
-		with open(STATIONS_FILE, 'w') as f:
-			json.dump(stations, f)
+		if _should_commit_on_every_change:
+			stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True)
+			with open(STATIONS_FILE, 'w') as f:
+				json.dump(stations, f)
 
-def found_train_at_station(station_name: str, train_number: int):
+def found_train_at_station(station_name: str, train_number: str):
+	train_number = ''.join(take_while(lambda s: str(s).isnumeric(), train_number))
 	found_station(station_name)
 	for i in range(len(stations)):
 		if stations[i]['name'] == station_name:
 			if train_number not in stations[i]['stoppedAtBy']:
 				stations[i]['stoppedAtBy'].append(train_number)
-				stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True)
-				with open(STATIONS_FILE, 'w') as f:
-					json.dump(stations, f)
 			break
+	if _should_commit_on_every_change:
+		stations.sort(key=lambda s: len(s['stoppedAtBy']), reverse=True)
+		with open(STATIONS_FILE, 'w') as f:
+			json.dump(stations, f)
 
 def on_train_data(train_data: dict):
-	train_no = found_train(train_data['rank'], train_data['number'], train_data['operator'])
-	for station in train_data['stations']:
-		found_train_at_station(station['name'], train_no)
+	with db_transaction():
+		train_no = found_train(train_data['rank'], train_data['number'], train_data['operator'])
+		for station in train_data['stations']:
+			found_train_at_station(station['name'], train_no)
 
-def on_train_lookup_failure(train_no: int):
+def on_train_lookup_failure(train_no: str):
 	pass
+
+def on_station(station_data: dict):
+	station_name = station_data['stationName']
+
+	def process_train(train_data: dict):
+		train_number = train_data['train']['number']
+		train_number = found_train(train_data['train']['rank'], train_number, train_data['train']['operator'])
+		found_train_at_station(station_name, train_number)
+		if 'route' in train_data['train'] and train_data['train']['route']:
+			for station in train_data['train']['route']:
+				found_train_at_station(station, train_number)
+	
+	with db_transaction():
+		for train in station_data['arrivals']:
+			process_train(train)
+		for train in station_data['departures']:
+			process_train(train)
diff --git a/server/server/flask_utils.py b/server/server/flask_utils.py
new file mode 100644
index 0000000..c43f00c
--- /dev/null
+++ b/server/server/flask_utils.py
@@ -0,0 +1,29 @@
+from flask import request as _f_request
+
+from .utils import filter_result as _filter_result
+
+def filtered_data(fn):
+	def filterer(*args, **kwargs):
+		filters = _f_request.args.get('filters', None)
+		if filters:
+			filters_raw = [f.split(':', 1) for f in filters.split(',')]
+			filters = {'.': []}
+			for key, value in filters_raw:
+				def add_to(obj, key, value):
+					if '.' in key:
+						prop, key = key.split('.', 1)
+						if prop not in filters:
+							obj[prop] = {'.': []}
+						add_to(obj[prop], key, value)
+					else:
+						obj['.'].append({key: value})
+				add_to(filters, key, value)
+		properties = _f_request.args.get('properties', None)
+		if properties:
+			properties = properties.split(',')
+
+		data = fn(*args, **kwargs)
+
+		return _filter_result(data, properties, filters)
+		
+	return filterer
diff --git a/server/server/server.py b/server/server/server.py
index aaf5a46..3380fcb 100644
--- a/server/server/server.py
+++ b/server/server/server.py
@@ -1,9 +1,13 @@
 print(f'Server {__name__=}')
 
 import datetime
-from flask import Flask, json, request, jsonify
+
+from flask import Flask, jsonify, url_for
+from jsonschema import validate
 
 from .cache import CachedData
+from .scraper.schemas import TRAIN_INFO_SCHEMA
+from .utils import get_hostname
 
 app = Flask(__name__)
 
@@ -14,14 +18,18 @@ app.register_blueprint(v2.bp)
 def root():
 	return 'Test'
 
+@app.route('/train/.schema.json')
+def get_train_info_schema():
+	return jsonify(TRAIN_INFO_SCHEMA['v1'])
+
 train_data_cache = {}
 
 @app.route('/train/<int:train_no>')
 def get_train_info(train_no: int):
 	def get_data():
-		from .scraper.scraper import scrape
+		from .scraper.scraper import scrape_train
 		use_yesterday = False
-		result = scrape(train_no, use_yesterday=use_yesterday)
+		result = scrape_train(train_no, use_yesterday=use_yesterday)
 
 		from . import db
 		db.on_train_data(result)
@@ -40,6 +48,8 @@ def get_train_info(train_no: int):
 	if train_no not in train_data_cache:
 		train_data_cache[train_no] = CachedData(get_data, validity=1000 * 30)
 	data, fetch_time = train_data_cache[train_no]()
+	data['$schema'] = get_hostname() + url_for('.get_train_info_schema')
+	validate(data, schema=TRAIN_INFO_SCHEMA['v1'])
 	resp = jsonify(data)
 	resp.headers['X-Last-Fetched'] = fetch_time.isoformat()
 	return resp
diff --git a/server/server/utils.py b/server/server/utils.py
index 81fcb30..8ebc85d 100644
--- a/server/server/utils.py
+++ b/server/server/utils.py
@@ -16,3 +16,26 @@ def check_yes_no(input: str, default=_NO_DEFAULT, considered_yes=None) -> bool:
 		considered_yes = ['y', 'yes', 't', 'true', '1']
 	return input in considered_yes
 
+def get_hostname():
+	import os
+	import platform
+	return os.getenv('HOSTNAME', os.getenv('COMPUTERNAME', platform.node()))
+
+def filter_result(data, properties=None, filters=None):
+	is_array = not hasattr(data, 'get')
+	result = data if is_array else [data]
+
+	if filters:
+		# Todo: implement filters
+		pass
+		# def f(lst, filters):
+		# 	def condition(item):
+				
+		# 	return list(filter(condition, lst))
+		# result = f(result, filters)
+
+	if properties:
+		for i in range(len(result)):
+			result[i] = {p:result[i].get(p, None) for p in properties}
+
+	return result if is_array else result[0]
diff --git a/server/server/v2/v2.py b/server/server/v2/v2.py
index d9a3fd1..2261fa4 100644
--- a/server/server/v2/v2.py
+++ b/server/server/v2/v2.py
@@ -1,32 +1,87 @@
+import json
 from flask import Blueprint, jsonify, request
+from flask.helpers import url_for
+from jsonschema import validate
 
 from .. import db
 from ..cache import CachedData
-from ..utils import check_yes_no
+from ..utils import check_yes_no, get_hostname
+from ..flask_utils import filtered_data
+from ..scraper.utils import ro_letters_to_en
+from ..scraper.schemas import STATION_SCHEMA, TRAIN_INFO_SCHEMA
 
 bp = Blueprint('v2', __name__, url_prefix='/v2')
 
 @bp.get('/trains')
 def get_known_trains():
-	return jsonify(db.trains)
+	@filtered_data
+	def get_data():
+		return db.trains
+
+	result = get_data()
+
+	return jsonify(result)
 
 @bp.get('/stations')
 def get_known_stations():
-	return jsonify(db.stations)
+	@filtered_data
+	def get_data():
+		return db.stations
+
+	result = get_data()
+
+	return jsonify(result)
 
 train_data_cache = {}
 
+@bp.route('/train/.schema.json')
+def get_train_info_schema():
+	return jsonify(TRAIN_INFO_SCHEMA['v2'])
+
 @bp.route('/train/<int:train_no>')
 def get_train_info(train_no: int):
 	use_yesterday = check_yes_no(request.args.get('use_yesterday', ''), default=False)
+	@filtered_data
 	def get_data():
-		from ..scraper.scraper import scrape
-		result = scrape(train_no, use_yesterday=use_yesterday)
+		from ..scraper.scraper import scrape_train
+		result = scrape_train(train_no, use_yesterday=use_yesterday)
 		db.on_train_data(result)
 		return result
-	if train_no not in train_data_cache:
+	if (train_no, use_yesterday) not in train_data_cache:
 		train_data_cache[(train_no, use_yesterday)] = CachedData(get_data, validity=1000 * 30)
 	data, fetch_time = train_data_cache[(train_no, use_yesterday)]()
+	data['$schema'] = get_hostname() + url_for('.get_train_info_schema')
+	validate(data, schema=TRAIN_INFO_SCHEMA['v2'])
 	resp = jsonify(data)
 	resp.headers['X-Last-Fetched'] = fetch_time.isoformat()
 	return resp
+
+station_cache = {}
+
+@bp.route('/station/.schema.json')
+def get_station_schema():
+	return jsonify(STATION_SCHEMA['v2'])
+
+@bp.route('/station/<station_name>')
+def get_station(station_name: str):
+	station_name = ro_letters_to_en(station_name.lower().replace(' ', '-'))
+
+	def get_data():
+		from ..scraper.scraper import scrape_station
+		result = scrape_station(station_name)
+		db.on_station(result)
+		return result
+	if station_name not in train_data_cache:
+		station_cache[station_name] = CachedData(get_data, validity=1000 * 30)
+	data, fetch_time = station_cache[station_name]()
+	data['$schema'] = get_hostname() + url_for('.get_station_schema')
+	validate(data, schema=STATION_SCHEMA['v2'])
+
+	@filtered_data
+	def filter(data):
+		return data
+
+	resp = jsonify(filter(data))
+	resp.headers['X-Last-Fetched'] = fetch_time.isoformat()
+	return resp
+