Fixed parsing + added support for seconds stopping time

3 years ago · ddf9c27cc3
6 changed files with 15 additions and 8 deletions
--- a/scraper/scrape_station.py
+++ b/scraper/scrape_station.py
@ -14,7 +14,7 @@ RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ'
 STATION_INFO_REGEX = re.compile(rf'^([{RO_LETTERS}. ]+) în ([0-9.]+)$')
-STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) min \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$')
+STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) (min|sec) \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$')
 # endregion
@ -62,13 +62,14 @@ def scrape(station_name: str):
 			st_hr, st_min = (int(comp) for comp in result['time'].split(':'))
 			result['time'] = tz.localize(dt_seq(st_hr, st_min)).isoformat()
-			unknown_st, st, st_opposite_time = STOPPING_TIME_REGEX.match(
+			unknown_st, st, minsec, st_opposite_time = STOPPING_TIME_REGEX.match(
 				collapse_space(stopping_time_div.div('div', recursive=False)[1].text)
 			).groups()
 			if unknown_st:
 				result['stoppingTime'] = None
 			elif st:
-				result['stoppingTime'] = int(st)
+				minutes = minsec == 'min'
 				result['stoppingTime'] = int(st) * 60 if minutes else int(st)
 			result['train'] = {}
 			result['train']['rank'] = collapse_space(train_div.div.div('div', recursive=False)[1].span.text)
--- a/scraper/scrape_station_schema_v2.json
+++ b/scraper/scrape_station_schema_v2.json
@ -53,11 +53,12 @@
 					]
 				},
 				"stoppingTime": {
 					"description": "The number of seconds the train stops in the station",
 					"type": [
 						"integer",
 						"null"
 					],
-					"minimum": 1
+					"minimum": 0
 				}
 			},
 			"required": [
--- a/scraper/scrape_train.py
+++ b/scraper/scrape_train.py
@ -29,7 +29,7 @@ KM_REGEX = re.compile(r'^km ([0-9]+)$')
 PLATFORM_REGEX = re.compile(r'^linia (.+)$')
-STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) min oprire$')
+STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) (min|sec) oprire$')
 STATION_DEPARR_STATUS_REGEX = re.compile(r'^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$')
@ -106,7 +106,10 @@ def scrape(train_no: int, use_yesterday=False, date_override=None):
 		if not station_scraped['stoppingTime']:
 			station_scraped['stoppingTime'] = None
 		else:
-			station_scraped['stoppingTime'] = int(STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()[0])
+			st_value, st_minsec = STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()
 			station_scraped['stoppingTime'] = int(st_value)
 			if st_minsec == 'min':
 				station_scraped['stoppingTime'] *= 60
 		station_scraped['platform'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[3].text)
 		if not station_scraped['platform']:
 			station_scraped['platform'] = None
--- a/scraper/scrape_train_schema.json
+++ b/scraper/scrape_train_schema.json
@ -113,7 +113,7 @@
 					"stoppingTime": {
 						"description": "The number of minutes the train is scheduled to stop in this station",
 						"type": ["integer", "null"],
-						"minimum": 1
+						"minimum": 0
 					},
 					"platform": {
 						"description": "The platform the train stopped at",
--- a/scraper/scrape_train_schema_v2.json
+++ b/scraper/scrape_train_schema_v2.json
@ -111,7 +111,7 @@
 						"type": "integer"
 					},
 					"stoppingTime": {
-						"description": "The number of minutes the train is scheduled to stop in this station",
+						"description": "The number of seconds the train is scheduled to stop in this station",
 						"type": ["integer", "null"],
 						"minimum": 1
 					},
--- a/server/server/server.py
+++ b/server/server/server.py
@ -43,6 +43,8 @@ def get_train_info(train_no: int):
 			if result['stations'][i]['departure']:
 				date = datetime.datetime.fromisoformat(result['stations'][i]['departure']['scheduleTime'])
 				result['stations'][i]['departure']['scheduleTime'] = f'{date.hour}:{date.minute:02}'
 			if 'stoppingTime' in result['stations'][i] and result['stations'][i]['stoppingTime']:
 				result['stations'][i]['stoppingTime'] //= 60
 		return result
 	if train_no not in train_data_cache: