Browse Source

Fixed parsing + added support for seconds stopping time

python3
Dan Cojocaru 3 years ago
parent
commit
ddf9c27cc3
Signed by: kbruen
GPG Key ID: 818A889458EDC937
  1. 7
      scraper/scrape_station.py
  2. 3
      scraper/scrape_station_schema_v2.json
  3. 7
      scraper/scrape_train.py
  4. 2
      scraper/scrape_train_schema.json
  5. 2
      scraper/scrape_train_schema_v2.json
  6. 2
      server/server/server.py

7
scraper/scrape_station.py

@ -14,7 +14,7 @@ RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ'
STATION_INFO_REGEX = re.compile(rf'^([{RO_LETTERS}. ]+) în ([0-9.]+)$')
STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) min \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$')
STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) (min|sec) \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$')
# endregion
@ -62,13 +62,14 @@ def scrape(station_name: str):
st_hr, st_min = (int(comp) for comp in result['time'].split(':'))
result['time'] = tz.localize(dt_seq(st_hr, st_min)).isoformat()
unknown_st, st, st_opposite_time = STOPPING_TIME_REGEX.match(
unknown_st, st, minsec, st_opposite_time = STOPPING_TIME_REGEX.match(
collapse_space(stopping_time_div.div('div', recursive=False)[1].text)
).groups()
if unknown_st:
result['stoppingTime'] = None
elif st:
result['stoppingTime'] = int(st)
minutes = minsec == 'min'
result['stoppingTime'] = int(st) * 60 if minutes else int(st)
result['train'] = {}
result['train']['rank'] = collapse_space(train_div.div.div('div', recursive=False)[1].span.text)

3
scraper/scrape_station_schema_v2.json

@ -53,11 +53,12 @@
]
},
"stoppingTime": {
"description": "The number of seconds the train stops in the station",
"type": [
"integer",
"null"
],
"minimum": 1
"minimum": 0
}
},
"required": [

7
scraper/scrape_train.py

@ -29,7 +29,7 @@ KM_REGEX = re.compile(r'^km ([0-9]+)$')
PLATFORM_REGEX = re.compile(r'^linia (.+)$')
STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) min oprire$')
STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) (min|sec) oprire$')
STATION_DEPARR_STATUS_REGEX = re.compile(r'^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$')
@ -106,7 +106,10 @@ def scrape(train_no: int, use_yesterday=False, date_override=None):
if not station_scraped['stoppingTime']:
station_scraped['stoppingTime'] = None
else:
station_scraped['stoppingTime'] = int(STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()[0])
st_value, st_minsec = STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()
station_scraped['stoppingTime'] = int(st_value)
if st_minsec == 'min':
station_scraped['stoppingTime'] *= 60
station_scraped['platform'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[3].text)
if not station_scraped['platform']:
station_scraped['platform'] = None

2
scraper/scrape_train_schema.json

@ -113,7 +113,7 @@
"stoppingTime": {
"description": "The number of minutes the train is scheduled to stop in this station",
"type": ["integer", "null"],
"minimum": 1
"minimum": 0
},
"platform": {
"description": "The platform the train stopped at",

2
scraper/scrape_train_schema_v2.json

@ -111,7 +111,7 @@
"type": "integer"
},
"stoppingTime": {
"description": "The number of minutes the train is scheduled to stop in this station",
"description": "The number of seconds the train is scheduled to stop in this station",
"type": ["integer", "null"],
"minimum": 1
},

2
server/server/server.py

@ -43,6 +43,8 @@ def get_train_info(train_no: int):
if result['stations'][i]['departure']:
date = datetime.datetime.fromisoformat(result['stations'][i]['departure']['scheduleTime'])
result['stations'][i]['departure']['scheduleTime'] = f'{date.hour}:{date.minute:02}'
if 'stoppingTime' in result['stations'][i] and result['stations'][i]['stoppingTime']:
result['stations'][i]['stoppingTime'] //= 60
return result
if train_no not in train_data_cache:

Loading…
Cancel
Save