import datetime import lxml.html import requests from peewee import * from playhouse.shortcuts import model_to_dict import config from database import BaseModel from plugins.plugin import Plugin class Robor(BaseModel): date = DateField(index=True, default=datetime.date.today(), null=False) field = TextField(null=False) value = FloatField(null=False) class RoborPlugin(Plugin): models = [Robor] def __init__(self): self.__table = None def get_column_index(self, table, column_name): header_row = table.find('tr') for elem in header_row.iter('th'): if column_name in elem.text_content(): return header_row.index(elem) return None def values_newer_than(self, table, oldest_date: datetime.date, col_date, col_value): # Items are ordered descending, so stop when we reach date for row in table.iter('tr'): dt_str = row[col_date].text_content() val_str = row[col_value].text_content() try: dt = datetime.datetime.strptime(dt_str, '%d/%b/%Y') val = float(val_str) except ValueError: continue dt = dt.date() if (oldest_date is not None) and (dt <= oldest_date): break yield (dt, val) def execute(self): # Get last existing date latest_date = Robor.select(Robor.date) \ .order_by(Robor.date.desc()) \ .limit(1) \ .scalar() # Fetch & parse data response = requests.get('https://www.bnro.ro/StatisticsReportHTML.aspx?icid=801&table=642', verify=False) response.raise_for_status() html = lxml.html.fromstring(response.text) # Read data from table table = html.find('.//table[@class="stat_table"]') for field in config.ROBOR_FIELDS: col_date = self.get_column_index(table, 'Date') col_value = self.get_column_index(table, field) for date, value in self.values_newer_than(table, latest_date, col_date, col_value): entry = Robor() entry.date = date entry.field = field entry.value = value entry.save() print(model_to_dict(entry))