Source code for uk_covid19.api_interface

#!/usr/bin python3

# Imports
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Python:
from typing import Iterable, Dict, Union, Iterator
from json import dumps
from http import HTTPStatus
from datetime import datetime
from xml.etree.ElementTree import Element as XMLElement

# 3rd party:
from requests import request, Response
import certifi

# Internal:
from uk_covid19.utils import save_data
from uk_covid19.data_format import DataFormat
from uk_covid19.exceptions import FailedRequestError

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

__all__ = [
    'Cov19API'
]


StructureType = Dict[str, Union[dict, str]]
FiltersType = Iterable[str]


[docs]class Cov19API: """ Interface to access the API service for COVID-19 data in the United Kingdom. Parameters ---------- filters: Iterable[str] API filters. See the API documentations for additional information. structure: Dict[str, Union[dict, str]] Structure parameter. See the API documentations for additional information. latest_by: Union[str, None] Retrieves the latest value for a specific metric. [Default: ``None``] """ endpoint = "https://api.coronavirus.data.gov.uk/v1/data" release_timestamp_endpoint = "https://api.coronavirus.data.gov.uk/v1/timestamp" _last_update: Union[str, None] = None _total_pages: Union[int, None] = None def __init__(self, filters: FiltersType, structure: StructureType, latest_by: Union[str, None] = None): self.filters = filters if any(isinstance(value, (list, dict)) for value in structure): raise TypeError( "Nested structures are no longer supported. Please define a flat " "structure instead." ) self.structure = structure self.latest_by = latest_by @property def total_pages(self) -> Union[int, None]: """ :property: Produces the total number of pages for a given set of parameters (only after the data are requested). Returns ------- Union[int, None] """ return self._total_pages @property def last_update(self) -> str: """ :property: Produces the timestamp for the last update in GMT. This property supplies the API time - i.e. the time at which the data were deployed to the database. Please note that there will always be a difference between this time and the timestamp that is displayed on the website, which may be accessed via the ``.get_release_timestamp()`` method. The website timestamp signifies the time at which the data were release to the API, and by extension the website. .. note:: The output is extracted from the header and is accurate to the second. .. warning:: The ISO-8601 standard requires a ``"Z"`` character to be added to the end of the timestamp. This is a timezone feature and is not recognised by Python's ``datetime`` library. It is, however, most other libraries; e.g. ``pandas``. If you wish to parse the timestamp using the the ``datetime`` library, make sure that you remove the trailing ``"Z"`` character. Returns ------- str Timestamp, formatted as ISO-8601. Examples -------- >>> filters = ["areaType=region"] >>> structure = { ... "name": "areaName", ... "newCases": "newCasesBySpecimenDate" ... } >>> data = Cov19API( ... filters=filters, ... structure=structure, ... latest_by='newCasesBySpecimenDate' ... ) >>> timestamp = data.last_update >>> print(timestamp) 2020-07-27T20:29:16.000000Z >>> from datetime import datetime >>> parsed_timestamp = datetime.fromisoformat(timestamp.strip("Z")) >>> print(parsed_timestamp) 2020-07-27 20:29:16 """ if self._last_update is None: self._last_update = self.head()['Last-Modified'] timestamp = datetime.strptime(self._last_update, "%a, %d %b %Y %H:%M:%S GMT") return timestamp.isoformat() + ".000000Z"
[docs] @staticmethod def get_release_timestamp() -> str: """ :staticmethod: Produces the website timestamp in GMT. .. versionadded:: 1.2.0 This property supplies the website timestamp - i.e. the time at which the data were released to the API and by extension the website. Please note that there will be a difference between this timestamp and the timestamp produced using the ``last_update`` property. The latter signifies the time at which the data were deployed to the database, not the time at which they were released. .. note:: The output is extracted from the header and is accurate to the miliseconds. .. warning:: The ISO-8601 standard requires a ``"Z"`` character to be added to the end of the timestamp. This is a timezone feature and is not recognised by Python's ``datetime`` library. It is, however, most other libraries; e.g. ``pandas``. If you wish to parse the timestamp using the the ``datetime`` library, make sure that you remove the trailing ``"Z"`` character. Returns ------- str Timestamp, formatted as ISO-8601. Examples -------- >>> release_timestamp = Cov19API.get_release_timestamp() >>> print(release_timestamp) 2020-08-08T15:00:09.977840Z >>> from datetime import datetime >>> release_timestamp = Cov19API.get_release_timestamp() >>> parsed_timestamp = datetime.fromisoformat(release_timestamp.strip("Z")) >>> print(parsed_timestamp) 2020-08-08 15:00:09 """ with request("GET", Cov19API.release_timestamp_endpoint) as response: json_data = response.json() return json_data['websiteTimestamp']
@property def api_params(self) -> dict: """ :staticmethod: API parameters, constructed based on ``filters``, ``structure``, and ``latest_by`` arguments as defined by the user. Returns ------- Dict[str, str] """ api_params = { "filters": str.join(";", self.filters), "structure": dumps(self.structure, separators=(",", ":")), } if self.latest_by is not None: api_params.update({ "latestBy": self.latest_by }) return api_params
[docs] def head(self): """ Request header for the given input arguments (``filters``, ``structure``, and ``lastest_by``). Returns ------- Dict[str, str] Examples -------- >>> filters = ["areaType=region"] >>> structure = { ... "name": "areaName", ... "newCases": "newCasesBySpecimenDate" ... } >>> data = Cov19API( ... filters=filters, ... structure=structure, ... latest_by='newCasesBySpecimenDate' ... ) >>> head = data.head() >>> print(head) {'Cache-Control': 'public, max-age=60', 'Content-Length': '0', ... } """ api_params = self.api_params with request("HEAD", self.endpoint, params=api_params, verify=certifi.where()) as response: response.raise_for_status() return response.headers
[docs] @staticmethod def options(): """ :staticmethod: Provides the options by calling the ``OPTIONS`` method of the API. Returns ------- dict API options. Examples -------- >>> from pprint import pprint >>> options = Cov19API.options() >>> pprint(options) {'info': {'description': "Public Health England's Coronavirus Dashboard API", 'title': 'Dashboard API', 'version': '1.0'}, 'openapi': '3.0.1', ... } """ with request("OPTIONS", Cov19API.endpoint, verify=certifi.where()) as response: response.raise_for_status() return response.json()
def _get(self, format_as: DataFormat) -> Iterator[Response]: """ Extracts paginated data by requesting all of the pages and combining the results. Parameters ---------- format_as: str Response format. Returns ------- Iterator[Response] Raises ------ FailedRequestError When the request fails. """ api_params = self.api_params api_params.update({ "format": format_as.value, "page": 1 }) if self.latest_by is not None: del api_params["page"] while True: with request("GET", self.endpoint, params=api_params, verify=certifi.where()) as response: if response.status_code >= HTTPStatus.BAD_REQUEST: raise FailedRequestError(response=response, params=api_params) if self.latest_by is not None: yield response break elif response.status_code == HTTPStatus.NO_CONTENT: self._total_pages = api_params["page"] - 1 break else: self._last_update = response.headers["Last-Modified"] yield response if self.latest_by is None: api_params["page"] += 1
[docs] def get_json(self, save_as: Union[str, None] = None, as_string: bool = False) -> Union[dict, str]: """ Provides full data (all pages) in JSON. Parameters ---------- save_as: Union[str, None] If defined, the results will (also) be saved as a file. [Default: ``None``] The value must be a path to a file with the correct extension -- i.e. ``.json`` for JSON). as_string: bool .. versionadded:: 1.1.4 If ``False`` (default), returns the data as a dictionary. Otherwise, returns the data as a JSON string. Returns ------- Union[Dict, str] Examples -------- >>> filters = ["areaType=region"] >>> structure = { ... "name": "areaName", ... "newCases": "newCasesBySpecimenDate" ... } >>> data = Cov19API( ... filters=filters, ... structure=structure, ... latest_by='newCasesBySpecimenDate' ... ) >>> result = data.get_json() >>> print(result) {'data': [{'name': 'East Midlands', 'newCases': 0}, ... } """ resp = { "data": list() } for response in self._get(DataFormat.JSON): current_data = response.json() page_data = current_data['data'] resp["data"].extend(page_data) resp["lastUpdate"] = self.last_update resp["length"] = len(resp["data"]) resp["totalPages"] = self._total_pages if as_string: return dumps(resp, separators=(",", ":")) if save_as is None: return resp data = dumps(resp, separators=(",", ":")) save_data(data, save_as, DataFormat.JSON) return resp
[docs] def get_xml(self, save_as=None, as_string=False) -> XMLElement: """ Provides full data (all pages) in XML. Parameters ---------- save_as: Union[str, None] If defined, the results will (also) be saved as a file. [Default: ``None``] The value must be a path to a file with the correct extension -- i.e. ``.xml`` for XML). as_string: bool .. versionadded:: 1.1.4 If ``False`` (default), returns an ``ElementTree`` object. Otherwise, returns the data as an XML string. Returns ------- xml.etree.ElementTree.Element Examples -------- >>> from xml.etree.ElementTree import tostring >>> filters = ["areaType=region"] >>> structure = { ... "name": "areaName", ... "newCases": "newCasesBySpecimenDate" ... } >>> data = Cov19API( ... filters=filters, ... structure=structure, ... latest_by='newCasesBySpecimenDate' ... ) >>> result_xml = data.get_xml() >>> result_str = tostring(result_xml, encoding='unicode', method='xml') >>> print(result_str) <document> <data> <name>East Midlands</name> <newCases>0</newCases> </data> ... </document> """ from xml.etree.ElementTree import SubElement, fromstring resp = XMLElement("document") for response in self._get(DataFormat.XML): decoded_content = response.content.decode() # Parsing the XML: parsed_data = fromstring(decoded_content) # Extracting "data" elements from the tree: page_data = parsed_data.findall(".//data") resp.extend(page_data) extras = { "lastUpdate": self.last_update, "length": len(resp.findall(".//data")), "totalPages": self._total_pages } for elm_name, value in extras.items(): elm = SubElement(resp, elm_name) elm.text = str(value) if save_as is None and not as_string: return resp from xml.etree.ElementTree import tostring str_data = tostring(resp, encoding='unicode', method='xml') if as_string: return str_data save_data(str_data, save_as, DataFormat.XML) return resp
[docs] def get_csv(self, save_as=None) -> str: """ Provides full data (all pages) in CSV. .. warning:: Please make sure that the ``structure`` is not hierarchical as CSV outputs are defined as 2D tables and as such, do not support hierarchies. Parameters ---------- save_as: Union[str, None] If defined, the results will (also) be saved as a file. [Default: ``None``] The value must be a path to a file with the correct extension -- i.e. ``.csv`` for CSV). Returns ------- str Raises ------ ValueError If the structure is nested. Examples -------- >>> filters = ["areaType=region"] >>> structure = { ... "name": "areaName", ... "newCases": "newCasesBySpecimenDate" ... } >>> data = Cov19API( ... filters=filters, ... structure=structure, ... latest_by='newCasesBySpecimenDate' ... ) >>> result = data.get_csv() >>> print(result) name,newCases East Midlands,0 ... """ # Checks to ensure that the structure is # not hierarchical. if isinstance(self.structure, dict): non_str = filter( lambda val: not isinstance(val, str), self.structure.values() ) if list(non_str): struct = dumps(self.structure, indent=4) raise ValueError("CSV structure cannot be nested. Received:\n%s" % struct) linebreak = "\n" resp = str() for page_num, response in enumerate(self._get(DataFormat.CSV), start=1): decoded_content = response.content.decode() # Removing CSV header (column names) where page # number is greater than 1. if page_num > 1: data_lines = decoded_content.split(linebreak)[1:] decoded_content = str.join(linebreak, data_lines) resp += decoded_content.strip() + linebreak if save_as is None: return resp save_data(resp, save_as, DataFormat.CSV) return resp
[docs] def get_dataframe(self): """ Provides the data as as ``pandas.DataFrame`` object. .. versionadded:: 1.2.0 .. warning:: The ``pandas`` library is not included in the dependencies of this library and must be installed separately. Returns ------- DataFrame Raises ------ ImportError If the ``pandas`` library is not installed. """ try: from pandas import DataFrame except ImportError: raise ImportError( "The `pandas` library is not installed as a part of the `uk-covid19` " "library. Please install the library and try again." ) data = self.get_json() df = DataFrame(data["data"]) return df
def __str__(self): resp = "COVID-19 in the UK - API Service\nCurrent parameters: \n" return resp + dumps(self.api_params, indent=4) __repr__ = __str__