Source code for neutronbraggedge.material_handler.retrieve_metadata_table

"""
This class will retrieve the table from the URL and reformat it to be able to
quickly retrieve the metadata for a given material
"""

import configparser
import io
import urllib.request

import pandas as pd

from ..config import config_file as config_config_file
from ..config import local_table as config_local_table


[docs] class RetrieveMetadataTable: """Metadata table retriever This class retrieves the metadata table that will allow us to get the lattice parameter and the crystal structure for a given material. By default the program will retrieve the local version, but the web version can be retrieved by using the local_version=False flag. In this case, the table is retrieved from the following web page: `Lattice constant <https://en.wikipedia.org/wiki/Lattice_constant>`. >>> from neutronbraggedge.material_handler.retrieve_metadata_table import RetrieveMetadataTable >>> retrieve_local_meta = RetrieveMetadataTable() >>> _table = retrieve_local_meta.get_table() >>> retrieve_url_meta = RetrieveMetadataTable(use_local_table=False) >>> _table = retrieve_url_meta.get_table() """ use_local_table: bool url: str table: pd.DataFrame raw_table: pd.DataFrame _config_file: str _local_table_file: str
[docs] def __init__(self, use_local_table: bool = True) -> None: self.use_local_table = use_local_table if not use_local_table: self._retrieve_url()
def _retrieve_url(self) -> None: """retrieve the default url defined in the top config file""" self._config_file = config_config_file config_obj = configparser.ConfigParser() config_obj.read(self._config_file) self.url = config_obj["DEFAULT"]["material_metadata_url"]
[docs] def retrieve_table(self) -> None: """retrieve the table that contain the material/lattice parameters.... by default, the local version is retrieved first, but the web version can be selected instead by using False on use_local_table flag """ if self.use_local_table: self.retrieve_table_local() else: self.retrieve_table_from_url()
[docs] def retrieve_table_local(self) -> None: """retrieve the local table""" self._local_table_file = config_local_table local_table = pd.read_csv(self._local_table_file) _table = local_table.set_index("Material") self.table = _table
[docs] def retrieve_table_from_url(self) -> None: """retrieve the table using the url defined in the config.cfg file""" # Wikipedia blocks requests without proper User-Agent headers request = urllib.request.Request( self.url, headers={"User-Agent": "neutronbraggedge/2.0 (scientific research tool)"}, ) with urllib.request.urlopen(request) as response: html_content = response.read().decode("utf-8") table_list = pd.read_html(io.StringIO(html_content)) self.raw_table = table_list[0] self.format_table_from_url()
[docs] def format_table_from_url(self) -> None: """reformat the table from the url to easily extract the metadata""" _table = self.raw_table # Check if pandas already extracted headers correctly if "Material" not in _table.columns: # Fallback for older table format: first row contains headers _table.columns = _table.values[0][:] _table = _table[1:] _table = _table.set_index("Material") self.table = _table
[docs] def get_table(self) -> pd.DataFrame: """return the table (via url or locally) according to flag used Args: use_local_table (boolean): get the local table or via the url defined in the config file (default True) Returns: Pandas table of material/lattice parameters ... """ self.retrieve_table() return self.table