Source code for neutronbraggedge.material_handler.retrieve_metadata_table
"""
This class will retrieve the table from the URL and reformat it to be able to
quickly retrieve the metadata for a given material
"""
import configparser
import io
import urllib.request
import pandas as pd
from ..config import config_file as config_config_file
from ..config import local_table as config_local_table
[docs]
class RetrieveMetadataTable:
"""Metadata table retriever
This class retrieves the metadata table that will allow us to get the lattice
parameter and the crystal structure for a given material.
By default the program will retrieve the local version, but the web version can be retrieved
by using the local_version=False flag. In this case, the table is retrieved from the following
web page: `Lattice constant
<https://en.wikipedia.org/wiki/Lattice_constant>`.
>>> from neutronbraggedge.material_handler.retrieve_metadata_table import RetrieveMetadataTable
>>> retrieve_local_meta = RetrieveMetadataTable()
>>> _table = retrieve_local_meta.get_table()
>>> retrieve_url_meta = RetrieveMetadataTable(use_local_table=False)
>>> _table = retrieve_url_meta.get_table()
"""
use_local_table: bool
url: str
table: pd.DataFrame
raw_table: pd.DataFrame
_config_file: str
_local_table_file: str
[docs]
def __init__(self, use_local_table: bool = True) -> None:
self.use_local_table = use_local_table
if not use_local_table:
self._retrieve_url()
def _retrieve_url(self) -> None:
"""retrieve the default url defined in the top config file"""
self._config_file = config_config_file
config_obj = configparser.ConfigParser()
config_obj.read(self._config_file)
self.url = config_obj["DEFAULT"]["material_metadata_url"]
[docs]
def retrieve_table(self) -> None:
"""retrieve the table that contain the material/lattice parameters....
by default, the local version is retrieved first, but the web version can
be selected instead by using False on use_local_table flag
"""
if self.use_local_table:
self.retrieve_table_local()
else:
self.retrieve_table_from_url()
[docs]
def retrieve_table_local(self) -> None:
"""retrieve the local table"""
self._local_table_file = config_local_table
local_table = pd.read_csv(self._local_table_file)
_table = local_table.set_index("Material")
self.table = _table
[docs]
def retrieve_table_from_url(self) -> None:
"""retrieve the table using the url defined in the config.cfg file"""
# Wikipedia blocks requests without proper User-Agent headers
request = urllib.request.Request(
self.url,
headers={"User-Agent": "neutronbraggedge/2.0 (scientific research tool)"},
)
with urllib.request.urlopen(request) as response:
html_content = response.read().decode("utf-8")
table_list = pd.read_html(io.StringIO(html_content))
self.raw_table = table_list[0]
self.format_table_from_url()
[docs]
def format_table_from_url(self) -> None:
"""reformat the table from the url to easily extract the metadata"""
_table = self.raw_table
# Check if pandas already extracted headers correctly
if "Material" not in _table.columns:
# Fallback for older table format: first row contains headers
_table.columns = _table.values[0][:]
_table = _table[1:]
_table = _table.set_index("Material")
self.table = _table
[docs]
def get_table(self) -> pd.DataFrame:
"""return the table (via url or locally) according to flag used
Args:
use_local_table (boolean): get the local table or via the url defined in the config file (default True)
Returns:
Pandas table of material/lattice parameters ...
"""
self.retrieve_table()
return self.table