Source code for rics.translation.offline.types

"""Types used for offline translation."""
from dataclasses import dataclass
from typing import Any, Dict, Generic, Sequence, Tuple, TypeVar, Union

import pandas as pd

NameType = TypeVar("NameType", int, str)
IdType = TypeVar("IdType", int, str)
SourceType = TypeVar("SourceType", int, str)

PlaceholdersTuple = Tuple[str, ...]
NameToSourceDict = Dict[NameType, SourceType]  # {name: source}
TranslatedIds = Dict[IdType, str]  # {id: translation}
# {"shared": {from_placeholder: to_placeholder}, "source-specific": {source: {from_placeholder: to_placeholder}}
PlaceholderOverridesDict = Dict[str, Union[Dict[str, str], Dict[str, Dict[str, str]]]]
DefaultTranslationsDict = Dict[str, Union[Dict[str, Any], Dict[str, Dict[str, Any]]]]
SourcePlaceholderTranslations = Dict[SourceType, "PlaceholderTranslations"]  # {source: PlaceholderTranslations}


[docs]@dataclass class PlaceholderTranslations(Generic[SourceType]): """Matrix of ID translation components returned by fetchers. Attributes: source: Source for the placeholders. placeholders: Names of placeholders in the order in which they appear in `records`. records: Response matrix of shape `N x M` where `N` is the number of IDs returned and `M` is the length of `placeholders`. id_pos: Position if the `"id"` placeholder in `placeholders`. """ MakeTypes = Union["PlaceholderTranslations", pd.DataFrame, Dict[str, Sequence[Any]]] source: SourceType placeholders: PlaceholdersTuple records: Sequence[Sequence[Any]] id_pos: int = -1
[docs] @classmethod def make(cls, source: SourceType, data: MakeTypes) -> "PlaceholderTranslations": """Try to make in instance from arbitrary input data. Args: source: Source label for the translations. data: Some data to convert to a PlaceholderTranslations instance. Returns: A new PlaceholderTranslations instance. Raises: TypeError: If `data` cannot be converted. """ if isinstance(data, PlaceholderTranslations): # pragma: no cover return data if isinstance(data, pd.DataFrame): return cls.from_dataframe(source, data) if isinstance(data, dict): return cls.from_dict(source, data) raise TypeError(data) # pragma: no cover
[docs] def to_dict(self, max_rows: int = 0) -> Dict[str, Sequence[Any]]: """Create a dict representation of the translations.""" records = self.records[:max_rows] if max_rows else self.records return {placeholder: [row[i] for row in records] for i, placeholder in enumerate(self.placeholders)}
[docs] @staticmethod def to_dicts( source_placeholder_translations: "SourcePlaceholderTranslations", max_rows: int = 0, ) -> Dict[SourceType, Dict[str, Sequence[Any]]]: """Create a nested dict representation of the translations.""" return { source: placeholder_translations.to_dict(max_rows) for source, placeholder_translations in source_placeholder_translations.items() }
[docs] @classmethod def from_dataframe(cls, source: SourceType, data: pd.DataFrame) -> "PlaceholderTranslations": """Create instance from a pandas DataFrame.""" return cls( source, placeholders=tuple(data), records=list(map(list, data.to_records(index=False))), id_pos=data.columns.get_loc("id") if "id" in data else -1, )
[docs] @classmethod def from_dict(cls, source: SourceType, data: Dict[str, Sequence[Any]]) -> "PlaceholderTranslations": """Create instance from a dict.""" return cls.from_dataframe(source, pd.DataFrame.from_dict(data))