"""Types used for offline translation."""
import dataclasses
from typing import TYPE_CHECKING, Any, Dict
from typing import Generic as _Generic
from typing import Sequence, Tuple, Union
import pandas as pd
from rics.translation.types import ID, IdType, NameType, SourceType
if TYPE_CHECKING:
from rics.translation.offline._format import Format
FormatType = Union[str, "Format"]
PlaceholdersTuple = Tuple[str, ...]
NameToSourceDict = Dict[NameType, SourceType] # {name: source}
TranslatedIds = Dict[IdType, str] # {id: translation}
# {"shared": {from_placeholder: to_placeholder}, "source-specific": {source: {from_placeholder: to_placeholder}}
SourcePlaceholderTranslations = Dict[SourceType, "PlaceholderTranslations"] # {source: PlaceholderTranslations}
[docs]@dataclasses.dataclass
class PlaceholderTranslations(_Generic[SourceType]):
"""Matrix of ID translation components returned by fetchers."""
MakeTypes = Union["PlaceholderTranslations", pd.DataFrame, Dict[str, Sequence[Any]]]
source: SourceType
"""Source from which translations were retrieved."""
placeholders: PlaceholdersTuple
"""Names of placeholders in the order in which they appear in `records`."""
records: Sequence[Sequence[Any]]
"""Matrix of shape `N x M` where `N` is the number of IDs returned and `M` is the length of `placeholders`."""
id_pos: int = -1
"""Position if the the ID placeholder in `placeholders`."""
[docs] @classmethod
def make(cls, source: SourceType, data: MakeTypes) -> "PlaceholderTranslations":
"""Try to make in instance from arbitrary input data.
Args:
source: Source label for the translations.
data: Some data to convert to a PlaceholderTranslations instance.
Returns:
A new PlaceholderTranslations instance.
Raises:
TypeError: If `data` cannot be converted.
"""
if isinstance(data, PlaceholderTranslations): # pragma: no cover
return data
if isinstance(data, pd.DataFrame):
return cls.from_dataframe(source, data)
if isinstance(data, dict):
return cls.from_dict(source, data)
raise TypeError(data) # pragma: no cover
[docs] def to_dict(self, max_rows: int = 0) -> Dict[str, Sequence[Any]]:
"""Create a dict representation of the translations."""
records = self.records[:max_rows] if max_rows else self.records
return {placeholder: [row[i] for row in records] for i, placeholder in enumerate(self.placeholders)}
[docs] @staticmethod
def to_dicts(
source_translations: "SourcePlaceholderTranslations",
max_rows: int = 0,
) -> Dict[SourceType, Dict[str, Sequence[Any]]]:
"""Create a nested dict representation of the translations."""
return {source: translations.to_dict(max_rows) for source, translations in source_translations.items()}
[docs] @classmethod
def from_dataframe(cls, source: SourceType, data: pd.DataFrame) -> "PlaceholderTranslations":
"""Create instance from a pandas DataFrame."""
return cls(
source,
placeholders=tuple(data),
records=list(map(list, data.to_records(index=False))),
id_pos=data.columns.get_loc(ID) if ID in data else -1,
)
[docs] @classmethod
def from_dict(cls, source: SourceType, data: Dict[str, Sequence[Any]]) -> "PlaceholderTranslations":
"""Create instance from a dict."""
return cls.from_dataframe(source, pd.DataFrame.from_dict(data))