Source code for rics.mapping.heuristic_functions

"""Functions which perform heuristics for score functions.

See Also:
    The :class:`~.HeuristicScore` class.
"""
from __future__ import annotations

import re
from typing import Any, Iterable, List, Optional, Set, Tuple, Union

from . import filter_functions as ff
from .types import ContextType

VERBOSE: bool = False
"""If ``True`` enable optional DEBUG-level log messages on each heuristic function invocation.

Notes:
    Not all functions have verbose messages.
"""


[docs]def like_database_table( name: str, candidates: Iterable[str], context: Optional[ContextType], ) -> Tuple[str, List[str]]: """Try to make `value` look like the name of a database table.""" def apply(s: str) -> str: s = s.lower().replace("_", "").replace(".", "") s = s[: -len("id")] if s.endswith("id") else s s = s if s.endswith("s") else s + "s" return s return apply(name), list(map(apply, candidates))
[docs]def short_circuit_to_value( value: str, candidates: Iterable[str], context: Optional[str], regex: Union[str, re.Pattern[str]], target: str, ) -> Set[str]: """Short circuit candidates which match a given `regex` a given to-value. Args: value: A value to map. candidates: Candidates for `value`. context: Context in which the function is being called. regex: A pattern in `candidates` which should trigger forced short-circuit matching. target: The target value. If ``value != target``, an empty set is always returned. Returns: Candidates which match `regex`, or an empty set. Notes: This is technically a filter function and may be used as such. """ return ( set() if value != target else ff.require_regex_match( value, candidates, context, regex=regex, where="candidate", purpose=f"short-circuiting to value-{target=}", ) )
[docs]def short_circuit_to_candidate( value: str, candidates: Iterable[str], context: Optional[str], regex: Union[str, re.Pattern[str]], target: str, ) -> Set[str]: """Short circuit candidates which match a given `regex` to a given to-candidate. Args: value: A value to map. candidates: Candidates for `value`. context: Context in which the function is being called. regex: A pattern in `candidates` which should trigger forced short-circuit matching. target: A target candidate. Must be present in `candidates`, or empty set is always returned. Returns: Candidates which match `regex`, or an empty set. Notes: This is technically a filter function and may be used as such. """ return ( set() if target not in candidates else ff.require_regex_match( value, [target], context, regex=regex, where="name", purpose=f"short-circuiting to candidate-{target=}", ) )
[docs]def force_lower_case( value: str, candidates: Iterable[str], context: Optional[ContextType] ) -> Tuple[str, Iterable[str]]: """Force lower-case in `value` and `candidates`.""" return value.lower(), list(map(str.lower, candidates))
[docs]def value_fstring_alias( value: str, candidates: Iterable[str], context: Optional[ContextType], fstring: str, for_value: str = None, **kwargs: Any, ) -> Tuple[str, Iterable[str]]: """Return a value formatted by `fstring`. Args: value: An element to find matches for. candidates: Potential matches for `value`. Not used (returned as given). context: Context in which the function is being called. fstring: The format string to use. Can use `value` and `context` as placeholders. for_value: If given, apply only if ``value == for_value``. When `if_value_equals` is given, `fstring` arguments which do not use the `value` as a placeholder key are permitted. **kwargs: Additional keyword placeholders in `fstring`. Returns: A tuple ``(formatted_value, candidates)``. Raises: ValueError: If `fstring` does not contain a placeholder `'value'` and `for_value` is not given. """ if not for_value and "{value}" not in fstring: # No longer a function of the value. raise ValueError( f"Invalid {fstring=} passed to value_fstring_alias(); does not contain {{value}}. " "To allow, the 'for_value' parameter must be given as well." ) if for_value and value != for_value: return value, candidates return fstring.format(value=value, context=context, **kwargs), candidates
[docs]def candidate_fstring_alias( value: str, candidates: Iterable[str], context: Optional[ContextType], fstring: str, **kwargs: Any, ) -> Tuple[str, Iterable[str]]: """Return candidates formatted by `fstring`. Args: value: An element to find matches for. Not used (returned as given). candidates: Potential matches for `value`. context: Context in which the function is being called. fstring: The format string to use. Can use `value`, `context`, and elements of `candidates` as placeholders. **kwargs: Additional keyword placeholders in `fstring`. Returns: A tuple ``(value, formatted_candidates)``. Raises: ValueError: If `fstring` does not contain a placeholder `'candidate'`. """ if "{candidate}" not in fstring: raise ValueError(f"Invalid {fstring=} passed to candidate_fstring_alias(); does not contain {{candidate}}.") return value, map(lambda c: fstring.format(value=value, candidate=c, context=context, **kwargs), candidates)