Source code for realtabformer.rtf_validators
from typing import Any, Dict, Optional, Tuple, Union
import numpy as np
import pandas as pd
from shapely.geometry import MultiPolygon, Point, Polygon
[docs]class ValidatorBase:
def __init__(self) -> None:
pass
[docs] def validate(self, *args: Any, **kwargs: Any) -> bool:
raise NotImplementedError
[docs]class RangeValidator(ValidatorBase):
def __init__(
self,
min_val: Union[float, int, np.number],
max_val: Union[float, int, np.number],
) -> None:
super().__init__()
self.min_val = min_val
self.max_val = max_val
[docs] def validate( # type: ignore
self, val: Union[float, int, np.number], *args: Any, **kwargs: Any
) -> bool:
return self.min_val <= val <= self.max_val
[docs]class GeoValidator(ValidatorBase):
def __init__(self, geo_bound: Union[Polygon, MultiPolygon]) -> None:
super().__init__()
self.geo_bound = geo_bound
[docs] def validate(self, lon: float, lat: float) -> bool: # type: ignore
p = Point(lon, lat)
return self.geo_bound.contains(p)
[docs]class ObservationValidator(ValidatorBase):
def __init__(
self, validators: Optional[Dict[str, Tuple[ValidatorBase, Tuple[str]]]] = None
) -> None:
super().__init__()
self.validators = validators or {}
[docs] def validate(self, series: pd.Series) -> bool: # type: ignore
is_valid = True
for vname in self.validators:
validator, cols = self.validators[vname]
is_valid = is_valid and validator.validate(*(series[c] for c in cols))
if not is_valid:
break
return is_valid
[docs] def validate_df(self, df: pd.DataFrame) -> pd.Series:
return df.apply(self.validate, axis=1)
[docs] def add_validator(self, name, validator, cols) -> None:
self.validators[name] = (validator, cols)
[docs] def remove_validator(self, name) -> Tuple:
return self.validators.pop(name)