-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathnumerical_array.py
45 lines (37 loc) · 1.67 KB
/
numerical_array.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""This module provides the integrity checks
for the 'numerical_array' fractalis format."""
import logging
import numpy as np
import pandas as pd
from fractalis.data.check import IntegrityCheck
logger = logging.getLogger(__name__)
class NumericalArrayIntegrityCheck(IntegrityCheck):
"""Implements IntegrityCheck for 'numerical_array' data type."""
data_type = 'numerical_array'
def check(self, data: object) -> None:
if not isinstance(data, pd.DataFrame):
error = "Data must be a pandas.DataFrame."
logger.error(error)
raise ValueError(error)
if sorted(['id', 'feature', 'value']) != sorted(data.columns.tolist()):
error = "Data frame must contain the columns " \
"'id', 'feature', and 'value'."
logger.error(error)
raise ValueError(error)
if data['id'].dtype != np.object:
error = "'id' column must be of type 'object' ('string')."
logger.error(error)
raise ValueError(error)
if data['feature'].dtype != np.object:
error = "'feature' column must be of type 'object' ('string')."
logger.error(error)
raise ValueError(error)
if data['value'].dtype != np.int \
and data['value'].dtype != np.float:
error = "'value' column must be of type 'np.int' or 'np.float'."
logger.error(error)
raise ValueError(error)
if data.groupby(['id', 'feature']).count().shape[0] != data.shape[0]:
error = "Every combination of 'id' and 'feature' must be unique."
logger.error(error)
raise ValueError(error)