-
Notifications
You must be signed in to change notification settings - Fork 1
Usage As A Python Library es
Rafael JPD edited this page Mar 3, 2025
·
1 revision
from scielo_log_validator import validator
# Validar un solo archivo
result = validator.pipeline_validate('/home/user/2022-03-01_scielo-br.log.gz', sample_size=0.25, apply_path_validation=True, apply_content_validation=True)
# Validar todos los archivos en un directorio
for root, _, files in os.walk('/home/user'):
for file in files:
file_path = os.path.join(root, file)
results = validator.pipeline_validate(
path=file_path,
sample_size=0.1,
apply_path_validation=True,
apply_content_validation=True
)La salida del proceso de validación es un objeto JSON que proporciona información detallada sobre el archivo de registro, incluyendo un resumen del contenido, el estado de validación y los detalles de la ruta. Aquí tienes un ejemplo de salida:
{
"/home/user/2022-03-01_scielo-br.log.gz": {
"content": {
"summary": {
"datetimes": {
"(2022, 3, 1, 23)": 5,
"(2022, 3, 2, 0)": 312,
"(2022, 3, 2, 1)": 319,
"(2022, 3, 2, 2)": 321,
"(2022, 3, 2, 3)": 331,
"(2022, 3, 2, 4)": 321,
"(2022, 3, 2, 5)": 320,
"(2022, 3, 2, 6)": 324,
"(2022, 3, 2, 7)": 376,
"(2022, 3, 2, 8)": 345,
"(2022, 3, 2, 9)": 480,
"(2022, 3, 2, 10)": 416,
"(2022, 3, 2, 11)": 506,
"(2022, 3, 2, 12)": 620,
"(2022, 3, 2, 13)": 452,
"(2022, 3, 2, 14)": 419,
"(2022, 3, 2, 15)": 399,
"(2022, 3, 2, 16)": 518,
"(2022, 3, 2, 17)": 419,
"(2022, 3, 2, 18)": 406,
"(2022, 3, 2, 19)": 615,
"(2022, 3, 2, 20)": 668,
"(2022, 3, 2, 21)": 546,
"(2022, 3, 2, 22)": 683,
"(2022, 3, 2, 23)": 442
},
"invalid_lines": 0,
"ips": {
"local": 324,
"remote": 10239
},
"total_lines": 105634
}
},
"is_valid": {
"all": true,
"dates": true,
"ips": true
},
"path": {
"collection": null,
"date": "2022-03-01",
"extension": ".gz",
"mimetype": "application/gzip",
"paperboy": false
},
"probably_date": "2022-03-02T00:00:00"
}
}