Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# Install testing / development requirements
coverage[toml]==6.5.0
coveralls==3.3.1
ddt==1.7.2
flake8==7.1.1
funcsigs==1.0.2
geojson-rewind==1.1.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

setup(
name='formpack',
version='3.0.0',
version='3.0.1',
description='Manipulation tools for KoBo forms',
author='the formpack contributors (https://github.com/kobotoolbox/formpack/graphs/contributors)',
url='https://github.com/kobotoolbox/formpack/',
Expand Down
77 changes: 77 additions & 0 deletions src/formpack/utils/expand_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,84 @@ def _get_translations_from_special_cols(
return translations, set(translated_cols)


def clean_column_name(column_name: str, already_seen: dict[str, str]) -> str:
"""

Preserves ":" vs "::" and any spaces around the colons
"""
RE_MEDIA_COLUMN_NAMES = '|'.join(MEDIA_COLUMN_NAMES)
if column_name in already_seen:
return already_seen[column_name]

# "LaBeL" -> "label", "HiNT" -> "hint"
if column_name.lower() in ['label', 'hint']:
cleaned = column_name.lower()
already_seen[column_name] = cleaned
return cleaned

# "Bind:Some:Thing" -> "bind:Some:Thing", "BodY:" -> "body:"
match = re.match(r'^(bind|body):.*', column_name, flags=re.IGNORECASE)
if match:
lower_cased = match.groups()[0].lower()
cleaned = re.sub(r'^(bind|body)', lower_cased, column_name, flags=re.IGNORECASE)
already_seen[column_name] = cleaned
return cleaned

# "Media:Audio::ES" -> "media:audio::ES", "ViDeO : ES" -> "video : ES"
match = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})\s*::?\s*([^:]+)$',
column_name,
flags=re.IGNORECASE
)
if match:
matched = match.groups()
lower_media_prefix = matched[0].lower() if matched[0] else ''
lower_media_type = matched[1].lower()
cleaned = re.sub(rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})(\s*::?\s*)([^:]+)$',
rf'{lower_media_prefix}{lower_media_type}\3\4',
column_name, flags=re.IGNORECASE)
already_seen[column_name] = cleaned
return cleaned

# "Media: AuDiO" -> "media: audio", "VIDEO" -> "video"
match = re.match(
rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$', column_name, flags=re.IGNORECASE
)
if match:
matched = match.groups()
lower_media_prefix = matched[0].lower() if matched[0] else ''
lower_media_type = matched[1].lower()
cleaned = re.sub(rf'^(media\s*::?\s*)?({RE_MEDIA_COLUMN_NAMES})$',
rf'{lower_media_prefix}{lower_media_type}',
column_name, flags=re.IGNORECASE)
already_seen[column_name] = cleaned

match = re.match(r'^([^:]+)(\s*::?\s*)([^:]+)$', column_name)
if match:
# example: label::x, constraint_message::x, hint::x
matched = match.groups()
lower_column_shortname = matched[0].lower()
cleaned = re.sub(r'^([^:]+)(\s*::?\s*)([^:]+)$', rf'{lower_column_shortname}\2\3', column_name,
flags=re.IGNORECASE)
already_seen[column_name] = cleaned
return cleaned
cleaned = column_name.lower()
already_seen[column_name] = cleaned
return cleaned


def preprocess_columns(content: Dict[str, List[Any]]) -> None:
seen = {}
for sheet, rows in content.items():
for row in rows:
for column_name, value in row.copy().items():
cleaned_name = clean_column_name(column_name, seen)
del row[column_name]
row[cleaned_name] = value

def expand_content_in_place(content: Dict[str, List[Any]]) -> None:
preprocess_columns(content)

specials, translations, transl_cols = _get_special_survey_cols(content)

if len(translations) > 0:
Expand Down
29 changes: 28 additions & 1 deletion tests/test_expand_content.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
# coding: utf-8
import copy
from collections import OrderedDict
from ddt import data, ddt, unpack
from unittest import TestCase

from formpack import FormPack
from formpack.constants import OR_OTHER_COLUMN as _OR_OTHER
from formpack.constants import UNTRANSLATED
from formpack.utils.expand_content import SCHEMA_VERSION
from formpack.utils.expand_content import SCHEMA_VERSION, clean_column_name
from formpack.utils.expand_content import _expand_tags
from formpack.utils.expand_content import _get_special_survey_cols
from formpack.utils.expand_content import expand_content, _expand_type_to_dict
from formpack.utils.flatten_content import flatten_content
from formpack.utils.string import orderable_with_none



def test_expand_selects_with_or_other():
assert _expand_type_to_dict('select_one xx or other').get(_OR_OTHER) == True
assert _expand_type_to_dict('select_one xx or_other').get(_OR_OTHER) == True
Expand Down Expand Up @@ -604,5 +607,29 @@ def test_expand_translations_null_lang():
assert s1 == s1_copy


def test_expand_ignores_case():
s1 = {'survey': [{'type': 'text', 'Label': 'hi'}]}
expand_content(s1, in_place=True)
assert s1.get('translated') == ['Label']


def _s(rows):
return {'survey': [dict([[key, 'x']]) for key in rows]}

@ddt
class ColumnTestCase(TestCase):
@data(
('FOO', 'foo'),
('LABEL', 'label'),
('HINT', 'hint'),
('BIND::FOO', 'bind::FOO'),
('BODY : FOO', 'body : FOO'),
('MEDIA:AUDIO:Spanish', 'media:audio:Spanish'),
('VIDEO :: SPANISH', 'video :: SPANISH'),
('MEDIA:AUDIO', 'media:audio'),
('IMAGE', 'image'),
('LABEL : SPANISH', 'label : SPANISH')
)
@unpack
def test_clean_column_name(self, name, expected):
assert clean_column_name(name) == expected
Loading