Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions .github/workflows/audit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@ jobs:
- uses: actions/checkout@v3
- name: install
run: |
python -m venv env/
source env/bin/activate
python -m pip install -r requirements.txt
python -m venv env
# Upgrade wheel & setuptools first
env/bin/python -m pip install --upgrade wheel setuptools
# Install patched pip from GitHub commit to fix tarfile vulnerability
env/bin/python -m pip install "git+https://github.com/pypa/pip@f2b9231"
# Install project dependencies
env/bin/python -m pip install -r requirements.txt
- uses: pypa/[email protected]
with:
virtual-environment: env/
Expand Down
15 changes: 10 additions & 5 deletions mario/hyper_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,16 @@ def save_hyper_as_csv(hyper_file: str, file_path: str, **kwargs):
options = CsvOptions(**kwargs)

with tempfile.TemporaryDirectory() as temp_dir:
temp_hyper = os.path.join(temp_dir, 'temp.hyper')
shutil.copyfile(
src=hyper_file,
dst=temp_hyper
)
if options.do_not_modify_source:
logging.info('Copy the source hyper file into the temp directory.')
temp_hyper = os.path.join(temp_dir, 'temp.hyper')
shutil.copyfile(
src=hyper_file,
dst=temp_hyper
)
else:
logging.info('Use the source hyper file directly without creating a temp copy.')
temp_hyper = hyper_file

schema, table = get_default_table_and_schema(temp_hyper)

Expand Down
1 change: 1 addition & 0 deletions mario/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class CsvOptions(OutputOptions):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.compress_using_gzip = kwargs.get('compress_using_gzip', False)
self.do_not_modify_source = kwargs.get('do_not_modify_source', True)


class HyperOptions(OutputOptions):
Expand Down
32 changes: 32 additions & 0 deletions test/test_data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,38 @@ def test_hyper_to_csv():
df = pd.read_csv(output_file)
assert round(df['Sales'].sum(), 4) == 2326534.3543

def test_hyper_to_csv_without_copy_to_tmp():
dataset = dataset_from_json(os.path.join('test', 'dataset.json'))
dataset.measures = []
metadata = metadata_from_json(os.path.join('test', 'metadata.json'))
# Copy the source data to avoid overwriting during other pytest runs
shutil.copyfile(
src=os.path.join('test', 'orders.hyper'),
dst=os.path.join('test', 'orders_copy.hyper')
)
configuration = Configuration(
file_path=os.path.join('test', 'orders_copy.hyper')
)
extractor = HyperFile(
dataset_specification=dataset,
metadata=metadata,
configuration=configuration
)
output_folder = os.path.join('output', 'test_hyper_to_csv')
os.makedirs(output_folder, exist_ok=True)
output_file = os.path.join(output_folder, 'orders_without_temp.csv')
extractor.save_data_as_csv(
file_path=output_file,
minimise=False,
compress_using_gzip=False,
do_not_modify_source=False
)
assert extractor.get_total() == 10194
assert extractor.get_total(measure='Sales') == 2326534.3542999607

df = pd.read_csv(output_file)
assert round(df['Sales'].sum(), 4) == 2326534.3543


def test_partitioning_extractor_partition_sql_no_data_in_partition():
# Skip this test if we don't have a connection string
Expand Down