Skip to content

Commit 7615ccc

Browse files
committed
fixup! FEAT: implemented sas7bdat adapter using Pandas
1 parent 51f1ec3 commit 7615ccc

File tree

1 file changed

+19
-5
lines changed

1 file changed

+19
-5
lines changed

larray_editor/arrayadapter.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3693,7 +3693,10 @@ def __init__(self, data, attributes=None):
36933693
super().__init__(data, attributes=attributes)
36943694
reader = data
36953695
index_cols = set(reader.index) if reader.index is not None else set()
3696-
self._chunk_size = reader.chunksize or 1_000
3696+
default_chunksize = max(1_000_000 // reader.row_length, 1)
3697+
chunksize = reader.chunksize or default_chunksize
3698+
logger.debug(f'{chunksize=}')
3699+
self._chunk_size = chunksize
36973700
self._colnames = [col for col in reader.column_names
36983701
if col not in index_cols]
36993702

@@ -3725,12 +3728,23 @@ def get_values(self, h_start, v_start, h_stop, v_stop):
37253728
reader.close()
37263729
reader.__init__(fpath, **kwargs)
37273730
current_row = reader._current_row_in_file_index
3731+
expected_num_rows = v_stop - v_start
3732+
expected_num_cols = h_stop - h_start
3733+
37283734
# skip to v_start
37293735
num_rows_to_skip = v_start - self._chunk_size - current_row
37303736
if num_rows_to_skip > 0:
37313737
logger.debug(f"must skip {num_rows_to_skip} rows")
3732-
if num_rows_to_skip > 10_000_000:
3733-
raise RuntimeError('File is too large to display non top rows')
3738+
if num_rows_to_skip * reader.row_lenth > 10_000_000:
3739+
# An exception would be eaten by the adapter so the user
3740+
# would never see it
3741+
msg = 'File is too large to display non top rows'
3742+
first_row = [[msg] + [''] * (expected_num_cols - 1)]
3743+
other_rows = [
3744+
[''] * expected_num_cols
3745+
] * (expected_num_rows - 1)
3746+
return first_row + other_rows
3747+
37343748
while current_row < v_start - self._chunk_size:
37353749
reader.read(self._chunk_size)
37363750
current_row = reader._current_row_in_file_index
@@ -3740,8 +3754,8 @@ def get_values(self, h_start, v_start, h_stop, v_stop):
37403754
logger.debug(f'{len(df)} rows read')
37413755
assert v_start >= current_row, f"{v_start} < {current_row}"
37423756
chunk = df.iloc[v_start - current_row:]
3743-
assert len(chunk) == (v_stop - v_start), \
3744-
f"{len(chunk)} != {v_stop - v_start}"
3757+
assert len(chunk) == expected_num_rows, \
3758+
f"{len(chunk)=} != {expected_num_rows=}"
37453759

37463760
chunk_columns = [chunk.iloc[:, i].values
37473761
for i in range(h_start, h_stop)]

0 commit comments

Comments
 (0)