Skip to content

Commit 52054f2

Browse files
committed
Streamline file reading for single segment records, record headers and annotation files. These methods all now stack between the public functions, handling mainly i/o, and the parsing functions.
1 parent 39cd94b commit 52054f2

File tree

4 files changed

+110
-316
lines changed

4 files changed

+110
-316
lines changed

wfdb/io/_signal.py

+36-158
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
import numpy as np
66

7-
from wfdb.io import download, _coreio, util
7+
from wfdb.io import download
8+
from wfdb.io import util
89

910

1011
MAX_I32 = 2147483647
@@ -1050,9 +1051,7 @@ def smooth_frames(self, sigtype="physical"):
10501051

10511052

10521053
def _rd_segment(
1053-
file_name,
1054-
dir_name,
1055-
pn_dir,
1054+
buf,
10561055
fmt,
10571056
n_sig,
10581057
sig_len,
@@ -1066,7 +1065,6 @@ def _rd_segment(
10661065
ignore_skew,
10671066
no_file=False,
10681067
sig_data=None,
1069-
sig_stream=None,
10701068
return_res=64,
10711069
):
10721070
"""
@@ -1075,14 +1073,6 @@ def _rd_segment(
10751073
10761074
Parameters
10771075
----------
1078-
file_name : list
1079-
The names of the dat files to be read.
1080-
dir_name : str
1081-
The full directory where the dat file(s) are located, if the dat
1082-
file(s) are local.
1083-
pn_dir : str
1084-
The PhysioNet directory where the dat file(s) are located, if
1085-
the dat file(s) are remote.
10861076
fmt : list
10871077
The formats of the dat files.
10881078
n_sig : int
@@ -1198,9 +1188,7 @@ def _rd_segment(
11981188
for fn in w_file_name:
11991189
# Get the list of all signals contained in the dat file
12001190
datsignals = _rd_dat_signals(
1201-
file_name=fn,
1202-
dir_name=dir_name,
1203-
pn_dir=pn_dir,
1191+
buf,
12041192
fmt=w_fmt[fn],
12051193
n_sig=len(datchannel[fn]),
12061194
sig_len=sig_len,
@@ -1212,7 +1200,6 @@ def _rd_segment(
12121200
sampto=sampto,
12131201
no_file=no_file,
12141202
sig_data=sig_data,
1215-
sig_stream=sig_stream,
12161203
)
12171204

12181205
# Copy over the wanted signals
@@ -1223,9 +1210,7 @@ def _rd_segment(
12231210

12241211

12251212
def _rd_dat_signals(
1226-
file_name,
1227-
dir_name,
1228-
pn_dir,
1213+
buf,
12291214
fmt,
12301215
n_sig,
12311216
sig_len,
@@ -1237,21 +1222,12 @@ def _rd_dat_signals(
12371222
sampto,
12381223
no_file=False,
12391224
sig_data=None,
1240-
sig_stream=None,
12411225
):
12421226
"""
12431227
Read all signals from a WFDB dat file.
12441228
12451229
Parameters
12461230
----------
1247-
file_name : str
1248-
The name of the dat file.
1249-
dir_name : str
1250-
The full directory where the dat file(s) are located, if the dat
1251-
file(s) are local.
1252-
pn_dir : str
1253-
The PhysioNet directory where the dat file(s) are located, if
1254-
the dat file(s) are remote.
12551231
fmt : str
12561232
The format of the dat file.
12571233
n_sig : int
@@ -1327,32 +1303,17 @@ def _rd_dat_signals(
13271303
if no_file:
13281304
data_to_read = sig_data
13291305
elif fmt in COMPRESSED_FMTS:
1330-
if sig_stream is not None:
1331-
data_to_read = _rd_compressed_stream(
1332-
fp=sig_stream,
1333-
fmt=fmt,
1334-
sample_offset=byte_offset,
1335-
n_sig=n_sig,
1336-
samps_per_frame=samps_per_frame,
1337-
start_frame=sampfrom,
1338-
end_frame=sampto,
1339-
)
1340-
else:
1341-
data_to_read = _rd_compressed_file(
1342-
file_name=file_name,
1343-
dir_name=dir_name,
1344-
pn_dir=pn_dir,
1345-
fmt=fmt,
1346-
sample_offset=byte_offset,
1347-
n_sig=n_sig,
1348-
samps_per_frame=samps_per_frame,
1349-
start_frame=sampfrom,
1350-
end_frame=sampto,
1351-
)
1352-
else:
1353-
data_to_read = _rd_dat_file(
1354-
file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples, sig_stream
1306+
data_to_read = _rd_compressed_stream(
1307+
buf,
1308+
fmt=fmt,
1309+
sample_offset=byte_offset,
1310+
n_sig=n_sig,
1311+
samps_per_frame=samps_per_frame,
1312+
start_frame=sampfrom,
1313+
end_frame=sampto,
13551314
)
1315+
else:
1316+
data_to_read = _rd_dat_stream(buf, fmt, start_byte, n_read_samples)
13561317

13571318
if extra_flat_samples:
13581319
if fmt in UNALIGNED_FMTS:
@@ -1591,7 +1552,7 @@ def _required_byte_num(mode, fmt, n_samp):
15911552
return int(n_bytes)
15921553

15931554

1594-
def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, sig_stream):
1555+
def _rd_dat_stream(buf, fmt, start_byte, n_samp):
15951556
"""
15961557
Read data from a dat file, either local or remote, into a 1d numpy
15971558
array.
@@ -1602,14 +1563,6 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, sig_strea
16021563
16031564
Parameters
16041565
----------
1605-
file_name : str
1606-
The name of the dat file.
1607-
dir_name : str
1608-
The full directory where the dat file(s) are located, if the dat
1609-
file(s) are local.
1610-
pn_dir : str
1611-
The PhysioNet directory where the dat file(s) are located, if
1612-
the dat file(s) are remote.
16131566
fmt : str
16141567
The format of the dat file.
16151568
start_byte : int
@@ -1649,27 +1602,11 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, sig_strea
16491602
element_count = n_samp
16501603
byte_count = n_samp * BYTES_PER_SAMPLE[fmt]
16511604

1652-
# Memory Stream
1653-
if sig_stream is not None:
1654-
sig_stream.seek(start_byte)
1655-
sig_data = np.frombuffer(
1656-
sig_stream.read(), dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
1657-
)
1658-
# Local dat file
1659-
elif pn_dir is None:
1660-
with open(os.path.join(dir_name, file_name), "rb") as fp:
1661-
fp.seek(start_byte)
1662-
sig_data = np.fromfile(
1663-
fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
1664-
)
1665-
# Stream dat file from Physionet
1666-
else:
1667-
dtype_in = np.dtype(DATA_LOAD_TYPES[fmt])
1668-
sig_data = download._stream_dat(
1669-
file_name, pn_dir, byte_count, start_byte, dtype_in
1670-
)
1605+
buf.seek(start_byte)
1606+
return np.frombuffer(
1607+
buf.read(byte_count), dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
1608+
)
16711609

1672-
return sig_data
16731610

16741611
def _blocks_to_samples(sig_data, n_samp, fmt):
16751612
"""
@@ -1790,20 +1727,28 @@ def _blocks_to_samples(sig_data, n_samp, fmt):
17901727

17911728

17921729
def _rd_compressed_stream(
1793-
fp,
1730+
buf,
17941731
fmt,
17951732
sample_offset,
17961733
n_sig,
17971734
samps_per_frame,
17981735
start_frame,
17991736
end_frame,
18001737
):
1801-
signature = fp.read(4)
1738+
import soundfile
1739+
1740+
if any(spf != samps_per_frame[0] for spf in samps_per_frame):
1741+
raise ValueError(
1742+
"All channels in a FLAC signal file must have the same "
1743+
"sampling rate and samples per frame"
1744+
)
1745+
1746+
signature = buf.read(4)
18021747
if signature != b"fLaC":
1803-
raise ValueError(f"{fp.name} is not a FLAC file")
1804-
fp.seek(0)
1748+
raise ValueError(f"{buf.name} is not a FLAC file")
1749+
buf.seek(0)
18051750

1806-
with soundfile.SoundFile(fp) as sf:
1751+
with soundfile.SoundFile(buf) as sf:
18071752
# Determine the actual resolution of the FLAC stream and the
18081753
# data type will use when reading it. Note that soundfile
18091754
# doesn't support int8.
@@ -1817,18 +1762,18 @@ def _rd_compressed_stream(
18171762
format_bits = 24
18181763
read_dtype = "int32"
18191764
else:
1820-
raise ValueError(f"unknown subtype in {fp.name} ({sf.subtype})")
1765+
raise ValueError(f"unknown subtype in {buf.name} ({sf.subtype})")
18211766

18221767
max_bits = int(fmt) - 500
18231768
if format_bits > max_bits:
18241769
raise ValueError(
1825-
f"wrong resolution in {fp.name} "
1770+
f"wrong resolution in {buf.name} "
18261771
f"({format_bits}, expected <= {max_bits})"
18271772
)
18281773

18291774
if sf.channels != n_sig:
18301775
raise ValueError(
1831-
f"wrong number of channels in {fp.name} "
1776+
f"wrong number of channels in {buf.name} "
18321777
f"({sf.channels}, expected {n_sig})"
18331778
)
18341779

@@ -1906,73 +1851,6 @@ def _rd_compressed_stream(
19061851
return sig_data.reshape(-1)
19071852

19081853

1909-
def _rd_compressed_file(
1910-
file_name,
1911-
dir_name,
1912-
pn_dir,
1913-
fmt,
1914-
sample_offset,
1915-
n_sig,
1916-
samps_per_frame,
1917-
start_frame,
1918-
end_frame,
1919-
):
1920-
"""
1921-
Read data from a compressed file into a 1D numpy array.
1922-
1923-
Parameters
1924-
----------
1925-
file_name : str
1926-
The name of the signal file.
1927-
dir_name : str
1928-
The full directory where the signal file is located, if local.
1929-
This argument is ignored if `pn_dir` is not None.
1930-
pn_dir : str or None
1931-
The PhysioNet database directory where the signal file is located.
1932-
fmt : str
1933-
The format code of the signal file.
1934-
sample_offset : int
1935-
The sample number in the signal file corresponding to sample 0 of
1936-
the WFDB record.
1937-
n_sig : int
1938-
The number of signals in the file.
1939-
samps_per_frame : list
1940-
The number of samples per frame for each signal in the file.
1941-
start_frame : int
1942-
The starting frame number to read.
1943-
end_frame : int
1944-
The ending frame number to read.
1945-
1946-
Returns
1947-
-------
1948-
signal : ndarray
1949-
The data read from the signal file. This is a one-dimensional
1950-
array in the same order the samples would be stored in a binary
1951-
signal file; `signal[(i*n_sig+j)*samps_per_frame[0]+k]` is sample
1952-
number `i*samps_per_frame[0]+k` of signal `j`.
1953-
1954-
Notes
1955-
-----
1956-
Converting the output array into "dat file order" here is inefficient,
1957-
but necessary to match the behavior of _rd_dat_file. It would be
1958-
better to reorganize _rd_dat_signals to make the reshaping unnecessary.
1959-
1960-
"""
1961-
import soundfile
1962-
1963-
if any(spf != samps_per_frame[0] for spf in samps_per_frame):
1964-
raise ValueError(
1965-
"All channels in a FLAC signal file must have the same "
1966-
"sampling rate and samples per frame"
1967-
)
1968-
1969-
if pn_dir is None:
1970-
file_name = os.path.join(dir_name, file_name)
1971-
1972-
with _coreio._open_file(pn_dir, file_name, "rb") as fp:
1973-
return _rd_compressed_stream(fp, fmt, sample_offset, n_sig, samps_per_frame, start_frame, end_frame)
1974-
1975-
19761854
def _skew_sig(
19771855
sig, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame=None
19781856
):

0 commit comments

Comments
 (0)