Skip to content

Commit d7fc7f0

Browse files
committed
Streamline file reading for single segment records, record headers and annotation files. These methods all now stack between the public functions, handling mainly i/o, and the parsing functions.
1 parent 39cd94b commit d7fc7f0

File tree

5 files changed

+120
-340
lines changed

5 files changed

+120
-340
lines changed

wfdb/io/_coreio.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
import posixpath
2+
import os
23

34
from wfdb.io import _url
45
from wfdb.io.download import config
56

67

78
def _open_file(
8-
pn_dir,
99
file_name,
1010
mode="r",
1111
*,
12+
dir_name="",
13+
pn_dir=None,
1214
buffering=-1,
1315
encoding=None,
1416
errors=None,
@@ -48,7 +50,7 @@ def _open_file(
4850
"""
4951
if pn_dir is None:
5052
return open(
51-
file_name,
53+
os.path.join(dir_name, file_name),
5254
mode,
5355
buffering=buffering,
5456
encoding=encoding,

wfdb/io/_signal.py

+55-161
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44

55
import numpy as np
66

7-
from wfdb.io import download, _coreio, util
7+
from wfdb.io import download
8+
from wfdb.io import util
9+
from wfdb.io import _coreio
810

911

1012
MAX_I32 = 2147483647
@@ -1066,7 +1068,6 @@ def _rd_segment(
10661068
ignore_skew,
10671069
no_file=False,
10681070
sig_data=None,
1069-
sig_stream=None,
10701071
return_res=64,
10711072
):
10721073
"""
@@ -1196,24 +1197,24 @@ def _rd_segment(
11961197
signals = [None] * len(channels)
11971198

11981199
for fn in w_file_name:
1199-
# Get the list of all signals contained in the dat file
1200-
datsignals = _rd_dat_signals(
1201-
file_name=fn,
1202-
dir_name=dir_name,
1203-
pn_dir=pn_dir,
1204-
fmt=w_fmt[fn],
1205-
n_sig=len(datchannel[fn]),
1206-
sig_len=sig_len,
1207-
byte_offset=w_byte_offset[fn],
1208-
samps_per_frame=w_samps_per_frame[fn],
1209-
skew=w_skew[fn],
1210-
init_value=w_init_value[fn],
1211-
sampfrom=sampfrom,
1212-
sampto=sampto,
1213-
no_file=no_file,
1214-
sig_data=sig_data,
1215-
sig_stream=sig_stream,
1216-
)
1200+
with _coreio._open_file(
1201+
fn, "rb", pn_dir=pn_dir, dir_name=dir_name
1202+
) as io:
1203+
# Get the list of all signals contained in the dat file
1204+
datsignals = _rd_dat_signals(
1205+
io,
1206+
fmt=w_fmt[fn],
1207+
n_sig=len(datchannel[fn]),
1208+
sig_len=sig_len,
1209+
byte_offset=w_byte_offset[fn],
1210+
samps_per_frame=w_samps_per_frame[fn],
1211+
skew=w_skew[fn],
1212+
init_value=w_init_value[fn],
1213+
sampfrom=sampfrom,
1214+
sampto=sampto,
1215+
no_file=no_file,
1216+
sig_data=sig_data,
1217+
)
12171218

12181219
# Copy over the wanted signals
12191220
for cn in range(len(out_dat_channel[fn])):
@@ -1223,9 +1224,7 @@ def _rd_segment(
12231224

12241225

12251226
def _rd_dat_signals(
1226-
file_name,
1227-
dir_name,
1228-
pn_dir,
1227+
io,
12291228
fmt,
12301229
n_sig,
12311230
sig_len,
@@ -1237,21 +1236,12 @@ def _rd_dat_signals(
12371236
sampto,
12381237
no_file=False,
12391238
sig_data=None,
1240-
sig_stream=None,
12411239
):
12421240
"""
12431241
Read all signals from a WFDB dat file.
12441242
12451243
Parameters
12461244
----------
1247-
file_name : str
1248-
The name of the dat file.
1249-
dir_name : str
1250-
The full directory where the dat file(s) are located, if the dat
1251-
file(s) are local.
1252-
pn_dir : str
1253-
The PhysioNet directory where the dat file(s) are located, if
1254-
the dat file(s) are remote.
12551245
fmt : str
12561246
The format of the dat file.
12571247
n_sig : int
@@ -1327,32 +1317,17 @@ def _rd_dat_signals(
13271317
if no_file:
13281318
data_to_read = sig_data
13291319
elif fmt in COMPRESSED_FMTS:
1330-
if sig_stream is not None:
1331-
data_to_read = _rd_compressed_stream(
1332-
fp=sig_stream,
1333-
fmt=fmt,
1334-
sample_offset=byte_offset,
1335-
n_sig=n_sig,
1336-
samps_per_frame=samps_per_frame,
1337-
start_frame=sampfrom,
1338-
end_frame=sampto,
1339-
)
1340-
else:
1341-
data_to_read = _rd_compressed_file(
1342-
file_name=file_name,
1343-
dir_name=dir_name,
1344-
pn_dir=pn_dir,
1345-
fmt=fmt,
1346-
sample_offset=byte_offset,
1347-
n_sig=n_sig,
1348-
samps_per_frame=samps_per_frame,
1349-
start_frame=sampfrom,
1350-
end_frame=sampto,
1351-
)
1352-
else:
1353-
data_to_read = _rd_dat_file(
1354-
file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples, sig_stream
1320+
data_to_read = _rd_compressed_stream(
1321+
io,
1322+
fmt=fmt,
1323+
sample_offset=byte_offset,
1324+
n_sig=n_sig,
1325+
samps_per_frame=samps_per_frame,
1326+
start_frame=sampfrom,
1327+
end_frame=sampto,
13551328
)
1329+
else:
1330+
data_to_read = _rd_dat_stream(io, fmt, start_byte, n_read_samples)
13561331

13571332
if extra_flat_samples:
13581333
if fmt in UNALIGNED_FMTS:
@@ -1591,7 +1566,7 @@ def _required_byte_num(mode, fmt, n_samp):
15911566
return int(n_bytes)
15921567

15931568

1594-
def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, sig_stream):
1569+
def _rd_dat_stream(io, fmt, start_byte, n_samp):
15951570
"""
15961571
Read data from a dat file, either local or remote, into a 1d numpy
15971572
array.
@@ -1602,14 +1577,6 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, sig_strea
16021577
16031578
Parameters
16041579
----------
1605-
file_name : str
1606-
The name of the dat file.
1607-
dir_name : str
1608-
The full directory where the dat file(s) are located, if the dat
1609-
file(s) are local.
1610-
pn_dir : str
1611-
The PhysioNet directory where the dat file(s) are located, if
1612-
the dat file(s) are remote.
16131580
fmt : str
16141581
The format of the dat file.
16151582
start_byte : int
@@ -1649,27 +1616,13 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, sig_strea
16491616
element_count = n_samp
16501617
byte_count = n_samp * BYTES_PER_SAMPLE[fmt]
16511618

1652-
# Memory Stream
1653-
if sig_stream is not None:
1654-
sig_stream.seek(start_byte)
1655-
sig_data = np.frombuffer(
1656-
sig_stream.read(), dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
1657-
)
1658-
# Local dat file
1659-
elif pn_dir is None:
1660-
with open(os.path.join(dir_name, file_name), "rb") as fp:
1661-
fp.seek(start_byte)
1662-
sig_data = np.fromfile(
1663-
fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
1664-
)
1665-
# Stream dat file from Physionet
1666-
else:
1667-
dtype_in = np.dtype(DATA_LOAD_TYPES[fmt])
1668-
sig_data = download._stream_dat(
1669-
file_name, pn_dir, byte_count, start_byte, dtype_in
1670-
)
1619+
io.seek(start_byte)
1620+
return np.frombuffer(
1621+
io.read(byte_count),
1622+
dtype=np.dtype(DATA_LOAD_TYPES[fmt]),
1623+
count=element_count,
1624+
)
16711625

1672-
return sig_data
16731626

16741627
def _blocks_to_samples(sig_data, n_samp, fmt):
16751628
"""
@@ -1790,20 +1743,28 @@ def _blocks_to_samples(sig_data, n_samp, fmt):
17901743

17911744

17921745
def _rd_compressed_stream(
1793-
fp,
1746+
io,
17941747
fmt,
17951748
sample_offset,
17961749
n_sig,
17971750
samps_per_frame,
17981751
start_frame,
17991752
end_frame,
18001753
):
1801-
signature = fp.read(4)
1754+
import soundfile
1755+
1756+
if any(spf != samps_per_frame[0] for spf in samps_per_frame):
1757+
raise ValueError(
1758+
"All channels in a FLAC signal file must have the same "
1759+
"sampling rate and samples per frame"
1760+
)
1761+
1762+
signature = io.read(4)
18021763
if signature != b"fLaC":
1803-
raise ValueError(f"{fp.name} is not a FLAC file")
1804-
fp.seek(0)
1764+
raise ValueError(f"{io.name} is not a FLAC file")
1765+
io.seek(0)
18051766

1806-
with soundfile.SoundFile(fp) as sf:
1767+
with soundfile.SoundFile(io) as sf:
18071768
# Determine the actual resolution of the FLAC stream and the
18081769
# data type will use when reading it. Note that soundfile
18091770
# doesn't support int8.
@@ -1817,18 +1778,18 @@ def _rd_compressed_stream(
18171778
format_bits = 24
18181779
read_dtype = "int32"
18191780
else:
1820-
raise ValueError(f"unknown subtype in {fp.name} ({sf.subtype})")
1781+
raise ValueError(f"unknown subtype in {io.name} ({sf.subtype})")
18211782

18221783
max_bits = int(fmt) - 500
18231784
if format_bits > max_bits:
18241785
raise ValueError(
1825-
f"wrong resolution in {fp.name} "
1786+
f"wrong resolution in {io.name} "
18261787
f"({format_bits}, expected <= {max_bits})"
18271788
)
18281789

18291790
if sf.channels != n_sig:
18301791
raise ValueError(
1831-
f"wrong number of channels in {fp.name} "
1792+
f"wrong number of channels in {io.name} "
18321793
f"({sf.channels}, expected {n_sig})"
18331794
)
18341795

@@ -1906,73 +1867,6 @@ def _rd_compressed_stream(
19061867
return sig_data.reshape(-1)
19071868

19081869

1909-
def _rd_compressed_file(
1910-
file_name,
1911-
dir_name,
1912-
pn_dir,
1913-
fmt,
1914-
sample_offset,
1915-
n_sig,
1916-
samps_per_frame,
1917-
start_frame,
1918-
end_frame,
1919-
):
1920-
"""
1921-
Read data from a compressed file into a 1D numpy array.
1922-
1923-
Parameters
1924-
----------
1925-
file_name : str
1926-
The name of the signal file.
1927-
dir_name : str
1928-
The full directory where the signal file is located, if local.
1929-
This argument is ignored if `pn_dir` is not None.
1930-
pn_dir : str or None
1931-
The PhysioNet database directory where the signal file is located.
1932-
fmt : str
1933-
The format code of the signal file.
1934-
sample_offset : int
1935-
The sample number in the signal file corresponding to sample 0 of
1936-
the WFDB record.
1937-
n_sig : int
1938-
The number of signals in the file.
1939-
samps_per_frame : list
1940-
The number of samples per frame for each signal in the file.
1941-
start_frame : int
1942-
The starting frame number to read.
1943-
end_frame : int
1944-
The ending frame number to read.
1945-
1946-
Returns
1947-
-------
1948-
signal : ndarray
1949-
The data read from the signal file. This is a one-dimensional
1950-
array in the same order the samples would be stored in a binary
1951-
signal file; `signal[(i*n_sig+j)*samps_per_frame[0]+k]` is sample
1952-
number `i*samps_per_frame[0]+k` of signal `j`.
1953-
1954-
Notes
1955-
-----
1956-
Converting the output array into "dat file order" here is inefficient,
1957-
but necessary to match the behavior of _rd_dat_file. It would be
1958-
better to reorganize _rd_dat_signals to make the reshaping unnecessary.
1959-
1960-
"""
1961-
import soundfile
1962-
1963-
if any(spf != samps_per_frame[0] for spf in samps_per_frame):
1964-
raise ValueError(
1965-
"All channels in a FLAC signal file must have the same "
1966-
"sampling rate and samples per frame"
1967-
)
1968-
1969-
if pn_dir is None:
1970-
file_name = os.path.join(dir_name, file_name)
1971-
1972-
with _coreio._open_file(pn_dir, file_name, "rb") as fp:
1973-
return _rd_compressed_stream(fp, fmt, sample_offset, n_sig, samps_per_frame, start_frame, end_frame)
1974-
1975-
19761870
def _skew_sig(
19771871
sig, skew, n_sig, read_len, fmt, nan_replace, samps_per_frame=None
19781872
):

0 commit comments

Comments
 (0)