1
1
import math
2
2
import os
3
+ import posixpath
3
4
import sys
4
5
6
+ import fsspec
5
7
import numpy as np
6
8
7
9
from wfdb .io import download , _coreio , util
8
-
10
+ from wfdb . io . _coreio import CLOUD_PROTOCOLS
9
11
10
12
MAX_I32 = 2147483647
11
13
MIN_I32 = - 2147483648
@@ -1643,10 +1645,10 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
1643
1645
The name of the dat file.
1644
1646
dir_name : str
1645
1647
The full directory where the dat file(s) are located, if the dat
1646
- file(s) are local.
1648
+ file(s) are local or in the cloud .
1647
1649
pn_dir : str
1648
1650
The PhysioNet directory where the dat file(s) are located, if
1649
- the dat file(s) are remote .
1651
+ the dat file(s) are on a PhysioNet server .
1650
1652
fmt : str
1651
1653
The format of the dat file.
1652
1654
start_byte : int
@@ -1686,15 +1688,22 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
1686
1688
element_count = n_samp
1687
1689
byte_count = n_samp * BYTES_PER_SAMPLE [fmt ]
1688
1690
1689
- # Local dat file
1691
+ # Local or cloud dat file
1690
1692
if pn_dir is None :
1691
- with open (os .path .join (dir_name , file_name ), "rb" ) as fp :
1693
+ with fsspec . open (os .path .join (dir_name , file_name ), "rb" ) as fp :
1692
1694
fp .seek (start_byte )
1693
- sig_data = np .fromfile (
1695
+ sig_data = util .fromfile (
1694
1696
fp , dtype = np .dtype (DATA_LOAD_TYPES [fmt ]), count = element_count
1695
1697
)
1696
- # Stream dat file from Physionet
1698
+
1699
+ # Stream dat file from PhysioNet
1697
1700
else :
1701
+ # check to make sure a cloud path isn't being passed under pn_dir
1702
+ if any (pn_dir .startswith (proto ) for proto in CLOUD_PROTOCOLS ):
1703
+ raise ValueError (
1704
+ "Cloud paths should be passed under record_name, not under pn_dir"
1705
+ )
1706
+
1698
1707
dtype_in = np .dtype (DATA_LOAD_TYPES [fmt ])
1699
1708
sig_data = download ._stream_dat (
1700
1709
file_name , pn_dir , byte_count , start_byte , dtype_in
@@ -1840,8 +1849,9 @@ def _rd_compressed_file(
1840
1849
file_name : str
1841
1850
The name of the signal file.
1842
1851
dir_name : str
1843
- The full directory where the signal file is located, if local.
1844
- This argument is ignored if `pn_dir` is not None.
1852
+ The full directory where the signal file is located, if this
1853
+ is a local or cloud path. This argument is ignored if `pn_dir`
1854
+ is not None.
1845
1855
pn_dir : str or None
1846
1856
The PhysioNet database directory where the signal file is located.
1847
1857
fmt : str
@@ -2585,10 +2595,10 @@ def _infer_sig_len(
2585
2595
The byte offset of the dat file. None is equivalent to zero.
2586
2596
dir_name : str
2587
2597
The full directory where the dat file(s) are located, if the dat
2588
- file(s) are local.
2598
+ file(s) are local or on the cloud .
2589
2599
pn_dir : str, optional
2590
2600
The PhysioNet directory where the dat file(s) are located, if
2591
- the dat file(s) are remote .
2601
+ the dat file(s) are on a PhysioNet server .
2592
2602
2593
2603
Returns
2594
2604
-------
@@ -2600,13 +2610,29 @@ def _infer_sig_len(
2600
2610
sig_len * tsamps_per_frame * bytes_per_sample == file_size
2601
2611
2602
2612
"""
2603
- if pn_dir is None :
2604
- file_size = os .path .getsize (os .path .join (dir_name , file_name ))
2605
- else :
2613
+ from wfdb .io .record import CLOUD_PROTOCOLS
2614
+
2615
+ # If this is a cloud path, use posixpath to construct the path and fsspec to open file
2616
+ if any (dir_name .startswith (proto ) for proto in CLOUD_PROTOCOLS ):
2617
+ with fsspec .open (posixpath .join (dir_name , file_name ), mode = "rb" ) as f :
2618
+ file_size = f .seek (0 , os .SEEK_END )
2619
+
2620
+ # If the PhysioNet database path is provided, construct the download path using the database version
2621
+ elif pn_dir is not None :
2622
+ # check to make sure a cloud path isn't being passed under pn_dir
2623
+ if any (pn_dir .startswith (proto ) for proto in CLOUD_PROTOCOLS ):
2624
+ raise ValueError (
2625
+ "Cloud paths should be passed under record_name, not under pn_dir"
2626
+ )
2627
+
2606
2628
file_size = download ._remote_file_size (
2607
2629
file_name = file_name , pn_dir = pn_dir
2608
2630
)
2609
2631
2632
+ # If it isn't a cloud path or a PhysioNet path, we treat as a local file
2633
+ else :
2634
+ file_size = os .path .getsize (os .path .join (dir_name , file_name ))
2635
+
2610
2636
if byte_offset is None :
2611
2637
byte_offset = 0
2612
2638
data_size = file_size - byte_offset
0 commit comments