diff --git a/pystac/stac_io.py b/pystac/stac_io.py index abe5bc26a..216249f3f 100644 --- a/pystac/stac_io.py +++ b/pystac/stac_io.py @@ -303,6 +303,7 @@ def read_text_from_href(self, href: str) -> str: except HTTPError as e: raise Exception(f"Could not read uri {href}") from e else: + href = safe_urlparse(href).path with open(href, encoding="utf-8") as f: href_contents = f.read() return href_contents @@ -328,7 +329,7 @@ def write_text_to_href(self, href: str, txt: str) -> None: """ if _is_url(href): raise NotImplementedError("DefaultStacIO cannot write to urls") - href = os.fspath(href) + href = safe_urlparse(href).path dirname = os.path.dirname(href) if dirname != "" and not os.path.isdir(dirname): os.makedirs(dirname) @@ -391,7 +392,7 @@ def _report_duplicate_object_names( def _is_url(href: str) -> bool: parsed = safe_urlparse(href) - return parsed.scheme != "" + return parsed.scheme not in ["", "file"] if HAS_URLLIB3: diff --git a/pystac/utils.py b/pystac/utils.py index 76c3f7100..4d702c187 100644 --- a/pystac/utils.py +++ b/pystac/utils.py @@ -71,6 +71,25 @@ def safe_urlparse(href: str) -> URLParseResult: query=parsed.query, fragment=parsed.fragment, ) + + # Windows drives sometimes get parsed as the netloc and sometimes + # as part of the parsed.path. + if parsed.scheme == "file" and os.name == "nt": + if parsed.netloc: + path = f"{parsed.netloc}{parsed.path}" + elif parsed.path.startswith("/") and ":" in parsed.path: + path = parsed.path[1:] + else: + path = parsed.path + + return URLParseResult( + scheme=parsed.scheme, + netloc="", + path=path, + params=parsed.params, + query=parsed.query, + fragment=parsed.fragment, + ) else: return parsed @@ -246,7 +265,7 @@ def make_relative_href( ): return source_href - if parsed_start.scheme == "": + if parsed_start.scheme in ["", "file"]: return _make_relative_href_path(parsed_source, parsed_start, start_is_dir) else: return _make_relative_href_url(parsed_source, parsed_start, start_is_dir) @@ -311,6 +330,9 @@ def _make_absolute_href_path( make_posix_style(os.path.abspath(start_dir)), start_dir ) + if parsed_source.scheme or parsed_start.scheme: + abs_path = f"file://{abs_path}" + return abs_path @@ -346,7 +368,10 @@ def make_absolute_href( parsed_start = safe_urlparse(start_href) parsed_source = safe_urlparse(source_href) - if parsed_source.scheme != "" or parsed_start.scheme != "": + if parsed_source.scheme not in ["", "file"] or parsed_start.scheme not in [ + "", + "file", + ]: return _make_absolute_href_url(parsed_source, parsed_start, start_is_dir) else: return _make_absolute_href_path(parsed_source, parsed_start, start_is_dir) @@ -364,7 +389,7 @@ def is_absolute_href(href: str) -> bool: bool: ``True`` if the given HREF is absolute, ``False`` if it is relative. """ parsed = safe_urlparse(href) - return parsed.scheme != "" or os.path.isabs(parsed.path) + return parsed.scheme not in ["", "file"] or os.path.isabs(parsed.path) def datetime_to_str(dt: datetime, timespec: str = "auto") -> str: diff --git a/tests/test_stac_io.py b/tests/test_stac_io.py index b84ea2854..97b6c1fc3 100644 --- a/tests/test_stac_io.py +++ b/tests/test_stac_io.py @@ -24,6 +24,15 @@ def test_read_write_collection(self) -> None: pystac.write_file(collection, dest_href=dest_href) self.assertTrue(os.path.exists(dest_href), msg="File was not written.") + def test_read_write_collection_with_file_protocol(self) -> None: + collection = pystac.read_file( + "file://" + TestCases.get_path("data-files/collections/multi-extent.json") + ) + with tempfile.TemporaryDirectory() as tmp_dir: + dest_href = os.path.join(tmp_dir, "collection.json") + pystac.write_file(collection, dest_href="file://" + dest_href) + self.assertTrue(os.path.exists(dest_href), msg="File was not written.") + def test_read_item(self) -> None: item = pystac.read_file(TestCases.get_path("data-files/item/sample-item.json")) with tempfile.TemporaryDirectory() as tmp_dir: diff --git a/tests/test_utils.py b/tests/test_utils.py index 5e9f85cef..8f2a9f6ac 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -33,6 +33,11 @@ def test_make_relative_href(self) -> None: ("/a/catalog.json", "/a/b/c/catalog.json", "../../catalog.json"), ("/a/b/c/d/", "/a/b/c/catalog.json", "./d/"), ("/a/b/c/d/.dotfile", "/a/b/c/d/catalog.json", "./.dotfile"), + ( + "file:///a/b/c/d/catalog.json", + "file:///a/b/c/catalog.json", + "./d/catalog.json", + ), ] for source_href, start_href, expected in test_cases: @@ -161,11 +166,22 @@ def test_make_absolute_href(self) -> None: "https://stacspec.org/a/b/item.json", ), ("http://localhost:8000", None, "http://localhost:8000"), + ("item.json", "file:///a/b/c/catalog.json", "file:///a/b/c/item.json"), + ( + "./z/item.json", + "file:///a/b/c/catalog.json", + "file:///a/b/c/z/item.json", + ), + ("file:///a/b/c/item.json", None, "file:///a/b/c/item.json"), ] for source_href, start_href, expected in test_cases: actual = make_absolute_href(source_href, start_href) - _, actual = os.path.splitdrive(actual) + if expected.startswith("file://"): + _, actual = os.path.splitdrive(actual.replace("file://", "")) + actual = f"file://{actual}" + else: + _, actual = os.path.splitdrive(actual) self.assertEqual(actual, expected) def test_make_absolute_href_on_vsitar(self) -> None: @@ -234,6 +250,7 @@ def test_is_absolute_href_os_aware(self) -> None: test_cases = [ ("/item.json", not incl_drive_letter), ("/home/someuser/Downloads/item.json", not incl_drive_letter), + ("file:///home/someuser/Downloads/item.json", not incl_drive_letter), ("d:/item.json", is_windows), ("c:/files/more_files/item.json", is_windows), ]