-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add utils for PyFilesystem and config key matching (#351)
This lays the foundation for [supporting multiple input dirs](#352).
- Loading branch information
Showing
6 changed files
with
662 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
import tempfile | ||
import unittest | ||
|
||
from util.file_match import match | ||
from util.filesystem import create_store | ||
|
||
|
||
class TestFileMatch(unittest.TestCase): | ||
|
||
def test_match_nested_dir(self): | ||
|
||
store = create_store("temp://") | ||
file = store.as_dir().open_file("path/to/foo.csv") | ||
|
||
def yes(pattern: str) -> None: | ||
self.assertTrue(match(file, pattern)) | ||
|
||
def no(pattern: str) -> None: | ||
self.assertFalse(match(file, pattern)) | ||
|
||
def err(pattern: str) -> None: | ||
with self.assertRaises(ValueError): | ||
match(file, pattern) | ||
|
||
# File path: temp://path/to/foo.csv | ||
# Absolute path: /<generated>/path/to/foo.csv | ||
|
||
# No slashes: match file name | ||
yes("foo.csv") | ||
no("bar.csv") | ||
no("oo.csv") | ||
no("oo*.csv") | ||
yes("foo*.csv") # Wildcard can be no characters | ||
yes("f*.csv") | ||
yes("*.csv") | ||
no("*.mcf") | ||
|
||
# With protocol: match protocol and full path | ||
yes("temp://path/to/**/*") | ||
yes("temp://**/foo.csv") | ||
yes("temp://**/*") | ||
no("temp://foo.csv") | ||
no("gs://**/*") | ||
|
||
# Leading single slash: partial match not allowed, match relative to input dir | ||
no("/foo.csv") | ||
no("/to/foo.csv") | ||
yes("/path/to/foo.csv") | ||
no("/*") | ||
yes("/**/*") | ||
no("/*/foo.csv") # Single wildcard is a single level of nesting | ||
no("/*/*/*/foo.csv") # Wrong depth with single wildcards | ||
yes("/*/*/foo.csv") # Right depth with single wildcards | ||
no("/*/*/oo.csv") # Right depth, wrong filename | ||
yes("/*/*/*oo.csv") # Wildcard paths, wildcard in filename | ||
yes("/**/foo.csv") # Double wildcard can be multiple levels of dirs | ||
no("/**/oo.csv") | ||
yes("/**/*/foo.csv") | ||
yes("/*/**/foo.csv") | ||
yes("/**/*/*/foo.csv") | ||
yes("/*/**/*/foo.csv") | ||
yes("/**/*/**/*/**/foo.csv") # gettin silly with it | ||
no("/**/*/**/*/**/*/**/foo.csv") | ||
no("/*/**/*/*/foo.csv") | ||
no("/**/*/*/*/foo.csv") | ||
|
||
# Leading double slash - partial match not allowed, match absolute path | ||
abs_root = store.as_dir().syspath() | ||
self.assertTrue(abs_root.startswith("/")) | ||
self.assertFalse(abs_root.startswith("//")) | ||
self.assertTrue(abs_root.endswith("/")) | ||
no("//path/to/foo.csv") | ||
yes(f"/{abs_root}path/to/foo.csv") | ||
no("//to/foo.csv" | ||
) # Leading slash means the match must be from the abs root | ||
no("//*/foo.csv") # Single wildcard is a single level of nesting | ||
no("//*/*/*/foo.csv") # Wrong depth with single wildcards | ||
yes("//**/foo.csv") # Double wildcard can be multiple levels of dirs | ||
|
||
no("temp://*.csv") # Single wildcard is a single level of nesting | ||
no("gs://**.csv") # Wrong protocol | ||
|
||
yes("path/to/foo.csv") | ||
yes("to/foo.csv") # Partial match allowed | ||
yes("*/foo.csv") # Wrong depth, but partial match allowed | ||
yes("*/*/foo.csv") | ||
yes("*/to/foo.csv") | ||
yes("**/to/foo.csv") | ||
yes("**/foo.csv") | ||
|
||
# Double wildcards don't make sense in the name portion of a pattern. | ||
err("temp://**.csv") # Use "temp://**/*.csv" instead | ||
err("**.csv") # Use "*.csv" instead | ||
err("//**/to/**.csv") | ||
|
||
def test_os_abs_path(self): | ||
with tempfile.TemporaryDirectory() as temp_dir: | ||
self.assertTrue((str(temp_dir)).startswith("/")) | ||
self.assertFalse((str(temp_dir)).startswith("//")) | ||
store = create_store(temp_dir) | ||
file = store.as_dir().open_file("path/to/foo.csv") | ||
|
||
def yes(pattern: str) -> None: | ||
self.assertTrue(match(file, pattern)) | ||
|
||
def no(pattern: str) -> None: | ||
self.assertFalse(match(file, pattern)) | ||
|
||
yes("/" + str(temp_dir) + "/path/to/foo.csv") | ||
no("//path/to/foo.csv") | ||
yes("/path/to/foo.csv") | ||
no("/to/foo.csv") | ||
yes("to/foo.csv") | ||
|
||
def test_match_in_input_dir(self): | ||
|
||
store = create_store("temp://") | ||
file = store.as_dir().open_file("foo.csv") | ||
|
||
def yes(pattern: str) -> None: | ||
self.assertTrue(match(file, pattern)) | ||
|
||
def no(pattern: str) -> None: | ||
self.assertFalse(match(file, pattern)) | ||
|
||
def err(pattern: str) -> None: | ||
with self.assertRaises(ValueError): | ||
match(file, pattern) | ||
|
||
# File path: temp://foo.csv | ||
|
||
yes("foo.csv") | ||
yes("foo*.csv") # Wildcard can be no characters | ||
yes("*.csv") | ||
no("*.mcf") | ||
|
||
yes("/foo.csv") | ||
yes("/*foo.csv") | ||
no("/*/foo.csv") | ||
no("*/foo.csv") | ||
|
||
yes("**/foo.csv") | ||
yes("/**/foo.csv") | ||
yes("/*") | ||
yes("/**/*") | ||
|
||
no("/to/foo.csv") # Extra dir | ||
yes("temp://**/foo.csv") | ||
no("gs://**/foo.csv") | ||
yes("temp://foo.csv") | ||
yes("temp://*.csv") | ||
|
||
err("**.csv") | ||
err("/**.csv") | ||
err("temp://**.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import os | ||
import tempfile | ||
import unittest | ||
|
||
from util.filesystem import create_store | ||
|
||
|
||
class TestFilesystem(unittest.TestCase): | ||
|
||
def test_create_store_dir_new(self): | ||
with create_store("mem://", create_if_missing=True) as store: | ||
self.assertTrue(store.isdir()) | ||
self.assertEqual(store.full_path(), "mem://") | ||
self.assertEqual(store.as_dir().full_path(), "mem://") | ||
|
||
def test_create_store_file_new(self): | ||
with create_store("mem://foo.txt", | ||
create_if_missing=True, | ||
treat_as_file=True) as store: | ||
self.assertFalse(store.isdir()) | ||
self.assertEqual(store.full_path(), "mem://foo.txt") | ||
self.assertEqual(store.as_file().full_path(), "mem://foo.txt") | ||
|
||
# Create subdir as well | ||
with create_store("mem://path/to/foo.txt", | ||
create_if_missing=True, | ||
treat_as_file=True) as store: | ||
self.assertFalse(store.isdir()) | ||
self.assertEqual(store.full_path(), "mem://path/to/foo.txt") | ||
self.assertEqual(store.as_file().full_path(), "mem://path/to/foo.txt") | ||
|
||
# Test that without create_if_missing, file opening fails | ||
def test_missing_file(self): | ||
with create_store("mem://") as store: | ||
with self.assertRaises(FileNotFoundError): | ||
store.as_dir().open_file("nonexistent.txt", create_if_missing=False) | ||
|
||
def test_create_store_defaults_to_dir(self): | ||
with create_store("mem://bar", create_if_missing=True) as store: | ||
self.assertTrue(store.isdir()) | ||
self.assertEqual(store.full_path(), "mem://bar") | ||
|
||
# Test create_store for a file that already exists | ||
def test_create_store_file_existing(self): | ||
with tempfile.TemporaryDirectory() as temp_dir: | ||
# Create a file "foo.txt" in temp_dir | ||
file_path = os.path.join(temp_dir, "foo.txt") | ||
with open(file_path, "w") as f: | ||
f.write("hello") | ||
|
||
# treat_as_file=True | ||
with create_store(file_path, create_if_missing=False, | ||
treat_as_file=True) as store: | ||
self.assertFalse(store.isdir()) | ||
self.assertEqual(store.full_path(), file_path) | ||
self.assertEqual(store.as_file().full_path(), file_path) | ||
|
||
# No treat_as_file param | ||
with create_store(file_path, create_if_missing=False) as store: | ||
self.assertFalse(store.isdir()) | ||
self.assertEqual(store.full_path(), file_path) | ||
self.assertEqual(store.as_file().full_path(), file_path) | ||
|
||
# Test create_store for a directory that already exists | ||
def test_create_store_dir_existing(self): | ||
with tempfile.TemporaryDirectory() as temp_dir: | ||
with create_store(str(temp_dir), create_if_missing=False) as store: | ||
self.assertEqual(store.full_path(), str(temp_dir)) | ||
self.assertTrue(store.isdir()) | ||
self.assertEqual(store.as_dir().full_path(), str(temp_dir)) | ||
|
||
# Test read and write methods on File | ||
def test_file(self): | ||
with create_store("mem://dir/foo.txt", | ||
create_if_missing=True, | ||
treat_as_file=True) as store: | ||
file = store.as_file() | ||
file.write("hello") | ||
self.assertEqual(file.read(), "hello") | ||
with file.read_string_io() as f_stringio: | ||
self.assertEqual(f_stringio.read(), "hello") | ||
self.assertEqual(file.size(), 5) | ||
file.write_bytes(b"bytes") | ||
self.assertEqual(file.read_bytes(), b"bytes") | ||
|
||
def test_dir(self): | ||
# Test open_dir and open_file methods on Dir | ||
with create_store("mem://") as store: | ||
dir = store.as_dir() | ||
subdir = dir.open_dir("dir1/dir2") | ||
self.assertEqual(subdir.full_path(), "mem://dir1/dir2") | ||
file = subdir.open_file("dir3/foo.txt") | ||
self.assertEqual(file.full_path(), "mem://dir1/dir2/dir3/foo.txt") | ||
dir.open_file("bar.txt") | ||
subdir.open_file("baz.txt") | ||
all_file_paths = [file.full_path() for file in dir.all_files()] | ||
self.assertListEqual(all_file_paths, [ | ||
"mem://bar.txt", "mem://dir1/dir2/baz.txt", | ||
"mem://dir1/dir2/dir3/foo.txt" | ||
]) | ||
|
||
# Test copy_to method on File | ||
def test_copy_to(self): | ||
with create_store("mem://") as store: | ||
file1 = store.as_dir().open_file("foo.txt") | ||
file1.write("hello") | ||
file2 = store.as_dir().open_file("bar.txt") | ||
file1.copy_to(file2) | ||
self.assertEqual(file2.read(), "hello") |
Oops, something went wrong.