Skip to content

Commit 7a4fe2a

Browse files
authored
Merge pull request #20 from pyiron/resources
Add Resolvers to unify finding of resources
2 parents 553d8d4 + cdc9123 commit 7a4fe2a

17 files changed

+394
-0
lines changed

pyiron_snippets/resources.py

Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
"""
2+
Classes to find data files and executables in global paths.
3+
"""
4+
5+
from abc import ABC, abstractmethod
6+
from collections.abc import Iterator, Iterable
7+
import os
8+
import os.path
9+
from fnmatch import fnmatch
10+
from glob import glob
11+
import re
12+
from typing import Any
13+
14+
if os.name == "nt":
15+
EXE_SUFFIX = "bat"
16+
else:
17+
EXE_SUFFIX = "sh"
18+
19+
20+
class ResourceNotFound(RuntimeError):
21+
pass
22+
23+
24+
class AbstractResolver(ABC):
25+
"""
26+
Interface for resolvers.
27+
28+
Implementations must define :meth:`._search`, taking a tuple of names to search for and yielding instances of any
29+
type. Implementations should pick a single type to yield, e.g. :class:`.ResourceResolver` always yields absolute
30+
paths, while :class:`.ExecutableResolver` always yields 2-tuples of a version tag and absolute paths.
31+
"""
32+
33+
@abstractmethod
34+
def _search(self, name: tuple[str]) -> Iterator[Any]:
35+
pass
36+
37+
def search(self, name: Iterable[str] | str = "*") -> Iterator[Any]:
38+
"""
39+
Yield all matches.
40+
41+
When `name` is given as an iterable, returned results match at least one of the `name` globs.
42+
43+
Args:
44+
name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those
45+
46+
Yields:
47+
object: resources matching `name`
48+
"""
49+
if name is not None and not isinstance(name, str):
50+
name = tuple(name)
51+
else:
52+
name = (name,)
53+
yield from self._search(name)
54+
55+
def list(self, name: Iterable[str] | str = "*") -> list[Any]:
56+
"""
57+
Return all matches.
58+
59+
Args:
60+
name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those
61+
62+
Returns:
63+
list: all matches returned by :meth:`.search`.
64+
"""
65+
return list(self.search(name))
66+
67+
def first(self, name: Iterable[str] | str = "*") -> Any:
68+
"""
69+
Return first match.
70+
71+
Args:
72+
name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those
73+
74+
Returns:
75+
object: the first match returned by :meth:`.search`.
76+
77+
Raises:
78+
:class:`~.ResourceNotFound`: if no matches are found.
79+
"""
80+
try:
81+
return next(iter(self.search(name)))
82+
except StopIteration:
83+
raise ResourceNotFound(f"Could not find {name} in {self}!") from None
84+
85+
def chain(self, *resolvers: "AbstractResolver") -> "ResolverChain":
86+
"""
87+
Return a new resolver that searches this and all given resolvers sequentially.
88+
89+
You will likely want to ensure that all given resolvers yield the same types and e.g. not mix ExecutableResolver
90+
and ResourceResolver, but this is not checked.
91+
92+
The advantage of using :meth:`.chain` rather than adding more paths to one resolver is when different paths have
93+
different internal sub structure, such as when combining resources from pyiron resources and conda data
94+
packages. When searching for lammps potential files, e.g. we have some folders that are set up as
95+
96+
<resources>/lammps/potentials/...
97+
98+
but iprpy conda package that ships the NIST potentials doesn't have the lammps/potentials
99+
100+
<iprpy>/...
101+
102+
With chaining we can do very easily
103+
104+
>>> ResourceResolver([<resources>], "lammps", "potentials").chain(
105+
... ResourceResolver([<iprpy>])) # doctest: +SKIP
106+
107+
without we'd need to modify the resource paths ourselves explicitly
108+
109+
>>> ResourceResolver([r + '/lammps/potentials' for r in <resources>] + [<iprpy>]) # doctest: +SKIP
110+
111+
which is a bit more awkward.
112+
113+
Args:
114+
resolvers (:class:`.AbstractResolver`): any number of sub resolvers
115+
116+
Returns:
117+
self: if `resolvers` is empty
118+
:class:`.ResolverChain`: otherwise
119+
"""
120+
if resolvers == ():
121+
return self
122+
return ResolverChain(self, *resolvers)
123+
124+
125+
class ResolverChain(AbstractResolver):
126+
"""
127+
A chain of resolvers. Matches are returned sequentially.
128+
"""
129+
130+
__slots__ = ("_resolvers",)
131+
132+
def __init__(self, *resolvers):
133+
"""
134+
Args:
135+
*resolvers (:class:`.AbstractResolver`): sub resolvers to use
136+
"""
137+
self._resolvers = resolvers
138+
139+
def _search(self, name):
140+
for resolver in self._resolvers:
141+
yield from resolver.search(name)
142+
143+
def __repr__(self):
144+
inner = ", ".join(repr(r) for r in self._resolvers)
145+
return f"{type(self).__name__}({inner})"
146+
147+
148+
class ResourceResolver(AbstractResolver):
149+
"""
150+
Generic resolver for files and directories.
151+
152+
Resources are expected to conform to the following format:
153+
<resource_path>/<module>/<subdir0>/<subdir1>/...
154+
155+
*All* entries within in this final `subdir` are yielded by :meth:`.search`, whether they are files or directories.
156+
Search results can be restricted by passing a (list of) globs. If a list is given, entries matching at least one of
157+
them are returned.
158+
159+
>>> res = ResourceResolver(..., "lammps")
160+
>>> res.list() # doctest: +SKIP
161+
[
162+
"bin",
163+
"potentials",
164+
"potentials.csv"
165+
]
166+
"""
167+
168+
__slots__ = "_resource_paths", "_module", "_subdirs"
169+
170+
def __init__(self, resource_paths, module, *subdirs):
171+
"""
172+
Args:
173+
resource_paths (list of str): base paths for resource locations
174+
module (str): name of the module
175+
*subdirs (str): additional sub directories to descend into
176+
"""
177+
self._resource_paths = resource_paths
178+
self._module = module
179+
self._subdirs = subdirs
180+
181+
def __repr__(self):
182+
inner = repr(self._resource_paths)
183+
inner += f", {repr(self._module)}"
184+
inner += ", ".join(repr(s) for s in self._subdirs)
185+
return f"{type(self).__name__}({inner})"
186+
187+
def _search(self, name):
188+
for p in self._resource_paths:
189+
sub = os.path.join(p, self._module, *self._subdirs)
190+
if os.path.exists(sub):
191+
for n in name:
192+
yield from sorted(glob(os.path.join(sub, n)))
193+
194+
195+
class ExecutableResolver(AbstractResolver):
196+
"""
197+
A resolver for executable scripts.
198+
199+
Executables are expected to conform to the following format:
200+
<resource_path>/<module>/bin/run_<code>_<version_string>.<suffix>
201+
202+
and have the executable bit set. :meth:`.search` yields tuples of version strings and full paths to the executable
203+
instead of plain strings.
204+
205+
>>> exe = ExecutableResolver(..., "lammps")
206+
>>> exe.list() # doctest: +SKIP
207+
[
208+
('v1', '/my/resources/lammps/bin/run_lammps_v1.sh),
209+
('v1_mpi', '/my/resources/lammps/bin/run_lammps_v1_mpi.sh),
210+
('v2_default', '/my/resources/lammps/bin/run_lammps_v2_default.sh),
211+
]
212+
>>> exe.default_version # doctest: +SKIP
213+
"v2_default"
214+
>>> exe.dict("v1*") # doctest: +SKIP
215+
{
216+
'v1': '/my/resources/lammps/bin/run_lammps_v1.sh),
217+
'v1_mpi': '/my/resources/lammps/bin/run_lammps_v1_mpi.sh)
218+
}
219+
"""
220+
221+
__slots__ = "_regex", "_resolver"
222+
223+
def __init__(self, resource_paths, code, module=None, suffix=EXE_SUFFIX):
224+
"""
225+
Args:
226+
resource_paths (list of str): base paths for resource locations
227+
code (str): name of the simulation code
228+
module (str): name of the module the code is part of, same as `code` by default
229+
suffix (str, optional): file ending; if `None`, 'bat' on Windows 'sh' elsewhere
230+
"""
231+
if suffix is None:
232+
suffix = EXE_SUFFIX
233+
if module is None:
234+
module = code
235+
self._regex = re.compile(f"run_{code}_(.*)\\.{suffix}$")
236+
self._glob = f"run_{code}_*.{suffix}"
237+
self._resolver = ResourceResolver(
238+
resource_paths,
239+
module,
240+
"bin",
241+
)
242+
243+
def __repr__(self):
244+
inner = repr(self._resolver._resource_paths)
245+
inner += f", {repr(self._glob)}"
246+
inner += f", {repr(self._resolver._module)}"
247+
# recover suffix
248+
inner += f", {repr(self._glob.split('.')[-1])}"
249+
return f"{type(self).__name__}({inner})"
250+
251+
def _search(self, name):
252+
seen = set()
253+
254+
def cond(path):
255+
isfile = os.path.isfile(path)
256+
isexec = os.access(
257+
path, os.X_OK, effective_ids=os.access in os.supports_effective_ids
258+
)
259+
return isfile and isexec
260+
261+
for path in filter(cond, self._resolver.search(self._glob)):
262+
# we know that the regex has to match, because we constrain the resolver with the glob
263+
version = self._regex.search(path).group(1)
264+
if version not in seen and any(fnmatch(version, n) for n in name):
265+
yield (version, path)
266+
seen.add(version)
267+
268+
def dict(self, name="*") -> dict[str, str]:
269+
"""
270+
Construct dict from :meth:`.search` results.
271+
272+
Args:
273+
name (str or list of str): glob(s) to filter the version strings
274+
275+
Returns:
276+
dict: mapping version strings to full paths
277+
"""
278+
return dict(self.search(name=name))
279+
280+
@property
281+
def available_versions(self):
282+
"""
283+
list of str: all found versions
284+
"""
285+
return [x[0] for x in self.search("*")]
286+
287+
@property
288+
def default_version(self):
289+
"""
290+
str: the first version found in resources
291+
292+
If a version matching `*default*` exists, the first matching is returned.
293+
294+
Raises:
295+
:class:`.ResourceNotFound`: if no executables are found at all
296+
"""
297+
try:
298+
return self.first("*default*")[0]
299+
except ResourceNotFound:
300+
pass
301+
# try again outside the except clause to avoid nested error in case this fails as well
302+
return self.first("*")[0]

tests/unit/static/resources/res1/module1/bin/run_code1_version1.bat

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_version1.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_version2.bat

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_version2.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_versionnonexec.bat

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code1_versionnonexec.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code2_version1.bat

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code2_version1.sh

Whitespace-only changes.

tests/unit/static/resources/res1/module1/bin/run_code2_version2_default.bat

Whitespace-only changes.

0 commit comments

Comments
 (0)