|  | 
|  | 1 | +""" | 
|  | 2 | +Classes to find data files and executables in global paths. | 
|  | 3 | +""" | 
|  | 4 | + | 
|  | 5 | +from abc import ABC, abstractmethod | 
|  | 6 | +from collections.abc import Iterator, Iterable | 
|  | 7 | +import os | 
|  | 8 | +import os.path | 
|  | 9 | +from fnmatch import fnmatch | 
|  | 10 | +from glob import glob | 
|  | 11 | +import re | 
|  | 12 | +from typing import Any | 
|  | 13 | + | 
|  | 14 | +if os.name == "nt": | 
|  | 15 | +    EXE_SUFFIX = "bat" | 
|  | 16 | +else: | 
|  | 17 | +    EXE_SUFFIX = "sh" | 
|  | 18 | + | 
|  | 19 | + | 
|  | 20 | +class ResourceNotFound(RuntimeError): | 
|  | 21 | +    pass | 
|  | 22 | + | 
|  | 23 | + | 
|  | 24 | +class AbstractResolver(ABC): | 
|  | 25 | +    """ | 
|  | 26 | +    Interface for resolvers. | 
|  | 27 | +
 | 
|  | 28 | +    Implementations must define :meth:`._search`, taking a tuple of names to search for and yielding instances of any | 
|  | 29 | +    type.  Implementations should pick a single type to yield, e.g. :class:`.ResourceResolver` always yields absolute | 
|  | 30 | +    paths, while :class:`.ExecutableResolver` always yields 2-tuples of a version tag and absolute paths. | 
|  | 31 | +    """ | 
|  | 32 | + | 
|  | 33 | +    @abstractmethod | 
|  | 34 | +    def _search(self, name: tuple[str]) -> Iterator[Any]: | 
|  | 35 | +        pass | 
|  | 36 | + | 
|  | 37 | +    def search(self, name: Iterable[str] | str = "*") -> Iterator[Any]: | 
|  | 38 | +        """ | 
|  | 39 | +        Yield all matches. | 
|  | 40 | +
 | 
|  | 41 | +        When `name` is given as an iterable, returned results match at least one of the `name` globs. | 
|  | 42 | +
 | 
|  | 43 | +        Args: | 
|  | 44 | +            name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those | 
|  | 45 | +
 | 
|  | 46 | +        Yields: | 
|  | 47 | +            object: resources matching `name` | 
|  | 48 | +        """ | 
|  | 49 | +        if name is not None and not isinstance(name, str): | 
|  | 50 | +            name = tuple(name) | 
|  | 51 | +        else: | 
|  | 52 | +            name = (name,) | 
|  | 53 | +        yield from self._search(name) | 
|  | 54 | + | 
|  | 55 | +    def list(self, name: Iterable[str] | str = "*") -> list[Any]: | 
|  | 56 | +        """ | 
|  | 57 | +        Return all matches. | 
|  | 58 | +
 | 
|  | 59 | +        Args: | 
|  | 60 | +            name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those | 
|  | 61 | +
 | 
|  | 62 | +        Returns: | 
|  | 63 | +            list: all matches returned by :meth:`.search`. | 
|  | 64 | +        """ | 
|  | 65 | +        return list(self.search(name)) | 
|  | 66 | + | 
|  | 67 | +    def first(self, name: Iterable[str] | str = "*") -> Any: | 
|  | 68 | +        """ | 
|  | 69 | +        Return first match. | 
|  | 70 | +
 | 
|  | 71 | +        Args: | 
|  | 72 | +            name (str, iterable of str): file name to search for; can be an exact file name, a glob or list of those | 
|  | 73 | +
 | 
|  | 74 | +        Returns: | 
|  | 75 | +            object: the first match returned by :meth:`.search`. | 
|  | 76 | +
 | 
|  | 77 | +        Raises: | 
|  | 78 | +            :class:`~.ResourceNotFound`: if no matches are found. | 
|  | 79 | +        """ | 
|  | 80 | +        try: | 
|  | 81 | +            return next(iter(self.search(name))) | 
|  | 82 | +        except StopIteration: | 
|  | 83 | +            raise ResourceNotFound(f"Could not find {name} in {self}!") from None | 
|  | 84 | + | 
|  | 85 | +    def chain(self, *resolvers: "AbstractResolver") -> "ResolverChain": | 
|  | 86 | +        """ | 
|  | 87 | +        Return a new resolver that searches this and all given resolvers sequentially. | 
|  | 88 | +
 | 
|  | 89 | +        You will likely want to ensure that all given resolvers yield the same types and e.g. not mix ExecutableResolver | 
|  | 90 | +        and ResourceResolver, but this is not checked. | 
|  | 91 | +
 | 
|  | 92 | +        The advantage of using :meth:`.chain` rather than adding more paths to one resolver is when different paths have | 
|  | 93 | +        different internal sub structure, such as when combining resources from pyiron resources and conda data | 
|  | 94 | +        packages.  When searching for lammps potential files, e.g. we have some folders that are set up as | 
|  | 95 | +
 | 
|  | 96 | +            <resources>/lammps/potentials/... | 
|  | 97 | +
 | 
|  | 98 | +        but iprpy conda package that ships the NIST potentials doesn't have the lammps/potentials | 
|  | 99 | +
 | 
|  | 100 | +            <iprpy>/... | 
|  | 101 | +
 | 
|  | 102 | +        With chaining we can do very easily | 
|  | 103 | +
 | 
|  | 104 | +        >>> ResourceResolver([<resources>], "lammps", "potentials").chain( | 
|  | 105 | +        ...     ResourceResolver([<iprpy>])) # doctest: +SKIP | 
|  | 106 | +
 | 
|  | 107 | +        without we'd need to modify the resource paths ourselves explicitly | 
|  | 108 | +
 | 
|  | 109 | +        >>> ResourceResolver([r + '/lammps/potentials' for r in <resources>] + [<iprpy>]) # doctest: +SKIP | 
|  | 110 | +
 | 
|  | 111 | +        which is a bit more awkward. | 
|  | 112 | +
 | 
|  | 113 | +        Args: | 
|  | 114 | +            resolvers (:class:`.AbstractResolver`): any number of sub resolvers | 
|  | 115 | +
 | 
|  | 116 | +        Returns: | 
|  | 117 | +            self: if `resolvers` is empty | 
|  | 118 | +            :class:`.ResolverChain`: otherwise | 
|  | 119 | +        """ | 
|  | 120 | +        if resolvers == (): | 
|  | 121 | +            return self | 
|  | 122 | +        return ResolverChain(self, *resolvers) | 
|  | 123 | + | 
|  | 124 | + | 
|  | 125 | +class ResolverChain(AbstractResolver): | 
|  | 126 | +    """ | 
|  | 127 | +    A chain of resolvers.  Matches are returned sequentially. | 
|  | 128 | +    """ | 
|  | 129 | + | 
|  | 130 | +    __slots__ = ("_resolvers",) | 
|  | 131 | + | 
|  | 132 | +    def __init__(self, *resolvers): | 
|  | 133 | +        """ | 
|  | 134 | +        Args: | 
|  | 135 | +            *resolvers (:class:`.AbstractResolver`): sub resolvers to use | 
|  | 136 | +        """ | 
|  | 137 | +        self._resolvers = resolvers | 
|  | 138 | + | 
|  | 139 | +    def _search(self, name): | 
|  | 140 | +        for resolver in self._resolvers: | 
|  | 141 | +            yield from resolver.search(name) | 
|  | 142 | + | 
|  | 143 | +    def __repr__(self): | 
|  | 144 | +        inner = ", ".join(repr(r) for r in self._resolvers) | 
|  | 145 | +        return f"{type(self).__name__}({inner})" | 
|  | 146 | + | 
|  | 147 | + | 
|  | 148 | +class ResourceResolver(AbstractResolver): | 
|  | 149 | +    """ | 
|  | 150 | +    Generic resolver for files and directories. | 
|  | 151 | +
 | 
|  | 152 | +    Resources are expected to conform to the following format: | 
|  | 153 | +        <resource_path>/<module>/<subdir0>/<subdir1>/... | 
|  | 154 | +
 | 
|  | 155 | +    *All* entries within in this final `subdir` are yielded by :meth:`.search`, whether they are files or directories. | 
|  | 156 | +    Search results can be restricted by passing a (list of) globs.  If a list is given, entries matching at least one of | 
|  | 157 | +    them are returned. | 
|  | 158 | +
 | 
|  | 159 | +    >>> res = ResourceResolver(..., "lammps") | 
|  | 160 | +    >>> res.list() # doctest: +SKIP | 
|  | 161 | +    [ | 
|  | 162 | +        "bin", | 
|  | 163 | +        "potentials", | 
|  | 164 | +        "potentials.csv" | 
|  | 165 | +    ] | 
|  | 166 | +    """ | 
|  | 167 | + | 
|  | 168 | +    __slots__ = "_resource_paths", "_module", "_subdirs" | 
|  | 169 | + | 
|  | 170 | +    def __init__(self, resource_paths, module, *subdirs): | 
|  | 171 | +        """ | 
|  | 172 | +        Args: | 
|  | 173 | +            resource_paths (list of str): base paths for resource locations | 
|  | 174 | +            module (str): name of the module | 
|  | 175 | +            *subdirs (str): additional sub directories to descend into | 
|  | 176 | +        """ | 
|  | 177 | +        self._resource_paths = resource_paths | 
|  | 178 | +        self._module = module | 
|  | 179 | +        self._subdirs = subdirs | 
|  | 180 | + | 
|  | 181 | +    def __repr__(self): | 
|  | 182 | +        inner = repr(self._resource_paths) | 
|  | 183 | +        inner += f", {repr(self._module)}" | 
|  | 184 | +        inner += ", ".join(repr(s) for s in self._subdirs) | 
|  | 185 | +        return f"{type(self).__name__}({inner})" | 
|  | 186 | + | 
|  | 187 | +    def _search(self, name): | 
|  | 188 | +        for p in self._resource_paths: | 
|  | 189 | +            sub = os.path.join(p, self._module, *self._subdirs) | 
|  | 190 | +            if os.path.exists(sub): | 
|  | 191 | +                for n in name: | 
|  | 192 | +                    yield from sorted(glob(os.path.join(sub, n))) | 
|  | 193 | + | 
|  | 194 | + | 
|  | 195 | +class ExecutableResolver(AbstractResolver): | 
|  | 196 | +    """ | 
|  | 197 | +    A resolver for executable scripts. | 
|  | 198 | +
 | 
|  | 199 | +    Executables are expected to conform to the following format: | 
|  | 200 | +        <resource_path>/<module>/bin/run_<code>_<version_string>.<suffix> | 
|  | 201 | +
 | 
|  | 202 | +    and have the executable bit set. :meth:`.search` yields tuples of version strings and full paths to the executable | 
|  | 203 | +    instead of plain strings. | 
|  | 204 | +
 | 
|  | 205 | +    >>> exe = ExecutableResolver(..., "lammps") | 
|  | 206 | +    >>> exe.list() # doctest: +SKIP | 
|  | 207 | +    [ | 
|  | 208 | +        ('v1', '/my/resources/lammps/bin/run_lammps_v1.sh), | 
|  | 209 | +        ('v1_mpi', '/my/resources/lammps/bin/run_lammps_v1_mpi.sh), | 
|  | 210 | +        ('v2_default', '/my/resources/lammps/bin/run_lammps_v2_default.sh), | 
|  | 211 | +    ] | 
|  | 212 | +    >>> exe.default_version # doctest: +SKIP | 
|  | 213 | +    "v2_default" | 
|  | 214 | +    >>> exe.dict("v1*") # doctest: +SKIP | 
|  | 215 | +    { | 
|  | 216 | +        'v1': '/my/resources/lammps/bin/run_lammps_v1.sh), | 
|  | 217 | +        'v1_mpi': '/my/resources/lammps/bin/run_lammps_v1_mpi.sh) | 
|  | 218 | +    } | 
|  | 219 | +    """ | 
|  | 220 | + | 
|  | 221 | +    __slots__ = "_regex", "_resolver" | 
|  | 222 | + | 
|  | 223 | +    def __init__(self, resource_paths, code, module=None, suffix=EXE_SUFFIX): | 
|  | 224 | +        """ | 
|  | 225 | +        Args: | 
|  | 226 | +            resource_paths (list of str): base paths for resource locations | 
|  | 227 | +            code (str): name of the simulation code | 
|  | 228 | +            module (str): name of the module the code is part of, same as `code` by default | 
|  | 229 | +            suffix (str, optional): file ending; if `None`, 'bat' on Windows 'sh' elsewhere | 
|  | 230 | +        """ | 
|  | 231 | +        if suffix is None: | 
|  | 232 | +            suffix = EXE_SUFFIX | 
|  | 233 | +        if module is None: | 
|  | 234 | +            module = code | 
|  | 235 | +        self._regex = re.compile(f"run_{code}_(.*)\\.{suffix}$") | 
|  | 236 | +        self._glob = f"run_{code}_*.{suffix}" | 
|  | 237 | +        self._resolver = ResourceResolver( | 
|  | 238 | +            resource_paths, | 
|  | 239 | +            module, | 
|  | 240 | +            "bin", | 
|  | 241 | +        ) | 
|  | 242 | + | 
|  | 243 | +    def __repr__(self): | 
|  | 244 | +        inner = repr(self._resolver._resource_paths) | 
|  | 245 | +        inner += f", {repr(self._glob)}" | 
|  | 246 | +        inner += f", {repr(self._resolver._module)}" | 
|  | 247 | +        # recover suffix | 
|  | 248 | +        inner += f", {repr(self._glob.split('.')[-1])}" | 
|  | 249 | +        return f"{type(self).__name__}({inner})" | 
|  | 250 | + | 
|  | 251 | +    def _search(self, name): | 
|  | 252 | +        seen = set() | 
|  | 253 | + | 
|  | 254 | +        def cond(path): | 
|  | 255 | +            isfile = os.path.isfile(path) | 
|  | 256 | +            isexec = os.access( | 
|  | 257 | +                path, os.X_OK, effective_ids=os.access in os.supports_effective_ids | 
|  | 258 | +            ) | 
|  | 259 | +            return isfile and isexec | 
|  | 260 | + | 
|  | 261 | +        for path in filter(cond, self._resolver.search(self._glob)): | 
|  | 262 | +            # we know that the regex has to match, because we constrain the resolver with the glob | 
|  | 263 | +            version = self._regex.search(path).group(1) | 
|  | 264 | +            if version not in seen and any(fnmatch(version, n) for n in name): | 
|  | 265 | +                yield (version, path) | 
|  | 266 | +                seen.add(version) | 
|  | 267 | + | 
|  | 268 | +    def dict(self, name="*") -> dict[str, str]: | 
|  | 269 | +        """ | 
|  | 270 | +        Construct dict from :meth:`.search` results. | 
|  | 271 | +
 | 
|  | 272 | +        Args: | 
|  | 273 | +            name (str or list of str): glob(s) to filter the version strings | 
|  | 274 | +
 | 
|  | 275 | +        Returns: | 
|  | 276 | +            dict: mapping version strings to full paths | 
|  | 277 | +        """ | 
|  | 278 | +        return dict(self.search(name=name)) | 
|  | 279 | + | 
|  | 280 | +    @property | 
|  | 281 | +    def available_versions(self): | 
|  | 282 | +        """ | 
|  | 283 | +        list of str: all found versions | 
|  | 284 | +        """ | 
|  | 285 | +        return [x[0] for x in self.search("*")] | 
|  | 286 | + | 
|  | 287 | +    @property | 
|  | 288 | +    def default_version(self): | 
|  | 289 | +        """ | 
|  | 290 | +        str: the first version found in resources | 
|  | 291 | +
 | 
|  | 292 | +        If a version matching `*default*` exists, the first matching is returned. | 
|  | 293 | +
 | 
|  | 294 | +        Raises: | 
|  | 295 | +            :class:`.ResourceNotFound`: if no executables are found at all | 
|  | 296 | +        """ | 
|  | 297 | +        try: | 
|  | 298 | +            return self.first("*default*")[0] | 
|  | 299 | +        except ResourceNotFound: | 
|  | 300 | +            pass | 
|  | 301 | +        # try again outside the except clause to avoid nested error in case this fails as well | 
|  | 302 | +        return self.first("*")[0] | 
0 commit comments