-
Notifications
You must be signed in to change notification settings - Fork 1
/
retrieval.py
43 lines (36 loc) · 1.69 KB
/
retrieval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from subprocess import run
def find_repository_urls(user_input: str) -> dict[str, float]:
"""Given user input, resolve to proper git URL.
For expected URL https://github.com/krassowski/multi-omics-state-of-the-field,
following user inputes should be accepted and harmonized:
- `https://github.com/krassowski/multi-omics-state-of-the-field`
- `github.com/krassowski/multi-omics-state-of-the-field`
- `krassowski/multi-omics-state-of-the-field`
- should check if github.com and gitlab.com; if both exists should return both
- `[email protected]:krassowski/multi-omics-state-of-the-field.git`
- `https://doi.org/10.3389/fgene.2020.610798`
- should use JSON API and
then Entrez (or something else) to scan abstract and then the full
text for git URLs (there are multiple matches; it should give them
confidence proportional to number of matches for now; in future could
use some more intelligent ciriteria).
Returns:
a mapping between the resolved/guessed URL and the confidence score
in the match (0, 1]; the mapping should be sorted by confidence from
best match to the worst match
Raises:
`ValueError` if the repository cannot be found,
with an informative error message to be shown to the user.
"""
# TODO (setup pytest → write unit → implement)
return {
user_input: 1
}
def fetch_repository(address: str, temp_dir: str):
"""Clone the repository into specified directory"""
return run(
['git', 'clone', '--depth=1', address, temp_dir],
check=True,
# TODO: uncomment (for now good ok debugging)
# capture_output=True
)