-
Notifications
You must be signed in to change notification settings - Fork 1
/
check_many_authors_keys.py
121 lines (95 loc) · 3.9 KB
/
check_many_authors_keys.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
"""
This script needs to be run in the root folder containing the
folders "lib" and "db"
"""
import collections
import sys
import os
scriptdir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(scriptdir, "..", "lib"))
sys.path.append(os.path.join(scriptdir, "..", "db"))
import mybibtex.parser
import mybibtex.database
import mybibtex.generator
import confs_years
import argparse
import logging
import re
import shutil
import time
from pybtex.bibtex.utils import split_name_list
import config
from config import *
mybibtex.generator.config = config
logging.basicConfig(level=logging.DEBUG)
color_texts = {
"Error": "\x1b[6;30;41mError\x1b[0m",
"Warning": "\x1b[6;30;43mWarning\x1b[0m",
"Success": "\x1b[6;30;42mSuccess\x1b[0m",
}
# Capture the author part of a key, between the ":" and the year
re_author_part_key = re.compile(r"^[a-zA-Z0-9]+:([a-zA-Z]+)[0-9]{2}[a-z]?$")
def check_more_6_authors(args):
"""
Analyze papers with more than 6 authors
and check whether their keys have more than 6 initials or not
because original rule of cryptobib is to have at most 6 initials in key
"""
parser = mybibtex.parser.Parser()
parser.parse_file("db/abbrev3.bib")
parser.parse_file("db/crypto_db.bib")
db = parser.parse_file("db/crypto_conf_list.bib")
my_filter = mybibtex.generator.FilterPaper()
filter_conf = args.filter
if filter_conf:
my_filter = mybibtex.generator.FilterConf(filter_conf, my_filter)
entries = dict(my_filter.filter(db.entries))
# dictionaries of the keys of the papers with >6 authors: split by number of authors in the key
keys_more_6_initials = {}
keys_equal_6_initials = {}
keys_less_6_initials = {}
# number of papers with more than 6 authors
nb_papers = 0
for keybib, entry in mybibtex.generator.SortConfYearPage().sort(iter(entries.items())):
key = str(keybib)
authors = split_name_list(entry.fields["author"].expand())
if len(authors) <= 6:
continue
nb_papers += 1
# Get the author part of the key (between the ":" and the year)
author_part_key_match = re_author_part_key.match(key)
if author_part_key_match is None:
print(f"{color_texts['Error']}: key {key} cannot be parsed")
return
author_part_key = author_part_key_match.group(1)
if len(author_part_key) < 6:
keys_less_6_initials[key] = authors
elif len(author_part_key) == 6:
keys_equal_6_initials[key] = authors
elif len(author_part_key) > 6:
keys_more_6_initials[key] = authors
if args.verbose:
print("Papers with >6 authors and <6 initials in key:")
for key, authors in keys_less_6_initials.items():
print(f" {key:20s}: {authors}")
print()
print("Papers with >6 authors and =6 initials in key:")
for key, authors in keys_equal_6_initials.items():
print(f" {key:20s}: {authors}")
print()
print("Papers with >6 authors and >6 initials in key:")
for key, authors in keys_more_6_initials.items():
print(f" {key:20s}: {authors}")
print()
print(f"{len(keys_less_6_initials):4d} / {nb_papers:4d} papers with >6 authors have <6 initials in key")
print(f"{len(keys_equal_6_initials):4d} / {nb_papers:4d} papers with >6 authors have =6 initials in key")
print(f"{len(keys_more_6_initials):4d} / {nb_papers:4d} papers with >6 authors have >6 initials in key")
def main():
parser = argparse.ArgumentParser("Analyze papers with >6 authors: count how many use key with >6 initials")
parser.add_argument("--filter", help="filter a specific conference")
parser.add_argument("--verbose", action="store_true", help="display all the papers with >6 authors")
args = parser.parse_args()
check_more_6_authors(args)
if __name__ == "__main__":
main()