Skip to content

Commit 5c43405

Browse files
committed
Add script to add overlay annotations
1 parent 30ab9b7 commit 5c43405

File tree

1 file changed

+273
-0
lines changed

1 file changed

+273
-0
lines changed

config/add-overlay-annotations.py

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
# This script is used to annotate .qll files without any existing overlay annotations
2+
# with overlay[local?] and overlay[caller] annotations. Maintenance of overlay annotations
3+
# in annotated files will be handled by QL-for-QL queries.
4+
5+
# It will walk the directory tree and annotate most .qll files, skipping only
6+
# some specific cases (e.g., empty files, files that configure dataflow for queries).
7+
8+
# The script takes a list of languages and processes the corresponding directories.
9+
# If the optional --check argument is provided, the script checks for missing annotations,
10+
# but does not modify any files.
11+
12+
# Usage: python3 add-overlay-annotations.py [--check] <language1> <language2> ...
13+
14+
# The script will modify the files in place and print the changes made.
15+
# The script is designed to be run from the root of the repository.
16+
17+
#!/usr/bin/python3
18+
import sys
19+
import os
20+
from difflib import *
21+
22+
23+
def has_overlay_annotations(lines):
24+
'''
25+
Check whether the given lines contain any overlay[...] annotations.
26+
'''
27+
overlays = ["local", "local?", "global", "caller"]
28+
annotations = [f"overlay[{t}]" for t in overlays]
29+
return any(ann in line for ann in annotations for line in lines)
30+
31+
32+
def is_line_comment(line):
33+
return line.startswith("//") or (line.startswith("/*") and line.endswith("*/"))
34+
35+
36+
def find_file_level_module_declaration(lines):
37+
'''
38+
Returns the index of the existing file-level module declaration if one
39+
exists. Returns None otherwise.
40+
'''
41+
comment = False
42+
for i, line in enumerate(lines):
43+
trimmed = line.strip()
44+
45+
if is_line_comment(trimmed):
46+
continue
47+
elif trimmed.startswith("/*"):
48+
comment = True
49+
elif comment and trimmed.endswith("*/"):
50+
comment = False
51+
elif not comment and trimmed.endswith("module;"):
52+
return i
53+
54+
return None
55+
56+
57+
def is_file_module_qldoc(i, lines):
58+
'''
59+
Assuming a qldoc ended on line i, determine if it belongs to the implicit
60+
file-level module. If it is followed by another qldoc or imports, then it
61+
does and if it is followed by any other non-empty, non-comment lines, then
62+
we assume that is a declaration of some kind and the qldoc is attached to
63+
that declaration.
64+
'''
65+
comment = False
66+
67+
for line in lines[i+1:]:
68+
trimmed = line.strip()
69+
70+
if trimmed.startswith("import ") or trimmed.startswith("private import ") or trimmed.startswith("/**"):
71+
return True
72+
elif is_line_comment(trimmed) or not trimmed:
73+
continue
74+
elif trimmed.startswith("/*"):
75+
comment = True
76+
elif comment and trimmed.endswith("*/"):
77+
comment = False
78+
elif not comment and trimmed:
79+
return False
80+
81+
return True
82+
83+
84+
def find_file_module_qldoc_declaration(lines):
85+
'''
86+
Returns the index of last line of the implicit file module qldoc if one
87+
exists. Returns None otherwise.
88+
'''
89+
90+
qldoc = False
91+
comment = False
92+
for i, line in enumerate(lines):
93+
trimmed = line.strip()
94+
95+
if trimmed.startswith("//"):
96+
continue
97+
elif (qldoc or trimmed.startswith("/**")) and trimmed.endswith("*/"):
98+
# a qldoc just ended; determine if it belongs to the implicit file module
99+
if is_file_module_qldoc(i, lines):
100+
return i
101+
else:
102+
return None
103+
elif trimmed.startswith("/**"):
104+
qldoc = True
105+
elif trimmed.startswith("/*"):
106+
comment = True
107+
elif comment and trimmed.endswith("*/"):
108+
comment = False
109+
elif (not qldoc and not comment) and trimmed:
110+
return None
111+
112+
return None
113+
114+
115+
def only_comments(lines):
116+
'''
117+
Returns true if the lines contain only comments and empty lines.
118+
'''
119+
comment = False
120+
121+
for line in lines:
122+
trimmed = line.strip()
123+
124+
if not trimmed or is_line_comment(trimmed):
125+
continue
126+
elif trimmed.startswith("/*"):
127+
comment = True
128+
elif comment and trimmed.endswith("*/"):
129+
comment = False
130+
elif comment:
131+
continue
132+
elif trimmed:
133+
return False
134+
135+
return True
136+
137+
138+
def insert_toplevel_maybe_local_annotation(filename, lines):
139+
'''
140+
Find a suitable place to insert an overlay[local?] annotation at the top of the file.
141+
Returns a pair consisting of description and the modified lines or None if no overlay
142+
annotation is necessary (e.g., for files that only contain comments).
143+
'''
144+
if only_comments(lines):
145+
return None
146+
147+
i = find_file_level_module_declaration(lines)
148+
if not i == None:
149+
out_lines = lines[:i]
150+
out_lines.append("overlay[local?]\n")
151+
out_lines.extend(lines[i:])
152+
return (f"Annotating \"{filename}\" via existing file-level module statement", out_lines)
153+
154+
i = find_file_module_qldoc_declaration(lines)
155+
if not i == None:
156+
out_lines = lines[:i+1]
157+
out_lines.append("overlay[local?]\n")
158+
out_lines.append("module;\n")
159+
out_lines.extend(lines[i+1:])
160+
return (f"Annotating \"{filename}\" which has a file-level module qldoc", out_lines)
161+
162+
out_lines = ["overlay[local?]\n", "module;\n", "\n"] + lines
163+
return (f"Annotating \"{filename}\" without file-level module qldoc", out_lines)
164+
165+
166+
def insert_overlay_caller_annotations(lines):
167+
'''
168+
Mark pragma[inline] predicates as overlay[caller] if they are not declared private.
169+
'''
170+
out_lines = []
171+
for i, line in enumerate(lines):
172+
trimmed = line.strip()
173+
if trimmed == "pragma[inline]":
174+
if i + 1 < len(lines) and not "private" in lines[i+1]:
175+
whitespace = line[0: line.find(trimmed)]
176+
out_lines.append(f"{whitespace}overlay[caller]\n")
177+
out_lines.append(line)
178+
return out_lines
179+
180+
181+
def annotate_as_appropriate(filename, lines):
182+
'''
183+
Insert new overlay[...] annotations according to heuristics in files without existing
184+
overlay annotations.
185+
186+
Returns None if no annotations are needed. Otherwise, returns a pair consisting of a
187+
string describing the action taken and the modified content as a list of lines.
188+
'''
189+
if has_overlay_annotations(lines):
190+
return None
191+
192+
# These simple heuristics filter out those .qll files that we no _not_ want to annotate
193+
# as overlay[local?]. It is not clear that these heuristics are exactly what we want,
194+
# but they seem to work well enough for now (as determined by speed and accuracy numbers).
195+
if (filename.endswith("Test.qll") or
196+
((filename.endswith("Query.qll") or filename.endswith("Config.qll")) and
197+
any("implements DataFlow::ConfigSig" in line for line in lines))):
198+
return None
199+
elif not any(line for line in lines if line.strip()):
200+
return None
201+
202+
lines = insert_overlay_caller_annotations(lines)
203+
return insert_toplevel_maybe_local_annotation(filename, lines)
204+
205+
206+
def process_single_file(write, filename):
207+
'''
208+
Process a single file, annotating it as appropriate.
209+
If write is set, the changes are written back to the file.
210+
Returns True if the file requires changes.
211+
'''
212+
old = [line for line in open(filename)]
213+
214+
annotate_result = annotate_as_appropriate(filename, old)
215+
if annotate_result is None:
216+
return False
217+
218+
if not write:
219+
return True
220+
221+
new = annotate_result[1]
222+
223+
diff = context_diff(old, new, fromfile=filename, tofile=filename)
224+
diff = [line for line in diff]
225+
if diff:
226+
print(annotate_result[0])
227+
for line in diff:
228+
print(line.rstrip())
229+
with open(filename, "w") as out_file:
230+
for line in new:
231+
out_file.write(line)
232+
233+
return True
234+
235+
236+
if len(sys.argv) > 1 and sys.argv[1] == "--check":
237+
check = True
238+
langs = sys.argv[2:]
239+
else:
240+
check = False
241+
langs = sys.argv[1:]
242+
243+
dirs = []
244+
for lang in langs:
245+
if lang in ["cpp", "go", "csharp", "java", "javascript", "python", "ruby", "rust", "swift"]:
246+
dirs.append(f"{lang}/ql/lib")
247+
else:
248+
raise Exception(f"Unknown language \"{lang}\".")
249+
250+
if dirs:
251+
dirs.append("shared")
252+
253+
missingAnnotations = []
254+
255+
for roots in dirs:
256+
for dirpath, dirnames, filenames in os.walk(roots):
257+
for filename in filenames:
258+
if filename.endswith(".qll") and not dirpath.endswith("tutorial"):
259+
path = os.path.join(dirpath, filename)
260+
res = process_single_file(not check, path)
261+
if check and res:
262+
missingAnnotations.append(path)
263+
264+
265+
if len(missingAnnotations) > 0:
266+
print("The following files have no overlay annotations:")
267+
for path in missingAnnotations[:10]:
268+
print("- " + path)
269+
if len(missingAnnotations) > 10:
270+
print("and " + str(len(missingAnnotations) - 10) + " additional files.")
271+
print()
272+
print("Please manually add overlay annotations or use the config/add-overlay-annotations.py script to automatically add sensible default overlay annotations.")
273+
exit(-1)

0 commit comments

Comments
 (0)