Skip to content

Commit e1f48f2

Browse files
author
Jason Sewall
authored
Merge pull request #1 from jasonsewall-intel/external-update-new
Better handle line cont. & other usability fixes
2 parents 1673301 + a046504 commit e1f48f2

File tree

13 files changed

+922
-134
lines changed

13 files changed

+922
-134
lines changed

codebasin.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
optional arguments:
1010
-h, --help show this help message and exit
1111
-c FILE, --config FILE
12-
configuration file (default: config.yaml)
12+
configuration file (default: <DIR>/config.yaml)
1313
-v, --verbose verbosity level
1414
-q, --quiet quiet level
1515
-r DIR, --rootdir DIR
@@ -25,7 +25,7 @@
2525

2626
from codebasin import config, finder, report, util, walkers
2727

28-
version = 1.0
28+
version = 1.05
2929

3030

3131
def report_enabled(name):
@@ -60,18 +60,20 @@ def guess_project_name(config_path):
6060

6161
# Read command-line arguments
6262
parser = argparse.ArgumentParser(description="Code Base Investigator v" + str(version))
63+
parser.add_argument('-r', '--rootdir', dest="rootdir", metavar='DIR',
64+
default=os.getcwd(), type=str,
65+
help="Set working root directory (default .)")
6366
parser.add_argument('-c', '--config', dest='config_file', metavar='FILE', action='store',
64-
default='config.yaml', help='configuration file (default: config.yaml)')
67+
help='configuration file (default: <DIR>/config.yaml)')
6568
parser.add_argument('-v', '--verbose', dest='verbose',
6669
action='count', default=0, help='increase verbosity level')
6770
parser.add_argument('-q', '--quiet', dest='quiet',
6871
action='count', default=0, help='decrease verbosity level')
69-
parser.add_argument('-r', '--rootdir', dest="rootdir", metavar='DIR',
70-
default=os.getcwd(), type=str,
71-
help="Set working root directory (default .)")
7272
parser.add_argument('-R', '--report', dest='reports', metavar='REPORT', default=['all'],
7373
choices=['all', 'summary', 'clustering'], nargs='+',
7474
help='desired output reports (default: all)')
75+
parser.add_argument('--batchmode', dest='batchmode', action='store_true', default=False,
76+
help="Set batch mode (additional output for bulk operation.)")
7577
args = parser.parse_args()
7678

7779
stdout_log = logging.StreamHandler(sys.stdout)
@@ -81,12 +83,16 @@ def guess_project_name(config_path):
8183
max(1, logging.WARNING - 10 * (args.verbose - args.quiet)))
8284
rootdir = os.path.realpath(args.rootdir)
8385

86+
if args.config_file is None:
87+
config_file = os.path.join(rootdir, "config.yaml")
88+
else:
89+
config_file = args.config_file
8490
# Load the configuration file into a dict
85-
if not util.ensure_yaml(args.config_file):
91+
if not util.ensure_yaml(config_file):
8692
logging.getLogger("codebasin").error(
8793
"Configuration file does not have YAML file extension.")
8894
sys.exit(1)
89-
codebase, configuration = config.load(args.config_file, rootdir)
95+
codebase, configuration = config.load(config_file, rootdir)
9096

9197
# Parse the source tree, and determine source line associations.
9298
# The trees and associations are housed in state.
@@ -96,7 +102,11 @@ def guess_project_name(config_path):
96102
platform_mapper = walkers.PlatformMapper(codebase)
97103
setmap = platform_mapper.walk(state)
98104

99-
output_prefix = os.path.realpath(guess_project_name(args.config_file))
105+
output_prefix = os.path.realpath(guess_project_name(config_file))
106+
107+
if args.batchmode and (report_enabled("summary") or report_enabled("clustering")):
108+
print(f"Config file: {config_file}")
109+
print(f"Root: {rootdir}")
100110

101111
# Print summary report
102112
if report_enabled("summary"):

codebasin/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def load_database(dbpath, rootdir):
134134
Return a list of compilation commands, where each command is
135135
represented as a compilation database entry.
136136
"""
137-
with open(dbpath, 'r') as fi:
137+
with util.safe_open_read_nofollow(dbpath, 'r') as fi:
138138
db = yaml.safe_load(fi)
139139

140140
configuration = []
@@ -244,7 +244,7 @@ def load(config_file, rootdir):
244244
Return a (codebase, platform configuration) tuple of dicts.
245245
"""
246246
if os.path.isfile(config_file):
247-
with open(config_file, 'r') as f:
247+
with util.safe_open_read_nofollow(config_file, 'r') as f:
248248
config = yaml.safe_load(f)
249249
else:
250250
raise RuntimeError("Could not open {!s}.".format(config_file))

codebasin/file_parser.py

Lines changed: 47 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
and building a tree of nodes from it.
66
"""
77

8-
from os.path import splitext
9-
8+
import os
9+
from codebasin.file_source import get_file_source
1010
from . import preprocessor # pylint : disable=no-name-in-module
11+
from . import util # pylint : disable=no-name-in-module
1112

1213

1314
class LineGroup:
@@ -30,25 +31,19 @@ def empty(self):
3031
return False
3132
return True
3233

33-
def add_line(self, line_num, is_countable=False):
34+
def add_line(self, phys_int, sloc_count):
3435
"""
3536
Add a line to this line group. Update the extent appropriately,
3637
and if it's a countable line, add it to the line count.
3738
"""
3839

39-
if self.start_line == -1:
40-
self.start_line = line_num
41-
42-
self.end_line = line_num
43-
44-
if self.start_line == -1 or line_num < self.start_line:
45-
self.start_line = line_num
40+
if self.start_line == -1 or phys_int[0] < self.start_line:
41+
self.start_line = phys_int[0]
4642

47-
if line_num > self.end_line:
48-
self.end_line = line_num
43+
if phys_int[1] - 1 > self.end_line:
44+
self.end_line = phys_int[1] - 1
4945

50-
if is_countable:
51-
self.line_count += 1
46+
self.line_count += sloc_count
5247

5348
def reset(self):
5449
"""
@@ -58,13 +53,12 @@ def reset(self):
5853
self.start_line = -1
5954
self.end_line = -1
6055

61-
def merge(self, line_group, count=False):
56+
def merge(self, line_group):
6257
"""
6358
Merge another line group into this line group, and reset the
6459
other group.
6560
"""
66-
if count:
67-
self.line_count += line_group.line_count
61+
self.line_count += line_group.line_count
6862

6963
if self.start_line == -1:
7064
self.start_line = line_group.start_line
@@ -84,63 +78,25 @@ class FileParser:
8478
"""
8579

8680
def __init__(self, _filename):
87-
self._filename = _filename
88-
self.full_line = ''
89-
90-
split = splitext(_filename)
91-
if len(split) == 2:
92-
self._file_extension = split[1].lower()
93-
else:
94-
self._file_extension = None
81+
self._filename = os.path.realpath(_filename)
9582

9683
@staticmethod
97-
def line_info(line):
98-
"""
99-
Determine if the input line is a directive by checking if the
100-
first by looking for a '#' as the first non-whitespace
101-
character. Also determine if the last character before a new
102-
line is a line continuation character '\'.
103-
104-
Return a (directive, line_continue) tuple.
105-
"""
106-
107-
directive = False
108-
line_continue = False
109-
110-
for c in line:
111-
if c == '#':
112-
directive = True
113-
break
114-
elif c not in [' ', '\t']:
115-
break
116-
117-
if line.rstrip("\n\r")[-1:] == '\\':
118-
line_continue = True
119-
120-
return (directive, line_continue)
121-
122-
def handle_directive(self, out_tree, line_num, comment_cleaner, groups):
84+
def handle_directive(out_tree, groups, logical_line):
12385
"""
12486
Handle inserting code and directive nodes, where appropriate.
12587
Update the file group, and reset the code and directive groups.
12688
"""
12789
# We will actually use this directive, if it is not empty
128-
self.full_line = comment_cleaner.strip_comments(self.full_line)
129-
if self.full_line.strip():
130-
# We need to finalize the previously started
131-
# CodeNode (if there was one) before processing
132-
# this DirectiveNode
133-
if not groups['code'].empty():
134-
groups['code'].add_line(line_num - 1)
135-
self.insert_code_node(out_tree, groups['code'])
136-
137-
groups['file'].merge(groups['code'])
90+
# We need to finalize the previously started
91+
# CodeNode (if there was one) before processing
92+
# this DirectiveNode
93+
if not groups['code'].empty():
94+
FileParser.insert_code_node(out_tree, groups['code'])
95+
groups['file'].merge(groups['code'])
13896

139-
self.insert_directive_node(out_tree, groups['directive'])
97+
FileParser.insert_directive_node(out_tree, groups['directive'], logical_line)
14098

141-
groups['file'].merge(groups['directive'])
142-
else:
143-
groups['code'].merge(groups['directive'])
99+
groups['file'].merge(groups['directive'])
144100

145101
@staticmethod
146102
def insert_code_node(tree, line_group):
@@ -151,13 +107,14 @@ def insert_code_node(tree, line_group):
151107
line_group.start_line, line_group.end_line, line_group.line_count)
152108
tree.insert(new_node)
153109

154-
def insert_directive_node(self, tree, line_group):
110+
@staticmethod
111+
def insert_directive_node(tree, line_group, logical_line):
155112
"""
156113
Build a directive node by parsing a directive line, and insert a
157114
new directive node into the tree.
158115
"""
159116
new_node = preprocessor.DirectiveParser(preprocessor.Lexer(
160-
self.full_line, line_group.start_line).tokenize()).parse()
117+
logical_line, line_group.start_line).tokenize()).parse()
161118
new_node.start_line = line_group.start_line
162119
new_node.end_line = line_group.end_line
163120
new_node.num_lines = line_group.line_count
@@ -169,73 +126,45 @@ def parse_file(self):
169126
representing this file, and return it.
170127
"""
171128

172-
file_comment_cleaner = preprocessor.CommentCleaner(self._file_extension)
173-
if file_comment_cleaner.filetype == 'c':
174-
cpp_comment_cleaner = file_comment_cleaner
175-
else:
176-
cpp_comment_cleaner = preprocessor.CommentCleaner('.c')
177-
178129
out_tree = preprocessor.SourceTree(self._filename)
179-
with open(self._filename, mode='r', errors='replace') as source_file:
180-
previous_continue = False
130+
file_source = get_file_source(self._filename)
131+
if not file_source:
132+
raise RuntimeError(f"{self._filename} doesn't appear " +
133+
"to be a language this tool can process")
134+
with util.safe_open_read_nofollow(self._filename, mode='r', errors='replace') as source_file:
181135

182136
groups = {'code': LineGroup(),
183137
'directive': LineGroup(),
184-
'file': LineGroup()
185-
}
138+
'file': LineGroup()}
186139

187140
groups['file'].start_line = 1
188141

189-
lines = source_file.readlines()
190-
for (line_num, line) in enumerate(lines, 1):
191-
# Determine if this line starts with a # (directive)
192-
# and/or ends with a \ (line continuation)
193-
(in_directive, continue_line) = self.line_info(line)
194-
195-
# Only follow continuation for directives
196-
if previous_continue or in_directive:
142+
source = file_source(source_file)
143+
try:
144+
while True:
145+
logical_line = next(source)
146+
phys_int = logical_line.phys_interval()
147+
# Only follow continuation for directives
148+
if logical_line.category == 'CPP_DIRECTIVE':
149+
# Add this into the directive lines, even if it
150+
# might not be a directive we count
197151

198-
# Add this into the directive lines, even if it
199-
# might not be a directive we count
200-
groups['directive'].add_line(line_num, True)
152+
groups['directive'].add_line(phys_int, logical_line.local_sloc)
201153

202-
# If this line starts a new directive, flush the
203-
# line buffer
204-
if in_directive and not previous_continue:
205-
self.full_line = ''
154+
FileParser.handle_directive(out_tree, groups, logical_line.flushed_line)
206155

207-
previous_continue = continue_line
208-
209-
# If this line also contains a continuation
210-
# character
211-
if continue_line:
212-
self.full_line += line.rstrip("\\\n\r")
213-
# If this line ends a previously continued line
214-
else:
215-
self.full_line += line.rstrip("\n\r")
216-
217-
self.handle_directive(out_tree, line_num, cpp_comment_cleaner,
218-
groups)
219-
220-
# FallBack is that this line is a simple code line.
221-
else:
222-
previous_continue = False
223-
224-
# If the line isn't empty after stripping comments,
225-
# count it as code
226-
if file_comment_cleaner.strip_comments(line[0:-1]).strip():
227-
groups['code'].add_line(line_num, True)
156+
# FallBack is that this line is a simple code line.
228157
else:
229-
groups['code'].add_line(line_num)
158+
groups['code'].add_line(phys_int, logical_line.local_sloc)
159+
except StopIteration as it:
160+
# pylint: disable=unpacking-non-sequence
161+
_, physical_loc = it.value
230162

231-
# Insert any code lines left at the end of the file
232163
if not groups['code'].empty():
233-
groups['code'].add_line(len(lines))
164+
groups['code'].add_line((groups['code'].start_line, physical_loc - 1), 0)
234165
self.insert_code_node(out_tree, groups['code'])
235-
236166
groups['file'].merge(groups['code'])
237167

238-
groups['file'].add_line(len(lines))
239168
out_tree.root.num_lines = groups['file'].end_line
240169
out_tree.root.total_sloc = groups['file'].line_count
241170
return out_tree

0 commit comments

Comments
 (0)