5
5
and building a tree of nodes from it.
6
6
"""
7
7
8
- from os . path import splitext
9
-
8
+ import os
9
+ from codebasin . file_source import get_file_source
10
10
from . import preprocessor # pylint : disable=no-name-in-module
11
+ from . import util # pylint : disable=no-name-in-module
11
12
12
13
13
14
class LineGroup :
@@ -30,25 +31,19 @@ def empty(self):
30
31
return False
31
32
return True
32
33
33
- def add_line (self , line_num , is_countable = False ):
34
+ def add_line (self , phys_int , sloc_count ):
34
35
"""
35
36
Add a line to this line group. Update the extent appropriately,
36
37
and if it's a countable line, add it to the line count.
37
38
"""
38
39
39
- if self .start_line == - 1 :
40
- self .start_line = line_num
41
-
42
- self .end_line = line_num
43
-
44
- if self .start_line == - 1 or line_num < self .start_line :
45
- self .start_line = line_num
40
+ if self .start_line == - 1 or phys_int [0 ] < self .start_line :
41
+ self .start_line = phys_int [0 ]
46
42
47
- if line_num > self .end_line :
48
- self .end_line = line_num
43
+ if phys_int [ 1 ] - 1 > self .end_line :
44
+ self .end_line = phys_int [ 1 ] - 1
49
45
50
- if is_countable :
51
- self .line_count += 1
46
+ self .line_count += sloc_count
52
47
53
48
def reset (self ):
54
49
"""
@@ -58,13 +53,12 @@ def reset(self):
58
53
self .start_line = - 1
59
54
self .end_line = - 1
60
55
61
- def merge (self , line_group , count = False ):
56
+ def merge (self , line_group ):
62
57
"""
63
58
Merge another line group into this line group, and reset the
64
59
other group.
65
60
"""
66
- if count :
67
- self .line_count += line_group .line_count
61
+ self .line_count += line_group .line_count
68
62
69
63
if self .start_line == - 1 :
70
64
self .start_line = line_group .start_line
@@ -84,63 +78,25 @@ class FileParser:
84
78
"""
85
79
86
80
def __init__ (self , _filename ):
87
- self ._filename = _filename
88
- self .full_line = ''
89
-
90
- split = splitext (_filename )
91
- if len (split ) == 2 :
92
- self ._file_extension = split [1 ].lower ()
93
- else :
94
- self ._file_extension = None
81
+ self ._filename = os .path .realpath (_filename )
95
82
96
83
@staticmethod
97
- def line_info (line ):
98
- """
99
- Determine if the input line is a directive by checking if the
100
- first by looking for a '#' as the first non-whitespace
101
- character. Also determine if the last character before a new
102
- line is a line continuation character '\' .
103
-
104
- Return a (directive, line_continue) tuple.
105
- """
106
-
107
- directive = False
108
- line_continue = False
109
-
110
- for c in line :
111
- if c == '#' :
112
- directive = True
113
- break
114
- elif c not in [' ' , '\t ' ]:
115
- break
116
-
117
- if line .rstrip ("\n \r " )[- 1 :] == '\\ ' :
118
- line_continue = True
119
-
120
- return (directive , line_continue )
121
-
122
- def handle_directive (self , out_tree , line_num , comment_cleaner , groups ):
84
+ def handle_directive (out_tree , groups , logical_line ):
123
85
"""
124
86
Handle inserting code and directive nodes, where appropriate.
125
87
Update the file group, and reset the code and directive groups.
126
88
"""
127
89
# We will actually use this directive, if it is not empty
128
- self .full_line = comment_cleaner .strip_comments (self .full_line )
129
- if self .full_line .strip ():
130
- # We need to finalize the previously started
131
- # CodeNode (if there was one) before processing
132
- # this DirectiveNode
133
- if not groups ['code' ].empty ():
134
- groups ['code' ].add_line (line_num - 1 )
135
- self .insert_code_node (out_tree , groups ['code' ])
136
-
137
- groups ['file' ].merge (groups ['code' ])
90
+ # We need to finalize the previously started
91
+ # CodeNode (if there was one) before processing
92
+ # this DirectiveNode
93
+ if not groups ['code' ].empty ():
94
+ FileParser .insert_code_node (out_tree , groups ['code' ])
95
+ groups ['file' ].merge (groups ['code' ])
138
96
139
- self .insert_directive_node (out_tree , groups ['directive' ])
97
+ FileParser .insert_directive_node (out_tree , groups ['directive' ], logical_line )
140
98
141
- groups ['file' ].merge (groups ['directive' ])
142
- else :
143
- groups ['code' ].merge (groups ['directive' ])
99
+ groups ['file' ].merge (groups ['directive' ])
144
100
145
101
@staticmethod
146
102
def insert_code_node (tree , line_group ):
@@ -151,13 +107,14 @@ def insert_code_node(tree, line_group):
151
107
line_group .start_line , line_group .end_line , line_group .line_count )
152
108
tree .insert (new_node )
153
109
154
- def insert_directive_node (self , tree , line_group ):
110
+ @staticmethod
111
+ def insert_directive_node (tree , line_group , logical_line ):
155
112
"""
156
113
Build a directive node by parsing a directive line, and insert a
157
114
new directive node into the tree.
158
115
"""
159
116
new_node = preprocessor .DirectiveParser (preprocessor .Lexer (
160
- self . full_line , line_group .start_line ).tokenize ()).parse ()
117
+ logical_line , line_group .start_line ).tokenize ()).parse ()
161
118
new_node .start_line = line_group .start_line
162
119
new_node .end_line = line_group .end_line
163
120
new_node .num_lines = line_group .line_count
@@ -169,73 +126,45 @@ def parse_file(self):
169
126
representing this file, and return it.
170
127
"""
171
128
172
- file_comment_cleaner = preprocessor .CommentCleaner (self ._file_extension )
173
- if file_comment_cleaner .filetype == 'c' :
174
- cpp_comment_cleaner = file_comment_cleaner
175
- else :
176
- cpp_comment_cleaner = preprocessor .CommentCleaner ('.c' )
177
-
178
129
out_tree = preprocessor .SourceTree (self ._filename )
179
- with open (self ._filename , mode = 'r' , errors = 'replace' ) as source_file :
180
- previous_continue = False
130
+ file_source = get_file_source (self ._filename )
131
+ if not file_source :
132
+ raise RuntimeError (f"{ self ._filename } doesn't appear " +
133
+ "to be a language this tool can process" )
134
+ with util .safe_open_read_nofollow (self ._filename , mode = 'r' , errors = 'replace' ) as source_file :
181
135
182
136
groups = {'code' : LineGroup (),
183
137
'directive' : LineGroup (),
184
- 'file' : LineGroup ()
185
- }
138
+ 'file' : LineGroup ()}
186
139
187
140
groups ['file' ].start_line = 1
188
141
189
- lines = source_file .readlines ()
190
- for (line_num , line ) in enumerate (lines , 1 ):
191
- # Determine if this line starts with a # (directive)
192
- # and/or ends with a \ (line continuation)
193
- (in_directive , continue_line ) = self .line_info (line )
194
-
195
- # Only follow continuation for directives
196
- if previous_continue or in_directive :
142
+ source = file_source (source_file )
143
+ try :
144
+ while True :
145
+ logical_line = next (source )
146
+ phys_int = logical_line .phys_interval ()
147
+ # Only follow continuation for directives
148
+ if logical_line .category == 'CPP_DIRECTIVE' :
149
+ # Add this into the directive lines, even if it
150
+ # might not be a directive we count
197
151
198
- # Add this into the directive lines, even if it
199
- # might not be a directive we count
200
- groups ['directive' ].add_line (line_num , True )
152
+ groups ['directive' ].add_line (phys_int , logical_line .local_sloc )
201
153
202
- # If this line starts a new directive, flush the
203
- # line buffer
204
- if in_directive and not previous_continue :
205
- self .full_line = ''
154
+ FileParser .handle_directive (out_tree , groups , logical_line .flushed_line )
206
155
207
- previous_continue = continue_line
208
-
209
- # If this line also contains a continuation
210
- # character
211
- if continue_line :
212
- self .full_line += line .rstrip ("\\ \n \r " )
213
- # If this line ends a previously continued line
214
- else :
215
- self .full_line += line .rstrip ("\n \r " )
216
-
217
- self .handle_directive (out_tree , line_num , cpp_comment_cleaner ,
218
- groups )
219
-
220
- # FallBack is that this line is a simple code line.
221
- else :
222
- previous_continue = False
223
-
224
- # If the line isn't empty after stripping comments,
225
- # count it as code
226
- if file_comment_cleaner .strip_comments (line [0 :- 1 ]).strip ():
227
- groups ['code' ].add_line (line_num , True )
156
+ # FallBack is that this line is a simple code line.
228
157
else :
229
- groups ['code' ].add_line (line_num )
158
+ groups ['code' ].add_line (phys_int , logical_line .local_sloc )
159
+ except StopIteration as it :
160
+ # pylint: disable=unpacking-non-sequence
161
+ _ , physical_loc = it .value
230
162
231
- # Insert any code lines left at the end of the file
232
163
if not groups ['code' ].empty ():
233
- groups ['code' ].add_line (len ( lines ) )
164
+ groups ['code' ].add_line (( groups [ 'code' ]. start_line , physical_loc - 1 ), 0 )
234
165
self .insert_code_node (out_tree , groups ['code' ])
235
-
236
166
groups ['file' ].merge (groups ['code' ])
237
167
238
- groups ['file' ].add_line (len (lines ))
239
168
out_tree .root .num_lines = groups ['file' ].end_line
240
169
out_tree .root .total_sloc = groups ['file' ].line_count
241
170
return out_tree
0 commit comments