-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTokenScanner.py
169 lines (139 loc) · 5.58 KB
/
TokenScanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# Terminals that will be recognized as Identifiers but they are keys!!!
keys={"EOF","public","class","static","void",
"main","extends","return","boolean",
"int","if","else","while","for","true",
"false","identifier","integer","System.out.println"}
# Other Terminals
simpleTerms={"{","}","(",")",";",",","=","+=","+","-",
"*",".","&&","==","<"}
# Start of Simple terms:
startOfST = {"{","}","(",")",";",",","=","+","-",
"*",".","&","<"}
# Second char of Simple terms(if there is):
endOfST = {"=","&"}
class tokenScanner():
"""Receives every character and
gives out the token"""
state=0
tempStr=""
last_constructed_token=(None,None)
lastChar=""# When Character is not proccessed
we_already_have_a_char=False
# For comments: When we see a '/' it means we have a comment...
last_state=-1# The state before entering a comment (we have to get back to this state after the comment
def constructToken(self,lastChar):
output = None
if(self.state==0):
pass
elif(self.state==1):#Identifier
if(self.tempStr in keys):
output = ("Key", self.tempStr)
elif('.' not in self.tempStr):
output = ("ID", self.tempStr)
elif(self.state==2):
output=("INT",int(self.tempStr))
elif (self.state == 3):
output = ("Opr",self.tempStr)
elif(self.state ==4 or self.state==5):
if(self.tempStr in simpleTerms):# ST: Simple Term
output = ("ST",self.tempStr)
# Reseting the vars
self.tempStr = ""
self.state=0
if(lastChar not in (" ","\n")):
self.we_already_have_a_char=True
self.lastChar = lastChar
if(output != None):
self.last_constructed_token=output
return output
def getChar(self,inputChar):
if(self.we_already_have_a_char):# There is a char left for process!
self.we_already_have_a_char = False # We don't want to get in a loop!
self.getChar(self.lastChar)
if(inputChar=='$'):
return ("ERROR","Wronginput: "+inputChar)
# print "state is: " + str(self.state),self.tempStr
if(self.state==0):
if(inputChar=='/'):#Check for comments
self.last_state=self.state
self.state = 6
elif(inputChar.isdigit()):
self.tempStr += inputChar
self.state = 2
elif(inputChar.isalpha()):
self.tempStr += inputChar
self.state = 1
elif (inputChar=='+' or inputChar=='-'):
self.tempStr += inputChar
self.state = 3
elif (inputChar in startOfST):
self.tempStr += inputChar
self.state = 4
elif (inputChar == "" or inputChar == None):
return ("STOP","END OF FILE")
elif(self.state==1):#ID
if (inputChar == '/'): # Check for comments
self.last_state = self.state
self.state = 6
elif(inputChar.isalpha() or inputChar.isdigit()):
self.tempStr += inputChar
elif(inputChar == '.' and (self.tempStr == "System" or self.tempStr == "System.out")):
self.tempStr += inputChar
else:
return self.constructToken(inputChar)
elif(self.state==2):#Digit
if (inputChar == '/'): # Check for comments
self.last_state = self.state
self.state = 6
elif (inputChar.isdigit()):
self.tempStr += inputChar
else:
return self.constructToken(inputChar)
elif(self.state==3):# +|-
if (inputChar == '/'): # Check for comments
self.last_state = self.state
self.state = 6
elif (inputChar.isdigit() and not (self.last_constructed_token[0] == "ID" or
self.last_constructed_token[0] == "INT")):# example c=1+1
self.tempStr += inputChar
self.state = 2
elif(inputChar == '='):
self.tempStr +=inputChar
self.state = 4
else:
return self.constructToken(inputChar)
elif(self.state==4):
if (inputChar == '/'): # Check for comments
self.last_state = self.state
self.state = 6
elif (inputChar in endOfST):
self.tempStr += inputChar
self.state = 5
else:
return self.constructToken(inputChar)
elif(self.state==5):
if (inputChar == '/'): # Check for comments
self.last_state = self.state
self.state = 6
else:
return self.constructToken(inputChar)
# here is the comments section:
elif(self.state==6):
if(inputChar=='/'):
self.state = 7
elif (inputChar=='*'):
self.state = 8
else:# ERROR
print "Error maybe in defining the comment"
self.state = self.last_state
elif(self.state==7):
if(inputChar == '\n'):# back to work...
self.state = self.last_state
elif(self.state==8):
if(inputChar == '*'):
self.state = 9
elif(self.state==9):
if(inputChar == '/'):# back to work...
self.state = self.last_state
else:
self.state = 8