-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
141 lines (113 loc) · 6.02 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# encoding=utf-8
import re
# -----------------------------------------------------------------------------------------------------------#
########################################
### Parsing for Input txt
########################################
def txt_parsing(txt):
# build the map for input text
key_place = {}
keyword = ["(S)", "(n)", "(V)", "([F])", "([C])", "(G)"]
for i in range(len(keyword)):
for j in range(i, len(txt)):
if txt[j].find(keyword[i]) > 0:
key_place[keyword[i]] = j
# {'(S)': 0, '(n)': 2, '(V)': 4, '([F])': 6, '([C])': 8, '(G)': 12}
# print(key_place)
# find the distance with next key word
params = key_place
tmp = list(key_place.keys()) # ['(S)', '(n)', '(V)', '([F])', '([σ])', '(G)']
for i in range(len(key_place) - 1):
begin = key_place[tmp[i]] + 1 # begin with '(S)': 0+1
end = key_place[tmp[i + 1]] # end with '(n)': 2
# print(key_place[tmp[i]]+1,key_place[tmp[i+1]])
params[keyword[i]] = txt[begin:end] # find the text from begin to end
# print(txt[key_place[tmp[i+1]]+1:]) # the last one
params[keyword[i + 1]] = txt[key_place[tmp[i + 1]] + 1:]
# params
# '''
# {'(S)': ['cust,1_avg_quant, 2_avg_quant,3_avg_quant'],
# '(n)': ['1,2,3'],
# '(V)': ['cust'],
# '([F])': ['1_sum_quant, 1_avg_quant, 2_avg_quant, 3_sum_quant, 3_avg_quant'],
# '([C])': ["1.cust=cust and 1.state='NY' and 1.year=2005", "2.cust=cust and 2.state='CT' and 2.year=2005", "3.cust=cust and 3.state='NJ' and 3.year=2005"],
# '(G)': ['1_avg_quant > 2_avg_quant and 1_avg_quant > 3_avg_quant']}'''
# handle several lines
S = [i.strip() for i in params['(S)'][0].split(',')]
n = [i.strip() for i in params['(n)'][0].split(',')]
V = [i.strip() for i in params['(V)'][0].split(',')]
F = [i.strip() for i in params['([F])'][0].split(',')]
C = params["([C])"]
G = [i.strip() for i in params['(G)'][0].split(',')]
return S, n, V, F, C, G
########################################
### Parsing for G
########################################
def G_parsing(G):
'''
input = ["2_count_prod+2= 1_count_prod+2 and 1_avg_quant<>0"] # G
output = "MFVector[index]['2_count_prod']+2==MFVector[index]['1_count_prod']+2
and MFVector[index]['1_avg_quant']!=0"
'''
Having_c = []
logic_map = {">=": " >= ", "<=": " <= ", "=": " == ", "<>": ' != ', ">": " > ",
"<": " < "} # from left to right, it only execute once. target: insert space for separating
for one_condition in G[0].split('and'): # piece = ["2_count_prod+2= 1_count_prod+2" and 1_avg_quant<>0]
piece = one_condition[:]
# replace once for the logic map
for key in logic_map.keys(): # order in the logic map matter. if find the '=' first, the">=" maybe include
if key in piece: # subsitute, and break into two part, based on " " whitespace because the map contains the white space in both side
x = [i.strip() for i in piece.replace(key, logic_map[key]).split(" ") if len(i.strip()) >= 1]
# print(x) # x = ['2_count_prod', '==', '1_count_prod+2']
break # only replace once
# x = ['2_count_prod', '==', '1_count_prod+2']
# further replace for +-*/ symbol with re and add the prefix: MFVector[index] etc
out = compute_str_process(x)
Having_c.append("".join(out)) # "MFVector[index]['2_count_prod']==MFVector[index]['1_count_prod']+2
return " and ".join(Having_c)
########################################
# process piece in Condition like
# ['1_sum_quant/2',"==","3"] ==> ["MFVector[index]['1_sum_quant']/2", '==', '3']
########################################
def compute_str_process(x):
'''
process the computational symbol after logic processing
input1 = ['1_sum_quant/2',"==","3"]
output1 = ["MFVector[index]['1_sum_quant']/2", '==', '3']
input2 = ['2_count_prod+2', '==', '1_count_prod+2']
output2 = ["MFVector[index]['2_count_prod']+2", '==', "MFVector[index]['1_count_prod']+2"]
'''
# test
out = []
for frac in x: # three kinds
# if gv in the frac and find and it contains compute symbol [\+\-\/\*] or other col
if ("_" in frac) and len(re.findall('(\w+)[\+\-\/\*]', frac)) >= 1:
col = re.findall('(\w+)[\+\-\/\*]', frac)[0] # extract the col
compute_symbol = re.findall('[\+\-\/\*]', frac)
suffix = frac[len(col) + 1::] # it contains compute symbol [\+\-\/\*] or other col
if re.findall('[\D]+', suffix): # not pure number --> suffix is one col in MFVector
out.extend(["MFVector[index]['" + col + "']" + compute_symbol[0] + "MFVector[index]['" + suffix + "']"])
else: # pure number
out.extend(["MFVector[index]['" + col + "']" + compute_symbol[0] + suffix])
elif ("_" in frac): # if only the gv in the frac
out.extend(["MFVector[index]['" + frac + "']"])
else: # not col in this frac. it means it is "==" or "0" or ">=", etc
out.append(frac)
return "".join(out)
########################################
### To be done - input args
########################################
#
# parser = argparse.ArgumentParser()
# parser.add_argument("-i", "--input-file", dest="input_file", type=str, metavar='<str>', default=None)
# parser.add_argument("-o", "--output-file", dest="output_file", type=str, metavar='<str>', default="output_df")
# args = parser.parse_args()
# print(args)
#
# import argparse
# parser = argparse.ArgumentParser()
# parser.add_argument("-i", "--input-file", dest="input_file", type=str, metavar='<str>', default="input.txt")
# parser.add_argument("-o", "--output-file", dest="output_file", type=str, metavar='<str>', default="out1.py")
# # parser.add_argument("-path", "--path", dest="path", type=str, metavar='<str>', default="E:/OneDrive - stevens.edu/Stevens BIA/CS562/Proj/EMF2PY/Input_files/")
# args = parser.parse_args()
# print(args.path)