-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdihedral_parameter_v2.py
More file actions
217 lines (154 loc) · 5.87 KB
/
dihedral_parameter_v2.py
File metadata and controls
217 lines (154 loc) · 5.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 26 11:20:11 2021
@author: chingchinglam
#update from v2:
# introduce functions that process the dihedral angle df
"""
import numpy as np
import pandas as pd
def dihedral_para(df):
'''
Parameters
----------
df : dataframe
table of dihedral angle values with the rotational bond as the column index
Returns
-------
pd_outcome : dataframe
statistical parameters for each set of dihedral angle values
'''
## convert the df to array format
dihedral_ar=df.to_numpy()
## get number of column in the df and list the title of the column (the selected bonds)
column_len=len(dihedral_ar[0,:])
bonds=list(df.columns)
## use numpy to get statistical parameters
stand_dev=[]
mean_va=[]
median_va=[]
max_va=[]
min_va=[]
range_va=[]
for i in range(0,column_len):
stand_dev.append(np.std(dihedral_ar[:,i]))
mean_va.append(np.mean(dihedral_ar[:,i]))
median_va.append(np.median(dihedral_ar[:,i]))
max_va.append(np.amax(dihedral_ar[:,i]))
min_va.append(np.amin(dihedral_ar[:,i]))
range_va.append(np.amax(dihedral_ar[:,i])-np.amin(dihedral_ar[:,i]))
## compile the parameters into df
dict_outcome={'bond':bonds,'stand_dev':stand_dev,'mean':mean_va,'median':median_va,
'max':max_va, 'min':min_va, 'range':range_va}
pd_outcome=pd.DataFrame(dict_outcome)
return pd_outcome
def dihedral_para_ab(df):
## absoluate all values in the df
ab_df=np.absolute(df)
return dihedral_para(ab_df)
def gen_merge_df(df):
## calculate the statistical parameters
## compile the relvant parameters into a df
noab_df=dihedral_para(df)
ab_df=dihedral_para_ab(df)
noab_df2=noab_df[['bond','stand_dev','range']]
ab_df2=ab_df[['bond','stand_dev','range']]
ab_df3=ab_df2.rename(columns={'stand_dev':'stand_dev_ab', 'range':'range_ab'})
merge_df= pd.merge(noab_df2, ab_df3, on='bond')
return merge_df
def list_fixed_bond(df):
'''
Parameters
----------
df : dataframe
table of dihedral angle values with the rotational bond as the column index
Returns
-------
remove_bond_format : list
list of fixed bonds - to be removed
remove_bond : list
list of fixed bonds - to be removed - input for remove_fixed_bond_df
merge_df : dataframe
df of statistical parameters
'''
## calculate the statistical parameters
## compile the relvant parameters into a df
noab_df=dihedral_para(df)
ab_df=dihedral_para_ab(df)
noab_df2=noab_df[['bond','stand_dev','range']]
ab_df2=ab_df[['bond','stand_dev','range']]
ab_df3=ab_df2.rename(columns={'stand_dev':'stand_dev_ab', 'range':'range_ab'})
merge_df= pd.merge(noab_df2, ab_df3, on='bond')
array_merge_df=merge_df.to_numpy()
remove_bond=[]
for i in array_merge_df:
## if the standard deviation is less than 5 and range is less than 10 (select)
if i[1]<2.4 and i[2]<8.7:
remove_bond.append(i[0])
## else if the range is greater than 355 and the standard deviation and the range of the
## absoluate dihedral angle are less than 5
elif i[2]>359.2 and i[3]<1.5 and i[4]<6.6:
remove_bond.append(i[0])
remove_bond_format=[[b.split('_')[0], b.split('_')[1]] for b in remove_bond]
return remove_bond_format, remove_bond, merge_df
##################### embedded functions above
def remove_fixed_dihedrals(df, dihedral_list, bond_list):
'''
Parameters
----------
df : dataframe
table of dihedral angle values with the rotational bond as the column index
dihedral_list : list
list of dihedrals [['C 1','C 2','C 3', 'C 4'],[...]]
bond_list : list
list of bonds [['C 1','C 2'],['C 3', 'C 4'],[...]]
Returns
-------
new_dihedral_list : list
list of dihedrals [['C 1','C 2','C 3', 'C 4'],[...]]
after removing the fixed dihedrals
new_bond_list : list
list of bonds [['C 1','C 2'],['C 3', 'C 4'],[...]]
after removing the fixed bonds
remove_bond : list
list of fixed bonds - to be removed - input for remove_fixed_bond_df
'''
## conduct statistical parameter analyses
fixed_bond=list_fixed_bond(df)
remove_bond_format=fixed_bond[0]
remove_bond=fixed_bond[1]
#print(remove_bond_format)
## remove fixed bonds from the bond list
rm_bond_index=[]
for b in remove_bond_format:
if b in bond_list:
rm_bond_index.append(bond_list.index(b))
## remove the fixed dihedrals from the dihedral list
rm_dihedral=[]
for idx in rm_bond_index:
rm_dihedral.append(dihedral_list[idx])
## generte the new bond and dihedral list
new_dihedral_list = [y for y in dihedral_list if y not in rm_dihedral]
new_bond_list=[x for x in bond_list if x not in remove_bond_format]
return new_dihedral_list, new_bond_list, remove_bond
def remove_fixed_bond_df(df,remove_label):
'''
Parameters
----------
df : dataframe
table of dihedral angle values with the rotational bond as the column index
remove_label : list
list of fixed bonds - to be removed ['C 1_C 2', 'C 3_C 4', ...]
Returns
-------
df_new : dataframe
table of dihedral angle values with the rotational bond as the column index
the coloumns of dihedrals in the remove_label lists are removed
'''
leftover_label=list(set(list(df.columns)) - set(remove_label))
## final df (selected dihedral after the filter)
df_new=df[leftover_label]
## dihedrals filtered by the system
df_remove=df[remove_label]
return df_new, df_remove