-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathstructure_factor_calc.py
More file actions
105 lines (88 loc) · 2.91 KB
/
structure_factor_calc.py
File metadata and controls
105 lines (88 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""
Calculate the extrapolated structure packing factor (scattering from intermolecular interactions)
based off of a dilution series, then write the structure packing factor as a dat file for further analysis.
Usage:
python3 structure_factor_calc.py sample_directory
Author(s):
Alexander M. Wolff
"""
import pandas as pd
import numpy as np
# from scipy.stats import linregress
import scipy.optimize
# import matplotlib.pyplot as plt
import pathlib
from sys import argv
from collections import defaultdict
from trace import Trace
script, samp_dir = argv
samp_dir = pathlib.Path(samp_dir)
samp_files = list(samp_dir.glob(pattern='*.dat'))
TEMPS = []
CONCS = []
master = defaultdict(lambda: defaultdict())
for file in samp_files:
name = file.name
parent = file.parent
samp, data_type, temp, conc, n1, n2, n3 = name.split('_')
TEMPS.append(temp)
CONCS.append(conc)
data = pd.read_table(file,names=["q","SA","sigSA"], delim_whitespace=True, skiprows=1)
# data = data[data.q>=0.03]
master[temp][conc]=data
TEMPS = sorted(list(set(TEMPS)))
CONCS = sorted(list(set(CONCS)))
def linear(x,a,b):
return a*x+b
for temp in TEMPS:
little = [master[temp][CONCS[0]]['q']]
for conc in CONCS:
little.append(master[temp][conc]['SA'])
little.append(master[temp]['PC0']['sigSA'])
new = pd.concat(little, axis=1, keys=['q','I_pc0', 'I_pc1', 'I_pc2', 'sigSA_pc0'])
I_0s = []
A_s = []
I_0s_errors = []
for i in new.index:
y = np.array([1/(new["I_pc0"][i]),1/(new["I_pc1"][i]),1/(new["I_pc2"][i])])
x = np.array([0.050,0.050/3,0.050/9]) ### hard-coded protein concentrations in grams/L
popt, pcov = scipy.optimize.curve_fit(linear, x, y, method='lm', p0=[-10,10], maxfev=50000)
# I_0 = np.exp(popt[1])
I_0 = 1/popt[1]
slope = popt[0]
I_0_error = np.sqrt(pcov[1][1])
MW = 18500 ### hard-coded protein molecular weight in Da
A = slope*I_0/(2*MW)
I_0s.append(I_0)
A_s.append(A)
I_0s_errors.append(I_0_error)
iz = pd.DataFrame(I_0s)
iz.index = iz.index
az = pd.DataFrame(A_s)
az.index = az.index
ierrorz = pd.DataFrame(I_0s_errors)
ierrorz.index = ierrorz.index
new["I_0"] = iz
new["A"] = az
new["I_0s_errors"] = ierrorz
spf = new.I_pc0/new.I_0
spf_error = spf*np.sqrt((new.sigSA_pc0/new.I_pc0)**2 + (new.I_0s_errors/new.I_0)**2) ### https://terpconnect.umd.edu/~toh/models/ErrorPropagation.pdf
structure_packing_factor = Trace(new.q, np.empty_like(new.q), np.empty_like(new.q), spf_error, spf, np.empty_like(new.q))
structure_packing_factor.write_dat(samp+"_"+temp+"_spf"+".dat")
# print(new)
# plt.figure(num=1, figsize=(9,6))
# plt.scatter(new.q,new.A)
# plt.xlabel("q")
# plt.ylabel("A")
# plt.ylim(-0.001,.001)
# plt.xlim(0,1)
# plt.figure(num=2, figsize=(9,6))
# plt.scatter(new.q,new.I_0)
# # plt.scatter(new.q,new.I_pc0)
# # plt.scatter(new.q,new.I_pc1)
# # plt.scatter(new.q,new.I_pc2)
# plt.xlabel("q")
# plt.ylabel("I_0")
# plt.xscale("log")
# plt.show()
# # plt.ylim(-1,1)