DC50/Script V2.0 at main · aidanborkan/DC50 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Import statements
import hillfit
import numpy as np
import matplotlib.pyplot as plt
import csv
from scipy.optimize import curve_fit

#start by defining the Hill Equation
#x is independent variable - drug concentration with scale in nanomolar (nM)
#y = bottom + ((top - bottom) * xnH) / (EC50nH+ xnH)

def HillEquation(x, bottom, top, EC50, nH):
    #setting a lower bound for EC50 - restriction of left boundary
    #non-negative x, also non-zero
    x = np.maximum(x, 0.01)

    return bottom + ((top-bottom) * (x ** nH) / (EC50 ** nH + (x ** nH)))

    #denominator = EC50 ** nH + (x ** nH)
    #we need to handle the case when x (concetration) is 0
    #denominator = np.where(denominator == 0, np.finfo(float).eps, denominator)

#Note: the ratios are extracted from imageJ analysis of band intensity of target analyte and loading control stored in a CSV
#04.02.2024: Protein Normalization module incorporated with automated JESS western which uses in-capillary electrophoresis
#Electropherogram measures chemiluminescent signal intensity of target analyte and is normalized against the total amount of protein within sample.
# Initialize am empty list for normalized total area under the curve ('Total AUC')

Total_AUC_list = []
#concentration scale can be modified - this example relies on nanomolar
#initialize an empty list for concentrations
concentrations = []

# Read data from the CSV file - modify file path accordingly
with open('PT65_DC50_03262024.csv', 'r') as file:
    f_csv = csv.reader(file)
    # Skip the header row
    header = next(f_csv)

    # Iterate over each row in the CSV file
    for row in f_csv:
        #modulate here based off location of sample identifiers
        #python begins with an index of 0
        sample = row[1]
        Total_AUC_str, concentration_str = row[2], row[3]

        # Get rid of trailing and leading whitespaces
        Total_AUC_str = Total_AUC_str.strip()
        concentration_str = concentration_str.strip()

        #extract our ratio value
        Total_AUC = float(Total_AUC_str)

        #Extract the concentration value
        concentration = float(concentration_str.split()[0])

        # Case handling if the conversion to float fails
        try:
            Total_AUC_value = float(Total_AUC_str)
        except ValueError:
            continue

        #Append concentration to concentration list
        #Append ratio to ratio list
        Total_AUC_list.append(Total_AUC_value)
        concentrations.append(concentration)

# Convert sample and ratio lists to numpy arrays
#to do this we need to split the sample name with whitespace as the delimiter and extract the numeric value
#converting this to a float

concentrations = np.array(concentrations)
Total_AUC = np.array(Total_AUC_list, dtype=float)

# Need to normalize our data
normalized_Total_AUC = (Total_AUC - np.min(Total_AUC)) / (np.max(Total_AUC) - np.min(Total_AUC))

#normalized ratio is a 2D array - reshape the concentration
concentrations_reshaped = concentrations.reshape(-1)

# Generate x values for the curve
#can modulate the axis length

x_values = np.linspace(np.min(concentrations_reshaped), np.max(concentrations_reshaped), 100)

#can set a vertifical shift parameter
def HillEquationWithInit(x, bottom, top, EC50, nH):
    return HillEquation(x, bottom_init, top_init, EC50, nH)

# Curve fitting
# Need to set initial parameter values for expected behavior
# this calculates the initial values of the parameters of the curve fitting function
#setting p0 by providing intial guesses for the parameters - [bottom, top, EC50, nH] and nH is slope which
#I will set as default 1

bottom_init = np.min(normalized_Total_AUC)
top_init = np.max(normalized_Total_AUC)
EC50_init = 30.0
nH_init = 1.0
# Initial guesses for the parameters of the curve function
p0 = [bottom_init, top_init, EC50_init, nH_init]

#must pass this as an argument to curve_fit
popt, pcov = curve_fit(HillEquationWithInit, concentrations_reshaped, normalized_Total_AUC, p0=p0, maxfev=5000)
#print
print(popt)

#Generate the curve using the fitted parameters
# Generate y values
y_values = HillEquation(x_values, *popt)

# Create scatter plot for individual data points
marker_size = 50
plt.scatter(concentrations, normalized_Total_AUC, s=marker_size, color='blue', label='Data')

#Define the Hill equation: taken from Github
# y = bottom + ((top - bottom) * xnH) / (EC50nH+ xnH)
# https://github.com/himoto/hillfit

# Define 50% reduction (This is DC50)
threshold_50 = np.min(normalized_Total_AUC) + (np.max(normalized_Total_AUC) - np.min(normalized_Total_AUC)) * 0.5
half_reduction_index = np.argmin(np.abs(normalized_Total_AUC - threshold_50))

#Tell python where the concentration = 0 (since we cannot divide by 0, x was set to 0.01 for DMSO)
zero_index = np.argmin(np.abs(concentrations - 0))

#calculate the DC50
half_reduction_index_relative = zero_index + half_reduction_index

# Define full TPD
threshold_100 = np.min(normalized_Total_AUC)
full_reduction_index = np.argmin(np.abs(normalized_Total_AUC - threshold_100))

# Plot the curve fit
plt.plot(x_values, y_values, 'r-', label='Curve Fit')

# Add vertical lines for 50% and 100% TPD
plt.axvline(x=popt[2], color='purple', linestyle='--', label='50% TPD')

#customized x-axis labels
unique_concentrations = np.unique(concentrations)
# Rotate x-axis labels for better visibility
tick_indices = np.linspace(0, len(unique_concentrations) - 1, num=len(unique_concentrations), dtype=int)
tick_labels = unique_concentrations[tick_indices]
plt.xticks(concentrations, rotation=90)
#labels and title
plt.xlabel('Dosage Concentration (nM)')
plt.ylabel('Normalized Total AUC of GSK-3α/β')
plt.title('Normalized Ratio GSK-3α/β by Dosage Concentration')
#want a legend
plt.legend(loc='best')
#grid format
plt.grid(True)
plt.show()