-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathutils.py
More file actions
763 lines (614 loc) · 29.2 KB
/
utils.py
File metadata and controls
763 lines (614 loc) · 29.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
# import packages
import numpy as np
import pandas as pd
import os
from itertools import product
from collections.abc import Iterable
def allowed_output(value, reaction_vol_nl=20000, drop_size_nl=100, verbose=0):
"""Based on high ,low and stock concentrations and droplet size calculate how many combinations is possible
Parameters
----------
value: tuple
(low, high, stock concentration)
Returns
-------
calculated_concs:
a list of possible concentrations
calculated_vols:
a list of possible volumes
"""
if value['Conc_Values']:
if isinstance(value['Conc_Stock'], Iterable):
drop_nums = [i * reaction_vol_nl / (drop_size_nl * value['Conc_Stock'][find_stock(value['Conc_Values'], value['Conc_Stock'], i)[0]]) for i in value['Conc_Values']]
calculated_concs = value['Conc_Values']
else:
drop_nums = [i * reaction_vol_nl / (drop_size_nl * value['Conc_Stock']) for i in value['Conc_Values']]
calculated_concs = value['Conc_Values']
else:
drop_nums = list(range(int((value['Conc_Min'] * reaction_vol_nl) / (drop_size_nl * value['Conc_Stock'])),
int((value['Conc_Max'] * reaction_vol_nl) / (drop_size_nl * value['Conc_Stock'])) + 1))
calculated_concs = [drop_num * value['Conc_Stock'] * drop_size_nl / reaction_vol_nl for drop_num in drop_nums]
if verbose:
print('drops :', drop_nums)
print('volumes :', [i * drop_size_nl for i in drop_nums])
print('possible_concentrations :', calculated_concs)
else:
return calculated_concs, [i * drop_size_nl for i in drop_nums]
def percentage_possible(data, threshold=40):
"""Based on threshold volume, it calculates how many combinations of all metabolite is possible to make
Parameters
----------
data: dict
{'meatbolite name':[possible volumes], ...}
Returns
-------
percentage possible: float
total: int
total number of combinations (includes forbidden one)
"""
lists = list(data.values())
m = [len(i) for i in data.values()]
total = np.prod(np.array([len(i) for i in data.values()]))
possible = 0
for items in product(*lists):
if sum(items) <= threshold:
possible += 1
return (possible/total*100), total
def find_stock(conc_values, conc_stocks, this_value):
"""this function find each concentration value belongs to wich stock concentration for metabolites with multiple stocks
Parameters
----------
conc_values: list
a list of all possible concentration
conc_stocks: list
a list of all stocks concentration
this_value: float, int
concentration value that we find to find its stock
Returns
-------
i:
index of found stock
out:
value of found stock
"""
num = len(conc_stocks)
avg = len(conc_values) / float(num)
out = []
last = 0.0
while last < len(conc_values):
out.append(conc_values[int(last):int(last + avg)])
last += avg
for i, value in enumerate(out):
if this_value in value:
return i, out
# random combination generator function_v3.0
def random_combination_generator(concentrations_limits, number_of_combination=100, reaction_vol_nl=10000,
max_nl=None, drop_size_nl=100, check_repeat=True, rounded=2, verbose=0, make_csv=False, return_df=False):
"""this function make random combination that is safe (e.g. dont make too much or low concentrated, not excecutable based on drop size, not repetitive)
Parameters
----------
concentrations_limits: dict
{'name of metabolite': {'Conc_Min': #, 'Conc_Max': #, 'Conc_Values': #, 'Conc_Stock': #, 'Alternatives': #}, ...}
Returns
-------
data: pandas.DataFrame
a dataframe as consists of number_of_combination of random combinations
"""
# generating random combinations
combinations = []
data_point = 0
while data_point < number_of_combination:
input_data = []
input_vol = []
# verbosity
if (data_point % 10000 == 0) and verbose:
print(data_point)
# generation of random input
for key, value in concentrations_limits.items():
# Manual Concentration Value Generation
if value['Conc_Values']:
# With Alternatives
if value['Alternatives']:
num_alternative = len(value['Alternatives'])
choice_alternative = np.random.randint(0, num_alternative)
choice_list = [0 for i in range(num_alternative)]
choice_list[choice_alternative] = 1
choice_conc = np.random.choice(value['Conc_Values'])
input_data.append(choice_conc)
input_data += choice_list
if isinstance(value['Conc_Stock'], Iterable):
choice_stock, _ = find_stock(value['Conc_Values'], value['Conc_Stock'], choice_conc)
input_vol.append(choice_conc/value['Conc_Stock'][choice_stock]*reaction_vol_nl)
else:
input_vol.append(choice_conc/value['Conc_Stock']*reaction_vol_nl)
# Without Alternatives
else:
choice_conc = np.random.choice(value['Conc_Values'])
input_data.append(choice_conc)
if isinstance(value['Conc_Stock'], Iterable):
choice_stock, _ = find_stock(value['Conc_Values'], value['Conc_Stock'], choice_conc)
input_vol.append(choice_conc/value['Conc_Stock'][choice_stock]*reaction_vol_nl)
else:
input_vol.append(choice_conc/value['Conc_Stock']*reaction_vol_nl)
# Auto Concentration Value Generation
else:
# With Alternatives
if value['Alternatives']:
num_alternative = len(value['Alternatives'])
choice_alternative = np.random.randint(0, num_alternative)
choice_list = [0 for i in range(num_alternative)]
choice_list[choice_alternative] = 1
drop_num = np.random.randint(round(value['Conc_Min'] * (reaction_vol_nl / drop_size_nl) / value['Conc_Stock']),
round(value['Conc_Max'] * (reaction_vol_nl / drop_size_nl) / value['Conc_Stock']) + 1)
recalculated_conc = drop_num * value['Conc_Stock'] * drop_size_nl / reaction_vol_nl
input_data.append(recalculated_conc)
input_data += choice_list
input_vol.append(recalculated_conc/value['Conc_Stock']*reaction_vol_nl)
# Without Alternatives
else:
drop_num = np.random.randint(round(value['Conc_Min'] * (reaction_vol_nl / drop_size_nl) / value['Conc_Stock']),
round(value['Conc_Max'] * (reaction_vol_nl / drop_size_nl) / value['Conc_Stock']) + 1)
recalculated_conc = drop_num * value['Conc_Stock'] * drop_size_nl / reaction_vol_nl
input_data.append(recalculated_conc)
input_vol.append(recalculated_conc/value['Conc_Stock']*reaction_vol_nl)
# Checks
if check_repetitive and max_nl:
if input_data not in combinations and sum(input_vol)<= max_nl:
combinations.append(input_data)
data_point += 1
elif check_repetitive and not max_nl:
if input_data not in combinations:
combinations.append(input_data)
data_point += 1
elif not check_repetitive and max_nl:
if sum(input_vol)<= max_nl:
combinations.append(input_data)
data_point += 1
else:
combinations.append(input_data)
data_point += 1
# make column name:
columns_name = []
for key, value in concentrations_limits.items():
if not value['Alternatives']:
columns_name.append(key)
else:
columns_name.append(key)
alternative_name = ['{}_{}'.format(key, i) for i in value['Alternatives']]
columns_name += alternative_name
# making csv file
if make_csv:
data = pd.DataFrame(np.array(combinations), columns=columns_name)
data.to_csv('Random_Combination_1.csv', index=False)
# making dataframe
if return_df:
data = pd.DataFrame(np.array(combinations), columns=columns_name)
return data
return np.array(combinations)
# transform concentration DataFrame to volume (nanolitre) DataFrame
def concentration_to_volume(concentrations, concentrations_limits, reaction_mixture_vol_nl=10000,
fixed_parts={'Lysate': 0.33, 'Saline': 0.1}, round_deg=1, check_water=True):
"""Transform concentrations dataframe to volumes dataframe
option: add a fixed volumes to all combinations like Lysate
caution: concentrations unit and metabolite name in concentrations and concentrations_limits must be the same.
Parameters
----------
concentrations: pandas.DataFrame
random_combination_generator output
Returns
-------
data: pandas.DataFrame
a dataframe same as input in shape but volumes data
"""
# make a copy of original dataframe to avoid further change than can affect that
data = concentrations.copy(deep=True)
data_all = data.copy(deep=True)
data = data[[i for i in data.columns if '_' not in i]]
data *= reaction_mixture_vol_nl
for metabolite_name, value in concentrations_limits.items():
if isinstance(value['Conc_Stock'], Iterable):
print()
data[metabolite_name] = [round(data[metabolite_name][i] / value['Conc_Stock'][find_stock(value['Conc_Values'], value['Conc_Stock'], data_all[metabolite_name][i])[0]], round_deg) for i in range(len(data[metabolite_name]))]
else:
data[metabolite_name] = [round(data[metabolite_name][i] / value['Conc_Stock'], round_deg) for i in range(len(data[metabolite_name]))]
# add fix parts
if fixed_parts:
for key, value in fixed_parts.items():
data[key] = reaction_mixture_vol_nl * value
# add water to reach the reaction_mixture_vol_nl
data['water'] = reaction_mixture_vol_nl - data.sum(axis=1)
# for low stock concentration that is not possible to make, raise an error
# stock conc should be set in a way that doesn't raise this error to avoid further debugging
if check_water and not all(data['water'] >= 0): raise Exception("Oops, too concentrated combination!")
# add alternative
# make columns name list:
columns_name = []
Type_dic = {}
Stock_dic = {}
for key, value in concentrations_limits.items():
if value['Alternatives']:
columns_name.append(key)
columns_name.append('{}_Type'.format(key))
Type_dic[key] = []
else:
columns_name.append(key)
if isinstance(value['Conc_Stock'], Iterable):
columns_name.append('{}_Stock_Type'.format(key))
Stock_dic[key] = []
# Alternatives
for key in Type_dic.keys():
data_type = data_all[[i for i in data_all.columns if '{}_'.format(key) in i]]
for i in data_type.values:
Type_dic[key].append(concentrations_limits[key]['Alternatives'][np.where(i == 1.0)[0][0]])
Type_list = list(Type_dic.keys())
for key in Type_list:
Type_dic['{}_Type'.format(key)] = Type_dic.pop(key)
# Stock
for key in Stock_dic.keys():
Stock_dic[key] = [concentrations_limits[key]['Conc_Stock'][find_stock(concentrations_limits[key]['Conc_Values'], concentrations_limits[key]['Conc_Stock'], i)[0]] for i in data_all[key]]
Stock_list = list(Stock_dic.keys())
for key in Stock_list:
Stock_dic['{}_Stock_Type'.format(key)] = Stock_dic.pop(key)
data_final = pd.concat([data, pd.DataFrame(Type_dic), pd.DataFrame(Stock_dic)], axis=1)
return data_final[columns_name + list(fixed_parts.keys()) + ['water']]
def day_finder(file, file_format='csv'):
"""Find the first notcompleted day
Parameters
----------
file:
for now, it can only be 'Results'
Returns
-------
i: int
the first not completed day
"""
i = 1
while True:
if not os.path.isfile('{}_{}.{}'.format(file, i, file_format)):
return i
i += 1
def result_preprocess(day, desired_cols, range=20):
"""Preprocess Results.csv file to get desired columns and rows
caution: the target column name MUST be 'yield'
Parameters
----------
day:
Results_day.csv
desired_cols:
name of columns that you want from the results file
Returns
-------
data_m:
data in range
label_m:
label in range
data_specials:
other data
label_specials:
other labels
"""
results = pd.read_csv('Results_{}.csv'.format(day, day))
# m number pipeline
data_m = results[desired_cols].iloc[:range, :]
label_m = results[['yield']].iloc[:range, :]
# reference, control and specials
data_specials = results[desired_cols].iloc[range:, :]
label_specials = results[['yield']].iloc[range:, :]
return data_m, label_m, data_specials, label_specials
def check_repetitive(combination, df_main):
"""Check to avoid repetitive combinations
Parameters
----------
combination:
combinations that want to be checked
df_main: pandas.DataFrame
source dataframe
Returns
-------
boolean:
True: it exists in df_main
False: it's not
"""
comparison_df = df_main.merge(combination, indicator=True, how='outer')
if 'both' in comparison_df._merge.unique():
return False
else:
return True
def bayesian_optimization(regressors_list,
data, label,
concentrations_limits,
final_order,
df_main,
reaction_vol_nl=20000, max_nl=13200, drop_size_nl=100,
exploitation=1, exploration=1, test_size=100, pool_size=100000, verbose=0, day=1,
days_range=[20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
batch_ucb=False):
"""Main bayesian optimization function
Parameters
----------
regressors_list:
a list consists of more than one regressor that has .fit and .predict feature
data: pandas.DataFrame
all previous day data
label: pandas.DataFrame
all previous day label
exploitation: 1
coefficient of focus on higher yield query
exploration: 1
coefficient of focus on a more informative query
test_size: 100
output combinations number
pool_size: 100000
how many random combinations to ask from the regressor list each round
caution: this parameter highly affects executions time
Returns
-------
chosen_combinations: pandas.DataFrame
combinations that expected to improve yield
if batch_ucb == True
Returns
-------
best sample based on ucb: pandas.Series, best sample's expected value: float
"""
# first fit training data on our models
for regressor in regressors_list:
regressor.fit(data.values, label.values)
# make random test data
df_1 = random_combination_generator(concentrations_limits, number_of_combination=pool_size,
reaction_vol_nl=reaction_vol_nl,
max_nl=max_nl, drop_size_nl=drop_size_nl, make_csv=False, return_df=True)
desired_cols = list(df_1.columns)
df_temp = df_1.copy(deep=True)
# Upper Confidence Bound
for index, regressor in enumerate(regressors_list):
df_1['pred_yield_{}'.format(index)] = regressor.predict(df_temp.values)
df_1['regressors_std'] = df_1[[str(i) for i in df_1.columns if 'pred_yield' in str(i)]].std(axis=1)
df_1['mean_vote'] = df_1[[str(i) for i in df_1.columns if 'pred_yield' in str(i)]].mean(axis=1)
df_1['UCB'] = exploitation * df_1['mean_vote'] + exploration * df_1['regressors_std']
df_1 = df_1.sort_values(['UCB'], ascending=False)
if batch_ucb:
return df_1[final_order].iloc[0:1, :], df_1['mean_vote'].values[0]
# check to don`t make repeated combinations, but it is not likely
chosen_combinations = pd.DataFrame(columns=desired_cols)
num = 0
for i in df_1[desired_cols].values:
temp_combination = pd.DataFrame([i], columns=desired_cols)
if check_repetitive(temp_combination, df_main):
num += 1
chosen_combinations = pd.concat([chosen_combinations, temp_combination]).reset_index(drop=True)
if num == test_size:
break
return chosen_combinations[final_order]
# Batch UCB on top of bayesian optimization
def batch_ucb(regressors_list,
data, label,
concentrations_limits,
final_order,
df_main,
reaction_vol_nl=20000, max_nl=13200, drop_size_nl=100,
exploitation=1, exploration=1, test_size=100, pool_size=100000, verbose=0, day=1,
days_range=[20, 20, 20, 20, 20, 20, 20, 20, 20, 20]):
"""Batch UCB on top of bayesian optimization function
Parameters
----------
regressors_list:
a list consists of more than one regressor that has .fit and .predict feature
data: pandas.DataFrame
all previous day data
label: pandas.DataFrame
all previous day label
exploitation: 1
coefficient of focus on higher yield query
exploration: 1
coefficient of focus on a more informative query
test_size: 100
output combinations number
pool_size: 100000
how many random combinations to ask from the regressor list each round
caution: this parameter highly affects executions time
Returns
-------
chosen_combinations: pandas.DataFrame
combinations that expected to improve yield
"""
final_samples = []
for i in range(test_size):
sample, expected_value = bayesian_optimization(regressors_list, data, label, concentrations_limits,
final_order=final_order,
df_main = df_main,
reaction_vol_nl=reaction_vol_nl, max_nl=max_nl,
drop_size_nl=drop_size_nl,
exploitation=exploitation, exploration=exploration, test_size=test_size, pool_size=pool_size, verbose=0, day=day, days_range = days_range,
batch_ucb=True)
final_samples.append(sample)
data = pd.concat([data, sample], axis=0).reset_index(drop=True)
label = pd.concat([label, pd.DataFrame({'yield': [expected_value]})], axis=0).reset_index(drop=True)
return pd.concat(final_samples)
# ECHO functions
def put_volumes_to_384_wells(volumes_array, starting_well='A1', vertical=False, make_csv=False):
"""Make a dataframe as a 384 well plate for each metabolite
Parameters
----------
volumes_array:
a dataframe with columns are component, each row vol of that component (e.g. volumes.csv)
starting_well: 'A1'
name of the well in 384 well plates that you want to start filling
vertical:
if True, it will fill the plate column by column top down
if False, it will fill the plate row by row, left to right
Returns
-------
all_dataframe:
a list consists of one dataframe for each metabolite that shows appropriate 384 well plate
named_volumes:
one separate dataframe that adds well name to volume dataframe
"""
if len(volumes_array) > 384: raise ValueError
all_dataframe = {}
rows_name = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P']
if not vertical:
from_well = rows_name.index(starting_well[0]) * 24 + int(starting_well[1:]) - 1
# make each metabolite`s dataframe and add it to dict
for metabolite_name in volumes_array.columns:
# first make an all zero dataframe
dataframe = pd.DataFrame(0.0, index=rows_name, columns=range(1, 25))
# add data one by one in each row
# (0, 0)--------->(0,23)
# .......................
# (15,0)--------->(15,23)
for index, value in enumerate(volumes_array[metabolite_name]):
index += from_well
dataframe.iloc[index // 24, index % 24] = value
all_dataframe[metabolite_name] = dataframe
# make named volumes dataframe
named_volumes = volumes_array.copy(deep=True)
names = ['{}{}'.format(rows_name[index // 24], index % 24 + 1) for index in named_volumes.index]
named_volumes['well_name'] = names
if vertical:
from_well = rows_name.index(starting_well[0]) + (int(starting_well[1:]) - 1) * 16
# make each metabolite`s dataframe and add it to dict
for metabolite_name in volumes_array.columns:
# first make an all zero dataframe
dataframe = pd.DataFrame(0.0, index=rows_name, columns=range(1, 25))
# add data one by one in each column
# (0, 0)---->-----(0,23)
# ||||||..........||||||
# (15,0)---->-----(15,23)
for index, value in enumerate(volumes_array[metabolite_name]):
index += from_well
dataframe.iloc[index % 16, index // 16] = value
all_dataframe[metabolite_name] = dataframe
# make named volumes dataframe
named_volumes = volumes_array.copy(deep=True)
names = ['{}{}'.format(rows_name[(index + from_well) % 16], (index + from_well) // 16 + 1) for index in
named_volumes.index]
named_volumes['well_name'] = names
# notice that this function output two value
return named_volumes, all_dataframe
def put_volumes_to_96_wells(volumes_array, starting_well='A1', vertical=False, make_csv=False):
"""Make a dataframe as a 96 well plate for each metabolite
Parameters
----------
volumes_array:
a dataframe with columns are component, each row vol of that component (e.g. volumes.csv)
starting_well: 'A1'
name of the well in 96 well plates that you want to start filling
vertical:
if True, it will fill the plate column by column top down
if False, it will fill the plate row by row, left to right
Returns
-------
all_dataframe:
a list consists of one dataframe for each metabolite that shows appropriate 384 well plate
named_volumes:
one separate dataframe that adds well name to volume dataframe
"""
if len(volumes_array) > 96: raise ValueError
all_dataframe = {}
rows_name = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
if not vertical:
from_well = rows_name.index(starting_well[0]) * 12 + int(starting_well[1:]) - 1
# make each metabolite`s dataframe and add it to dict
for metabolite_name in volumes_array.columns:
# first make an all zero dataframe
dataframe = pd.DataFrame(0.0, index=rows_name, columns=range(1, 13))
# add data one by one in each row
# (0, 0)--------->(0,11)
# .......................
# (7,0)--------->(7,11)
for index, value in enumerate(volumes_array[metabolite_name]):
index += from_well
dataframe.iloc[index // 12, index % 12] = value
all_dataframe[metabolite_name] = dataframe
# make named volumes dataframe
named_volumes = volumes_array.copy(deep=True)
names = ['{}{}'.format(rows_name[index // 12], index % 12 + 1) for index in named_volumes.index]
named_volumes['well_name'] = names
if vertical:
from_well = rows_name.index(starting_well[0]) + (int(starting_well[1:]) - 1) * 8
# make each metabolite`s dataframe and add it to dict
for metabolite_name in volumes_array.columns:
# first make an all zero dataframe
dataframe = pd.DataFrame(0.0, index=rows_name, columns=range(1, 13))
# add data one by one in each column
# (0, 0)---->-----(0,11)
# ||||||..........||||||
# (7,0)---->-----(7,11)
for index, value in enumerate(volumes_array[metabolite_name]):
index += from_well
dataframe.iloc[index % 8, index // 8] = value
all_dataframe[metabolite_name] = dataframe
# make named volumes dataframe
named_volumes = volumes_array.copy(deep=True)
names = ['{}{}'.format(rows_name[(index + from_well) % 8], (index + from_well) // 8 + 1) for index in
named_volumes.index]
named_volumes['well_name'] = names
# notice that this function output two value
return named_volumes, all_dataframe
# make source to destination dataframe for ECHO machine
def source_to_destination(named_volumes, desired_order=None, reset_index=True, check_zero=False):
"""Make a dataframe as a 384/96 well plate for each metabolite
Parameters
----------
named_volume:
first output of put_volumes_to_384_wells or put_volumes_to_96_wells function
Returns
-------
all_sources:
separate dataframe for each metabolite that appended in a dict
aggregated:
aggregated all_sources to one CSV file by your desired order
"""
all_sources = {}
for metabolite_name in named_volumes.drop(columns=['well_name']):
transfers = {'Source_Plate_Barcode': [], 'Source_Well': [], 'Destination_Plate_Barcode': [],
'Destination_Well': [], 'Transfer_Volume': []}
for index in range(len(named_volumes)):
if named_volumes.loc[index, metabolite_name] > 0 or check_zero == False:
transfers['Source_Plate_Barcode'].append('Plate1')
transfers['Source_Well'].append('{} well'.format(metabolite_name))
transfers['Destination_Plate_Barcode'].append('destPlate1')
transfers['Destination_Well'].append(named_volumes.loc[index, 'well_name'])
transfers['Transfer_Volume'].append(named_volumes.loc[index, metabolite_name])
transfers = pd.DataFrame(transfers)
all_sources[metabolite_name] = transfers
# aggregate all dataframe
aggregated = pd.concat(all_sources.values())
if desired_order:
aggregated = pd.concat([all_sources[i] for i in desired_order])
if reset_index:
aggregated = aggregated.reset_index(drop=True)
return all_sources, aggregated
def put_volumes_to_wells(volumes_array, plate_384_well=True, vertical=True, triplicate=False, starting_well='A1', make_csv=False):
"""it's a helper function for put_volumes_to_96_wells and put_volumes_to_384_wells that take care of creaating triplicate
Parameters
----------
volumes_array:
a dataframe with columns are component, each row vol of that component (e.g. volumes.csv)
starting_well: 'A1'
name of the well in 96 well plates that you want to start filling
vertical:
if True, it will fill the plate column by column top down
if False, it will fill the plate row by row, left to right
Returns
-------
named_volumes:
one separate dataframe that adds well name to volume dataframe
"""
if plate_384_well:
if triplicate == False:
intermediate, _ = put_volumes_to_384_wells(volumes_array, starting_well=starting_well, vertical=vertical, make_csv=make_csv)
else:
intermediate_1, _ = put_volumes_to_384_wells(volumes_array, starting_well='A1', vertical=vertical, make_csv=make_csv)
intermediate_2, _ = put_volumes_to_384_wells(volumes_array, starting_well='A9', vertical=vertical, make_csv=make_csv)
intermediate_3, _ = put_volumes_to_384_wells(volumes_array, starting_well='A17', vertical=vertical, make_csv=make_csv)
intermediate = pd.concat([intermediate_1, intermediate_2, intermediate_3]).reset_index(drop=True)
else:
if triplicate == False:
intermediate, _ = put_volumes_to_96_wells(volumes_array, starting_well=starting_well, vertical=vertical, make_csv=make_csv)
else:
intermediate_1, _ = put_volumes_to_96_wells(volumes_array, starting_well='A1', vertical=vertical, make_csv=make_csv)
intermediate_2, _ = put_volumes_to_96_wells(volumes_array, starting_well='A5', vertical=vertical, make_csv=make_csv)
intermediate_3, _ = put_volumes_to_96_wells(volumes_array, starting_well='A9', vertical=vertical, make_csv=make_csv)
intermediate = pd.concat([intermediate_1, intermediate_2, intermediate_3]).reset_index(drop=True)
return intermediate