-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcombination.py
109 lines (84 loc) · 3.2 KB
/
combination.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
Author: Carolina
Date: 02/13/20
Exploring creating combinations of amino PTMs (in this case disulfide bon mods)
"""
from itertools import combinations
from itertools import combinations_with_replacement
def rSubset(arr, r):
"""
:param arr: a list of elements
:param r: the amount of elements in a combination w/out replacement
:return: a list of combinations
"""
return list(combinations(arr, r))
def rSubset_rep(arr, r):
"""
:param arr: a list of elements
:param r: the amount of elements in a combination w/ replacement
:return: a list of combinations
"""
# return list of all subsets of length r
# to deal with duplicate subsets use
return list(combinations_with_replacement(arr, r))
def combo_masses(combinations_list):
"""
Generates the total mass of a combination of PTMs
:param combinations_list: The list of combinations
:return: A dictionary where the key is the amount of combinations and the values are the combinations
with their total neutral mass
"""
#Modifications of disulfides
#TODO: Make it more modular
mods_dict = {'sh': 32.97990,'sshl':-64.95197 , 'shl': -32.97990, 'chhsshl': -78.96762, 'h': 1.007825, 'hl': -1.007825, 'oxyhemeChl':684.15273 - 2.01565 -1.0078, 'semioxyhemeChl':684.15273 -1.0078 -1.0078, 'none':0}
#Initiate a dictionary
dict = {}
#For each combination in the combination list
for combi in combinations_list:
#Set a mass int
mass_combi = 0
#For each modification in the combination
for ele in combi:
#Add the masses together
mass_combi += mods_dict[ele]
#In the output dictionary add the combination and its total mass
dict[str(combi)] = mass_combi
return dict
def batch_combos(arr, r):
"""
:param arr: A list of modifications
:param r: Max number of elements in the combinations
:return: a dictionary with the amount of residues to be modified in a sequence (in this case cysteines),
the values are the possible combination of modifications
"""
#Initialize a dictionary
batch_dict = {}
#Using r, create a range of maximun numbers to make combinations
for mod_num in range(1, r+1):
#Create the all possible combinations using the modifications and r
combos = rSubset_rep(arr, mod_num)
#Calculate neutral mass for each combinations
all_masses = combo_masses(combos)
#Add to dictionary
batch_dict[mod_num] = all_masses
return batch_dict
if __name__ == "__main__":
#Disulfide breakage modifications
# arr = ['sh','shl','chhsshl', 'h', 'hl', 'hemeC-h', 'hemeC-2h']
# r = 7
# combos = rSubset_rep(arr, r)
# for x in combos:
# print(x)
#
# combo_masses(combos)
#
#
# batch_combos(arr, r)
# Combinations of modifications to be removed from PSMs counts
arr = [1,2,3,4]
# arr = ['C(57.0214)', 'N-term(42.0106)', 'N-term(229.1629)', 'K(229.1629)']
r = 4
print(rSubset(arr, r))
print(rSubset_rep(arr, r))
for i in range(5):
print(rSubset_rep(arr, i))