Skip to content

Commit f77978b

Browse files
committed
Remove the deprecated use_dgl argument and its accompanying logic
1 parent 1df2300 commit f77978b

9 files changed

+60
-146
lines changed

Diff for: project/datasets/CASP_CAPRI/casp_capri_dgl_data_module.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class CASPCAPRIDGLDataModule(LightningDataModule):
1919
casp_capri_test = None
2020

2121
def __init__(self, data_dir: str, batch_size: int, num_dataloader_workers: int, knn: int, self_loops: bool,
22-
pn_ratio: float, percent_to_use: float, use_dgl: bool, process_complexes: bool, input_indep: bool):
22+
pn_ratio: float, percent_to_use: float, process_complexes: bool, input_indep: bool):
2323
super().__init__()
2424

2525
self.data_dir = data_dir
@@ -29,26 +29,26 @@ def __init__(self, data_dir: str, batch_size: int, num_dataloader_workers: int,
2929
self.self_loops = self_loops
3030
self.pn_ratio = pn_ratio
3131
self.percent_to_use = percent_to_use # Fraction of CASP-CAPRI dataset splits to use
32-
self.use_dgl = use_dgl # Whether to process each complex into a pair of DGL graphs for its final representation
3332
self.process_complexes = process_complexes # Whether to process any unprocessed complexes before training
3433
self.input_indep = input_indep # Whether to use an input-independent pipeline to train the model
34+
self.collate_fn = dgl_picp_collate # Which collation function to use
3535

3636
def setup(self, stage: Optional[str] = None):
3737
# Assign testing dataset for use in DataLoaders - called on every GPU
3838
self.casp_capri_test = CASPCAPRIDGLDataset(mode='test', raw_dir=self.data_dir, knn=self.knn,
39-
self_loops=self.self_loops, pn_ratio=self.pn_ratio,
40-
percent_to_use=self.percent_to_use, use_dgl=self.use_dgl,
39+
geo_nbrhd_size=2, self_loops=self.self_loops, pn_ratio=self.pn_ratio,
40+
percent_to_use=self.percent_to_use,
4141
process_complexes=self.process_complexes,
4242
input_indep=self.input_indep)
4343

4444
def train_dataloader(self) -> DataLoader:
4545
return DataLoader(self.casp_capri_test, batch_size=self.batch_size, shuffle=True,
46-
num_workers=self.num_dataloader_workers, collate_fn=dgl_picp_collate, pin_memory=True)
46+
num_workers=self.num_dataloader_workers, collate_fn=self.collate_fn, pin_memory=True)
4747

4848
def val_dataloader(self) -> DataLoader:
4949
return DataLoader(self.casp_capri_test, batch_size=self.batch_size, shuffle=False,
50-
num_workers=self.num_dataloader_workers, collate_fn=dgl_picp_collate, pin_memory=True)
50+
num_workers=self.num_dataloader_workers, collate_fn=self.collate_fn, pin_memory=True)
5151

5252
def test_dataloader(self) -> DataLoader:
5353
return DataLoader(self.casp_capri_test, batch_size=self.batch_size, shuffle=False,
54-
num_workers=self.num_dataloader_workers, collate_fn=dgl_picp_collate, pin_memory=True)
54+
num_workers=self.num_dataloader_workers, collate_fn=self.collate_fn, pin_memory=True)

Diff for: project/datasets/CASP_CAPRI/casp_capri_dgl_dataset.py

+3-26
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,10 @@ class CASPCAPRIDGLDataset(DGLDataset):
3434
Size of each edge's neighborhood when updating geometric edge features. Default: 2.
3535
self_loops: bool
3636
Whether to connect a given node to itself. Default: True.
37-
pn_ratio: bool
37+
pn_ratio: float
3838
The positive-negative ratio to use when assembling training labels for node-node pairs. Default: 0.1.
3939
percent_to_use: float
4040
How much of the dataset to load. Default: 1.00.
41-
use_dgl: bool
42-
Whether to process each complex into a pair of DGL graphs for its final representation. Default: True.
4341
process_complexes: bool
4442
Whether to process each unprocessed complex as we load in the dataset. Default: True.
4543
input_indep: bool
@@ -72,7 +70,6 @@ def __init__(self,
7270
self_loops=True,
7371
pn_ratio=0.1,
7472
percent_to_use=1.00,
75-
use_dgl=True,
7673
process_complexes=True,
7774
input_indep=False,
7875
force_reload=False,
@@ -87,7 +84,6 @@ def __init__(self,
8784
self.self_loops = self_loops
8885
self.pn_ratio = pn_ratio
8986
self.percent_to_use = percent_to_use # How much of the dataset (e.g. CASP-CAPRI training dataset) to use
90-
self.use_dgl = use_dgl # Whether to process each complex into a pair of DGL graphs for its final representation
9187
self.process_complexes = process_complexes # Whether to process any unprocessed complexes before training
9288
self.input_indep = input_indep # Whether to use an input-independent pipeline to train the model
9389
self.final_dir = os.path.join(*self.root.split(os.sep)[:-1])
@@ -163,9 +159,8 @@ def process(self):
163159
if not os.path.exists(processed_filepath):
164160
processed_parent_dir_to_make = os.path.join(self.processed_dir, os.path.split(raw_path[0])[0])
165161
os.makedirs(processed_parent_dir_to_make, exist_ok=True)
166-
process_complex_into_dict(raw_filepath, processed_filepath,
167-
self.knn, self.geo_nbrhd_size, self.self_loops,
168-
check_sequence=False, use_dgl=self.use_dgl)
162+
process_complex_into_dict(raw_filepath, processed_filepath, self.knn,
163+
self.geo_nbrhd_size, self.self_loops, check_sequence=False)
169164

170165
def has_cache(self):
171166
"""Check if each complex is downloaded and available for testing."""
@@ -189,28 +184,10 @@ def __getitem__(self, idx):
189184
-------
190185
:class:`dict`
191186
192-
(If process_complexes_into_dicts() was run with use_dgl=True):
193-
Protein complex, DGLGraphs for each of the complex's structures.
194-
195187
- ``complex['graph1']:`` DGLGraph (of length M) containing each of the first graph's encoded node and edge features
196188
- ``complex['graph2']:`` DGLGraph (of length N) containing each of the second graph's encoded node and edge features
197189
- ``complex['examples']:`` PyTorch Tensor (of shape (M x N) x 3) containing the labels for inter-graph node pairs
198190
- ``complex['complex']:`` Python string describing the complex's code and original pdb filename
199-
- ``complex['filepath']:`` Python string describing the complex's filepath
200-
201-
(If process_complexes_into_dicts() was run with use_dgl=False):
202-
Protein complex, feature tensors for each node and edge and indices of each node's neighboring nodes.
203-
204-
- ``complex['graph1_node_feats']:`` PyTorch Tensor containing each of the first graph's encoded node features
205-
- ``complex['graph2_node_feats']``: PyTorch Tensor containing each of the second graph's encoded node features
206-
- ``complex['graph1_node_coords']:`` PyTorch Tensor containing each of the first graph's node coordinates
207-
- ``complex['graph2_node_coords']``: PyTorch Tensor containing each of the second graph's node coordinates
208-
- ``complex['graph1_edge_feats']:`` PyTorch Tensor containing each of the first graph's edge features for each node
209-
- ``complex['graph2_edge_feats']:`` PyTorch Tensor containing each of the second graph's edge features for each node
210-
- ``complex['graph1_nbrhd_indices']:`` PyTorch Tensor containing each of the first graph's neighboring node indices
211-
- ``complex['graph2_nbrhd_indices']:`` PyTorch Tensor containing each of the second graph's neighboring node indices
212-
- ``complex['examples']:`` PyTorch Tensor containing the labels for inter-graph node pairs
213-
- ``complex['complex']:`` Python string describing the complex's code and original pdb filename
214191
- ``complex['filepath']:`` Python string describing the complex's filepath
215192
"""
216193
# Assemble filepath of processed protein complex

Diff for: project/datasets/DIPS/dips_dgl_data_module.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class DIPSDGLDataModule(LightningDataModule):
2121
dips_test = None
2222

2323
def __init__(self, data_dir: str, batch_size: int, num_dataloader_workers: int, knn: int, self_loops: bool,
24-
pn_ratio: float, percent_to_use: float, use_dgl: bool, process_complexes: bool, input_indep: bool):
24+
pn_ratio: float, percent_to_use: float, process_complexes: bool, input_indep: bool):
2525
super().__init__()
2626

2727
self.data_dir = data_dir
@@ -31,32 +31,30 @@ def __init__(self, data_dir: str, batch_size: int, num_dataloader_workers: int,
3131
self.self_loops = self_loops
3232
self.pn_ratio = pn_ratio
3333
self.percent_to_use = percent_to_use # Fraction of DIPS dataset splits to use
34-
self.use_dgl = use_dgl # Whether to process each complex into a pair of DGL graphs for its final representation
3534
self.process_complexes = process_complexes # Whether to process any unprocessed complexes before training
3635
self.input_indep = input_indep # Whether to use an input-independent pipeline to train the model
36+
self.collate_fn = dgl_picp_collate # Which collation function to use
3737

3838
def setup(self, stage: Optional[str] = None):
3939
# Assign training/validation/testing data set for use in DataLoaders - called on every GPU
4040
self.dips_train = DIPSDGLDataset(mode='train', raw_dir=self.data_dir, knn=self.knn, self_loops=self.self_loops,
4141
pn_ratio=self.pn_ratio, percent_to_use=self.percent_to_use,
42-
use_dgl=self.use_dgl, process_complexes=self.process_complexes,
43-
input_indep=self.input_indep)
42+
process_complexes=self.process_complexes, input_indep=self.input_indep)
4443
self.dips_val = DIPSDGLDataset(mode='val', raw_dir=self.data_dir, knn=self.knn, self_loops=self.self_loops,
45-
pn_ratio=self.pn_ratio, percent_to_use=self.percent_to_use, use_dgl=self.use_dgl,
44+
pn_ratio=self.pn_ratio, percent_to_use=self.percent_to_use,
4645
process_complexes=self.process_complexes, input_indep=self.input_indep)
4746
self.dips_test = DIPSDGLDataset(mode='test', raw_dir=self.data_dir, knn=self.knn, self_loops=self.self_loops,
4847
pn_ratio=self.pn_ratio, percent_to_use=self.percent_to_use,
49-
use_dgl=self.use_dgl, process_complexes=self.process_complexes,
50-
input_indep=self.input_indep)
48+
process_complexes=self.process_complexes, input_indep=self.input_indep)
5149

5250
def train_dataloader(self) -> DataLoader:
5351
return DataLoader(self.dips_train, batch_size=self.batch_size, shuffle=True,
54-
num_workers=self.num_dataloader_workers, collate_fn=dgl_picp_collate, pin_memory=True)
52+
num_workers=self.num_dataloader_workers, collate_fn=self.collate_fn, pin_memory=True)
5553

5654
def val_dataloader(self) -> DataLoader:
5755
return DataLoader(self.dips_val, batch_size=self.batch_size, shuffle=False,
58-
num_workers=self.num_dataloader_workers, collate_fn=dgl_picp_collate, pin_memory=True)
56+
num_workers=self.num_dataloader_workers, collate_fn=self.collate_fn, pin_memory=True)
5957

6058
def test_dataloader(self) -> DataLoader:
6159
return DataLoader(self.dips_test, batch_size=self.batch_size, shuffle=False,
62-
num_workers=self.num_dataloader_workers, collate_fn=dgl_picp_collate, pin_memory=True)
60+
num_workers=self.num_dataloader_workers, collate_fn=self.collate_fn, pin_memory=True)

Diff for: project/datasets/DIPS/dips_dgl_dataset.py

+2-25
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ class DIPSDGLDataset(DGLDataset):
4545
The positive-negative ratio to use when assembling training labels for node-node pairs. Default: 0.1.
4646
percent_to_use: float
4747
How much of the dataset to load. Default: 1.00.
48-
use_dgl: bool
49-
Whether to process each complex into a pair of DGL graphs for its final representation. Default: True.
5048
process_complexes: bool
5149
Whether to process each unprocessed complex as we load in the dataset. Default: True.
5250
input_indep: bool
@@ -83,7 +81,6 @@ def __init__(self,
8381
self_loops=True,
8482
pn_ratio=0.1,
8583
percent_to_use=1.00,
86-
use_dgl=True,
8784
process_complexes=True,
8885
input_indep=False,
8986
train_viz=False,
@@ -99,7 +96,6 @@ def __init__(self,
9996
self.self_loops = self_loops
10097
self.pn_ratio = pn_ratio
10198
self.percent_to_use = percent_to_use # How much of the requested dataset (e.g. DIPS-Plus) to use
102-
self.use_dgl = use_dgl # Whether to process each complex into a pair of DGL graphs for its final representation
10399
self.process_complexes = process_complexes # Whether to process any unprocessed complexes before training
104100
self.input_indep = input_indep # Whether to use an input-independent pipeline to train the model
105101
self.train_viz = train_viz # Whether to curate the training loop's validation samples for visualization
@@ -183,9 +179,8 @@ def process(self):
183179
if not os.path.exists(processed_filepath):
184180
processed_parent_dir_to_make = os.path.join(self.processed_dir, os.path.split(raw_path[0])[0])
185181
os.makedirs(processed_parent_dir_to_make, exist_ok=True)
186-
process_complex_into_dict(raw_filepath, processed_filepath,
187-
self.knn, self.geo_nbrhd_size, self.self_loops,
188-
check_sequence=False, use_dgl=self.use_dgl)
182+
process_complex_into_dict(raw_filepath, processed_filepath, self.knn,
183+
self.geo_nbrhd_size, self.self_loops, check_sequence=False)
189184

190185
def has_cache(self):
191186
"""Check if each complex is downloaded and available for training, validation, or testing."""
@@ -209,28 +204,10 @@ def __getitem__(self, idx):
209204
-------
210205
:class:`dict`
211206
212-
(If process_complexes_into_dicts() was run with use_dgl=True):
213-
Protein complex, DGLGraphs for each of the complex's structures.
214-
215207
- ``complex['graph1']:`` DGLGraph (of length M) containing each of the first graph's encoded node and edge features
216208
- ``complex['graph2']:`` DGLGraph (of length N) containing each of the second graph's encoded node and edge features
217209
- ``complex['examples']:`` PyTorch Tensor (of shape (M x N) x 3) containing the labels for inter-graph node pairs
218210
- ``complex['complex']:`` Python string describing the complex's code and original pdb filename
219-
- ``complex['filepath']:`` Python string describing the complex's filepath
220-
221-
(If process_complexes_into_dicts() was run with use_dgl=False):
222-
Protein complex, feature tensors for each node and edge and indices of each node's neighboring nodes.
223-
224-
- ``complex['graph1_node_feats']:`` PyTorch Tensor containing each of the first graph's encoded node features
225-
- ``complex['graph2_node_feats']``: PyTorch Tensor containing each of the second graph's encoded node features
226-
- ``complex['graph1_node_coords']:`` PyTorch Tensor containing each of the first graph's node coordinates
227-
- ``complex['graph2_node_coords']``: PyTorch Tensor containing each of the second graph's node coordinates
228-
- ``complex['graph1_edge_feats']:`` PyTorch Tensor containing each of the first graph's edge features for each node
229-
- ``complex['graph2_edge_feats']:`` PyTorch Tensor containing each of the second graph's edge features for each node
230-
- ``complex['graph1_nbrhd_indices']:`` PyTorch Tensor containing each of the first graph's neighboring node indices
231-
- ``complex['graph2_nbrhd_indices']:`` PyTorch Tensor containing each of the second graph's neighboring node indices
232-
- ``complex['examples']:`` PyTorch Tensor containing the labels for inter-graph node pairs
233-
- ``complex['complex']:`` Python string describing the complex's code and original pdb filename
234211
- ``complex['filepath']:`` Python string describing the complex's filepath
235212
"""
236213
# Assemble filepath of processed protein complex

0 commit comments

Comments
 (0)