From b914fc8b3b77973394a8ac1fdc097efb95d2c538 Mon Sep 17 00:00:00 2001 From: mufeili Date: Wed, 30 Dec 2020 01:43:11 +0800 Subject: [PATCH 1/5] Update --- README.md | 8 ++++++-- examples/README.md | 2 ++ examples/fingerprint.py | 6 +++--- examples/gnn.py | 4 ++-- examples/utils.py | 6 +++++- 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bdb8ec0..cfc0c61 100644 --- a/README.md +++ b/README.md @@ -6,12 +6,16 @@ | Rank | Model | Featurization | Test ROC-AUC | Validation ROC-AUC | Contact | References | Date | | ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | -| 1 | Random Forest | 1024-bit ECFP2 | 0.8507 +- 0.0072 | 0.7368 +- 0.0066 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 2nd, 2020 | +| 1 | Random Forest | 1024-bit ECFP4 | 0.8507 +- 0.0072 | 0.7368 +- 0.0066 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 2nd, 2020 | | 2 | GCN | GraphConv | 0.8175 +- 0.0193 | 0.7430 +- 0.0194 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples) | Dec 20th, 2020 | ### BACE Regression | Rank | Model | Featurization | Test RMSE | Validation RMSE | Contact | References | Date | | ---- | ------------- | -------------- | ---------------- | ---------------- | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | -| 1 | Random Forest | 1024-bit ECFP2 | 1.3178 +- 0.0081 | 0.6716 +- 0.0059 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 26th, 2020 | +| 1 | Random Forest | 1024-bit ECFP4 | 1.3178 +- 0.0081 | 0.6716 +- 0.0059 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 26th, 2020 | | 2 | GCN | GraphConv | 1.6450 +- 0.1325 | 0.5244 +- 0.0200 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples) | Dec 26th, 2020 | + +## Physiology + +### BBBP diff --git a/examples/README.md b/examples/README.md index 88783ff..8621cbf 100644 --- a/examples/README.md +++ b/examples/README.md @@ -17,6 +17,7 @@ The feasible arguments include: - Specifies the dataset to use, which can be one of the following: - `BACE_classification` - `BACE_regression` + - `BBBP` - **Hyperparameter Search (optional)**: `-hs` - Perform a hyperparameter search using Bayesian optimization. It determines the best hyperparameters based on the validation metric averaged across 3 runs. @@ -38,6 +39,7 @@ The feasible arguments include: - Specifies the dataset to use, which can be one of the following: - `BACE_classification` - `BACE_regression` + - `BBBP` - **Hyperparameter Search (optional)**: `-hs` - Perform a hyperparameter search using Bayesian optimization. It determines the best hyperparameters based on the validation metric averaged across 3 runs. diff --git a/examples/fingerprint.py b/examples/fingerprint.py index 4ceccb8..459c4d2 100644 --- a/examples/fingerprint.py +++ b/examples/fingerprint.py @@ -28,7 +28,7 @@ def rf_model_builder(model_dir, hyperparams, mode): def load_model(args, tasks, hyperparams): - if args['dataset'] in ['BACE_classification']: + if args['dataset'] in ['BACE_classification', 'BBBP']: mode = 'classification' elif args['dataset'] in ['BACE_regression']: mode = 'regression' @@ -100,7 +100,7 @@ def init_hyper_search_space(args): 'min_samples_split': hp.choice('min_samples_split', [2, 4, 8, 16, 32]), 'bootstrap': hp.choice('bootstrap', [True, False]), } - if args['dataset'] in ['BACE_classification']: + if args['dataset'] in ['BACE_classification', 'BBBP']: search_space['criterion'] = hp.choice('criterion', ["gini", "entropy"]) else: search_space['criterion'] = hp.choice('criterion', ["mse", "mae"]) @@ -154,7 +154,7 @@ def objective(hyperparams): parser.add_argument( '-d', '--dataset', - choices=['BACE_classification', 'BACE_regression'], + choices=['BACE_classification', 'BACE_regression', 'BBBP'], help='Dataset to use') parser.add_argument( '-m', diff --git a/examples/gnn.py b/examples/gnn.py index 15e7b58..c42b21e 100644 --- a/examples/gnn.py +++ b/examples/gnn.py @@ -11,7 +11,7 @@ def load_model(save_pth, args, tasks, hyperparams): - if args['dataset'] in ['BACE_classification']: + if args['dataset'] in ['BACE_classification', 'BBBP']: mode = 'classification' # binary classification n_classes = 2 @@ -177,7 +177,7 @@ def objective(hyperparams): parser.add_argument( '-d', '--dataset', - choices=['BACE_classification', 'BACE_regression'], + choices=['BACE_classification', 'BACE_regression', 'BBBP'], help='Dataset to use') parser.add_argument( '-m', diff --git a/examples/utils.py b/examples/utils.py index e43c340..b08a56f 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -4,7 +4,7 @@ def decide_metric(dataset): - if dataset == 'BACE_classification': + if dataset in ['BACE_classification', 'BBBP']: return 'roc_auc' elif dataset == 'BACE_regression': return 'rmse' @@ -67,6 +67,10 @@ def load_dataset(args): from deepchem.molnet import load_bace_classification tasks, all_dataset, transformers = load_bace_classification( featurizer=featurizer, splitter=splitter, reload=False) + elif args['dataset'] == 'BBBP': + from deepchem.molnet import load_bbbp + tasks, all_dataset, transformers = load_bbbp( + featurizer=featurizer, splitter=splitter, reload=False) elif args['dataset'] == 'BACE_regression': from deepchem.molnet import load_bace_regression tasks, all_dataset, transformers = load_bace_regression( From 195f5d0fadd27f2551fa3024760ff97ac272ec2c Mon Sep 17 00:00:00 2001 From: mufeili Date: Wed, 30 Dec 2020 01:49:53 +0800 Subject: [PATCH 2/5] Update --- examples/configures/RF_ECFP/BBBP.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 examples/configures/RF_ECFP/BBBP.json diff --git a/examples/configures/RF_ECFP/BBBP.json b/examples/configures/RF_ECFP/BBBP.json new file mode 100644 index 0000000..efa035e --- /dev/null +++ b/examples/configures/RF_ECFP/BBBP.json @@ -0,0 +1,6 @@ +{ + "bootstrap": true, + "criterion": "entropy", + "min_samples_split": 8, + "n_estimators": 100 +} \ No newline at end of file From 91f752dfce20a6439f054f9dd4c9e5da4b378c30 Mon Sep 17 00:00:00 2001 From: mufeili Date: Wed, 30 Dec 2020 01:53:16 +0800 Subject: [PATCH 3/5] Update --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index cfc0c61..4735d5c 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,7 @@ ## Physiology ### BBBP + +| Rank | Model | Featurization | Test ROC-AUC | Validation ROC-AUC | Contact | References | Date | +| ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | +| 1 | Random Forest | 1024-bit ECFP4 | 0.9104 +- 0.0078 | 0.9422 +- 0.0014 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 30th, 2020 | From 40d047e885bb9d91b56d69186a71add8a05273aa Mon Sep 17 00:00:00 2001 From: mufeili Date: Wed, 30 Dec 2020 03:49:01 +0800 Subject: [PATCH 4/5] Update --- README.md | 1 + examples/configures/GCN_GC/BBBP.json | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 examples/configures/GCN_GC/BBBP.json diff --git a/README.md b/README.md index 4735d5c..fde1076 100644 --- a/README.md +++ b/README.md @@ -23,3 +23,4 @@ | Rank | Model | Featurization | Test ROC-AUC | Validation ROC-AUC | Contact | References | Date | | ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | | 1 | Random Forest | 1024-bit ECFP4 | 0.9104 +- 0.0078 | 0.9422 +- 0.0014 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 30th, 2020 | +| 2 | GCN | GraphConv | diff --git a/examples/configures/GCN_GC/BBBP.json b/examples/configures/GCN_GC/BBBP.json new file mode 100644 index 0000000..e4b13ac --- /dev/null +++ b/examples/configures/GCN_GC/BBBP.json @@ -0,0 +1,8 @@ +{ + "batchnorm": true, + "dropout": 0.14190910017170108, + "hidden_feats": 128, + "lr": 0.11349404033122097, + "num_gnn_layers": 3, + "residual": true +} \ No newline at end of file From de327f4d371683b75c5054fda9f2b54248085515 Mon Sep 17 00:00:00 2001 From: mufeili Date: Wed, 30 Dec 2020 04:13:07 +0800 Subject: [PATCH 5/5] Update --- README.md | 4 ++-- examples/fingerprint.py | 2 +- examples/gnn.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fde1076..389b5b7 100644 --- a/README.md +++ b/README.md @@ -22,5 +22,5 @@ | Rank | Model | Featurization | Test ROC-AUC | Validation ROC-AUC | Contact | References | Date | | ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | -| 1 | Random Forest | 1024-bit ECFP4 | 0.9104 +- 0.0078 | 0.9422 +- 0.0014 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 30th, 2020 | -| 2 | GCN | GraphConv | +| 1 | Random Forest | 1024-bit ECFP4 | 0.9540 +- 0.0038 | 0.9062 +- 0.0079 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 30th, 2020 | +| 2 | GCN | GraphConv | 0.9214 +- 0.0106 | 0.9445 +- 0.0049 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples) | Dec 30th, 2020 | diff --git a/examples/fingerprint.py b/examples/fingerprint.py index 459c4d2..dd1fcd9 100644 --- a/examples/fingerprint.py +++ b/examples/fingerprint.py @@ -205,7 +205,7 @@ def objective(hyperparams): else: print('Use the manually specified hyperparameters') with open('configures/{}_{}/{}.json'.format( - args['model'], args['featurizer'], args['dataset'])) as f: + args['model'], args['featurizer'], args['dataset'])) as f: default_hyperparams = json.load(f) val_metrics, test_metrics = main(args['result_path'], args, default_hyperparams) diff --git a/examples/gnn.py b/examples/gnn.py index c42b21e..9aee517 100644 --- a/examples/gnn.py +++ b/examples/gnn.py @@ -236,7 +236,7 @@ def objective(hyperparams): else: print('Use the manually specified hyperparameters') with open('configures/{}_{}/{}.json'.format( - args['model'], args['featurizer'], args['dataset'])) as f: + args['model'], args['featurizer'], args['dataset'])) as f: default_hyperparams = json.load(f) val_metrics, test_metrics = main(args['result_path'], args, default_hyperparams)