From b0dd3ab71fb3475a1eeff42258030aa35c6068a3 Mon Sep 17 00:00:00 2001
From: mufeili <mufeili1996@gmail.com>
Date: Fri, 22 Jan 2021 02:11:41 +0800
Subject: [PATCH 1/2] Update

---
 README.md               | 4 ++++
 examples/README.md      | 2 ++
 examples/fingerprint.py | 6 +++---
 examples/gnn.py         | 4 ++--
 examples/utils.py       | 6 +++++-
 5 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 389b5b7..52192b6 100644
--- a/README.md
+++ b/README.md
@@ -24,3 +24,7 @@
 | ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- |
 | 1    | Random Forest | 1024-bit ECFP4 | 0.9540 +- 0.0038 | 0.9062 +- 0.0079   | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 30th, 2020 |
 | 2    | GCN           | GraphConv      | 0.9214 +- 0.0106 | 0.9445 +- 0.0049   | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples)                            | Dec 30th, 2020 |
+
+### ClinTox
+
+| Rank | Model         | Featurization  | Test ROC-AUC     | Validation ROC-AUC | Contact                           | References	                                                                           | Date           |
diff --git a/examples/README.md b/examples/README.md
index 8621cbf..f9c95d0 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -18,6 +18,7 @@ The feasible arguments include:
         - `BACE_classification`
         - `BACE_regression`
         - `BBBP`
+        - `ClinTox`
 - **Hyperparameter Search (optional)**: `-hs`
     - Perform a hyperparameter search using Bayesian optimization. It determines the best 
       hyperparameters based on the validation metric averaged across 3 runs.
@@ -40,6 +41,7 @@ The feasible arguments include:
         - `BACE_classification`
         - `BACE_regression`
         - `BBBP`
+        - `ClinTox`
 - **Hyperparameter Search (optional)**: `-hs`
     - Perform a hyperparameter search using Bayesian optimization. It determines the best 
       hyperparameters based on the validation metric averaged across 3 runs.
diff --git a/examples/fingerprint.py b/examples/fingerprint.py
index dd1fcd9..5fd7161 100644
--- a/examples/fingerprint.py
+++ b/examples/fingerprint.py
@@ -28,7 +28,7 @@ def rf_model_builder(model_dir, hyperparams, mode):
 
 
 def load_model(args, tasks, hyperparams):
-  if args['dataset'] in ['BACE_classification', 'BBBP']:
+  if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
     mode = 'classification'
   elif args['dataset'] in ['BACE_regression']:
     mode = 'regression'
@@ -100,7 +100,7 @@ def init_hyper_search_space(args):
         'min_samples_split': hp.choice('min_samples_split', [2, 4, 8, 16, 32]),
         'bootstrap': hp.choice('bootstrap', [True, False]),
     }
-    if args['dataset'] in ['BACE_classification', 'BBBP']:
+    if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
       search_space['criterion'] = hp.choice('criterion', ["gini", "entropy"])
     else:
       search_space['criterion'] = hp.choice('criterion', ["mse", "mae"])
@@ -154,7 +154,7 @@ def objective(hyperparams):
   parser.add_argument(
       '-d',
       '--dataset',
-      choices=['BACE_classification', 'BACE_regression', 'BBBP'],
+      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'],
       help='Dataset to use')
   parser.add_argument(
       '-m',
diff --git a/examples/gnn.py b/examples/gnn.py
index 9aee517..a641913 100644
--- a/examples/gnn.py
+++ b/examples/gnn.py
@@ -11,7 +11,7 @@
 
 
 def load_model(save_pth, args, tasks, hyperparams):
-  if args['dataset'] in ['BACE_classification', 'BBBP']:
+  if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
     mode = 'classification'
     # binary classification
     n_classes = 2
@@ -177,7 +177,7 @@ def objective(hyperparams):
   parser.add_argument(
       '-d',
       '--dataset',
-      choices=['BACE_classification', 'BACE_regression', 'BBBP'],
+      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'],
       help='Dataset to use')
   parser.add_argument(
       '-m',
diff --git a/examples/utils.py b/examples/utils.py
index b08a56f..fd4c5fb 100644
--- a/examples/utils.py
+++ b/examples/utils.py
@@ -4,7 +4,7 @@
 
 
 def decide_metric(dataset):
-  if dataset in ['BACE_classification', 'BBBP']:
+  if dataset in ['BACE_classification', 'BBBP', 'ClinTox']:
     return 'roc_auc'
   elif dataset == 'BACE_regression':
     return 'rmse'
@@ -75,6 +75,10 @@ def load_dataset(args):
     from deepchem.molnet import load_bace_regression
     tasks, all_dataset, transformers = load_bace_regression(
         featurizer=featurizer, splitter=splitter, reload=False)
+  elif args['dataset'] == 'ClinTox':
+    from deepchem.molnet import load_clintox
+    tasks, all_dataset, transformers = load_clintox(
+        featurizer=featurizer, splitter=splitter, reload=False)
   else:
     raise ValueError('Unexpected dataset: {}'.format(args['dataset']))
 

From 2cc0328ec7770071ec61e397a4f4c127c05dc859 Mon Sep 17 00:00:00 2001
From: mufeili <mufeili1996@gmail.com>
Date: Fri, 22 Jan 2021 03:42:07 +0800
Subject: [PATCH 2/2] Update

---
 README.md                                | 3 +++
 examples/configures/GCN_GC/ClinTox.json  | 8 ++++++++
 examples/configures/RF_ECFP/ClinTox.json | 6 ++++++
 3 files changed, 17 insertions(+)
 create mode 100644 examples/configures/GCN_GC/ClinTox.json
 create mode 100644 examples/configures/RF_ECFP/ClinTox.json

diff --git a/README.md b/README.md
index 52192b6..1afa93b 100644
--- a/README.md
+++ b/README.md
@@ -28,3 +28,6 @@
 ### ClinTox
 
 | Rank | Model         | Featurization  | Test ROC-AUC     | Validation ROC-AUC | Contact                           | References	                                                                           | Date           |
+| ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- |
+| 1    | GCN           | GraphConv      | 0.9065 +- 0.0179 | 0.9880 +- 0.0073   | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples)                            | Jan 22nd, 2021 |
+| 2    | Random Forest | 1024-bit ECFP4 | 0.7829 +- 0.0235 | 0.8883 +- 0.0230   | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Jan 22nd, 2021 |
diff --git a/examples/configures/GCN_GC/ClinTox.json b/examples/configures/GCN_GC/ClinTox.json
new file mode 100644
index 0000000..bab7803
--- /dev/null
+++ b/examples/configures/GCN_GC/ClinTox.json
@@ -0,0 +1,8 @@
+{
+  "batchnorm": true,
+  "dropout": 0.04333497108612183,
+  "hidden_feats": 128,
+  "lr": 0.15302291932022413,
+  "num_gnn_layers": 2,
+  "residual": true
+}
\ No newline at end of file
diff --git a/examples/configures/RF_ECFP/ClinTox.json b/examples/configures/RF_ECFP/ClinTox.json
new file mode 100644
index 0000000..7c9475c
--- /dev/null
+++ b/examples/configures/RF_ECFP/ClinTox.json
@@ -0,0 +1,6 @@
+{
+  "bootstrap": false,
+  "criterion": "entropy",
+  "min_samples_split": 16,
+  "n_estimators": 100
+}
\ No newline at end of file