diff --git a/Houston_J/Houston_J-sub3.ipynb b/Houston_J/Houston_J-sub3.ipynb
new file mode 100644
index 0000000..da1236e
--- /dev/null
+++ b/Houston_J/Houston_J-sub3.ipynb
@@ -0,0 +1,601 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Compared to sub2, a few changes was made,\n",
+    "For feature engineering, apply a log scale to \"GR\" and \"PHIND\", which has non-sysmetric hist. \n",
+    "For xgboost model, fine tuning the parameters for better performance. \n",
+    "The code was developed in spyder, and pasted directly here. For comments, please refer to submit 2. \n",
+    "\n",
+    "The Facies classification project for SEG A test notes is appended at bottom\n",
+    "The project has three parts :\n",
+    "1. Raw Data analysis (Small data, quick statistics)\n",
+    "\n",
+    "2. Feature Engineering\n",
+    "    a. Missing \"PE\" data : Regressional fillin is better than median and mean (https://github.com/seg/2016-ml-contest/blob/master/LA_Team/Facies_classification_LA_TEAM_05.ipynb)\n",
+    "    b. How many features to include : \n",
+    "      Current tests from other groups use only pre-defined features. \n",
+    "      I found Formation has predicting power too, including Formation info give extra uplift to my model See test 8,9\n",
+    "    c. Feature augmentation : https://github.com/seg/2016-ml-contest/blob/master/ispl/facies_classification_try02.ipynb\n",
+    "      Great works, included the depth information in a nature way\n",
+    "    d. Robust model scaling \n",
+    "\n",
+    "3. Model Selection\n",
+    "   XGBOOST is superior to SVC (My benchmark)\n",
+    "   A brutal gridsearch was done on XGBOOST on top of the best feature engineering  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.colors as colors\n",
+    "from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
+    "\n",
+    "train_raw = pd.read_csv('../01_raw_data/facies_vectors.csv')\n",
+    "train = train_raw.copy()\n",
+    "cols = train.columns.values\n",
+    "well = train[\"Well Name\"].values\n",
+    "depth = train[\"Depth\"].values\n",
+    "\n",
+    "\n",
+    "## 01 Raw data analysis\n",
+    "print(\"No. of Wells is \" + str(len(train[\"Well Name\"].unique())))\n",
+    "print(\"No. of Formation is \" + str(len(train[\"Formation\"].unique())))\n",
+    "well_PE_Miss = train.loc[train[\"PE\"].isnull(),\"Well Name\"].unique()\n",
+    "#print(\"Wells with Missing PE \" + well_PE_Miss)\n",
+    "#print(train.loc[train[\"Well Name\"] == well_PE_Miss[0],[\"PE\",\"Depth\"]].count())\n",
+    "#print(train.loc[train[\"Well Name\"] == well_PE_Miss[1],[\"PE\",\"Depth\"]].count())\n",
+    "#print(train.loc[train[\"Well Name\"] == well_PE_Miss[2],[\"PE\",\"Depth\"]].count())\n",
+    "(train.groupby(\"Well Name\"))[\"PE\"].mean()\n",
+    "(train.groupby(\"Well Name\"))[\"PE\"].median()\n",
+    "#\n",
+    "### 02 Feature definition and QC functions\n",
+    "features = ['GR', 'ILD_log10', 'DeltaPHI', \n",
+    "    'PHIND','PE','NM_M', 'RELPOS']\n",
+    "feature_vectors = train[features]\n",
+    "facies_labels = train['Facies']\n",
+    "## 1=sandstone  2=c_siltstone   3=f_siltstone \n",
+    "## 4=marine_silt_shale 5=mudstone 6=wackestone 7=dolomite\n",
+    "## 8=packstone 9=bafflestone\n",
+    "facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00',\n",
+    "       '#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']\n",
+    "\n",
+    "facies_labels = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS',\n",
+    "                 'WS', 'D','PS', 'BS']\n",
+    "#facies_color_map is a dictionary that maps facies labels\n",
+    "#to their respective colors\n",
+    "\n",
+    "facies_color_map = {}\n",
+    "for ind, label in enumerate(facies_labels):\n",
+    "    facies_color_map[label] = facies_colors[ind]\n",
+    "\n",
+    "def label_facies(row, labels):\n",
+    "    return labels[ row['Facies'] -1]\n",
+    "    \n",
+    "train.loc[:,'FaciesLabels'] = train.apply(lambda row: label_facies(row, facies_labels), axis=1)\n",
+    "##\n",
+    "#\n",
+    "def make_facies_log_plot(logs, facies_colors):\n",
+    "    #make sure logs are sorted by depth\n",
+    "    logs = logs.sort_values(by='Depth')\n",
+    "    cmap_facies = colors.ListedColormap(\n",
+    "            facies_colors[0:len(facies_colors)], 'indexed')\n",
+    "    \n",
+    "    ztop=logs.Depth.min(); zbot=logs.Depth.max()\n",
+    "    \n",
+    "    cluster=np.repeat(np.expand_dims(logs['Facies'].values,1), 100, 1)\n",
+    "    \n",
+    "    f, ax = plt.subplots(nrows=1, ncols=7, figsize=(10, 12))\n",
+    "    ax[0].plot(logs.GR, logs.Depth, '-g')\n",
+    "    ax[1].plot(logs.ILD_log10, logs.Depth, '-')\n",
+    "    ax[2].plot(logs.DeltaPHI, logs.Depth, '-', color='0.5')\n",
+    "    ax[3].plot(logs.PHIND, logs.Depth, '-', color='r')\n",
+    "    ax[4].plot(logs.PE, logs.Depth, '-', color='black')\n",
+    "    ax[5].plot(logs.NM_M, logs.Depth, '-', color='black')\n",
+    "    im=ax[6].imshow(cluster, interpolation='none', aspect='auto',\n",
+    "                    cmap=cmap_facies,vmin=1,vmax=9)\n",
+    "    \n",
+    "    divider = make_axes_locatable(ax[5])\n",
+    "    cax = divider.append_axes(\"right\", size=\"20%\", pad=0.05)\n",
+    "    cbar=plt.colorbar(im, cax=cax)\n",
+    "    cbar.set_label((17*' ').join([' SS ', 'CSiS', 'FSiS', \n",
+    "                                'SiSh', ' MS ', ' WS ', ' D  ', \n",
+    "                                ' PS ', ' BS ']))\n",
+    "    cbar.set_ticks(range(0,1)); cbar.set_ticklabels('')\n",
+    "    \n",
+    "    for i in range(len(ax)-1):\n",
+    "        ax[i].set_ylim(ztop,zbot)\n",
+    "        ax[i].invert_yaxis()\n",
+    "        ax[i].grid()\n",
+    "        ax[i].locator_params(axis='x', nbins=3)\n",
+    "    \n",
+    "    ax[0].set_xlabel(\"GR\")\n",
+    "    ax[0].set_xlim(logs.GR.min(),logs.GR.max())\n",
+    "    ax[1].set_xlabel(\"ILD_log10\")\n",
+    "    ax[1].set_xlim(logs.ILD_log10.min(),logs.ILD_log10.max())\n",
+    "    ax[2].set_xlabel(\"DeltaPHI\")\n",
+    "    ax[2].set_xlim(logs.DeltaPHI.min(),logs.DeltaPHI.max())\n",
+    "    ax[3].set_xlabel(\"PHIND\")\n",
+    "    ax[3].set_xlim(logs.PHIND.min(),logs.PHIND.max())\n",
+    "    ax[4].set_xlabel(\"PE\")\n",
+    "    ax[4].set_xlim(logs.PE.min(),logs.PE.max())\n",
+    "    ax[5].set_xlabel('NoMarine/Marine')\n",
+    "    ax[6].set_xlabel('Facies')\n",
+    "    \n",
+    "    ax[1].set_yticklabels([]); ax[2].set_yticklabels([]); ax[3].set_yticklabels([])\n",
+    "    ax[4].set_yticklabels([]); ax[5].set_yticklabels([]); ax[6].set_yticklabels([])\n",
+    "    ax[5].set_xticklabels([])\n",
+    "    f.suptitle('Well: %s'%logs.iloc[0]['Well Name'], fontsize=14,y=0.94)\n",
+    "\n",
+    "#\n",
+    "##\n",
+    "##\n",
+    "#\n",
+    "#\n",
+    "#\n",
+    "### 03 Feature Engineering tests (SVC and XGB were used to test this)\n",
+    "## a. Fill in missing PE values : Median, mean, NN regressor\n",
+    "## b. Feature augmentaions\n",
+    "## c. Additional dummy features : Formation \n",
+    "## d. Featuere scaling\n",
+    "# Feature windows concatenation function\n",
+    "def augment_features_window(X, N_neig):\n",
+    "    \n",
+    "    # Parameters\n",
+    "    N_row = X.shape[0]\n",
+    "    N_feat = X.shape[1]\n",
+    "\n",
+    "    # Zero padding\n",
+    "    X = np.vstack((np.zeros((N_neig, N_feat)), X, (np.zeros((N_neig, N_feat)))))\n",
+    "\n",
+    "    # Loop over windows\n",
+    "    X_aug = np.zeros((N_row, N_feat*(2*N_neig+1)))\n",
+    "    for r in np.arange(N_row)+N_neig:\n",
+    "        this_row = []\n",
+    "        for c in np.arange(-N_neig,N_neig+1):\n",
+    "            this_row = np.hstack((this_row, X[r+c]))\n",
+    "        X_aug[r-N_neig] = this_row\n",
+    "\n",
+    "    return X_aug\n",
+    "\n",
+    "\n",
+    "# Feature gradient computation function\n",
+    "def augment_features_gradient(X, depth):\n",
+    "    \n",
+    "    # Compute features gradient\n",
+    "    d_diff = np.diff(depth).reshape((-1, 1))\n",
+    "    d_diff[d_diff==0] = 0.001\n",
+    "    X_diff = np.diff(X, axis=0)\n",
+    "    X_grad = X_diff / d_diff\n",
+    "        \n",
+    "    # Compensate for last missing value\n",
+    "    X_grad = np.concatenate((X_grad, np.zeros((1, X_grad.shape[1]))))\n",
+    "    \n",
+    "    return X_grad\n",
+    "\n",
+    "\n",
+    "# Feature augmentation function\n",
+    "def augment_features(X, well, depth, N_neig=1):\n",
+    "    \n",
+    "    # Augment features\n",
+    "    X_aug = np.zeros((X.shape[0], X.shape[1]*(N_neig*2+2)))\n",
+    "    for w in np.unique(well):\n",
+    "        w_idx = np.where(well == w)[0]\n",
+    "        X_aug_win = augment_features_window(X[w_idx, :], N_neig)\n",
+    "        X_aug_grad = augment_features_gradient(X[w_idx, :], depth[w_idx])\n",
+    "        X_aug[w_idx, :] = np.concatenate((X_aug_win, X_aug_grad), axis=1)\n",
+    "    \n",
+    "    # Find padded rows\n",
+    "    padded_rows = np.unique(np.where(X_aug[:, 0:7] == np.zeros((1, 7)))[0])\n",
+    "    return X_aug, padded_rows\n",
+    "##\n",
+    "#### LA_Team Feature engineering\n",
+    "##train[\"PE\"] = train_raw[\"PE\"].fillna(train_raw[\"PE\"].median())\n",
+    "##X1 = train[features].values \n",
+    "##X_aug, padded_rows = augment_features(X1, well, depth,N_neig = 1)\n",
+    "##X_feat.update({\"X_aug\" : X_aug})\n",
+    "###\n",
+    "#\n",
+    "#\n",
+    "X_feat = {}\n",
+    "## Feature Engeering 1 : With dummy variable from Formation\n",
+    "## Create dummy variables for Well Name, Formation, which may have geologic or geospatial information\n",
+    "train_dummy = pd.get_dummies(train[[\"Formation\"]])\n",
+    "train_dummy.describe()\n",
+    "cols_dummy = train_dummy.columns.values\n",
+    "train[cols_dummy] = train_dummy[cols_dummy]\n",
+    "train_inp = train.drop([\"Formation\",\"Well Name\",'FaciesLabels',\"Depth\"],axis =1)\n",
+    "X_fe1 = train_inp.drop([\"Facies\"],axis = 1).values\n",
+    "X_feat.update({\"X_fe1\" : X_fe1})\n",
+    "#\n",
+    "## Feature Engeering 2 : With dummy variable from Formation and feature augmentation\n",
+    "train[\"PE\"] = train_raw[\"PE\"].fillna(train_raw[\"PE\"].median())\n",
+    "train_inp = train.drop([\"Formation\",\"Well Name\",'FaciesLabels',\"Depth\"],axis =1)\n",
+    "X_fe1 = train_inp.drop([\"Facies\"],axis = 1).values\n",
+    "X_fe1_aug, padded_rows = augment_features(X_fe1, well, depth,N_neig = 1)\n",
+    "X_feat.update({\"X_fe2\" : X_fe1_aug})\n",
+    "\n",
+    "\n",
+    "## Feature Engeering 3 : With dummy variable from Formation and feature augmentation\n",
+    "## Fill Nan PE with mean\n",
+    "train[\"PE\"] = train_raw[\"PE\"].fillna(train_raw[\"PE\"].mean())\n",
+    "train_inp = train.drop([\"Formation\",\"Well Name\",'FaciesLabels',\"Depth\"],axis =1)\n",
+    "X_fe1 = train_inp.drop([\"Facies\"],axis = 1).values\n",
+    "X_fe1_aug, padded_rows = augment_features(X_fe1, well, depth,N_neig = 1)\n",
+    "X_feat.update({\"X_fe3\" : X_fe1_aug})\n",
+    "\n",
+    "\n",
+    "### Feature Engeering 4c : Modified GR and PHIND With dummy variable from Formation and feature augmentation\n",
+    "### Fill Nan PE with MPRRegressor\n",
+    "train[\"GR\"] = train[\"GR\"].apply(lambda x : np.log(x))\n",
+    "train[\"PHIND\"] = train[\"PHIND\"].apply(lambda x : np.log(x))\n",
+    "from sklearn.neural_network import MLPRegressor\n",
+    "reg = MLPRegressor()\n",
+    "DataImpAll = train_raw.drop(['Formation', 'Well Name', 'Depth'], axis=1).copy()\n",
+    "DataImp = DataImpAll.dropna(axis = 0, inplace=False)\n",
+    "Ximp=DataImp.loc[:, DataImp.columns != 'PE']\n",
+    "Yimp=DataImp.loc[:, 'PE']\n",
+    "reg.fit(Ximp, Yimp)\n",
+    "train.loc[np.array(DataImpAll.PE.isnull()),\"PE\"] = reg.predict(DataImpAll.loc[DataImpAll.PE.isnull(),:].drop('PE',axis=1,inplace=False))\n",
+    "train_inp = train.drop([\"Formation\",\"Well Name\",'FaciesLabels',\"Depth\"],axis =1)\n",
+    "X_fe1 = train_inp.drop([\"Facies\"],axis = 1).values\n",
+    "X_fe1_aug, padded_rows = augment_features(X_fe1, well, depth,N_neig = 1)\n",
+    "X_feat.update({\"X_fe4\" : X_fe1_aug})\n",
+    "\n",
+    "\n",
+    "#### Feature Engeering 6 : Drop low-correlating feature RELPOS\n",
+    "#train_inp = train.drop([\"Formation\",\"Well Name\",'FaciesLabels',\"Depth\",\"RELPOS\"],axis =1)\n",
+    "#X_fe6 = train_inp.drop([\"Facies\"],axis = 1).values\n",
+    "#X_fe6_aug, padded_rows = augment_features(X_fe6, well, depth,N_neig = 1)\n",
+    "#X_feat.update({\"X_fe6\" : X_fe6_aug})\n",
+    "\n",
+    "### Feature Engeering 7 : Drop low-correlating feature RELPOS\n",
+    "#train[\"GR\"] = train[\"GR\"].apply(lambda x : np.log(x))\n",
+    "#train[\"PHIND\"] = train[\"PHIND\"].apply(lambda x : np.log(x))\n",
+    "#train_inp = train.drop([\"Formation\",\"Well Name\",'FaciesLabels',\"Depth\"],axis =1)\n",
+    "#X_fe7 = train_inp.drop([\"Facies\"],axis = 1).values\n",
+    "#X_fe7_aug, padded_rows = augment_features(X_fe6, well, depth,N_neig = 1)\n",
+    "#X_feat.update({\"X_fe7\" : X_fe7_aug})\n",
+    "\n",
+    "\n",
+    "## Select which feature engineering for next model test\n",
+    "# Feature enginering Selection \n",
+    "X_tr = X_feat[\"X_fe4\"]\n",
+    "y = train[\"Facies\"].values\n",
+    "## Feature Scaling\n",
+    "from sklearn import preprocessing\n",
+    "scaler = preprocessing.RobustScaler(quantile_range=(25.0, 75.0)).fit(X_tr)\n",
+    "X = scaler.transform(X_tr)\n",
+    "\n",
+    "## Removing Padded Rows\n",
+    "#X = np.delete(X,padded_rows,axis = 0)\n",
+    "#y = np.delete(y,padded_rows,axis = 0)\n",
+    "Well_no_pad = well #np.delete(well,padded_rows,axis=0)\n",
+    "\n",
+    "#\n",
+    "#\n",
+    "#\n",
+    "## Reading Test dataset and process the same way as trainning\n",
+    "test = pd.read_csv('../01_raw_data/validation_data_nofacies.csv')\n",
+    "## Test data Check\n",
+    "print(test.count())  # Make sure no missing data in test\n",
+    "print(\"No. of Formation in test is \" + str(len(test[\"Formation\"].unique())))\n",
+    "## Dummy formation\n",
+    "test_dummy = pd.get_dummies(test[[\"Formation\"]])\n",
+    "test_cols_dummy = test_dummy.columns.values\n",
+    "test[test_cols_dummy] = test_dummy[cols_dummy]\n",
+    "## Feature augmentaion\n",
+    "Well_test = test[\"Well Name\"].values\n",
+    "Depth_test = test[\"Depth\"].values\n",
+    "test[\"GR\"] = test[\"GR\"].apply(lambda x : np.log(x))\n",
+    "test[\"PHIND\"] = test[\"PHIND\"].apply(lambda x : np.log(x))\n",
+    "test_inp = test.drop([\"Formation\",\"Well Name\",\"Depth\"],axis =1)\n",
+    "test_fe = test_inp.values\n",
+    "test_aug,t_pad_row = augment_features(test_fe,Well_test,Depth_test)\n",
+    "## Scaling\n",
+    "X_test = scaler.transform(test_aug)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Split Group\n",
+    "from sklearn.model_selection import LeavePGroupsOut\n",
+    "lpgo = LeavePGroupsOut(n_groups=2)\n",
+    "#split_no = lpgo.get_n_splits(X,y,wellgroups)\n",
+    "train_index=[]\n",
+    "val_index = []\n",
+    "for tr_i,val_i in lpgo.split(X, y, groups=Well_no_pad):\n",
+    "    hist_tr = np.histogram(y[tr_i], bins=np.arange(len(facies_labels)+1)+0.5)\n",
+    "    hist_val = np.histogram(y[val_i], bins=np.arange(len(facies_labels)+1)+0.5)\n",
+    "    if np.all(hist_tr[0] != 0) & np.all(hist_val[0] != 0):    \n",
+    "        train_index.append(tr_i)\n",
+    "        val_index.append(val_i)\n",
+    "split_no = len(train_index)\n",
+    "##\n",
+    "from sklearn.multiclass import OneVsOneClassifier\n",
+    "import xgboost as xgb\n",
+    "from xgboost.sklearn import XGBClassifier\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "from sklearn.metrics import classification_report\n",
+    "from sklearn.metrics import f1_score\n",
+    "from sklearn.svm import SVC\n",
+    "from scipy.signal import medfilt\n",
+    "\n",
+    "\n",
+    "#\n",
+    "#tuned_param = {'learning_rate': [0.1],\n",
+    "# 'n_estimators': [200],\n",
+    "# 'reg_alpha': [0.6,0.8,1.2],\n",
+    "# 'reg_lambda':[1],\n",
+    "# 'colsample_bytree': [0.8], \n",
+    "# 'gamma': [0.6],\n",
+    "# 'max_depth':[3],\n",
+    "# 'min_child_weight': [1],\n",
+    "# 'subsample': [0.7]}\n",
+    "\n",
+    "#param = {'alpha': 0.2,\n",
+    "# 'colsamplebytree': 0.8,\n",
+    "# 'gamma': 0.3,\n",
+    "# 'learningrate': 0.2,\n",
+    "# 'maxdepth': 5,\n",
+    "# 'minchildweight': 1,\n",
+    "# 'n_estimators': 200,\n",
+    "# 'subsample': 0.9}\n",
+    "##\n",
+    "#param = {'alpha': 0.2,\n",
+    "# 'colsamplebytree': 0.8,\n",
+    "# 'gamma': 0.3,\n",
+    "# 'learningrate': 0.05,\n",
+    "# 'maxdepth': 3,\n",
+    "# 'minchildweight': 1,\n",
+    "# 'n_estimators': 200,\n",
+    "# 'subsample': 0.7}\n",
+    "seed = np.random.randint(100)\n",
+    "clf = XGBClassifier(\n",
+    "            learning_rate = 0.05,\n",
+    "            n_estimators=400,\n",
+    "            max_depth=3,\n",
+    "            min_child_weight=1,\n",
+    "            gamma = 0.6,\n",
+    "            subsample= 0.7,\n",
+    "            colsample_bytree=0.8,\n",
+    "            reg_alpha = 0.8,\n",
+    "            reg_lambda= 1,\n",
+    "            nthread =-1,\n",
+    "            seed = seed,\n",
+    "        ) \n",
+    "#\n",
+    "#import datetime\n",
+    "\n",
+    "\n",
+    "#scores = ['precision', 'recall']\n",
+    "#clf = GridSearchCV(XGBClassifier(nthread=-1,seed = seed),\n",
+    "#                   tuned_param, cv=lpgo.split(X, y, groups=train['Well Name'].values),\n",
+    "#                  scoring='%s_micro' % scores[0],n_jobs = -1,verbose = 10)\n",
+    "#clf.fit(X,y)\n",
+    "#print(clf.best_score_)\n",
+    "#print(clf.best_params_)\n",
+    "#\n",
+    "#\n",
+    "svc_best = SVC(C = 10, gamma = 0.01, kernel = 'rbf')\n",
+    "#\n",
+    "#\n",
+    "f1 = np.zeros((split_no,1))\n",
+    "f1_svc = np.zeros((split_no,1))\n",
+    "for i in range(split_no):\n",
+    "    split_train_no_pad = np.setdiff1d(train_index[i],padded_rows)\n",
+    "#    print(len(train_index[i]),len(split_train_no_pad))\n",
+    "    X_train = X[split_train_no_pad,:]\n",
+    "    Y_train = y[split_train_no_pad]\n",
+    "    X_val = X[val_index[i],:]\n",
+    "    Y_val = y[val_index[i]]\n",
+    "    print(i)\n",
+    "    ### XGBOOST\n",
+    "    clf.fit(X_train,Y_train)\n",
+    "    y_pred = clf.predict(X_val)\n",
+    "#    y_pred = medfilt(y_pred,kernel_size=5)\n",
+    "    f1[i] = f1_score(Y_val, y_pred, average='micro')  \n",
+    "    \n",
+    "    \n",
+    "    svc_best.fit(X_train,Y_train)\n",
+    "    Y_pred = svc_best.predict(X_val)\n",
+    "#    Y_pred = medfilt(Y_pred,kernel_size=5)\n",
+    "    f1_svc[i] = f1_score(Y_val, Y_pred, average='micro')  \n",
+    "    \n",
+    "print(\"XGBOOST score \" + str(np.mean(f1)))\n",
+    "print(\"SVC score\" + str(np.mean(f1_svc)))\n",
+    "\n",
+    "\n",
+    "#\n",
+    "# Predict for testing data\n",
+    "##Plot predicted labels\n",
+    "test_facies = clf.fit(X,y).predict(X_test)\n",
+    "test[\"Facies\"] = test_facies\n",
+    "test.to_csv(\"HoustonJ_sub3.csv\")\n",
+    "\n",
+    "make_facies_log_plot(\n",
+    "    test[test['Well Name'] == 'STUART'],\n",
+    "    facies_colors=facies_colors)\n",
+    "\n",
+    "make_facies_log_plot(\n",
+    "    test[test['Well Name'] == 'CRAWFORD'],\n",
+    "    facies_colors=facies_colors)\n",
+    "\n",
+    "\n",
+    "\n",
+    "#plt.figure(figsize=(6,4))\n",
+    "#sns.heatmap(train.corr(method='pearson', min_periods=1),cmap='seismic')\n",
+    "#plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Facies Classification :\n",
+    " \n",
+    "facies_labels = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS',\n",
+    "                 'WS', 'D','PS', 'BS']\n",
+    "\n",
+    "Facies Description Label Adjacen facies\n",
+    "1 Nonmarine sandstone  SS 2\n",
+    "2 Nonmarine coarse siltstone CSiS 1,3\n",
+    "3 Nonmarine fine siltstone FSiS 2\n",
+    "4 Marine siltstone and shale SiSh 5\n",
+    "5 Mudstone MS 4,6\n",
+    "6 Wackestone WS 5,7,8\n",
+    "7 Dolomite D 6,8\n",
+    "8 Packstone-grainstone PS 6,7,9\n",
+    "9 Phylloid-algal bafflestone BS 7,8\n",
+    "\n",
+    "Features : 'Facies', 'Formation', 'Well Name', 'Depth', 'GR', 'ILD_log10','DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS'\n",
+    "\n",
+    "Pre-processing 1: \n",
+    "1. Well 'ALEXANDER D', 'KIMZEY A' has missing PE, filled in with median values \n",
+    "2. Map \"Formation\", \"Well Name\" into dummy features ; Effective features increase from 8 to 21\n",
+    "3. Robust normalization, dropping depth\n",
+    "\n",
+    "\n",
+    "Pre-processing 2: \n",
+    "1. Well 'ALEXANDER D', 'KIMZEY A' has missing PE, filled in with median values \n",
+    "2. Map \"Formation\", \"Well Name\" into dummy features ; Effective features increase from 8 to 21\n",
+    "3. Feature augmentation\n",
+    "4. Robust normalization, dropping depth\n",
+    "\n",
+    "Pre-processing 3: \n",
+    "1. Well 'ALEXANDER D', 'KIMZEY A' has missing PE, filled in with mean values \n",
+    "2. Map \"Formation\", \"Well Name\" into dummy features ; Effective features increase from 8 to 21\n",
+    "3. Feature augmentation\n",
+    "4. Robust normalization, dropping depth\n",
+    "\n",
+    "Pre-processing 4: \n",
+    "1. Well 'ALEXANDER D', 'KIMZEY A' has missing PE, filled in NN Regressor\n",
+    "2. Map \"Formation\", \"Well Name\" into dummy features ; Effective features increase from 8 to 21\n",
+    "3. Feature augmentation\n",
+    "4. Robust normalization, dropping depth\n",
+    "\n",
+    "Pre-processing 5: \n",
+    "1. Well 'ALEXANDER D', 'KIMZEY A' has missing PE, filled in with mean values \n",
+    "3. Feature augmentation\n",
+    "4. Robust normalization, dropping depth\n",
+    "\n",
+    "Pre-processing 6: Derive from 4, and drop the low-correlation feature RELPOS\n",
+    "1. Well 'ALEXANDER D', 'KIMZEY A' has missing PE, filled in NN Regressor\n",
+    "2. Map \"Formation\", \"Well Name\" into dummy features ; Effective features increase from 8 to 21\n",
+    "3. Feature augmentation\n",
+    "4. Robust normalization, dropping depth\n",
+    "\n",
+    "Pre-processing 6: Derive from 4, and Applied a log function to GR and PHIND\n",
+    "1. Well 'ALEXANDER D', 'KIMZEY A' has missing PE, filled in NN Regressor\n",
+    "2. Map \"Formation\", \"Well Name\" into dummy features ; Effective features increase from 8 to 21\n",
+    "3. Feature augmentation\n",
+    "4. Robust normalization, dropping depth\n",
+    "\n",
+    "\n",
+    "Model_selection_pre:\n",
+    "Split group by Well Name : 2 test 8 train\n",
+    "\n",
+    "Test 1 : The pre-processing 1\n",
+    "Radomly choose one slipt and run SVC vs XGBOOST\n",
+    "f1 score  : SVC 0.486  <  XGBOOST 0.535\n",
+    "Conclusion_pre : XGBOOST > SVC \n",
+    "\n",
+    "Feature Engineering Selection : \n",
+    "Test 2 : The pre-processing 1\n",
+    "XGBOOST for all splits : 0.535\n",
+    "Houmath Best : 0.563\n",
+    "\n",
+    "Test3 : The feature augmentation \n",
+    "XGBOOST score 0.552620109649\n",
+    "SVC score0.502307800369\n",
+    "\n",
+    "Test 4 : The feature augmentation N_neig = 2\n",
+    "XGBOOST score 0.544176923417\n",
+    "SVC score0.489872101252\n",
+    "\n",
+    "Test 5 : The pre-processing 2  \n",
+    "XGBOOST score 0.557558000862\n",
+    "SVC score0.499220019065\n",
+    "\n",
+    "Test 6 : The pre-processing 3  \n",
+    "XGBOOST score 0.557884804169\n",
+    "SVC score0.500650895029\n",
+    "\n",
+    "Test 7 : The pre-processing 3  y_pre = medfil size 5\n",
+    "XGBOOST score 0.559944170153\n",
+    "SVC score0.509190227257\n",
+    "\n",
+    "Test 8 : The pre-processing 4  y_pre = medfil size 5\n",
+    "XGBOOST score 0.566146182295\n",
+    "SVC score0.507362308656\n",
+    "\n",
+    "Test 9 : The pre-processing 5 Drop Formation dummy  y_pre = medfil size 5\n",
+    "XGBOOST score 0.555870232144\n",
+    "SVC score0.509423764916\n",
+    "\n",
+    "Test 10 : The pre-processing 6 Drop RELPOS\n",
+    "XGBOOST score 0.559226819725\n",
+    "SVC score0.509379374599\n",
+    "\n",
+    "Test 11 : The pre-processing 7 Apply Log function to GR and PHIND\n",
+    "XGBOOST score 0.559652979267\n",
+    "SVC score0.512544201578\n",
+    "\n",
+    "Test 12 : The pre-processing 8 Drop RELPOS and Apply log function to GR and PHIND (Best)\n",
+    "XGBOOST score 0.562391525842\n",
+    "SVC score0.51057543881\n",
+    "\n",
+    "\n",
+    "Removing padded rows in cross-validation\n",
+    "XGBOOST score 0.559949591075\n",
+    "SVC score0.512978370131\n",
+    "\n",
+    "\n",
+    "Model Optimization : \n",
+    "Test 10 : The pre-processing 4  y_pre = medfil size 5\n",
+    "Grid Search for XGBOOST parameters\n",
+    "\n",
+    "Learning_rate 0.05, n_estimators : 200 ; Hope not overfitting\n",
+    "XGBOOST score 0.563599674886\n",
+    "SVC score0.510516447302\n",
+    "\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python [default]",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}