{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Week 5 Notebook: Deep Learning from Jet Images\n",
    "===============================================================\n",
    "\n",
    "Now, we'll look at a deep learning model based on jet images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow.keras as keras\n",
    "import numpy as np\n",
    "from sklearn.metrics import roc_curve, auc\n",
    "import matplotlib.pyplot as plt\n",
    "import uproot\n",
    "import utils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import yaml\n",
    "\n",
    "with open('definitions_image.yml') as file:\n",
    "    # The FullLoader parameter handles the conversion from YAML\n",
    "    # scalar values to Python the dictionary format\n",
    "    definitions = yaml.load(file, Loader=yaml.FullLoader)\n",
    "    \n",
    "features = definitions['features']\n",
    "spectators = definitions['spectators']\n",
    "labels = definitions['labels']\n",
    "\n",
    "nfeatures = definitions['nfeatures']\n",
    "nspectators = definitions['nspectators']\n",
    "nlabels = definitions['nlabels']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Jet Images\n",
    "\n",
    "Let's construct jet images {cite:p}`deOliveira:2015xxd`, which are 2D image-based representations of the spatial energy spread of jets. Each pixel's intensity is given by the sum of the transverse momentum of the particles located in that pixel."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_array, y, spec_array = utils.get_features_labels('root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/train/ntuple_merged_10.root', \n",
    "                                                         features, spectators, labels,\n",
    "                                                         remove_mass_pt_window=False,\n",
    "                                                         entry_stop=10000)\n",
    "# make image\n",
    "X = utils.make_image(feature_array)\n",
    "print(X.shape) # image is a 4D tensor (n_samples, n_pixels_x, n_pixels_y, n_channels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib.colors import LogNorm\n",
    "\n",
    "plt.figure()\n",
    "plt.title('Average H(bb) jet')\n",
    "plt.imshow(np.mean(X[y[:,1]==1],axis=0), origin='lower', norm=LogNorm())\n",
    "plt.colorbar()\n",
    "plt.xlabel(r\"$\\Delta\\eta$ cell\", fontsize=15)\n",
    "plt.ylabel(r\"$\\Delta\\phi$ cell\", fontsize=15)\n",
    "plt.show()\n",
    "\n",
    "plt.figure()\n",
    "plt.title('Average QCD jet')\n",
    "plt.imshow(np.mean(X[y[:,0]==1],axis=0), origin='lower', norm=LogNorm())\n",
    "plt.colorbar()\n",
    "plt.xlabel(r\"$\\Delta\\eta$ cell\", fontsize=15)\n",
    "plt.ylabel(r\"$\\Delta\\phi$ cell\", fontsize=15)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2D Convolutional Neural Network Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.models import Model\n",
    "from tensorflow.keras.layers import Input, Dense, BatchNormalization, Conv2D, Flatten, MaxPooling2D, Activation\n",
    "import tensorflow.keras.backend as K\n",
    "\n",
    "# define dense keras model\n",
    "inputs = Input(shape=(224,224,1), name = 'input')  \n",
    "x = BatchNormalization(name='bn_1')(inputs)\n",
    "x = Conv2D(64, (3,3), padding='same', name = 'conv2d_1')(x)\n",
    "x = MaxPooling2D(2,2)(x)\n",
    "x = BatchNormalization(name='bn_2')(x)\n",
    "x = Activation('relu')(x)\n",
    "x = Conv2D(32, (3,3), padding='same', name = 'conv2d_2')(x)\n",
    "x = MaxPooling2D(2,2)(x)\n",
    "x = BatchNormalization(name='bn_3')(x)\n",
    "x = Activation('relu')(x)\n",
    "x = Conv2D(32, (3,3), padding='same', name = 'conv2d_3')(x)\n",
    "x = MaxPooling2D(2,2)(x)\n",
    "x = BatchNormalization(name='bn_4')(x)\n",
    "x = Activation('relu')(x)\n",
    "x = Flatten(name='flatten_1')(x)\n",
    "x = Dense(256, name='dense_1', activation='relu')(x)\n",
    "outputs = Dense(nlabels, name = 'output', activation='softmax')(x)\n",
    "keras_model_conv2d = Model(inputs=inputs, outputs=outputs)\n",
    "keras_model_conv2d.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n",
    "print(keras_model_conv2d.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define callbacks\n",
    "from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau\n",
    "\n",
    "early_stopping = EarlyStopping(monitor='val_loss', patience=5)\n",
    "reduce_lr = ReduceLROnPlateau(patience=5,factor=0.5)\n",
    "model_checkpoint = ModelCheckpoint('keras_model_conv2d_best.h5', monitor='val_loss', save_best_only=True)\n",
    "callbacks = [early_stopping, model_checkpoint, reduce_lr]\n",
    "\n",
    "# fit keras model\n",
    "history_conv2d = keras_model_conv2d.fit(X, y, validation_split = 0.2,\n",
    "                                        epochs=20, \n",
    "                                        shuffle=True,\n",
    "                                        callbacks = callbacks, \n",
    "                                        verbose=0)\n",
    "# reload best weights\n",
    "keras_model_conv2d.load_weights('keras_model_conv2d_best.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure()\n",
    "plt.plot(history_conv2d.history['loss'],label='Loss')\n",
    "plt.plot(history_conv2d.history['val_loss'],label='Val. loss')\n",
    "plt.xlabel('Epoch')\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load testing file\n",
    "feature_array_test, label_array_test, spec_array_test = utils.get_features_labels('root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/test/ntuple_merged_0.root',\n",
    "                                                                                  features, spectators, labels,\n",
    "                                                                                  remove_mass_pt_window=False)\n",
    "                      \n",
    "# make image\n",
    "X_test = utils.make_image(feature_array_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# run model inference on test data set\n",
    "predict_array_cnn2d = keras_model_conv2d.predict(X_test)\n",
    "\n",
    "# create ROC curves\n",
    "fpr_cnn2d, tpr_cnn2d, threshold_cnn2d = roc_curve(label_array_test[:,1], predict_array_cnn2d[:,1])\n",
    "    \n",
    "# plot ROC curves\n",
    "plt.figure()\n",
    "plt.plot(tpr_cnn2d, fpr_cnn2d, lw=2.5, label=\"Conv2D, AUC = {:.1f}%\".format(auc(fpr_cnn2d,tpr_cnn2d)*100))\n",
    "plt.xlabel(r'True positive rate')\n",
    "plt.ylabel(r'False positive rate')\n",
    "plt.semilogy()\n",
    "plt.ylim(0.001, 1)\n",
    "plt.xlim(0, 1)\n",
    "plt.grid(True)\n",
    "plt.legend(loc='upper left')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}