{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Week 7 Notebook: Optimizing Other Objectives\n", "===============================================================\n", "\n", "This week, we will look at optimizing multiple objectives simultaneously. In particular, we will look at pivoting with adversarial neural networks {cite:p}`Louppe:2016ylz,ganin2014unsupervised,Sirunyan:2019nfw`.\n", "\n", "We will borrow the implementation from: " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import tensorflow.keras as keras\n", "import numpy as np\n", "from sklearn.metrics import roc_curve, auc\n", "import matplotlib.pyplot as plt\n", "import uproot\n", "from tqdm.notebook import tqdm" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import yaml\n", "\n", "with open('definitions.yml') as file:\n", " # The FullLoader parameter handles the conversion from YAML\n", " # scalar values to Python the dictionary format\n", " definitions = yaml.load(file, Loader=yaml.FullLoader)\n", " \n", "features = definitions['features']\n", "spectators = definitions['spectators']\n", "labels = definitions['labels']\n", "\n", "nfeatures = definitions['nfeatures']\n", "nspectators = definitions['nspectators']\n", "nlabels = definitions['nlabels']\n", "ntracks = definitions['ntracks']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define discriminator, regression, and combined adversarial models\n", "The combined loss function is $$L = L_\\mathrm{class} - \\lambda L_\\mathrm{reg}$$\n", "\n", "- $L_\\mathrm{class}$ is the loss function for the classification part (categorical cross entropy)\n", "- $L_\\mathrm{reg}$ is the loss function for the adversarial part (in this case a regression)\n", "- $\\lambda$ is a hyperparamter that controls how important the adversarial part of the loss is compared to the classification part, which we nominally set to 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.models import Model\n", "from tensorflow.keras.layers import Input, Dense, BatchNormalization, Concatenate, GlobalAveragePooling1D\n", "import tensorflow.keras.backend as K\n", "\n", "# define Deep Sets model with Dense Keras layer\n", "inputs = Input(shape=(ntracks, nfeatures,), name='input') \n", "x = BatchNormalization(name='bn_1')(inputs)\n", "x = Dense(64, name='dense_1', activation='relu')(x)\n", "x = Dense(32, name='dense_2', activation='relu')(x)\n", "x = Dense(32, name='dense_3', activation='relu')(x)\n", "# sum over tracks\n", "x = GlobalAveragePooling1D(name='pool_1')(x)\n", "x = Dense(100, name='dense_4', activation='relu')(x)\n", "output = Dense(nlabels, name = 'output', activation='softmax')(x)\n", " \n", "keras_model_disc = Model(inputs=inputs, outputs=output)\n", "keras_model_disc.compile(optimizer='adam',\n", " loss='categorical_crossentropy')\n", "\n", "# regressor\n", "x = Dense(100, name='dense_5', activation='relu')(keras_model_disc(inputs))\n", "x = Dense(100, name='dense_6', activation='relu')(x)\n", "output_reg = Dense(2, activation='linear', name='mass_pt_reg')(x)\n", " \n", "\n", "sgd_opt = keras.optimizers.SGD(momentum=0)\n", "keras_model_reg = Model(inputs=inputs, outputs=output_reg)\n", "keras_model_reg.compile(optimizer=sgd_opt,\n", " loss='mse')\n", "\n", "# combined model\n", "lam = 1\n", "keras_model_adv = Model(inputs=inputs, outputs=[keras_model_disc(inputs), keras_model_reg(inputs)])\n", "keras_model_adv.compile(optimizer=sgd_opt, \n", " loss=['categorical_crossentropy', 'mse'],\n", " loss_weights = [1, -lam]) \n", "\n", "print(keras_model_disc.summary())\n", "print(keras_model_reg.summary())\n", "print(keras_model_adv.summary())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from DataGenerator import DataGenerator\n", "# load training and validation generators \n", "train_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/train/ntuple_merged_10.root']\n", "val_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/train/ntuple_merged_11.root']\n", "\n", "\n", "train_generator = DataGenerator(train_files, features, labels, spectators, batch_size=1024, n_dim=ntracks, \n", " remove_mass_pt_window=False, \n", " remove_unlabeled=True, max_entry=5000,\n", " return_spectators=True, scale_mass_pt=[100., 10000.])\n", "\n", "val_generator = DataGenerator(val_files, features, labels, spectators, batch_size=1024, n_dim=ntracks, \n", " remove_mass_pt_window=False, \n", " remove_unlabeled=True, max_entry=5000, \n", " return_spectators=True, scale_mass_pt=[100., 10000.])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Pretrain discriminator and regressor models" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# pretrain discriminator\n", "keras_model_disc.trainable = True\n", "keras_model_disc.compile(optimizer='adam',\n", " loss='categorical_crossentropy')\n", "for n_epoch in tqdm(range(20)):\n", " for t in tqdm(train_generator, total=len(train_generator), leave=bool(n_epoch==19)):\n", " keras_model_disc.fit(t[0], t[1][0],verbose=0) \n", " \n", "# pretrain regressor\n", "keras_model_reg.trainable = True\n", "keras_model_disc.trainable = False\n", "keras_model_reg.compile(optimizer=sgd_opt, loss='mse')\n", "for n_epoch in tqdm(range(20)):\n", " for t in tqdm(train_generator, total=len(train_generator), leave=bool(n_epoch==19)):\n", " keras_model_reg.fit(t[0], t[1][1], verbose=0) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Main training loop\n", "\n", "During the main training loop, we do two things:\n", "1. Train the discriminator model with the combined loss function $$L = L_\\mathrm{class} - \\lambda L_\\mathrm{reg}$$\n", "1. Train the regression model to learn the mass from with the standard MSE loss function $$L_\\mathrm{reg}$$" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# alternate training discriminator and regressor \n", "for n_epoch in tqdm(range(40)):\n", " for t in tqdm(train_generator, total=len(train_generator), leave=bool(n_epoch==39)):\n", " # train discriminator\n", " keras_model_reg.trainable = False\n", " keras_model_disc.trainable = True\n", " keras_model_adv.compile(optimizer=sgd_opt, \n", " loss=['categorical_crossentropy', 'mse'],\n", " loss_weights=[1, -lam]) \n", " keras_model_adv.fit(t[0], t[1], verbose=0)\n", "\n", " # train regressor\n", " keras_model_reg.trainable = True\n", " keras_model_disc.trainable = False\n", " keras_model_reg.compile(optimizer=sgd_opt, loss='mse')\n", " keras_model_reg.fit(t[0], t[1][1],verbose=0)\n", "keras_model_adv.save_weights('keras_model_adv_best.h5')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Test" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# load testing file\n", "test_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/test/ntuple_merged_0.root']\n", "test_generator = DataGenerator(test_files, features, labels, spectators, batch_size=8192, n_dim=ntracks, \n", " remove_mass_pt_window=True, \n", " remove_unlabeled=True,\n", " return_spectators=True,\n", " max_entry=200000) # basically, no maximum" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# run model inference on test data set\n", "predict_array_adv = []\n", "label_array_test = []\n", "spec_array_test = []\n", "\n", "for t in tqdm(test_generator, total=len(test_generator)):\n", " label_array_test.append(t[1][0])\n", " spec_array_test.append(t[1][1])\n", " predict_array_adv.append(keras_model_adv.predict(t[0])[0])\n", "predict_array_adv = np.concatenate(predict_array_adv, axis=0)\n", "label_array_test = np.concatenate(label_array_test, axis=0)\n", "spec_array_test = np.concatenate(spec_array_test, axis=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# create ROC curves\n", "print(label_array_test.shape)\n", "print(spec_array_test.shape)\n", "print(predict_array_adv.shape)\n", "fpr_adv, tpr_adv, threshold_adv = roc_curve(label_array_test[:,1], predict_array_adv[:,1])\n", " \n", "# plot ROC curves\n", "plt.figure()\n", "plt.plot(tpr_adv, fpr_adv, lw=2.5, label=\"Adversarial, AUC = {:.1f}%\".format(auc(fpr_adv,tpr_adv)*100))\n", "plt.xlabel(r'True positive rate')\n", "plt.ylabel(r'False positive rate')\n", "plt.semilogy()\n", "plt.ylim(0.001, 1)\n", "plt.xlim(0, 1)\n", "plt.grid(True)\n", "plt.legend(loc='upper left')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from utils import find_nearest" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.figure()\n", "for wp in [1.0, 0.5, 0.3, 0.1, 0.05]:\n", " idx, val = find_nearest(fpr_adv, wp)\n", " plt.hist(spec_array_test[:,0], bins=np.linspace(40, 200, 21), \n", " weights=label_array_test[:,0]*(predict_array_adv[:,1] > threshold_adv[idx]),\n", " alpha=0.4, density=True, label='QCD, {}% FPR cut'.format(int(wp*100)),linestyle='-')\n", "plt.legend()\n", "plt.xlabel(r'$m_{SD}$')\n", "plt.ylabel(r'Normalized probability')\n", "plt.xlim(40, 200)\n", "\n", "plt.figure()\n", "for wp in [1.0, 0.5, 0.3, 0.1, 0.05]:\n", " idx, val = find_nearest(fpr_adv, wp)\n", " plt.hist(spec_array_test[:,0], bins=np.linspace(40, 200, 21), \n", " weights=label_array_test[:,1]*(predict_array_adv[:,1] > threshold_adv[idx]),\n", " alpha=0.4, density=True, label='H(bb), {}% FPR cut'.format(int(wp*100)),linestyle='-')\n", "plt.legend()\n", "plt.xlabel(r'$m_{SD}$')\n", "plt.ylabel(r'Normalized probability')\n", "plt.xlim(40, 200)\n", "plt.show()\n", "\n", "plt.figure()\n", "plt.hist(predict_array_adv[:,1], bins = np.linspace(0, 1, 21), \n", " weights=label_array_test[:,1]*0.1,\n", " alpha=0.4, linestyle='-', label='H(bb)')\n", "plt.hist(predict_array_adv[:,1], bins = np.linspace(0, 1, 21), \n", " weights=label_array_test[:,0],\n", " alpha=0.4, linestyle='-', label='QCD')\n", "plt.legend()\n", "plt.show()\n", "\n", "\n", "plt.figure()\n", "plt.hist(spec_array_test[:,0], bins = np.linspace(40, 200, 21), \n", " weights = label_array_test[:,1]*0.1,\n", " alpha=0.4, linestyle='-', label='H(bb)')\n", "plt.hist(spec_array_test[:,0], bins = np.linspace(40, 200, 21), \n", " weights = label_array_test[:,0],\n", " alpha=0.4, linestyle='-', label='QCD')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.4" } }, "nbformat": 4, "nbformat_minor": 2 }