Jet Images
Contents
Jet Images#
Now, we’ll look at a deep learning model based on jet images
import tensorflow.keras as keras
import numpy as np
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import uproot
import utils
2022-11-08 17:40:48.519083: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-08 17:40:48.665878: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.7.15/x64/lib
2022-11-08 17:40:48.665902: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-11-08 17:40:48.698790: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-08 17:40:49.521305: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.7.15/x64/lib
2022-11-08 17:40:49.521405: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.7.15/x64/lib
2022-11-08 17:40:49.521414: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
import yaml
with open("definitions_image.yml") as file:
# The FullLoader parameter handles the conversion from YAML
# scalar values to Python the dictionary format
definitions = yaml.load(file, Loader=yaml.FullLoader)
features = definitions["features"]
spectators = definitions["spectators"]
labels = definitions["labels"]
nfeatures = definitions["nfeatures"]
nspectators = definitions["nspectators"]
nlabels = definitions["nlabels"]
Jet Images#
Let’s construct jet images [1], which are 2D image-based representations of the spatial energy spread of jets. Each pixel’s intensity is given by the sum of the transverse momentum of the particles located in that pixel.
feature_array, y, spec_array = utils.get_features_labels(
"root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/train/ntuple_merged_10.root",
features,
spectators,
labels,
remove_mass_pt_window=False,
entry_stop=10000,
)
# make image
X = utils.make_image(feature_array)
print(X.shape) # image is a 4D tensor (n_samples, n_pixels_x, n_pixels_y, n_channels)
Plugin No such file or directory loading sec.protocol libXrdSeckrb5-5.so
(9375, 224, 224, 1)
from matplotlib.colors import LogNorm
plt.figure()
plt.title("Average H(bb) jet")
plt.imshow(np.mean(X[y[:, 1] == 1], axis=0), origin="lower", norm=LogNorm())
plt.colorbar()
plt.xlabel(r"$\Delta\eta$ cell", fontsize=15)
plt.ylabel(r"$\Delta\phi$ cell", fontsize=15)
plt.show()
plt.figure()
plt.title("Average QCD jet")
plt.imshow(np.mean(X[y[:, 0] == 1], axis=0), origin="lower", norm=LogNorm())
plt.colorbar()
plt.xlabel(r"$\Delta\eta$ cell", fontsize=15)
plt.ylabel(r"$\Delta\phi$ cell", fontsize=15)
plt.show()
2D Convolutional Neural Network Classifier#
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
Input,
Dense,
BatchNormalization,
Conv2D,
Flatten,
MaxPooling2D,
Activation,
)
import tensorflow.keras.backend as K
# define dense keras model
inputs = Input(shape=(224, 224, 1), name="input")
x = BatchNormalization(name="bn_1")(inputs)
x = Conv2D(64, (3, 3), padding="same", name="conv2d_1")(x)
x = MaxPooling2D(2, 2)(x)
x = BatchNormalization(name="bn_2")(x)
x = Activation("relu")(x)
x = Conv2D(32, (3, 3), padding="same", name="conv2d_2")(x)
x = MaxPooling2D(2, 2)(x)
x = BatchNormalization(name="bn_3")(x)
x = Activation("relu")(x)
x = Conv2D(32, (3, 3), padding="same", name="conv2d_3")(x)
x = MaxPooling2D(2, 2)(x)
x = BatchNormalization(name="bn_4")(x)
x = Activation("relu")(x)
x = Flatten(name="flatten_1")(x)
x = Dense(256, name="dense_1", activation="relu")(x)
outputs = Dense(nlabels, name="output", activation="softmax")(x)
keras_model_conv2d = Model(inputs=inputs, outputs=outputs)
keras_model_conv2d.compile(
optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
)
print(keras_model_conv2d.summary())
2022-11-08 17:41:33.641450: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/hostedtoolcache/Python/3.7.15/x64/lib
2022-11-08 17:41:33.645636: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2022-11-08 17:41:33.649052: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fv-az196-389): /proc/driver/nvidia/version does not exist
2022-11-08 17:41:33.671036: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input (InputLayer) [(None, 224, 224, 1)] 0
bn_1 (BatchNormalization) (None, 224, 224, 1) 4
conv2d_1 (Conv2D) (None, 224, 224, 64) 640
max_pooling2d (MaxPooling2D (None, 112, 112, 64) 0
)
bn_2 (BatchNormalization) (None, 112, 112, 64) 256
activation (Activation) (None, 112, 112, 64) 0
conv2d_2 (Conv2D) (None, 112, 112, 32) 18464
max_pooling2d_1 (MaxPooling (None, 56, 56, 32) 0
2D)
bn_3 (BatchNormalization) (None, 56, 56, 32) 128
activation_1 (Activation) (None, 56, 56, 32) 0
conv2d_3 (Conv2D) (None, 56, 56, 32) 9248
max_pooling2d_2 (MaxPooling (None, 28, 28, 32) 0
2D)
bn_4 (BatchNormalization) (None, 28, 28, 32) 128
activation_2 (Activation) (None, 28, 28, 32) 0
flatten_1 (Flatten) (None, 25088) 0
dense_1 (Dense) (None, 256) 6422784
output (Dense) (None, 2) 514
=================================================================
Total params: 6,452,166
Trainable params: 6,451,908
Non-trainable params: 258
_________________________________________________________________
None
# define callbacks
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
early_stopping = EarlyStopping(monitor="val_loss", patience=5)
reduce_lr = ReduceLROnPlateau(patience=5, factor=0.5)
model_checkpoint = ModelCheckpoint(
"keras_model_conv2d_best.h5", monitor="val_loss", save_best_only=True
)
callbacks = [early_stopping, model_checkpoint, reduce_lr]
# fit keras model
history_conv2d = keras_model_conv2d.fit(
X, y, validation_split=0.2, epochs=20, shuffle=True, callbacks=callbacks, verbose=0
)
# reload best weights
keras_model_conv2d.load_weights("keras_model_conv2d_best.h5")
2022-11-08 17:41:49.011427: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1505280000 exceeds 10% of free system memory.
2022-11-08 17:41:53.682555: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 411041792 exceeds 10% of free system memory.
2022-11-08 17:41:55.602288: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 411041792 exceeds 10% of free system memory.
2022-11-08 17:41:57.451003: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 411041792 exceeds 10% of free system memory.
2022-11-08 17:41:59.254947: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 411041792 exceeds 10% of free system memory.
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
/tmp/ipykernel_5000/313964328.py in <module>
11 # fit keras model
12 history_conv2d = keras_model_conv2d.fit(
---> 13 X, y, validation_split=0.2, epochs=20, shuffle=True, callbacks=callbacks, verbose=0
14 )
15 # reload best weights
/opt/hostedtoolcache/Python/3.7.15/x64/lib/python3.7/site-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
63 filtered_tb = None
64 try:
---> 65 return fn(*args, **kwargs)
66 except Exception as e:
67 filtered_tb = _process_traceback_frames(e.__traceback__)
/opt/hostedtoolcache/Python/3.7.15/x64/lib/python3.7/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1562 ):
1563 callbacks.on_train_batch_begin(step)
-> 1564 tmp_logs = self.train_function(iterator)
1565 if data_handler.should_sync:
1566 context.async_wait()
/opt/hostedtoolcache/Python/3.7.15/x64/lib/python3.7/site-packages/tensorflow/python/util/traceback_utils.py in error_handler(*args, **kwargs)
148 filtered_tb = None
149 try:
--> 150 return fn(*args, **kwargs)
151 except Exception as e:
152 filtered_tb = _process_traceback_frames(e.__traceback__)
/opt/hostedtoolcache/Python/3.7.15/x64/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
913
914 with OptionalXlaContext(self._jit_compile):
--> 915 result = self._call(*args, **kwds)
916
917 new_tracing_count = self.experimental_get_tracing_count()
/opt/hostedtoolcache/Python/3.7.15/x64/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
945 # In this case we have created variables on the first call, so we run the
946 # defunned version which is guaranteed to never create variables.
--> 947 return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
948 elif self._stateful_fn is not None:
949 # Release the lock early so that multiple threads can perform the call
/opt/hostedtoolcache/Python/3.7.15/x64/lib/python3.7/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
2495 filtered_flat_args) = self._maybe_define_function(args, kwargs)
2496 return graph_function._call_flat(
-> 2497 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
2498
2499 @property
/opt/hostedtoolcache/Python/3.7.15/x64/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1861 # No tape is watching; skip to running the function.
1862 return self._build_call_outputs(self._inference_function.call(
-> 1863 ctx, args, cancellation_manager=cancellation_manager))
1864 forward_backward = self._select_forward_and_backward_functions(
1865 args,
/opt/hostedtoolcache/Python/3.7.15/x64/lib/python3.7/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
502 inputs=args,
503 attrs=attrs,
--> 504 ctx=ctx)
505 else:
506 outputs = execute.execute_with_cancellation(
/opt/hostedtoolcache/Python/3.7.15/x64/lib/python3.7/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
KeyboardInterrupt:
plt.figure()
plt.plot(history_conv2d.history["loss"], label="Loss")
plt.plot(history_conv2d.history["val_loss"], label="Val. loss")
plt.xlabel("Epoch")
plt.legend()
plt.show()
# load testing file
feature_array_test, label_array_test, spec_array_test = utils.get_features_labels(
"root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/test/ntuple_merged_0.root",
features,
spectators,
labels,
remove_mass_pt_window=False,
)
# make image
X_test = utils.make_image(feature_array_test)
# run model inference on test data set
predict_array_cnn2d = keras_model_conv2d.predict(X_test)
# create ROC curves
fpr_cnn2d, tpr_cnn2d, threshold_cnn2d = roc_curve(
label_array_test[:, 1], predict_array_cnn2d[:, 1]
)
# plot ROC curves
plt.figure()
plt.plot(
tpr_cnn2d,
fpr_cnn2d,
lw=2.5,
label="Conv2D, AUC = {:.1f}%".format(auc(fpr_cnn2d, tpr_cnn2d) * 100),
)
plt.xlabel(r"True positive rate")
plt.ylabel(r"False positive rate")
plt.semilogy()
plt.ylim(0.001, 1)
plt.xlim(0, 1)
plt.grid(True)
plt.legend(loc="upper left")
plt.show()