{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Hands-on 08: Model Compression" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "! pip install --user --quiet tensorflow-model-optimization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.utils import to_categorical\n", "from sklearn.datasets import fetch_openml\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline\n", "seed = 42\n", "np.random.seed(seed)\n", "import tensorflow as tf\n", "\n", "tf.random.set_seed(seed)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fetch the jet tagging dataset from Open ML" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = fetch_openml(\"hls4ml_lhc_jets_hlf\", parser=\"auto\")\n", "X, y = data[\"data\"], data[\"target\"]\n", "\n", "le = LabelEncoder()\n", "y_onehot = le.fit_transform(y)\n", "y_onehot = to_categorical(y_onehot, 5)\n", "classes = le.classes_\n", "\n", "X_train_val, X_test, y_train_val, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)\n", "\n", "\n", "scaler = StandardScaler()\n", "X_train_val = scaler.fit_transform(X_train_val)\n", "X_test = scaler.transform(X_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Now construct a model\n", "We'll use the same architecture as in part 1: 3 hidden layers with 64, then 32, then 32 neurons. Each layer will use `relu` activation.\n", "Add an output layer with 5 neurons (one for each class), then finish with Softmax activation." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense, Activation, BatchNormalization\n", "from tensorflow.keras.optimizers import Adam\n", "from tensorflow.keras.regularizers import l1\n", "from callbacks import all_callbacks" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = Sequential()\n", "model.add(Dense(64, input_shape=(16,), name=\"fc1\", kernel_initializer=\"lecun_uniform\"))\n", "model.add(Activation(activation=\"relu\", name=\"relu1\"))\n", "model.add(Dense(32, name=\"fc2\", kernel_initializer=\"lecun_uniform\"))\n", "model.add(Activation(activation=\"relu\", name=\"relu2\"))\n", "model.add(Dense(32, name=\"fc3\", kernel_initializer=\"lecun_uniform\"))\n", "model.add(Activation(activation=\"relu\", name=\"relu3\"))\n", "model.add(Dense(5, name=\"output\", kernel_initializer=\"lecun_uniform\"))\n", "model.add(Activation(activation=\"softmax\", name=\"softmax\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train the unpruned model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "adam = Adam(learning_rate=0.0001)\n", "model.compile(optimizer=adam, loss=[\"categorical_crossentropy\"], metrics=[\"accuracy\"])\n", "callbacks = all_callbacks(\n", " stop_patience=1000,\n", " lr_factor=0.5,\n", " lr_patience=10,\n", " lr_epsilon=0.000001,\n", " lr_cooldown=2,\n", " lr_minimum=0.0000001,\n", " outputDir=\"unpruned_model\",\n", ")\n", "model.fit(\n", " X_train_val, y_train_val, batch_size=1024, epochs=30, validation_split=0.25, shuffle=True, callbacks=callbacks.callbacks\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train the pruned model\n", "This time we'll use the Tensorflow model optimization sparsity to train a sparse model (forcing many weights to '0'). In this instance, the target sparsity is 75%" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from tensorflow_model_optimization.python.core.sparsity.keras import prune, pruning_callbacks, pruning_schedule\n", "from tensorflow_model_optimization.sparsity.keras import strip_pruning\n", "\n", "pruned_model = Sequential()\n", "pruned_model.add(Dense(64, input_shape=(16,), name=\"fc1\", kernel_initializer=\"lecun_uniform\", kernel_regularizer=l1(0.0001)))\n", "pruned_model.add(Activation(activation=\"relu\", name=\"relu1\"))\n", "pruned_model.add(Dense(32, name=\"fc2\", kernel_initializer=\"lecun_uniform\", kernel_regularizer=l1(0.0001)))\n", "pruned_model.add(Activation(activation=\"relu\", name=\"relu2\"))\n", "pruned_model.add(Dense(32, name=\"fc3\", kernel_initializer=\"lecun_uniform\", kernel_regularizer=l1(0.0001)))\n", "pruned_model.add(Activation(activation=\"relu\", name=\"relu3\"))\n", "pruned_model.add(Dense(5, name=\"output\", kernel_initializer=\"lecun_uniform\", kernel_regularizer=l1(0.0001)))\n", "pruned_model.add(Activation(activation=\"softmax\", name=\"softmax\"))\n", "\n", "pruning_params = {\"pruning_schedule\": pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100)}\n", "pruned_model = prune.prune_low_magnitude(pruned_model, **pruning_params)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We'll use the same settings as before: Adam optimizer with categorical crossentropy loss.\n", "The callbacks will decay the learning rate and save the model into a directory `pruned_model`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "adam = Adam(lr=0.0001)\n", "pruned_model.compile(optimizer=adam, loss=[\"categorical_crossentropy\"], metrics=[\"accuracy\"])\n", "callbacks = all_callbacks(\n", " stop_patience=1000,\n", " lr_factor=0.5,\n", " lr_patience=10,\n", " lr_epsilon=0.000001,\n", " lr_cooldown=2,\n", " lr_minimum=0.0000001,\n", " outputDir=\"pruned_model\",\n", ")\n", "callbacks.callbacks.append(pruning_callbacks.UpdatePruningStep())\n", "pruned_model.fit(\n", " X_train_val,\n", " y_train_val,\n", " batch_size=1024,\n", " epochs=30,\n", " validation_split=0.25,\n", " shuffle=True,\n", " callbacks=callbacks.callbacks,\n", " verbose=0,\n", ")\n", "# Save the model again but with the pruning 'stripped' to use the regular layer types\n", "pruned_model = strip_pruning(pruned_model)\n", "pruned_model.save(\"pruned_model/model_best.h5\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Check sparsity\n", "Make a quick check that the model was indeed trained sparse. We'll just make a histogram of the weights of the 1st layer, and hopefully observe a large peak in the bin containing '0'. Note logarithmic y axis." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "bins = np.arange(-2, 2, 0.04)\n", "w_unpruned = model.layers[0].weights[0].numpy().flatten()\n", "w_pruned = pruned_model.layers[0].weights[0].numpy().flatten()\n", "\n", "plt.figure(figsize=(7, 7))\n", "\n", "plt.hist(w_unpruned, bins=bins, alpha=0.7, label=\"Unpruned layer 1\")\n", "plt.hist(w_pruned, bins=bins, alpha=0.7, label=\"Pruned layer 1\")\n", "\n", "plt.xlabel(\"Weight value\")\n", "plt.ylabel(\"Number of weights\")\n", "plt.semilogy()\n", "plt.legend()\n", "\n", "print(f\"Sparsity of unpruned model layer 1: {np.sum(w_unpruned==0)*100/np.size(w_unpruned)}% zeros\")\n", "print(f\"Sparsity of pruned model layer 1: {np.sum(w_pruned==0)*100/np.size(w_pruned)}% zeros\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Check performance\n", "How does this 75% sparse model compare against the unpruned model? Let's report the accuracy and make a ROC curve. The pruned model is shown with solid lines, the unpruned model is shown with dashed lines." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import plotting\n", "import matplotlib.pyplot as plt\n", "from sklearn.metrics import accuracy_score\n", "from tensorflow.keras.models import load_model\n", "\n", "unpruned_model = load_model(\"unpruned_model/model_best.h5\")\n", "\n", "y_ref = unpruned_model.predict(X_test, verbose=0)\n", "y_prune = pruned_model.predict(X_test, verbose=0)\n", "\n", "print(\"Accuracy unpruned: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_ref, axis=1))))\n", "print(\"Accuracy pruned: {}\".format(accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_prune, axis=1))))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(9, 9))\n", "_ = plotting.make_roc(y_test, y_ref, classes)\n", "plt.gca().set_prop_cycle(None) # reset the colors\n", "_ = plotting.make_roc(y_test, y_prune, classes, linestyle=\"--\")\n", "\n", "from matplotlib.lines import Line2D\n", "\n", "lines = [Line2D([0], [0], ls=\"-\"), Line2D([0], [0], ls=\"--\")]\n", "from matplotlib.legend import Legend\n", "\n", "leg = Legend(ax, lines, labels=[\"Unpruned\", \"Pruned\"], loc=\"lower right\", frameon=False)\n", "ax.add_artist(leg)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 4 }