diff --git a/Vagrantfile b/Vagrantfile
new file mode 100644
index 0000000..3022eb6
--- /dev/null
+++ b/Vagrantfile
@@ -0,0 +1,70 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+# All Vagrant configuration is done below. The "2" in Vagrant.configure
+# configures the configuration version (we support older styles for
+# backwards compatibility). Please don't change it unless you know what
+# you're doing.
+Vagrant.configure("2") do |config|
+  # The most common configuration options are documented and commented below.
+  # For a complete reference, please see the online documentation at
+  # https://docs.vagrantup.com.
+
+  # Every Vagrant development environment requires a box. You can search for
+  # boxes at https://vagrantcloud.com/search.
+  config.vm.box = "arrikto/minikf"
+
+  # Disable automatic box update checking. If you disable this, then
+  # boxes will only be checked for updates when the user runs
+  # `vagrant box outdated`. This is not recommended.
+  # config.vm.box_check_update = false
+
+  # Create a forwarded port mapping which allows access to a specific port
+  # within the machine from a port on the host machine. In the example below,
+  # accessing "localhost:8080" will access port 80 on the guest machine.
+  # NOTE: This will enable public access to the opened port
+  # config.vm.network "forwarded_port", guest: 80, host: 8080
+
+  # Create a forwarded port mapping which allows access to a specific port
+  # within the machine from a port on the host machine and only allow access
+  # via 127.0.0.1 to disable public access
+  # config.vm.network "forwarded_port", guest: 80, host: 8080, host_ip: "127.0.0.1"
+
+  # Create a private network, which allows host-only access to the machine
+  # using a specific IP.
+  # config.vm.network "private_network", ip: "192.168.33.10"
+
+  # Create a public network, which generally matched to bridged network.
+  # Bridged networks make the machine appear as another physical device on
+  # your network.
+  # config.vm.network "public_network"
+
+  # Share an additional folder to the guest VM. The first argument is
+  # the path on the host to the actual folder. The second argument is
+  # the path on the guest to mount the folder. And the optional third
+  # argument is a set of non-required options.
+  # config.vm.synced_folder "../data", "/vagrant_data"
+
+  # Provider-specific configuration so you can fine-tune various
+  # backing providers for Vagrant. These expose provider-specific options.
+  # Example for VirtualBox:
+  #
+  # config.vm.provider "virtualbox" do |vb|
+  #   # Display the VirtualBox GUI when booting the machine
+  #   vb.gui = true
+  #
+  #   # Customize the amount of memory on the VM:
+  #   vb.memory = "1024"
+  # end
+  #
+  # View the documentation for the provider you are using for more
+  # information on available options.
+
+  # Enable provisioning with a shell script. Additional provisioners such as
+  # Ansible, Chef, Docker, Puppet and Salt are also available. Please see the
+  # documentation for more information about their specific syntax and use.
+  # config.vm.provision "shell", inline: <<-SHELL
+  #   apt-get update
+  #   apt-get install -y apache2
+  # SHELL
+end
diff --git a/kubeflow/config.yaml b/kubeflow/config.yaml
new file mode 100644
index 0000000..d4ca50b
--- /dev/null
+++ b/kubeflow/config.yaml
@@ -0,0 +1,245 @@
+# This is the config to install Kubeflow on an existing k8s cluster.
+# If the cluster already has istio, comment out the istio install part below.
+
+apiVersion: kfdef.apps.kubeflow.org/v1alpha1
+kind: KfDef
+metadata:
+  name: kubeflow_app
+  namespace: kubeflow
+spec:
+  repos:
+    - name: manifests
+      uri: https://github.com/kubeflow/manifests/archive/56e2fb15db286198f7a53723cb1fbfecf3fe83fb.tar.gz
+    - name: kubeflow
+      uri: https://github.com/kubeflow/kubeflow/archive/0dbd2550372c003ba69069aeee283bd59fb1341f.tar.gz
+  applications:
+    # Istio install. If not needed, comment out istio-crds and istio-install.
+    - kustomizeConfig:
+        parameters:
+          - name: namespace
+            value: istio-system
+        repoRef:
+          name: manifests
+          path: istio/istio-crds
+      name: istio-crds
+    - kustomizeConfig:
+        parameters:
+          - name: namespace
+            value: istio-system
+        repoRef:
+          name: manifests
+          path: istio/istio-install
+      name: istio-install
+    # This component is the istio resources for Kubeflow (e.g. gateway), not about installing istio.
+    - kustomizeConfig:
+        parameters:
+          - name: clusterRbacConfig
+            value: "OFF"
+        repoRef:
+          name: manifests
+          path: istio/istio
+      name: istio
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: application/application-crds
+      name: application-crds
+    - kustomizeConfig:
+        overlays:
+          - application
+        repoRef:
+          name: manifests
+          path: application/application
+      name: application
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: metacontroller
+      name: metacontroller
+    - kustomizeConfig:
+        overlays:
+          - istio
+        repoRef:
+          name: manifests
+          path: argo
+      name: argo
+    - kustomizeConfig:
+        overlays:
+          - istio
+        repoRef:
+          name: manifests
+          path: common/centraldashboard
+      name: centraldashboard
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: admission-webhook/bootstrap
+      name: bootstrap
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: admission-webhook/webhook
+      name: webhook
+    - kustomizeConfig:
+        overlays:
+          - istio
+          - application
+        repoRef:
+          name: manifests
+          path: jupyter/jupyter-web-app
+      name: jupyter-web-app
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: katib-v1alpha2/katib-db
+      name: katib-db
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: katib-v1alpha2/katib-manager
+      name: katib-manager
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: katib-v1alpha2/katib-controller
+      name: katib-controller
+    - kustomizeConfig:
+        overlays:
+          - istio
+        repoRef:
+          name: manifests
+          path: katib-v1alpha2/katib-ui
+      name: katib-ui # Issue: https://github.com/kubeflow/manifests/issues/151
+    - kustomizeConfig:
+        overlays:
+          - istio
+        repoRef:
+          name: manifests
+          path: metadata
+      name: metadata
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: katib-v1alpha2/metrics-collector
+      name: metrics-collector
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: katib-v1alpha2/suggestion
+      name: suggestion
+    - kustomizeConfig:
+        overlays:
+          - istio
+          - application
+        repoRef:
+          name: manifests
+          path: jupyter/notebook-controller
+      name: notebook-controller
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: pytorch-job/pytorch-job-crds
+      name: pytorch-job-crds
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: pytorch-job/pytorch-operator
+      name: pytorch-operator
+    - kustomizeConfig:
+        parameters:
+          - initRequired: true
+            name: usageId
+            value: <randomly-generated-id>
+          - initRequired: true
+            name: reportUsage
+            value: "true"
+        repoRef:
+          name: manifests
+          path: common/spartakus
+      name: spartakus
+    - kustomizeConfig:
+        overlays:
+          - istio
+        repoRef:
+          name: manifests
+          path: tensorboard
+      name: tensorboard
+    - kustomizeConfig:
+        overlays:
+          - istio
+        repoRef:
+          name: manifests
+          path: tf-training/tf-job-operator
+      name: tf-job-operator
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: pipeline/api-service
+      name: api-service
+    - kustomizeConfig:
+        parameters:
+          - name: minioPvcName
+            value: minio-pv-claim
+        repoRef:
+          name: manifests
+          path: pipeline/minio
+      name: minio
+    - kustomizeConfig:
+        parameters:
+          - name: mysqlPvcName
+            value: mysql-pv-claim
+        repoRef:
+          name: manifests
+          path: pipeline/mysql
+      name: mysql
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: pipeline/persistent-agent
+      name: persistent-agent
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: pipeline/pipelines-runner
+      name: pipelines-runner
+    - kustomizeConfig:
+        overlays:
+          - istio
+        repoRef:
+          name: manifests
+          path: pipeline/pipelines-ui
+      name: pipelines-ui
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: pipeline/pipelines-viewer
+      name: pipelines-viewer
+    - kustomizeConfig:
+        repoRef:
+          name: manifests
+          path: pipeline/scheduledworkflow
+      name: scheduledworkflow
+    - kustomizeConfig:
+        overlays:
+          - istio
+        parameters:
+          - initRequired: true
+            name: admin
+            value: johnDoe@acme.com
+        repoRef:
+          name: manifests
+          path: profiles
+      name: profiles
+    - kustomizeConfig:
+        overlays:
+          - application
+        repoRef:
+          name: manifests
+          path: seldon/seldon-core-operator
+      name: seldon-core-operator
+  enableApplications: true
+  packageManager: kustomize
+  skipInitProject: true
+  useBasicAuth: false
+  useIstio: true
+  # version: SET_VERSION
diff --git a/notebooks/vqt_qmhl.ipynb b/notebooks/vqt_qmhl.ipynb
new file mode 100644
index 0000000..bc2ec66
--- /dev/null
+++ b/notebooks/vqt_qmhl.ipynb
@@ -0,0 +1,1489 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "TFQ_Example_VQT.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xeNLEVTAzqUY",
+        "colab_type": "text"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Quantum Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "jlv9gRg5zmNn",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "45-jw_FfvFo4",
+        "colab_type": "text"
+      },
+      "source": [
+        "# VQT in TFQ"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-BRvsd9_DiCx",
+        "colab_type": "text"
+      },
+      "source": [
+        "Author : Antonio J. Martinez\n",
+        "\n",
+        "Contributors : Guillaume Verdon\n",
+        "\n",
+        "Created : 2020-Feb-06\n",
+        "\n",
+        "Last updated : 2020-Mar-06"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YMDX5gWDExOT",
+        "colab_type": "text"
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tensorflow/quantum/blob/research/vqt_qmhl/vqt_qmhl.ipynb)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NOcn7GMuvItv",
+        "colab_type": "text"
+      },
+      "source": [
+        "In this notebook, you will explore the combination of quantum computing and classical energy-based models with TensorFlow Quantum.  The system under study is the [2D Heisenberg model](https://en.wikipedia.org/wiki/Heisenberg_model_(quantum)).  You will apply the Variational Quantum Thermalizer (VQT) to produce approximate thermal states of this model.  VQT was first proposed in the paper [here.](https://arxiv.org/abs/1910.02071)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ADO-msNe4nFL",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Install and import dependencies"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "JuXxC5fbaGAS",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "!pip install --upgrade tensorflow==2.1.0"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fyrqkto1aHQV",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "!pip install tensorflow-quantum"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "6tjpeUc-_jGr",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "%%capture\n",
+        "import cirq\n",
+        "import itertools\n",
+        "import numpy as np\n",
+        "import random\n",
+        "from scipy import linalg\n",
+        "import seaborn\n",
+        "import sympy\n",
+        "import tensorflow as tf\n",
+        "import tensorflow_probability as tfp\n",
+        "import tensorflow_quantum as tfq\n",
+        "\n",
+        "# visualization tools\n",
+        "%matplotlib inline\n",
+        "import matplotlib.pyplot as plt\n",
+        "from cirq.contrib.svg import SVGCircuit"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "36wDb-D64-Bf",
+        "colab_type": "text"
+      },
+      "source": [
+        "## 2D Heisenberg model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Kj1k345gV6vC",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Hamiltonian definition"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "P7mzjpQr__nH",
+        "colab_type": "text"
+      },
+      "source": [
+        "This Hamiltonian is supported on a rectangular lattice of qubits:\n",
+        "$$\\hat{H}_{\\text{heis}} = \\sum_{\\langle ij\\rangle_h} J_{h} \\hat{S}_i \\cdot \\hat{S}_j +  \\sum_{\\langle ij\\rangle_v} J_{v} \\hat{S}_i \\cdot \\hat{S}_j,$$\n",
+        "where $h$ ($v$) denote horizontal (vertical) bonds, while $\\langle \\cdot \\rangle $ represent nearest-neighbor pairings.\n",
+        "  You can build this Hamiltonian using Cirq `PauliString` and `PauliSum` objects:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oWpQomEnA51w",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def get_qubit_grid(rows, cols):\n",
+        "  \"\"\"Rectangle of qubits returned as a nested list.\"\"\"\n",
+        "  qubits = []\n",
+        "  for r in range(rows):\n",
+        "    qubits.append([])\n",
+        "    for c in range(cols):\n",
+        "      qubits[-1].append(cirq.GridQubit(r, c))\n",
+        "  return qubits\n",
+        "\n",
+        "def get_bond(q0, q1):\n",
+        "  \"\"\"Given two Cirq qubits, return the PauliSum that bonds them.\"\"\"\n",
+        "  return cirq.PauliSum.from_pauli_strings([\n",
+        "    cirq.PauliString(cirq.X(q0), cirq.X(q1)),\n",
+        "    cirq.PauliString(cirq.Y(q0), cirq.Y(q1)),\n",
+        "    cirq.PauliString(cirq.Z(q0), cirq.Z(q1))])\n",
+        "\n",
+        "def get_heisenberg_hamiltonian(qubits, jh, jv):\n",
+        "  \"\"\"Returns the 2D Heisenberg Hamiltonian over the given grid of qubits.\"\"\"\n",
+        "  heisenberg = cirq.PauliSum()\n",
+        "  # Apply horizontal bonds\n",
+        "  for r in qubits:\n",
+        "    for q0, q1 in zip(r, r[1::]):\n",
+        "      heisenberg += jh * get_bond(q0, q1)\n",
+        "  # Apply vertical bonds\n",
+        "  for r0, r1 in zip(qubits, qubits[1::]):\n",
+        "    for q0, q1 in zip(r0, r1):\n",
+        "      heisenberg += jv * get_bond(q0, q1)\n",
+        "  return heisenberg"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "g07ao_-9dV6v",
+        "colab_type": "text"
+      },
+      "source": [
+        "For visualization and verification purposes, the following function recovers an explicit matrix from a Cirq `PauliSum` given a linear ordering of the qubits which support it:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "KXEoSvsEdgfM",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def pauli_sum_to_matrix(qubits, pauli_sum):\n",
+        "  \"\"\"Unpacks each pauli string in the pauli sum into a matrix and sums them.\"\"\"\n",
+        "  matrix = np.zeros((2**len(qubits), 2**len(qubits)), dtype=np.complex128)\n",
+        "  for pauli_string in pauli_sum:\n",
+        "    coeff = pauli_string.coefficient\n",
+        "    bare_string = pauli_string/coeff\n",
+        "    matrix += coeff*bare_string.dense(qubits)._unitary_()\n",
+        "  return matrix"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lLR_vKjwWhgP",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Target density matrix"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uUcPcokKcDOt",
+        "colab_type": "text"
+      },
+      "source": [
+        "Here you define the parameters of the system to be learned.  The 2D Heisenberg model is defined by the number of rows and columns in the qubit lattice, the bond strengths in the horizontal and vertical directions, and the inverse temperature $\\beta$.  Here, we use the same parameters as in the associated paper:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "C9L88CvDZCmw",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "num_rows = 2\n",
+        "num_cols = 2\n",
+        "jh = 1\n",
+        "jv = 0.6\n",
+        "beta = 2.6\n",
+        "\n",
+        "# Get the grid of qubits.\n",
+        "all_qubits = get_qubit_grid(num_rows, num_cols)\n",
+        "all_qubits_flat = [q for r in all_qubits for q in r]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wu96Vy78TvER",
+        "colab_type": "text"
+      },
+      "source": [
+        "Given a Hamiltonian $\\hat{H}$ and an inverse temperature $\\beta$, the thermal state $\\rho_T$ is given by\n",
+        "$$\\rho_T = e^{-\\beta \\hat{H}}.$$\n",
+        "Since our target system is small, you can compute this matrix exponential directly, using the `PauliSum`-to-matrix converter defined above:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "9fKp0XaNjPSF",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "num_H = pauli_sum_to_matrix(\n",
+        "    all_qubits_flat, get_heisenberg_hamiltonian(all_qubits, jh, jv))\n",
+        "heisenberg_exp = linalg.expm(-beta*num_H)\n",
+        "exact_thermal_state = np.true_divide(heisenberg_exp, np.trace(heisenberg_exp))\n",
+        "seaborn.heatmap(abs(exact_thermal_state))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GcqC3cXOnWBy",
+        "colab_type": "text"
+      },
+      "source": [
+        "Recall that any density matrix $\\rho$ [can be written as](https://en.wikipedia.org/wiki/Density_matrix#Definition)\n",
+        "$$\\rho = \\sum_i p_i |\\psi_i\\rangle\\langle\\psi_i|,$$\n",
+        "where $|\\psi_i\\rangle$ is a pure state and $p_i$ is the classical probability of encoutering that state in the mixture.   Since TFQ is a pure state simulator, we will emulate density matrices by outputting pure states according to their probabilities $p_i$, which by the equation above is equivalent to outputting the full density matrix.  We define here a function that converts such a list of pure states into the associated density matrix:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "QMnyxFTBncbG",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def pure_state_list_to_density_matrix(pure_states):\n",
+        "  \"\"\"Return the uniform mixture of the given list of pure states.\"\"\"\n",
+        "  dim = len(pure_states[0].numpy())\n",
+        "  n_s = pure_states.shape[0]\n",
+        "  thermal_state = np.zeros((dim, dim), dtype=np.complex128)\n",
+        "  for i in range(n_s):\n",
+        "    psi = pure_states[i].numpy()\n",
+        "    thermal_state += np.outer(psi, psi)\n",
+        "  return np.true_divide(thermal_state, n_s)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vOCUwIOBav50",
+        "colab_type": "text"
+      },
+      "source": [
+        "Finally, to track the performance of our models, we need a measure of the distance of our estimated density matrix $\\tilde{\\rho}$ from the target density matrix $\\rho_T$.  One common metric is the [fidelity](https://en.wikipedia.org/wiki/Fidelity_of_quantum_states), which is defined as\n",
+        "$$F(\\tilde{\\rho}, \\rho_T) = \\text{tr}\\left[\\sqrt{\\sqrt{\\tilde{\\rho}}\\rho_T\\sqrt{\\tilde{\\rho}}}\\right]^2.$$\n",
+        "This is tractable to compute because our model system is small.  Below we define a function that computes this quantity:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "o505UmerbsLm",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def fidelity(dm1, dm2):\n",
+        "  \"\"\"Calculate the fidelity between the two given density matrices.\"\"\"\n",
+        "  dm1_sqrt = linalg.sqrtm(dm1)\n",
+        "  return abs(np.trace(linalg.sqrtm(\n",
+        "      np.matmul(dm1_sqrt, np.matmul(dm2, dm1_sqrt))))) ** 2"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6dHgm4SiBEB8",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Energy based models"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "y7QXAsERBHEI",
+        "colab_type": "text"
+      },
+      "source": [
+        "Energy based models are a type of machine learning ansatze inspired by physics and exponential families.  The advantage of using energy based models for probabilistic modeling is that fair samples can be drawn from the distributions they define without requiring computation of their partition functions.\n",
+        "\n",
+        "One specific class of EBM is the Boltzmann machine.  The energy of a spin configuration $x \\in \\{-1, 1\\}^n$ in this model is defined as:\n",
+        "$$E(x) = -\\sum_{i, j}w_{ij} x_i x_j - \\sum_i b_i x_i.$$\n",
+        "\n",
+        "This classical model can be easily converted into a quantum mechanical Ising model by replacing each bit with the Pauli $Z$ operator, and considering the usual mapping of the spin to qubit pictures 1 -> $|0\\rangle$ and $-1$ -> $|1\\rangle$.\n",
+        "\n",
+        "In the special case where the connection weights $w_{ij}$ are all zero, the Boltzmann machine is reduced to a product of independent Bernoulli distributions over the set of qubits.  This \"Bernoulli EBM\" has many simplifying properties, and hence you will explore this EBM first in the examples below.  Later in the notebook, you will apply the full Boltzmann EBM to VQT."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DHSwa_R6WEIs",
+        "colab_type": "text"
+      },
+      "source": [
+        "###  Energy functions"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cvoLq1XOsFQu",
+        "colab_type": "text"
+      },
+      "source": [
+        "Here we define functions which compute the energy of a Boltzmann or Bernoulli EBM given the weight, biases, and bitstrings:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "pYzD_xF0BJpl",
+        "colab": {}
+      },
+      "source": [
+        "def bitstring_to_spin_config(bitstring):\n",
+        "  \"\"\"Implements the mapping from the qubit to the spin picture.\"\"\"\n",
+        "  return [-1 if b == 1 else 1 for b in bitstring]\n",
+        "\n",
+        "def spin_config_to_bitstring(spin_config):\n",
+        "  \"\"\"Implements the mapping from the spin to the qubit picture.\"\"\"\n",
+        "  return [0 if s == 1 else 1 for s in spin_config]\n",
+        "\n",
+        "def ebm_energy(spin_config, biases, weights=None):\n",
+        "  \"\"\"Given a rank-2 tensor representing the weight matrix and a rank-1 tensor\n",
+        "  representing the biases, calculate the energy of the spin configuration.\"\"\"\n",
+        "  energy = 0\n",
+        "  if weights is not None:\n",
+        "    for w_row, xi in zip(weights.numpy(), spin_config):\n",
+        "      for wij, xj in zip(w_row, spin_config):\n",
+        "        energy -= wij*xi*xj\n",
+        "  for bi, xi in zip(biases.numpy(), spin_config):\n",
+        "    energy -= bi*xi\n",
+        "  return energy\n",
+        "\n",
+        "def ebm_energy_avg(spin_config_list, biases, weights=None):\n",
+        "  \"\"\"Average energy over a set of spin configuration samples.\"\"\"\n",
+        "  energy_avg = 0\n",
+        "  for spin_config in spin_config_list:\n",
+        "    energy_avg += ebm_energy(spin_config, biases, weights)\n",
+        "  energy_avg /= len(spin_config_list)\n",
+        "  return energy_avg"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yCK1ByUXkywN",
+        "colab_type": "text"
+      },
+      "source": [
+        "We also define functions which initialize TF Variables for our weights and biases.  Initializing all weights and biases near 0 means we begin near the uniform distribution, which can also be thought of as starting with a high temperature prior:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "E8TPAtmvkt6Q",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def get_initialized_ebm_biases(num_units):\n",
+        "  return tf.Variable(\n",
+        "      tf.random.uniform(minval=-0.1, maxval=0.1, shape=[num_units],\n",
+        "                        dtype=tf.float32), dtype=tf.float32)\n",
+        "def get_initialized_ebm_weights(num_units):\n",
+        "  return tf.Variable(\n",
+        "      tf.random.uniform(minval=-0.1, maxval=0.1,\n",
+        "                        shape=[num_units, num_units],dtype=tf.float32), dtype=tf.float32)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zkDAxG9JWJep",
+        "colab_type": "text"
+      },
+      "source": [
+        "### EBM derivatives"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "is1yfTqVEgas",
+        "colab_type": "text"
+      },
+      "source": [
+        "The derivative of an EBM given a bitstring is easy to compute.  In fact, the derivatives are independent of the weights and biases:\n",
+        "$$\\nabla_{w_{ij}}E(x) = -x_ix_j\\quad \\text{and}\\quad \\nabla_{b_{i}}E(x) = -x_i.$$\n",
+        "Information about the weights and biases enters by averaging these derivates over samples from the EBM."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OIxgPEKaEbYn",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def ebm_weights_derivative(spin_config):\n",
+        "  w_deriv = np.zeros((len(spin_config), len(spin_config)))\n",
+        "  for i, x_i in enumerate(spin_config):\n",
+        "    for j, x_j in enumerate(spin_config):\n",
+        "      w_deriv[i][j] = -x_i*x_j\n",
+        "  return w_deriv\n",
+        "\n",
+        "def ebm_biases_derivative(spin_config):\n",
+        "  b_deriv = np.zeros(len(spin_config))\n",
+        "  for i, x_i in enumerate(spin_config):\n",
+        "    b_deriv[i] = -x_i\n",
+        "  return b_deriv\n",
+        "\n",
+        "def ebm_weights_derivative_avg(spin_config_list):\n",
+        "  w_deriv = np.zeros((len(spin_config_list[0]), len(spin_config_list[0])))\n",
+        "  for spin_config in spin_config_list:\n",
+        "    w_deriv += ebm_weights_derivative(spin_config)\n",
+        "  return np.true_divide(w_deriv, len(spin_config_list))\n",
+        "\n",
+        "def ebm_biases_derivative_avg(spin_config_list):\n",
+        "  b_deriv = np.zeros(len(spin_config_list[0]))\n",
+        "  for spin_config in spin_config_list: \n",
+        "    b_deriv += ebm_biases_derivative(spin_config)\n",
+        "  return np.true_divide(b_deriv, len(spin_config_list))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "22C0LS7GeyQh",
+        "colab_type": "text"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "### Classical VQT loss gradients"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8kbBeqxkpBHH",
+        "colab_type": "text"
+      },
+      "source": [
+        "As discussed in the paper, the gradient of the VQT loss function can be calculated without computing entropies or partition functions.  For example, the gradient of the VQT free energy loss with respect to the classical model parameters can be writtent as:\n",
+        "$$\\partial_{\\theta} \\mathcal{L}_{\\text{fe}} =\\mathbb{E}_{x\\sim p_{\\theta}(x)}[(E_{\\theta}(x)-\\beta H_{\\phi}(x) ) \\nabla_{\\theta}E_{\\theta}(x) ]-(\\mathbb{E}_{x\\sim p_{\\theta}(x)}[E_{\\theta}(x)-\\beta H_{\\phi}(x)]) ( \\mathbb{E}_{y\\sim p_{\\theta}(y)}[\\nabla_{\\theta}E_{\\theta}(y)] ).$$\n",
+        "Below these gradients are defined for the general Boltzmann EBM.  In the VQT gradients, each entry in `bitstring_list` corresponds to the entry with the same index in `energy_losses`, where for each bitstring $x$, we compute the product $\\beta\\langle x|H|x\\rangle$.  The list of bitstrings is assumed to be sampled from the EBM.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "lNYyUNbloNO2",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def get_vqt_weighted_weights_grad_product(\n",
+        "    energy_losses, spin_config_list, biases, weights):\n",
+        "  \"\"\"Implements the first term in the derivative of the FE loss,\n",
+        "  for the weights of a Boltzmann EBM.\"\"\"\n",
+        "  w_deriv = np.zeros((len(spin_config_list[0]), len(spin_config_list[0])))\n",
+        "  for e_loss, spin_config in zip(energy_losses, spin_config_list):\n",
+        "    w_deriv = w_deriv + (\n",
+        "        ebm_energy(spin_config, biases, weights) - e_loss \n",
+        "        )*ebm_weights_derivative(spin_config)\n",
+        "  return np.true_divide(w_deriv, len(energy_losses))\n",
+        "\n",
+        "def get_vqt_weighted_biases_grad_product(\n",
+        "    energy_losses, spin_config_list, biases, weights=None):\n",
+        "  \"\"\"Implements the first term in the derivative of the FE loss,\n",
+        "  for the biases of a Boltzmann EBM.\"\"\"\n",
+        "  b_deriv = np.zeros(len(spin_config_list[0]))\n",
+        "  for e_loss, spin_config in zip(energy_losses, spin_config_list):\n",
+        "    b_deriv = b_deriv + (\n",
+        "        ebm_energy(spin_config, biases, weights) - e_loss\n",
+        "        )*ebm_biases_derivative(spin_config)\n",
+        "  return np.true_divide(b_deriv, len(energy_losses))\n",
+        "\n",
+        "def get_vqt_factored_weights_grad_product(\n",
+        "    energy_losses, spin_config_list, biases, weights):\n",
+        "  \"\"\"Implements the second term in the derivative of the FE loss,\n",
+        "  for the weights of a Boltzmann EBM.\"\"\"\n",
+        "  energy_losses_avg = tf.reduce_mean(energy_losses)\n",
+        "  classical_energy_avg = ebm_energy_avg(spin_config_list, biases, weights)\n",
+        "  energy_diff_avg = classical_energy_avg - energy_losses_avg\n",
+        "  return energy_diff_avg*ebm_weights_derivative_avg(spin_config_list)\n",
+        "\n",
+        "def get_vqt_factored_biases_grad_product(\n",
+        "    energy_losses, spin_config_list, biases, weights=None):\n",
+        "  \"\"\"Implements the second term in the derivative of the FE loss,\n",
+        "  for the biases of a Boltzmann EBM.\"\"\"\n",
+        "  energy_losses_avg = tf.reduce_mean(energy_losses)\n",
+        "  classical_energy_avg = ebm_energy_avg(spin_config_list, biases, weights)\n",
+        "  energy_diff_avg = classical_energy_avg - energy_losses_avg\n",
+        "  return energy_diff_avg*ebm_biases_derivative_avg(spin_config_list)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jbVDJgviYO0x",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Model components"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "G45X3XnzWdI4",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Ansatz unitary"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kIS8zy34qZWy",
+        "colab_type": "text"
+      },
+      "source": [
+        "The parameterized unitary ansatz you will use consists of alternating layers of general single qubit rotations and nearest-neighbor entangling gates:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2fGHmkevsUsZ",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def get_rotation_1q(q, a, b, c):\n",
+        "  \"\"\"General single qubit rotation.\"\"\"\n",
+        "  return cirq.Circuit(cirq.X(q) ** a, cirq.Y(q) ** b, cirq.Z(q) ** c)\n",
+        "\n",
+        "def get_rotation_2q(q0, q1, a):\n",
+        "  \"\"\"Exponent of entangling CNOT gate.\"\"\"\n",
+        "  return cirq.Circuit(cirq.CNotPowGate(exponent=a)(q0, q1))\n",
+        "\n",
+        "def get_layer_1q(qubits, layer_num, name):\n",
+        "  \"\"\"Apply single qubit rotations to all the given qubits.\"\"\"\n",
+        "  layer_symbols = []\n",
+        "  circuit = cirq.Circuit()\n",
+        "  for n, q in enumerate(qubits):\n",
+        "    a, b, c = sympy.symbols(\n",
+        "        \"a{2}_{0}_{1} b{2}_{0}_{1} c{2}_{0}_{1}\".format(layer_num, n, name))\n",
+        "    layer_symbols += [a, b, c]\n",
+        "    circuit += get_rotation_1q(q, a, b, c)\n",
+        "  return circuit, layer_symbols\n",
+        "\n",
+        "def get_layer_2q(qubits, layer_num, name):\n",
+        "  \"\"\"Apply CNOT gates to all pairs of nearest-neighbor qubits.\"\"\"\n",
+        "  layer_symbols = []\n",
+        "  circuit = cirq.Circuit()\n",
+        "  for n, (q0, q1) in enumerate(zip(qubits[::2], qubits[1::2])):\n",
+        "    a = sympy.symbols(\"a{2}_{0}_{1}\".format(layer_num, n, name))\n",
+        "    layer_symbols += [a]\n",
+        "    circuit += get_rotation_2q(q0, q1, a)\n",
+        "  shifted_qubits = qubits[1::]+[qubits[0]]\n",
+        "  for n, (q0, q1) in enumerate(zip(shifted_qubits[::2], shifted_qubits[1::2])):\n",
+        "    a = sympy.symbols(\"a{2}_{0}_{1}\".format(layer_num, n+1, name))\n",
+        "    layer_symbols += [a]\n",
+        "    circuit += get_rotation_2q(q0, q1, a)\n",
+        "  return circuit, layer_symbols\n",
+        "\n",
+        "def get_one_full_layer(qubits, layer_num, name):\n",
+        "  \"\"\"Stack the one- and two-qubit parameterized circuits.\"\"\"\n",
+        "  circuit = cirq.Circuit()\n",
+        "  all_symbols = []\n",
+        "  new_circ, new_symb = get_layer_1q(qubits, layer_num, name)\n",
+        "  circuit += new_circ\n",
+        "  all_symbols += new_symb\n",
+        "  new_circ, new_symb = get_layer_2q(qubits, layer_num + 1, name)\n",
+        "  circuit += new_circ\n",
+        "  all_symbols += new_symb\n",
+        "  return circuit, all_symbols\n",
+        "\n",
+        "def get_model_unitary(qubits, num_layers, name=\"\"):\n",
+        "  \"\"\"Build our full parameterized model unitary.\"\"\"\n",
+        "  circuit = cirq.Circuit()\n",
+        "  all_symbols = []\n",
+        "  for i in range(num_layers):\n",
+        "    new_circ, new_symb = get_one_full_layer(qubits, 2*i, name)\n",
+        "    circuit += new_circ\n",
+        "    all_symbols += new_symb\n",
+        "  return circuit, all_symbols"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8xCxHubUYVXp",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Bitstring injector"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NaBARInB6maZ",
+        "colab_type": "text"
+      },
+      "source": [
+        "You also need a way to feed bitstrings into the quantum model.  These bitstrings can be prepared by applying an X gate to every qubit that should be excited.  The following function returns a parameterized circuit which prepares any given bitstring:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ly6Rp_KL7f6r",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def get_bitstring_circuit(qubits):\n",
+        "  \"\"\"Returns wall of parameterized X gates and the bits used to turn them on.\"\"\"\n",
+        "  circuit = cirq.Circuit()\n",
+        "  all_symbols = []\n",
+        "  for n, q in enumerate(qubits):\n",
+        "    new_bit = sympy.Symbol(\"bit_{}\".format(n))\n",
+        "    circuit += cirq.X(q) ** new_bit\n",
+        "    all_symbols.append(new_bit)\n",
+        "  return circuit, all_symbols"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JkaiSF5Ez0Hp",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Factorized latent state"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PiivYeckVi9s",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Bernoulli EBM"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AbY_ZkiqSowp",
+        "colab_type": "text"
+      },
+      "source": [
+        "The Bernoulli EBM can be used to parameterize a factorized latent state.  The probability of sampling a 1 from a unit with bias $b$ is:\n",
+        "\n",
+        "$$p = \\frac{e^b}{e^b + e^{-b}}$$\n",
+        "\n",
+        "Since the units of a Bernoulli EBM are independent, the probability of a given spin configuration is simply the product of the individual unit probabilities:\n",
+        "\n",
+        "$$p(x) = \\prod_i\\frac{e^{x_ib_i}}{e^{b_i} + e^{-b_i}}$$\n",
+        "\n",
+        "This distribution is easy to sample from."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "NE8OCbQ0gwQB",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def bernoulli_spin_p1(b):\n",
+        "  return np.exp(b)/(np.exp(b) + np.exp(-b))\n",
+        "\n",
+        "def sample_spins_bernoulli(num_samples, biases):\n",
+        "  prob_list = []\n",
+        "  for bias in biases.numpy():\n",
+        "    prob_list.append(bernoulli_spin_p1(bias))\n",
+        "  # The `probs` keyword specifies the probability of a 1 event\n",
+        "  latent_dist = tfp.distributions.Bernoulli(probs=prob_list, dtype=tf.float32)\n",
+        "  bit_samples = latent_dist.sample(num_samples).numpy()\n",
+        "  spin_samples = []\n",
+        "  for sample in bit_samples:\n",
+        "    spin_samples.append([])\n",
+        "    for bit in sample:\n",
+        "      if bit == 0:\n",
+        "        spin_samples[-1].append(-1)\n",
+        "      else:\n",
+        "        spin_samples[-1].append(1)\n",
+        "  return spin_samples"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gJiE1ULKFMih",
+        "colab_type": "text"
+      },
+      "source": [
+        "The entropy of a single unit with bias $b$ in our Bernoulli EBM is:\n",
+        "\n",
+        "$S = \\frac{be^{b} - be^{-b}}{e^{b} + e^{-b}}- \\log[e^{b} + e^{-b}]$\n",
+        "\n",
+        "For a factorized latent distribution, the entropy is simply the sum of the entropies of the individual factors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "03lIN32qFqJT",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def bernoulli_factor_partition(b):\n",
+        "  return np.exp(b) + np.exp(-b)\n",
+        "\n",
+        "def bernoulli_partition(biases):\n",
+        "  partition = 1\n",
+        "  for bias in biases.numpy():\n",
+        "    partition *= bernoulli_factor_partition(bias)\n",
+        "  return partition\n",
+        "\n",
+        "def bernoulli_factor_entropy(b):\n",
+        "  Z = bernoulli_factor_partition(b)\n",
+        "  return (b*np.exp(b) - b*np.exp(-b))/Z - np.log(Z)\n",
+        "\n",
+        "def bernoulli_entropy(biases):\n",
+        "  entropy = 0\n",
+        "  for bias in biases.numpy():\n",
+        "    entropy += bernoulli_factor_entropy(bias)\n",
+        "  return entropy"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HGc8XMBOM76Q",
+        "colab_type": "text"
+      },
+      "source": [
+        "Finally we define a function for converting the classical Bernoulli distribution into an Ising model whose expectation values can be simulated in the TFQ ops:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "MjuUUwRbNJq7",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def bernoulli_ebm_to_ising(qubits, biases, bare=False):\n",
+        "  pauli_s_list = []\n",
+        "  for i, bi in enumerate(biases.numpy()):\n",
+        "    if bare:\n",
+        "      coeff = 1.0\n",
+        "    else:\n",
+        "      coeff = bi\n",
+        "    pauli_s_list.append(cirq.PauliString(coeff, cirq.Z(qubits[i])))\n",
+        "  if bare:\n",
+        "    return pauli_s_list\n",
+        "  return cirq.PauliSum.from_pauli_strings(pauli_s_list)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Pq1arQTGDRy5",
+        "colab_type": "text"
+      },
+      "source": [
+        "### VQT"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eJoZptT3mt7N",
+        "colab_type": "text"
+      },
+      "source": [
+        "Build and view our unitary model and set up the TFQ Expectation Op inputs:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "F_EvADGJZSZw",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Number of bitstring samples from our classical model to average over\n",
+        "num_samples = 300\n",
+        "\n",
+        "# Number of rotations-plus-entanglement layers to stack.\n",
+        "# Note that the depth required to reach a given fidelity increases depending on\n",
+        "# the temperature and Hamiltonian parameters.\n",
+        "num_layers = 4\n",
+        "\n",
+        "# Build the model unitary and visible state circuits\n",
+        "U, model_symbols = get_model_unitary(all_qubits_flat, num_layers)\n",
+        "V, bit_symbols = get_bitstring_circuit(all_qubits_flat)\n",
+        "visible_state = tfq.convert_to_tensor([V + U])\n",
+        "\n",
+        "# Make a copy of the visible state for each bitstring we will sample\n",
+        "tiled_visible_state = tf.tile(visible_state, [num_samples])\n",
+        "\n",
+        "# Upconvert symbols to tensors\n",
+        "vqt_symbol_names = tf.identity(tf.convert_to_tensor(\n",
+        "    [str(s) for s in bit_symbols + model_symbols], dtype=tf.dtypes.string))\n",
+        "\n",
+        "# Build and tile the Hamiltonian\n",
+        "H = get_heisenberg_hamiltonian(all_qubits, jh, jv)\n",
+        "tiled_H = tf.tile(tfq.convert_to_tensor([[H]]), [num_samples, 1])\n",
+        "\n",
+        "# Get the expectation op with a differentiator attached\n",
+        "expectation = tfq.differentiators.ForwardDifference().generate_differentiable_op(\n",
+        "            analytic_op=tfq.get_expectation_op())\n",
+        "\n",
+        "SVGCircuit(U)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OU0WZ1GPp6RF",
+        "colab_type": "text"
+      },
+      "source": [
+        "We can use gradient descent on the model parameters thanks to TFQ, and our classical model parameters are tractable due to our use of an energy based model.  The factorized nature of our latent space also allows us to efficiently obtain a loss function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5w3op8KUp61G",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "optimizer = tf.keras.optimizers.Adam(learning_rate=0.03)\n",
+        "\n",
+        "# Initialize our model variables\n",
+        "vqt_model_params = tf.Variable(\n",
+        "    tf.random.uniform(minval=-0.1, maxval=0.1,\n",
+        "                      shape=[len(model_symbols)], dtype=tf.float32),\n",
+        "    dtype=tf.float32)\n",
+        "\n",
+        "# Keep track of metrics during training\n",
+        "vqt_loss_history = []\n",
+        "vqt_fidelity_history = []\n",
+        "vqt_model_params_history = []\n",
+        "vqt_bias_history = []\n",
+        "vqt_density_matrix_history = []\n",
+        "\n",
+        "# Initialize our EBM variables\n",
+        "vqt_biases = get_initialized_ebm_biases(len(all_qubits_flat))\n",
+        "\n",
+        "# The innermost training step, where gradients are taken and applied\n",
+        "def vqt_train_step():\n",
+        "  # Sample from our EBM\n",
+        "  spin_config_list = sample_spins_bernoulli(num_samples, vqt_biases)\n",
+        "  bitstring_list = [spin_config_to_bitstring(s) for s in spin_config_list]\n",
+        "  bitstring_tensor = tf.convert_to_tensor(bitstring_list, dtype=tf.float32)\n",
+        "\n",
+        "  # Use the samples to find gradient of the loss w.r.t. model parameters.\n",
+        "  with tf.GradientTape() as tape:\n",
+        "    tiled_vqt_model_params = tf.tile([vqt_model_params], [num_samples, 1])\n",
+        "    sampled_expectations = expectation(\n",
+        "        tiled_visible_state,\n",
+        "        vqt_symbol_names,\n",
+        "        tf.concat([bitstring_tensor, tiled_vqt_model_params], 1),\n",
+        "        tiled_H)\n",
+        "    energy_losses = beta*sampled_expectations\n",
+        "    energy_losses_avg = tf.reduce_mean(energy_losses)\n",
+        "  vqt_model_gradients = tape.gradient(energy_losses_avg, [vqt_model_params])\n",
+        "\n",
+        "  # Build the classical model gradients\n",
+        "  weighted_biases_grad = get_vqt_weighted_biases_grad_product(\n",
+        "    energy_losses, spin_config_list, vqt_biases)\n",
+        "  factored_biases_grad = get_vqt_factored_biases_grad_product(\n",
+        "    energy_losses, spin_config_list, vqt_biases)\n",
+        "  biases_grad = tf.subtract(weighted_biases_grad, factored_biases_grad)\n",
+        "\n",
+        "  # Apply the gradients\n",
+        "  optimizer.apply_gradients(zip([vqt_model_gradients[0], biases_grad],\n",
+        "                                [vqt_model_params, vqt_biases]))\n",
+        "\n",
+        "  # Sample pure states to build the current estimate of the density matrix\n",
+        "  many_states = tfq.layers.State()(\n",
+        "      tiled_visible_state,\n",
+        "      symbol_names=vqt_symbol_names,\n",
+        "      symbol_values=tf.concat([bitstring_tensor, tiled_vqt_model_params], 1)\n",
+        "  )\n",
+        "  vqt_density_matrix_history.append(pure_state_list_to_density_matrix(many_states))\n",
+        "\n",
+        "  # Record the history\n",
+        "  vqt_loss_history.append((energy_losses_avg - bernoulli_entropy(vqt_biases)).numpy())\n",
+        "  vqt_fidelity_history.append(\n",
+        "      fidelity(vqt_density_matrix_history[-1], exact_thermal_state))\n",
+        "  vqt_model_params_history.append(vqt_model_params.numpy())\n",
+        "  vqt_bias_history.append(vqt_biases.numpy())\n",
+        "  \n",
+        "  print(\"Current loss:\")\n",
+        "  print(vqt_loss_history[-1])\n",
+        "  print(\"Current fidelity to optimal state:\")\n",
+        "  print(vqt_fidelity_history[-1])\n",
+        "  print(\"Current estimated density matrix:\")\n",
+        "  plt.figure()\n",
+        "  seaborn.heatmap(abs(vqt_density_matrix_history[-1]))\n",
+        "  plt.show()"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3n3hXfHGcVRh",
+        "colab_type": "text"
+      },
+      "source": [
+        "With setup complete, we can now optimize our Heisenberg VQT."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OqFfhQtotacR",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def vqt_train(epochs):\n",
+        "  for epoch in range(epochs):\n",
+        "    vqt_train_step()\n",
+        "    print ('Epoch {} finished'.format(epoch + 1))\n",
+        "\n",
+        "vqt_train(100)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Yigvs92XtcLQ",
+        "colab_type": "text"
+      },
+      "source": [
+        "We plot our metrics and visualize the motion of the parameters during training:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "C2dopegQtmQA",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "plt.plot(vqt_loss_history)\n",
+        "plt.xlabel('Epoch #')\n",
+        "plt.ylabel('Loss [free energy]')"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "aQuPV9l6Lh7I",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "plt.plot(vqt_fidelity_history)\n",
+        "plt.xlabel('Epoch #')\n",
+        "plt.ylabel('Fidelity with exact state')"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vI1lbaENVZXq",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Classically correlated latent state"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4ojAo5UwVQWK",
+        "colab_type": "text"
+      },
+      "source": [
+        "### Boltzmann machine EBM"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WrYekcBOOToj",
+        "colab_type": "text"
+      },
+      "source": [
+        "The Bernoulli distribution is only able to inject entropy into our density matrix.  To encode classical correlations, we need to move beyond a factorized latent state.  This can be accomplished by allowing the weights of our Boltzmann machine to be non-zero.\n",
+        "\n",
+        "Now that there are correlations, sampling from the model becomes non-trivial.  The probability of bitstring $x$ is:\n",
+        "\n",
+        "$P(x) = \\frac{\\exp(-E(x))}{\\sum_{y\\in\\{-1, 1\\}^n} \\exp(-E(y))}$\n",
+        "\n",
+        "In general this function is intractable to compute directly; however, we can still obtain samples from the distribution efficiently. Markov chain Monte Carlo (MCMC) is one family of procedures for this efficient sampling.  Here, we use the simplest example of MCMC, the [Metropolis-Hastings](https://en.wikipedia.org/wiki/Metropolis%E2%80%93Hastings_algorithm) algorithm:\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-cFS7yyxR6dY",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def make_proposal(y):\n",
+        "  \"\"\"Flip spins in y to generate a new sample.\"\"\"\n",
+        "  coin = tfp.distributions.Bernoulli(probs=[0.75]*len(y))\n",
+        "  samples = coin.sample(1).numpy()[0]\n",
+        "  x = []\n",
+        "  for s_i, y_i in zip(samples, y):\n",
+        "    if s_i:\n",
+        "      x.append(y_i)\n",
+        "    else:\n",
+        "      if y_i == 1:\n",
+        "        x.append(-1)\n",
+        "      else:\n",
+        "        x.append(1)\n",
+        "  return x\n",
+        "\n",
+        "def sample_boltzmann(burn_in, num_samples, skip, initial_state, biases, weights):\n",
+        "  \"\"\"Walk towards and sample from regions of high probability.\"\"\"\n",
+        "  current_state = initial_state\n",
+        "  all_samples = []\n",
+        "  for i in range(burn_in + skip*num_samples):\n",
+        "    proposal = make_proposal(current_state)\n",
+        "    proposal_energy = ebm_energy(proposal, biases, weights)\n",
+        "    current_energy = ebm_energy(current_state, biases, weights)\n",
+        "    acceptance = min(np.exp(-proposal_energy)/np.exp(-current_energy), 1)\n",
+        "    threshold = random.random()\n",
+        "    if threshold <= acceptance:\n",
+        "      current_state = proposal\n",
+        "    if i >= burn_in:\n",
+        "      if (i - burn_in)%skip == 0:\n",
+        "        all_samples.append(current_state)\n",
+        "  return all_samples"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mTfa35s2PLbd",
+        "colab_type": "text"
+      },
+      "source": [
+        "Since there are now correlations between the bits, the partition function and entropy can no longer be computed in a scalable way.  However, for the small system size considered here, these quantities can be illustrative."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1tguzlInPH6r",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def boltzmann_partition(biases, weights):\n",
+        "  partition_value = 0\n",
+        "  for spin_config in itertools.product([-1, 1], repeat=biases.shape[0]):\n",
+        "    partition_value += np.exp(-ebm_energy(spin_config, biases, weights))\n",
+        "  return partition_value\n",
+        "\n",
+        "def boltzmann_entropy(biases, weights):\n",
+        "  Z = boltzmann_partition(biases, weights)\n",
+        "  Z_log = np.log(Z)\n",
+        "  unnormalized = 0\n",
+        "  for spin_config in itertools.product([-1, 1], repeat=biases.shape[0]):\n",
+        "    this_energy = ebm_energy(spin_config, biases, weights)\n",
+        "    unnormalized += np.exp(-this_energy)*(-this_energy - Z_log)\n",
+        "  return -unnormalized/Z"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "05Ns_rcIOAvR",
+        "colab_type": "text"
+      },
+      "source": [
+        "Finally we define a function for converting the classical Boltzmann machine into an Ising model whose expectation values can be simulated in the TFQ ops:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bmsidwSwVT27",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def boltzmann_ebm_to_ising(qubits, biases, weights, bare=False):\n",
+        "  pauli_s_list = []\n",
+        "  for i, w_row in enumerate(weights.numpy()):\n",
+        "    for j, wij in enumerate(w_row):\n",
+        "      init_list = [cirq.Z(q) for qi, q in enumerate(qubits) if qi == i or qi == j]\n",
+        "      if bare:\n",
+        "        coeff = 1.0\n",
+        "      else:\n",
+        "        coeff = wij\n",
+        "      pauli_s_list.append(cirq.PauliString(coeff, init_list))\n",
+        "  for i, bi in enumerate(biases.numpy()):\n",
+        "    if bare:\n",
+        "      coeff = 1.0\n",
+        "    else:\n",
+        "      coeff = bi\n",
+        "    pauli_s_list.append(cirq.PauliString(coeff, cirq.Z(qubits[i])))\n",
+        "  if bare:\n",
+        "    return pauli_s_list\n",
+        "  return cirq.PauliSum.from_pauli_strings(pauli_s_list)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pWgYpkp9O3Et",
+        "colab_type": "text"
+      },
+      "source": [
+        "### VQT"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "gr2R5-t0UJvO",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Initialize our model variables\n",
+        "vqt_model_params = tf.Variable(\n",
+        "    tf.random.uniform(minval=-0.1, maxval=0.1, shape=[len(model_symbols)],\n",
+        "                      dtype=tf.float32), dtype=tf.float32)\n",
+        "\n",
+        "# Define the learning hyperparameters\n",
+        "burn_in = 100\n",
+        "skip = 7\n",
+        "optimizer = tf.keras.optimizers.Adam(learning_rate=0.025)\n",
+        "\n",
+        "# Keep track of metrics during training\n",
+        "vqt_loss_history = []\n",
+        "vqt_fidelity_history = []\n",
+        "vqt_model_params_history = []\n",
+        "vqt_weights_history = []\n",
+        "vqt_bias_history = []\n",
+        "vqt_density_matrix_history = []\n",
+        "\n",
+        "# Initialize our EBM variables\n",
+        "vqt_weights = get_initialized_ebm_weights(len(all_qubits_flat))\n",
+        "vqt_biases = get_initialized_ebm_biases(len(all_qubits_flat))\n",
+        "\n",
+        "# The innermost training step, where gradients are taken and applied\n",
+        "def vqt_train_step():\n",
+        "  # Sample from our EBM\n",
+        "  spin_config_list = sample_boltzmann(burn_in, num_samples, skip,\n",
+        "                                      vqt_train_step.initial_state,\n",
+        "                                      vqt_biases, vqt_weights)\n",
+        "  vqt_train_step.initial_state = spin_config_list[-1]\n",
+        "  bitstring_list = [spin_config_to_bitstring(s) for s in spin_config_list]\n",
+        "  bitstring_tensor = tf.convert_to_tensor(bitstring_list, dtype=tf.float32)\n",
+        "\n",
+        "  # Use the samples to find gradient of the loss w.r.t. model parameters.\n",
+        "  with tf.GradientTape() as tape:\n",
+        "    tiled_vqt_model_params = tf.tile([vqt_model_params], [num_samples, 1])\n",
+        "    sampled_expectations = expectation(\n",
+        "        tiled_visible_state,\n",
+        "        vqt_symbol_names,\n",
+        "        tf.concat([bitstring_tensor, tiled_vqt_model_params], 1),\n",
+        "        tiled_H)\n",
+        "    energy_losses = beta*sampled_expectations\n",
+        "    energy_losses_avg = tf.reduce_mean(energy_losses)\n",
+        "  vqt_model_gradients = tape.gradient(energy_losses_avg, [vqt_model_params])\n",
+        "\n",
+        "  # Build the classical model gradients\n",
+        "  weighted_biases_grad = get_vqt_weighted_biases_grad_product(\n",
+        "    energy_losses, spin_config_list, vqt_biases, vqt_weights)\n",
+        "  factored_biases_grad = get_vqt_factored_biases_grad_product(\n",
+        "    energy_losses, spin_config_list, vqt_biases, vqt_weights)\n",
+        "  biases_grad = tf.subtract(weighted_biases_grad, factored_biases_grad)\n",
+        "  weighted_weights_grad = get_vqt_weighted_weights_grad_product(\n",
+        "    energy_losses, spin_config_list, vqt_weights, vqt_weights)\n",
+        "  factored_weights_grad = get_vqt_factored_weights_grad_product(\n",
+        "    energy_losses, spin_config_list, vqt_weights, vqt_weights)\n",
+        "  weights_grad = tf.subtract(weighted_weights_grad, factored_weights_grad)\n",
+        "\n",
+        "  # Apply the gradients\n",
+        "  optimizer.apply_gradients(\n",
+        "      zip([vqt_model_gradients[0], weights_grad, biases_grad],\n",
+        "          [vqt_model_params, vqt_weights, vqt_biases]))\n",
+        "\n",
+        "  # Sample pure states to build the current estimate of the density matrix\n",
+        "  many_states = tfq.layers.State()(\n",
+        "      tiled_visible_state,\n",
+        "      symbol_names=vqt_symbol_names,\n",
+        "      symbol_values=tf.concat([bitstring_tensor, tiled_vqt_model_params], 1)\n",
+        "  )\n",
+        "  vqt_density_matrix_history.append(pure_state_list_to_density_matrix(many_states))\n",
+        "\n",
+        "  # Record the history\n",
+        "  vqt_loss_history.append(\n",
+        "      (energy_losses_avg - boltzmann_entropy(vqt_biases, vqt_weights)).numpy())\n",
+        "  vqt_fidelity_history.append(\n",
+        "      fidelity(vqt_density_matrix_history[-1], exact_thermal_state))\n",
+        "  vqt_model_params_history.append(vqt_model_params.numpy())\n",
+        "  vqt_weights_history.append(vqt_weights.numpy())\n",
+        "  vqt_bias_history.append(vqt_biases.numpy())\n",
+        "  \n",
+        "  print(\"Current loss:\")\n",
+        "  print(vqt_loss_history[-1])\n",
+        "  print(\"Current fidelity to optimal state:\")\n",
+        "  print(vqt_fidelity_history[-1])\n",
+        "  print(\"Current estimated density matrix:\")\n",
+        "  plt.figure()\n",
+        "  seaborn.heatmap(abs(vqt_density_matrix_history[-1]))\n",
+        "  plt.show()\n",
+        "\n",
+        "vqt_train_step.initial_state = [1]*len(bit_symbols)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2NxcXXKvWA03",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "def vqt_train(epochs):\n",
+        "  for epoch in range(epochs):\n",
+        "    vqt_train_step()\n",
+        "    print ('Epoch {} finished'.format(epoch + 1))\n",
+        "\n",
+        "vqt_train(100)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "B0N0LhGnZtmZ",
+        "colab": {}
+      },
+      "source": [
+        "plt.plot(vqt_loss_history)\n",
+        "plt.xlabel('Epoch #')\n",
+        "plt.ylabel('Loss [free energy]')"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "0zVqoEJPZtmk",
+        "colab": {}
+      },
+      "source": [
+        "plt.plot(vqt_fidelity_history)\n",
+        "plt.xlabel('Epoch #')\n",
+        "plt.ylabel('Fidelity with exact state')"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xUCZgEaa7_q5",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file