Reading group on Deep Learning

LEAR - XRCE, 23 Nov. 2012

Notes on this document:

Outline

Pointers

Reading material

Websites

Code

Refresher on Neural Networks

Central idea

Preliminary: Projection Pursuit Regression (PPR)

$$f(X) = \sum_{m=1}^{M} g_m(V_m)$$

In [1]:
# Plot of two ridge function examples


from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np


def g_left(X1, X2):
    V = (X1 + X2) / np.sqrt(2)
    return 1.0 / (1.0 + np.exp(-5 * (V - 0.5)))


def g_right(X1, X2):
    V = X1
    return (V + 0.1) * np.sin(10 / (V / 3.0 + 0.1))


def plot_surface_3d(X1, X2, func, fig=None, subplot=(1, 1, 1)):
    if fig is None:
        fig = plt.figure(figsize=(12, 5))
    ax = fig.add_subplot(*subplot, projection='3d')
    X1, X2 = np.meshgrid(X1, X2)
    Z = func(X1, X2)
    surf = ax.plot_surface(X1, X2, Z,
        rstride=1, cstride=1, cmap=cm.coolwarm,
        linewidth=0, antialiased=False)


fig = plt.figure(figsize=(12, 5))
X1, X2 = np.linspace(-1.5, 1.5, 150), np.linspace(-0.5, 1, 150)
plot_surface_3d(X1, X2, g_left, fig, subplot=(1, 2, 1))
X1, X2 =  np.linspace(-0.05, 0.05, 300), np.linspace(0, 1, 100)
plot_surface_3d(X1, X2, g_right, fig, subplot=(1, 2, 2))

Neural Networks (NN)

The model

In [2]:
# How to plot a network diagram of a NN


import matplotlib.pyplot as plt
import networkx as nx


class NeuralNetwork(object):
    """ A simple neural network class for visualization purposes
    """
    def __init__(self, n_nodes_per_layer):
        """ Build the network layer by layer
        """
        self.n_nodes_per_layer = n_nodes_per_layer
        self.graph = nx.DiGraph()
        self.nodes_pos = {}
        self.nodes_label = {}
        self.input_units = []
        self.hidden_units = []
        self.output_units = []

        # add the units per layer
        for layer_i, layer_size in enumerate(n_nodes_per_layer):

            # add the nodes for this layer
            for node_i in range(layer_size):

                node = "%d_%d" % (layer_i, node_i)  # simple encoding of a node
                self.graph.add_node(node)
                self.nodes_pos[node] = (layer_i, layer_size / 2.0 - node_i)

                if layer_i == 0:
                    # label for input layer
                    self.input_units.append(node)
                    self.nodes_label[node] = r"$X_%d$" % node_i
                elif layer_i == len(n_nodes_per_layer) - 1:
                    # label for output layer
                    self.output_units.append(node)
                    self.nodes_label[node] = r"$Y_%d$" % node_i
                else:
                    # hidden layer
                    self.hidden_units.append(node)
                    self.nodes_label[node] = r"$Z_{%d, %d}$" % (layer_i, node_i)

            # add the edges: full connection between layer_i -1 and layer_i
            if layer_i > 0:
                prev_layer_i = layer_i - 1
                prev_layer_size = n_nodes_per_layer[prev_layer_i]
                self.graph.add_edges_from([
                    ("%d_%d" % (prev_layer_i, l), "%d_%d" % (layer_i, k))
                    for k in range(layer_size) for l in range(prev_layer_size)
                ])

    def draw(self, ax=None):
        """ Draw the neural network
        """
        if ax is None:
            ax = plt.figure(figsize=(10, 6)).add_subplot(1, 1, 1)
        nx.draw_networkx_edges(self.graph, pos=self.nodes_pos, alpha=0.7, ax=ax)
        nx.draw_networkx_nodes(self.graph, nodelist=self.input_units,
                               pos=self.nodes_pos, ax=ax,
                               node_color='#66FFFF', node_size=700)
        nx.draw_networkx_nodes(self.graph, nodelist=self.hidden_units,
                               pos=self.nodes_pos, ax=ax,
                               node_color='#CCCCCC', node_size=900)
        nx.draw_networkx_nodes(self.graph, nodelist=self.output_units,
                               pos=self.nodes_pos, ax=ax,
                               node_color='#FFFF99', node_size=700)
        nx.draw_networkx_labels(self.graph, labels=self.nodes_label,
                                pos=self.nodes_pos, font_size=14, ax=ax)
        ax.axis('off')


# classifcal 3 layer neural network example
n_layers = 3  # input layer | hidden layer | output layer
n_input_dims = 5
n_hidden_units = 3
n_output_dims = 2
n_nodes_per_layer = [n_input_dims, n_hidden_units, n_output_dims]
nn1 = NeuralNetwork(n_nodes_per_layer)

# another example
nn2 = NeuralNetwork([10, 5, 10])

# show the graphical representations
fig = plt.figure(figsize=(12, 8))
nn1.draw(fig.add_subplot(1, 2, 1))
nn2.draw(fig.add_subplot(1, 2, 2))

Formal definition of NN (assuming full connectivity between layers):

The Back-Propagation algorithm

(Again assuming one hidden layer for brevity)

Remarks

Deep Learning in practice

What is Deep Learning?

Examples

Why going deep?

How to learn deeply?

Overview of Theano

Theano is a Python library that allows you to define, optimize, and evaluate mathematical expressions involving multi-dimensional arrays efficiently. Theano features:

In [3]:
# Sneak peek

import theano
from theano import tensor

# declare two symbolic floating-point scalars
a = tensor.dscalar()
b = tensor.dscalar()

# create a simple expression
c = a + b

# convert the expression into a callable object that takes (a,b)
# values as input and computes a value for c
f = theano.function([a,b], c)

# bind 1.5 to 'a', 2.5 to 'b', and evaluate 'c'
assert 4.0 == f(1.5, 2.5)
In [4]:
# Logistic Regression simple example

import numpy
import theano
import theano.tensor as T
rng = numpy.random

N = 400
feats = 784
D = (rng.randn(N, feats), rng.randint(size=N,low=0, high=2))
training_steps = 10000

# Declare Theano symbolic variables
x = T.matrix("x")
y = T.vector("y")
w = theano.shared(rng.randn(feats), name="w")
b = theano.shared(0., name="b")
#print "Initial model:"
#print w.get_value(), b.get_value()

# Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))       # Probability that target = 1
prediction = p_1 > 0.5                        # The prediction thresholded
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
cost = xent.mean() + 0.01 * (w ** 2).sum()    # The cost to minimize
gw, gb = T.grad(cost, [w, b])                 # Compute the gradient of the cost

# Compile
train = theano.function(
          inputs=[x,y],
          outputs=[prediction, xent],
          updates={w: w - 0.1 * gw, b: b - 0.1 * gb})
predict = theano.function(inputs=[x], outputs=prediction)

# Train
for i in range(training_steps):
    pred, err = train(D[0], D[1])

# To display the model, uncomment below:
#print "Final model:"
#print w.get_value(), b.get_value()
#print "target values for D:", D[1]
#print "prediction on D:", predict(D[0])

Deep Learning in Theano

Many implementations already available and documented in the deep learning tutorials (cf. the code on github)

Supervised learning

$$ P(Y=i|x, W,b) = softmax_i(W x + b) \ = \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}} $$

$$y_{pred} = {\rm argmax}_i P(Y=i|x,W,b)$$

Unsupervised learning

Overview of EBLearn

EBLearn usage