# Data Science from Scratch: First Principles with Python (2015)

### Chapter 18. Neural Networks

I like nonsense; it wakes up the brain cells.

Dr. Seuss

# Perceptrons

def`step_function(x):`

return`1`

if`x`

`>=`

`0`

else`0`

def`perceptron_output(weights,`

`bias,`

`x):`

` `*"""returns 1 if the perceptron 'fires', 0 if not"""*

`calculation`

`=`

`dot(weights,`

`x)`

`+`

`bias`

return`step_function(calculation)`

`dot(weights,x)`

`+`

`bias`

`==`

`0`

`weights`

`=`

`[`

`2,`

`2]`

`bias`

`=`

`-`

`3`

`weights`

`=`

`[`

`2,`

`2]`

`bias`

`=`

`-`

`1`

`weights`

`=`

`[-`

`2]`

`bias`

`=`

`1`

`and_gate`

`=`

`min`

`or_gate`

`=`

`max`

`xor_gate`

`=`

lambda`x,`

`y:`

`0`

if`x`

`==`

`y`

else`1`

# Feed-Forward Neural Networks

def`sigmoid(t):`

return`1`

`/`

`(`

`1`

`+`

`math.exp(-t))`

###### NOTE

def`neuron_output(weights,`

`inputs):`

return`sigmoid(dot(weights,`

`inputs))`

def`feed_forward(neural_network,`

`input_vector):`

` `*"""takes in a neural network*

* (represented as a list of lists of lists of weights)*

* and returns the output from forward-propagating the input"""*

`outputs`

`=`

`[]`

` `*# process one layer at a time*

for`layer`

in`neural_network:`

`input_with_bias`

`=`

`input_vector`

`+`

`[`

`1]`

# add a bias input

`output`

`=`

`[`

`neuron_output(neuron,`

`input_with_bias)`

# compute the output

for`neuron`

in`layer]`

# for each neuron

`outputs.append(output)`

# and remember it

` `*# then the input to the next layer is the output of this one*

`input_vector`

`=`

`output`

return`outputs`

`xor_network`

`=`

`[`

# hidden layer

`[[`

`20,`

`20,`

`-`

`30],`

# 'and' neuron

`[`

`20,`

`20,`

`-`

`10]],`

# 'or' neuron

` `*# output layer*

`[[-`

`60,`

`60,`

`-`

`30]]]`

# '2nd input but not 1st input' neuron

for`x`

in`[`

`0,`

`1]:`

for`y`

in`[`

`0,`

`1]:`

` `*# feed_forward produces the outputs of every neuron*

` `*# feed_forward[-1] is the outputs of the output-layer neurons*

`x,`

`y,`

`feed_forward(xor_network,[x,`

`y])[-1]`

*# 0 0 [9.38314668300676e-14]*

*# 0 1 [0.9999999999999059]*

*# 1 0 [0.9999999999999059]*

*# 1 1 [9.383146683006828e-14]*

1. Run `feed_forward`

on an input vector to produce the outputs of all the neurons in the network.

2. This results in an error for each output neuron — the difference between its output and its target.

3. Compute the gradient of this error as a function of the neuron’s weights, and adjust its weights in the direction that most decreases the error.

4. “Propagate” these output errors backward to infer errors for the hidden layer.

5. Compute the gradients of these errors and adjust the hidden layer’s weights in the same manner.

def`backpropagate(network,`

`input_vector,`

`targets):`

`hidden_outputs,`

`outputs`

`=`

`feed_forward(network,`

`input_vector)`

` `*# the output * (1 - output) is from the derivative of sigmoid*

`output_deltas`

`=`

`[`

`output`

`*`

`(`

`1`

`-`

`output)`

`*`

`(`

`output`

`-`

`target)`

for`output,`

`target`

in`zip(outputs,`

`targets)]`

` `*# adjust weights for output layer, one neuron at a time*

for`i,`

`output_neuron`

in`enumerate(network[-1]):`

` `*# focus on the ith output layer neuron*

for`j,`

`hidden_output`

in`enumerate(hidden_outputs`

`+`

`[`

`1]):`

` `*# adjust the jth weight based on both*

` `*# this neuron's delta and its jth input*

`output_neuron[j]`

`-=`

`output_deltas[i]`

`*`

`hidden_output`

` `*# back-propagate errors to hidden layer*

`hidden_deltas`

`=`

`[`

`hidden_output`

`*`

`(`

`1`

`-`

`hidden_output)`

`*`

`dot(output_deltas,`

`[`

`n[i]`

for`n`

in`output_layer])`

for`i,`

`hidden_output`

in`enumerate(hidden_outputs)]`

` `*# adjust weights for hidden layer, one neuron at a time*

for`i,`

`hidden_neuron`

in`enumerate(network[0]):`

for`j,`

`input`

in`enumerate(input_vector`

`+`

`[`

`1]):`

`hidden_neuron[j]`

`-=`

`hidden_deltas[i]`

`*`

`input`

# Example: Defeating a CAPTCHA

@@@@@ ..@.. @@@@@ @@@@@ @...@ @@@@@ @@@@@ @@@@@ @@@@@ @@@@@

@...@ ..@.. ....@ ....@ @...@ @.... @.... ....@ @...@ @...@

@...@ ..@.. @@@@@ @@@@@ @@@@@ @@@@@ @@@@@ ....@ @@@@@ @@@@@

@...@ ..@.. @.... ....@ ....@ ....@ @...@ ....@ @...@ ....@

@@@@@ ..@.. @@@@@ @@@@@ ....@ @@@@@ @@@@@ ....@ @@@@@ @@@@@

`zero_digit`

`=`

`[`

`1,1,1,1,1,`

` ``1,0,0,0,1,`

` ``1,0,0,0,1,`

` ``1,0,0,0,1,`

` ``1,1,1,1,1]`

`[0,`

`0,`

`0,`

`0,`

`1,`

`0,`

`0,`

`0,`

`0,`

`0]`

`targets`

`=`

`[[`

`1`

if`i`

`==`

`j`

else`0`

for`i`

in`range(10)]`

for`j`

in`range(10)]`

`random.seed(0)`

# to get repeatable results

`input_size`

`=`

`25`

# each input is a vector of length 25

`num_hidden`

`=`

`5`

# we'll have 5 neurons in the hidden layer

`output_size`

`=`

`10`

# we need 10 outputs for each input

*# each hidden neuron has one weight per input, plus a bias weight*

`hidden_layer`

`=`

`[[`

`random.random()`

for`__`

in`range(input_size`

`+`

`1)]`

for`__`

in`range(num_hidden)]`

*# each output neuron has one weight per hidden neuron, plus a bias weight*

`output_layer`

`=`

`[[`

`random.random()`

for`__`

in`range(num_hidden`

`+`

`1)]`

for`__`

in`range(output_size)]`

*# the network starts out with random weights*

`network`

`=`

`[`

`hidden_layer,`

`output_layer]`

*# 10,000 iterations seems enough to converge*

for`__`

in`range(10000):`

for`input_vector,`

`target_vector`

in`zip(inputs,`

`targets):`

`backpropagate(network,`

`input_vector,`

`target_vector)`

def`predict(input):`

return`feed_forward(network,`

`input)[-1]`

`predict(inputs[7])`

*# [0.026, 0.0, 0.0, 0.018, 0.001, 0.0, 0.0, 0.967, 0.0, 0.0]*

`predict([0,1,1,1,0,`

# .@@@.

`0,0,0,1,1,`

# ...@@

`0,0,1,1,0,`

# ..@@.

`0,0,0,1,1,`

# ...@@

`0,1,1,1,0])`

# .@@@.

*# [0.0, 0.0, 0.0, 0.92, 0.0, 0.0, 0.0, 0.01, 0.0, 0.12]*

`predict([0,1,1,1,0,`

# .@@@.

`1,0,0,1,1,`

# @..@@

`0,1,1,1,0,`

# .@@@.

`1,0,0,1,1,`

# @..@@

`0,1,1,1,0])`

# .@@@.

*# [0.0, 0.0, 0.0, 0.0, 0.0, 0.55, 0.0, 0.0, 0.93, 1.0]*

import

matplotlib

`weights`

`=`

`network[0][0]`

# first neuron in hidden layer

`abs_weights`

`=`

`map(abs,`

`weights)`

# darkness only depends on absolute value

`grid`

`=`

`[`

`abs_weights[row:(row+5)]`

# turn the weights into a 5x5 grid

for`row`

in`range(0,25,5)]`

# [weights[0:5], ..., weights[20:25]]

`ax`

`=`

`plt.gca()`

# to use hatching, we'll need the axis

`ax.imshow(grid,`

# here same as plt.imshow

`cmap=matplotlib.cm.binary,`

# use white-black color scale

`interpolation='none')`

# plot blocks as blocks

def`patch(x,`

`y,`

`hatch,`

`color):`

` `*"""return a matplotlib 'patch' object with the specified*

* location, crosshatch pattern, and color"""*

return`matplotlib.patches.Rectangle((x`

`-`

`0.5,`

`y`

`-`

`0.5),`

`1,`

`1,`

`hatch=hatch,`

`fill=False,`

`color=color)`

*# cross-hatch the negative weights*

for`i`

in`range(5):`

# row

for`j`

in`range(5):`

# column

if`weights[5*i`

`+`

`j]`

`<`

`0:`

# row i, column j = weights[5*i + j]

` `*# add black and white hatches, so visible whether dark or light*

`ax.add_patch(patch(j,`

`i,`

`'/',`

`"white"))`

`ax.add_patch(patch(j,`

`i,`

`'`

\\',`"black"))`

`plt.show()`

`left_column_only`

`=`

`[`

`1,`

`0,`

`0,`

`0,`

`0]`

`*`

`5`

`feed_forward(network,`

`left_column_only)[0][0]`

# 1.0

`center_middle_row`

`=`

`[`

`0,`

`0,`

`0,`

`0,`

`0]`

`*`

`2`

`+`

`[`

`0,`

`1,`

`1,`

`1,`

`0]`

`+`

`[`

`0,`

`0,`

`0,`

`0,`

`0]`

`*`

`2`

`feed_forward(network,`

`center_middle_row)[0][0]`

# 0.95

`right_column_only`

`=`

`[`

`0,`

`0,`

`0,`

`0,`

`1]`

`*`

`5`

`feed_forward(network,`

`right_column_only)[0][0]`

# 0.0

`my_three`

`=`

`[`

`0,1,1,1,0,`

# .@@@.

`0,0,0,1,1,`

# ...@@

`0,0,1,1,0,`

# ..@@.

`0,0,0,1,1,`

# ...@@

`0,1,1,1,0]`

# .@@@.

`hidden,`

`output`

`=`

`feed_forward(network,`

`my_three)`

0.121080 # from network[0][0], probably dinged by (1, 4)

0.999979 # from network[0][1], big contributions from (0, 2) and (2, 2)

0.999999 # from network[0][2], positive everywhere except (3, 4)

0.999992 # from network[0][3], again big contributions from (0, 2) and (2, 2)

0.000000 # from network[0][4], negative or zero everywhere except center row

-11.61 # weight for hidden[0]

-2.17 # weight for hidden[1]

9.31 # weight for hidden[2]

-1.38 # weight for hidden[3]

-11.47 # weight for hidden[4]

- 1.92 # weight for bias input

`sigmoid(.121`

`*`

`-`

`11.61`

`+`

`1`

`*`

`-`

`2.17`

`+`

`1`

`*`

`9.31`

`-`

`1.38`

`*`

`1`

`-`

`0`

`*`

`11.47`

`-`

`1.92)`

§ Coursera has a free course on Neural Networks for Machine Learning. As I write this it was last run in 2012, but the course materials are still available.

§ Michael Nielsen is writing a free online book on Neural Networks and Deep Learning. By the time you read this it might be finished.

§ PyBrain is a pretty simple Python neural network library.

§ Pylearn2 is a much more advanced (and much harder to use) neural network library.