2019-03-20

搭建简单的神经网络

搭建简单的神经网络（Python手写）。

Python代码

import numpy as np


# 加载数据
def load_planar_dataset():
    np.random.seed(1)
    m = 400  # number of examples
    N = int(m/2)  # number of points per class
    D = 2  # dimensionality
    X = np.zeros((m,D))  # data matrix where each row is a single example
    Y = np.zeros((m,1), dtype='uint8')  # labels vector (0 for red, 1 for blue)
    a = 4  # maximum ray of the flower
    for j in range(2):
        ix = range(N*j, N*(j+1))
        t = np.linspace(j*3.12, (j+1)*3.12, N) + np.random.randn(N)*0.2 # theta
        r = a*np.sin(4*t) + np.random.randn(N)*0.2  # radius
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
        Y[ix] = j
    X = X.T
    Y = Y.T
    return X, Y


# S函数
def sigmoid(z):
    return 1. / (1.+np.exp(-z))


# 简单神经网络模型（一个隐含层）
class SimpleNeuralNetwork():
    def __init__(self, input_size, hidden_layer_size):
        self.paramters = self.__parameter_initailizer(input_size, hidden_layer_size)
    
    # 参数初始化
    def __parameter_initailizer(self, n_x, n_h):
        W1, b1 = np.random.randn(n_h, n_x)*0.01, np.zeros((n_h, 1))
        W2, b2 = np.random.randn(1, n_h)*0.01, np.zeros((1, 1))    
        return {'W1': W1,'b1': b1,'W2': W2,'b2': b2}
    
    # 前向传播
    def __forward_propagation(self, X):
        W1, b1 = self.paramters['W1'], self.paramters['b1']
        W2, b2 = self.paramters['W2'], self.paramters['b2'] 
        Z1 = np.dot(W1, X) + b1
        A1 = np.tanh(Z1)
        Z2 = np.dot(W2, A1) + b2
        A2 = sigmoid(Z2)
        cache = {'X': X, 'Z1': Z1,'A1': A1,'Z2': Z2,'A2': A2}
        return A2, cache
    
    # 计算损失
    def __compute_cost(self, A2, Y):
        m = A2.shape[1]
        cost = -np.sum(Y*np.log(A2) + (1-Y)*np.log(1-A2)) / m
        return cost
    
    # 代价函数
    def cost_function(self, X, Y):
        A2, cache = self.__forward_propagation(X)
        cost = self.__compute_cost(A2, Y)
        return cost
    
    # 反向传播计算梯度
    def __backward_propagation(self, cache, Y):
        A1, A2 = cache['A1'], cache['A2']
        W2 = self.paramters['W2']
        X = cache['X']
        m = X.shape[1]
        dZ2 = A2 - Y
        dW2 = np.dot(dZ2, A1.T) / m
        db2 = np.sum(dZ2, axis=1, keepdims=True) / m
        dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
        dW1 = np.dot(dZ1, X.T) / m
        db1 = np.sum(dZ1, axis=1, keepdims=True) / m
        grads = {'dW1': dW1,'db1': db1,'dW2': dW2,'db2': db2}
        return grads
    
    # 更新参数
    def __update_parameters(self, grads, learning_rate):
        self.paramters['W1'] -= learning_rate * grads['dW1']
        self.paramters['b1'] -= learning_rate * grads['db1']
        self.paramters['W2'] -= learning_rate * grads['dW2']
        self.paramters['b2'] -= learning_rate * grads['db2']
    
    # 拟合
    def fit(self, X, Y, num_iterations, learning_rate, print_cost=False, print_num=100):
        for i in range(num_iterations):
            A2, cache = self.__forward_propagation(X)  # 前行传播
            cost = self.cost_function(X, Y)  # 计算代价
            grads = self.__backward_propagation(cache, Y)  # 后向传播计算梯度
            self.__update_parameters(grads, learning_rate)  # 更新参数
            if i % print_num == 0 and print_cost:
                print ("Cost after iteration %i: %f" %(i, cost))
        return self
    
    # 预测结果的概率
    def predict_prob(self, X):
        A2, _ = self.__forward_propagation(X)
        return A2
    
    # 预测结果（0 or 1）
    def predict(self, X, threshold=0.5):
        pred_prob = self.predict_prob(X)
        threshold_func = np.vectorize(lambda x: 1 if x > threshold else 0)
        Y_prediction = threshold_func(pred_prob)
        return Y_prediction
    
    # 精度
    def accuracy_score(self, X, Y):
        pred = self.predict(X)
        return len(Y[pred == Y]) / Y.shape[1]


# 加载数据
np.random.seed(1)
X, Y = load_planar_dataset()

# 设置参数
np.random.seed(3)
num_iter = 10001
learning_rate = 1.2
input_size = X.shape[0]
hidden_layer_size = 4

# 训练神经网络模型
clf = SimpleNeuralNetwork(input_size=input_size, hidden_layer_size=hidden_layer_size).fit(X, Y, num_iter, learning_rate, True, 1000)
train_acc = clf.accuracy_score(X, Y)
print('training accuracy: {}%'.format(train_acc*100))

# 不同隐含层神经元个数下的实验结果
for hidden_layer_size in [1, 2, 3, 4, 5, 20, 50]:
    clf = SimpleNeuralNetwork(input_size=input_size, hidden_layer_size=hidden_layer_size).fit(X, Y, num_iter, learning_rate, False)
    print('{} hidden units, cost: {}, accuracy: {}%'.format(hidden_layer_size, clf.cost_function(X, Y), clf.accuracy_score(X, Y)))

训练结果

Cost after iteration 0: 0.693162
Cost after iteration 1000: 0.258625
Cost after iteration 2000: 0.239334
Cost after iteration 3000: 0.230802
Cost after iteration 4000: 0.225528
Cost after iteration 5000: 0.221845
Cost after iteration 6000: 0.219094
Cost after iteration 7000: 0.220620
Cost after iteration 8000: 0.219398
Cost after iteration 9000: 0.218482
Cost after iteration 10000: 0.217738
training accuracy: 90.5%
1 hidden units, cost: 0.6315586841940574, accuracy: 0.675%
2 hidden units, cost: 0.5727605029758619, accuracy: 0.6725%
3 hidden units, cost: 0.2521706044982674, accuracy: 0.91%
4 hidden units, cost: 0.24709879496077952, accuracy: 0.9125%
5 hidden units, cost: 0.2471140523262158, accuracy: 0.9125%
20 hidden units, cost: 0.1580494894558535, accuracy: 0.9125%
50 hidden units, cost: 0.16379020903733227, accuracy: 0.91%

1	import numpy as np
2
3
4	# 加载数据
5	def load_planar_dataset():
6	np.random.seed(1)
7	m = 400 # number of examples
8	N = int(m/2) # number of points per class
9	D = 2 # dimensionality
10	X = np.zeros((m,D)) # data matrix where each row is a single example
11	Y = np.zeros((m,1), dtype='uint8') # labels vector (0 for red, 1 for blue)
12	a = 4 # maximum ray of the flower
13	for j in range(2):
14	ix = range(Nj, N(j+1))
15	t = np.linspace(j3.12, (j+1)3.12, N) + np.random.randn(N)*0.2 # theta
16	r = anp.sin(4t) + np.random.randn(N)*0.2 # radius
17	X[ix] = np.c_[rnp.sin(t), rnp.cos(t)]
18	Y[ix] = j
19	X = X.T
20	Y = Y.T
21	return X, Y
22
23
24	# S函数
25	def sigmoid(z):
26	return 1. / (1.+np.exp(-z))
27
28
29	# 简单神经网络模型（一个隐含层）
30	class SimpleNeuralNetwork():
31	def __init__(self, input_size, hidden_layer_size):
32	self.paramters = self.__parameter_initailizer(input_size, hidden_layer_size)
33
34	# 参数初始化
35	def __parameter_initailizer(self, n_x, n_h):
36	W1, b1 = np.random.randn(n_h, n_x)*0.01, np.zeros((n_h, 1))
37	W2, b2 = np.random.randn(1, n_h)*0.01, np.zeros((1, 1))
38	return {'W1': W1,'b1': b1,'W2': W2,'b2': b2}
39
40	# 前向传播
41	def __forward_propagation(self, X):
42	W1, b1 = self.paramters['W1'], self.paramters['b1']
43	W2, b2 = self.paramters['W2'], self.paramters['b2']
44	Z1 = np.dot(W1, X) + b1
45	A1 = np.tanh(Z1)
46	Z2 = np.dot(W2, A1) + b2
47	A2 = sigmoid(Z2)
48	cache = {'X': X, 'Z1': Z1,'A1': A1,'Z2': Z2,'A2': A2}
49	return A2, cache
50
51	# 计算损失
52	def __compute_cost(self, A2, Y):
53	m = A2.shape[1]
54	cost = -np.sum(Ynp.log(A2) + (1-Y)np.log(1-A2)) / m
55	return cost
56
57	# 代价函数
58	def cost_function(self, X, Y):
59	A2, cache = self.__forward_propagation(X)
60	cost = self.__compute_cost(A2, Y)
61	return cost
62
63	# 反向传播计算梯度
64	def __backward_propagation(self, cache, Y):
65	A1, A2 = cache['A1'], cache['A2']
66	W2 = self.paramters['W2']
67	X = cache['X']
68	m = X.shape[1]
69	dZ2 = A2 - Y
70	dW2 = np.dot(dZ2, A1.T) / m
71	db2 = np.sum(dZ2, axis=1, keepdims=True) / m
72	dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
73	dW1 = np.dot(dZ1, X.T) / m
74	db1 = np.sum(dZ1, axis=1, keepdims=True) / m
75	grads = {'dW1': dW1,'db1': db1,'dW2': dW2,'db2': db2}
76	return grads
77
78	# 更新参数
79	def __update_parameters(self, grads, learning_rate):
80	self.paramters['W1'] -= learning_rate * grads['dW1']
81	self.paramters['b1'] -= learning_rate * grads['db1']
82	self.paramters['W2'] -= learning_rate * grads['dW2']
83	self.paramters['b2'] -= learning_rate * grads['db2']
84
85	# 拟合
86	def fit(self, X, Y, num_iterations, learning_rate, print_cost=False, print_num=100):
87	for i in range(num_iterations):
88	A2, cache = self.__forward_propagation(X) # 前行传播
89	cost = self.cost_function(X, Y) # 计算代价
90	grads = self.__backward_propagation(cache, Y) # 后向传播计算梯度
91	self.__update_parameters(grads, learning_rate) # 更新参数
92	if i % print_num == 0 and print_cost:
93	print ("Cost after iteration %i: %f" %(i, cost))
94	return self
95
96	# 预测结果的概率
97	def predict_prob(self, X):
98	A2, _ = self.__forward_propagation(X)
99	return A2
100
101	# 预测结果（0 or 1）
102	def predict(self, X, threshold=0.5):
103	pred_prob = self.predict_prob(X)
104	threshold_func = np.vectorize(lambda x: 1 if x > threshold else 0)
105	Y_prediction = threshold_func(pred_prob)
106	return Y_prediction
107
108	# 精度
109	def accuracy_score(self, X, Y):
110	pred = self.predict(X)
111	return len(Y[pred == Y]) / Y.shape[1]
112
113
114	# 加载数据
115	np.random.seed(1)
116	X, Y = load_planar_dataset()
117
118	# 设置参数
119	np.random.seed(3)
120	num_iter = 10001
121	learning_rate = 1.2
122	input_size = X.shape[0]
123	hidden_layer_size = 4
124
125	# 训练神经网络模型
126	clf = SimpleNeuralNetwork(input_size=input_size, hidden_layer_size=hidden_layer_size).fit(X, Y, num_iter, learning_rate, True, 1000)
127	train_acc = clf.accuracy_score(X, Y)
128	print('training accuracy: {}%'.format(train_acc*100))
129
130	# 不同隐含层神经元个数下的实验结果
131	for hidden_layer_size in [1, 2, 3, 4, 5, 20, 50]:
132	clf = SimpleNeuralNetwork(input_size=input_size, hidden_layer_size=hidden_layer_size).fit(X, Y, num_iter, learning_rate, False)
133	print('{} hidden units, cost: {}, accuracy: {}%'.format(hidden_layer_size, clf.cost_function(X, Y), clf.accuracy_score(X, Y)))

1	Cost after iteration 0: 0.693162
2	Cost after iteration 1000: 0.258625
3	Cost after iteration 2000: 0.239334
4	Cost after iteration 3000: 0.230802
5	Cost after iteration 4000: 0.225528
6	Cost after iteration 5000: 0.221845
7	Cost after iteration 6000: 0.219094
8	Cost after iteration 7000: 0.220620
9	Cost after iteration 8000: 0.219398
10	Cost after iteration 9000: 0.218482
11	Cost after iteration 10000: 0.217738
12	training accuracy: 90.5%
13	1 hidden units, cost: 0.6315586841940574, accuracy: 0.675%
14	2 hidden units, cost: 0.5727605029758619, accuracy: 0.6725%
15	3 hidden units, cost: 0.2521706044982674, accuracy: 0.91%
16	4 hidden units, cost: 0.24709879496077952, accuracy: 0.9125%
17	5 hidden units, cost: 0.2471140523262158, accuracy: 0.9125%
18	20 hidden units, cost: 0.1580494894558535, accuracy: 0.9125%
19	50 hidden units, cost: 0.16379020903733227, accuracy: 0.91%