搭建简单的神经网络

搭建简单的神经网络(Python手写)。

Python代码

1
import numpy as np
2
3
4
# 加载数据
5
def load_planar_dataset():
6
    np.random.seed(1)
7
    m = 400  # number of examples
8
    N = int(m/2)  # number of points per class
9
    D = 2  # dimensionality
10
    X = np.zeros((m,D))  # data matrix where each row is a single example
11
    Y = np.zeros((m,1), dtype='uint8')  # labels vector (0 for red, 1 for blue)
12
    a = 4  # maximum ray of the flower
13
    for j in range(2):
14
        ix = range(N*j, N*(j+1))
15
        t = np.linspace(j*3.12, (j+1)*3.12, N) + np.random.randn(N)*0.2 # theta
16
        r = a*np.sin(4*t) + np.random.randn(N)*0.2  # radius
17
        X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
18
        Y[ix] = j
19
    X = X.T
20
    Y = Y.T
21
    return X, Y
22
23
24
# S函数
25
def sigmoid(z):
26
    return 1. / (1.+np.exp(-z))
27
28
29
# 简单神经网络模型(一个隐含层)
30
class SimpleNeuralNetwork():
31
    def __init__(self, input_size, hidden_layer_size):
32
        self.paramters = self.__parameter_initailizer(input_size, hidden_layer_size)
33
    
34
    # 参数初始化
35
    def __parameter_initailizer(self, n_x, n_h):
36
        W1, b1 = np.random.randn(n_h, n_x)*0.01, np.zeros((n_h, 1))
37
        W2, b2 = np.random.randn(1, n_h)*0.01, np.zeros((1, 1))    
38
        return {'W1': W1,'b1': b1,'W2': W2,'b2': b2}
39
    
40
    # 前向传播
41
    def __forward_propagation(self, X):
42
        W1, b1 = self.paramters['W1'], self.paramters['b1']
43
        W2, b2 = self.paramters['W2'], self.paramters['b2'] 
44
        Z1 = np.dot(W1, X) + b1
45
        A1 = np.tanh(Z1)
46
        Z2 = np.dot(W2, A1) + b2
47
        A2 = sigmoid(Z2)
48
        cache = {'X': X, 'Z1': Z1,'A1': A1,'Z2': Z2,'A2': A2}
49
        return A2, cache
50
    
51
    # 计算损失
52
    def __compute_cost(self, A2, Y):
53
        m = A2.shape[1]
54
        cost = -np.sum(Y*np.log(A2) + (1-Y)*np.log(1-A2)) / m
55
        return cost
56
    
57
    # 代价函数
58
    def cost_function(self, X, Y):
59
        A2, cache = self.__forward_propagation(X)
60
        cost = self.__compute_cost(A2, Y)
61
        return cost
62
    
63
    # 反向传播计算梯度
64
    def __backward_propagation(self, cache, Y):
65
        A1, A2 = cache['A1'], cache['A2']
66
        W2 = self.paramters['W2']
67
        X = cache['X']
68
        m = X.shape[1]
69
        dZ2 = A2 - Y
70
        dW2 = np.dot(dZ2, A1.T) / m
71
        db2 = np.sum(dZ2, axis=1, keepdims=True) / m
72
        dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
73
        dW1 = np.dot(dZ1, X.T) / m
74
        db1 = np.sum(dZ1, axis=1, keepdims=True) / m
75
        grads = {'dW1': dW1,'db1': db1,'dW2': dW2,'db2': db2}
76
        return grads
77
    
78
    # 更新参数
79
    def __update_parameters(self, grads, learning_rate):
80
        self.paramters['W1'] -= learning_rate * grads['dW1']
81
        self.paramters['b1'] -= learning_rate * grads['db1']
82
        self.paramters['W2'] -= learning_rate * grads['dW2']
83
        self.paramters['b2'] -= learning_rate * grads['db2']
84
    
85
    # 拟合
86
    def fit(self, X, Y, num_iterations, learning_rate, print_cost=False, print_num=100):
87
        for i in range(num_iterations):
88
            A2, cache = self.__forward_propagation(X)  # 前行传播
89
            cost = self.cost_function(X, Y)  # 计算代价
90
            grads = self.__backward_propagation(cache, Y)  # 后向传播计算梯度
91
            self.__update_parameters(grads, learning_rate)  # 更新参数
92
            if i % print_num == 0 and print_cost:
93
                print ("Cost after iteration %i: %f" %(i, cost))
94
        return self
95
    
96
    # 预测结果的概率
97
    def predict_prob(self, X):
98
        A2, _ = self.__forward_propagation(X)
99
        return A2
100
    
101
    # 预测结果(0 or 1)
102
    def predict(self, X, threshold=0.5):
103
        pred_prob = self.predict_prob(X)
104
        threshold_func = np.vectorize(lambda x: 1 if x > threshold else 0)
105
        Y_prediction = threshold_func(pred_prob)
106
        return Y_prediction
107
    
108
    # 精度
109
    def accuracy_score(self, X, Y):
110
        pred = self.predict(X)
111
        return len(Y[pred == Y]) / Y.shape[1]
112
113
114
# 加载数据
115
np.random.seed(1)
116
X, Y = load_planar_dataset()
117
118
# 设置参数
119
np.random.seed(3)
120
num_iter = 10001
121
learning_rate = 1.2
122
input_size = X.shape[0]
123
hidden_layer_size = 4
124
125
# 训练神经网络模型
126
clf = SimpleNeuralNetwork(input_size=input_size, hidden_layer_size=hidden_layer_size).fit(X, Y, num_iter, learning_rate, True, 1000)
127
train_acc = clf.accuracy_score(X, Y)
128
print('training accuracy: {}%'.format(train_acc*100))
129
130
# 不同隐含层神经元个数下的实验结果
131
for hidden_layer_size in [1, 2, 3, 4, 5, 20, 50]:
132
    clf = SimpleNeuralNetwork(input_size=input_size, hidden_layer_size=hidden_layer_size).fit(X, Y, num_iter, learning_rate, False)
133
    print('{} hidden units, cost: {}, accuracy: {}%'.format(hidden_layer_size, clf.cost_function(X, Y), clf.accuracy_score(X, Y)))

训练结果

1
Cost after iteration 0: 0.693162
2
Cost after iteration 1000: 0.258625
3
Cost after iteration 2000: 0.239334
4
Cost after iteration 3000: 0.230802
5
Cost after iteration 4000: 0.225528
6
Cost after iteration 5000: 0.221845
7
Cost after iteration 6000: 0.219094
8
Cost after iteration 7000: 0.220620
9
Cost after iteration 8000: 0.219398
10
Cost after iteration 9000: 0.218482
11
Cost after iteration 10000: 0.217738
12
training accuracy: 90.5%
13
1 hidden units, cost: 0.6315586841940574, accuracy: 0.675%
14
2 hidden units, cost: 0.5727605029758619, accuracy: 0.6725%
15
3 hidden units, cost: 0.2521706044982674, accuracy: 0.91%
16
4 hidden units, cost: 0.24709879496077952, accuracy: 0.9125%
17
5 hidden units, cost: 0.2471140523262158, accuracy: 0.9125%
18
20 hidden units, cost: 0.1580494894558535, accuracy: 0.9125%
19
50 hidden units, cost: 0.16379020903733227, accuracy: 0.91%