逻辑回归是一个应用于监督学习、用于解决二分类(0 or 1)问题的机器学习算法,用于估计某种事物的可能性,其目的是最小化预测值与真实值的误差。
逻辑回归算法评估概率:𝐺𝑖𝑣𝑒𝑛 𝑥, 𝑦̂=𝑃(𝑦=1|𝑥), where 0≤𝑦̂≤1。
参数设置
输入的特征向量:𝑥 ∈ R𝑛𝑥,𝑛𝑥是特征数量
标签:𝑦 ∈ 0,1
权值:𝑤 ∈ R𝑛𝑥, 𝑛𝑥是特征数量
阈值:𝑏 ∈ R
输出:𝑦̂ = 𝜎(𝑤𝑇𝑥 + 𝑏)
Sigmoid函数:s = 𝜎(𝑤𝑇𝑥 + 𝑏) = 𝜎(𝑧)= 1/(1+e^(-z))
Sigmoid函数图像:
代价函数
损失函数:𝐿(𝑦̂(𝑖), 𝑦(𝑖)) = −( 𝑦(𝑖) log(𝑦̂(𝑖)) + (1 − 𝑦(𝑖))log(1 − 𝑦̂(𝑖))
代价函数:
识别猫案例
1 | import h5py |
2 | import numpy as np |
3 | |
4 | |
5 | # 加载数据 |
6 | def load_dataset(): |
7 | train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r") |
8 | train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # train set features |
9 | train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # train set labels |
10 | test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r") |
11 | test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # test set features |
12 | test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # test set labels |
13 | classes = np.array(test_dataset["list_classes"][:]) # the list of classes |
14 | train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) |
15 | test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0])) |
16 | return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes |
17 | |
18 | |
19 | # S函数 |
20 | def sigmoid(z): |
21 | return 1. / (1.+np.exp(-z)) |
22 | |
23 | |
24 | # 逻辑回归模型 |
25 | class LogisticRegression(): |
26 | def __init__(self): |
27 | pass |
28 | |
29 | # 参数初始化 |
30 | def __parameters_initializer(self, input_size): |
31 | w = np.zeros((input_size, 1), dtype=float) |
32 | b = 0.0 |
33 | return w, b |
34 | |
35 | # 前向传播 |
36 | def __forward_propagation(self, X): |
37 | A = sigmoid(np.dot(self.w.T, X) + self.b) |
38 | return A |
39 | |
40 | # 计算损失 |
41 | def __compute_cost(self, A, Y): |
42 | m = A.shape[1] |
43 | cost = -np.sum(Y*np.log(A) + (1-Y)*(np.log(1-A))) / m |
44 | return cost |
45 | |
46 | # 代价函数 |
47 | def cost_function(self, X, Y): |
48 | A = self.__forward_propagation(X) |
49 | cost = self.__compute_cost(A, Y) |
50 | return cost |
51 | |
52 | # 反向传播计算梯度 |
53 | def __backward_propagation(self, A, X, Y): |
54 | m = X.shape[1] |
55 | dw = np.dot(X, (A-Y).T) / m |
56 | db = np.sum(A-Y) / m |
57 | grads = {"dw": dw, "db": db} |
58 | return grads |
59 | |
60 | # 更新参数 |
61 | def __update_parameters(self, grads, learning_rate): |
62 | self.w -= learning_rate * grads['dw'] |
63 | self.b -= learning_rate * grads['db'] |
64 | |
65 | # 拟合 |
66 | def fit(self, X, Y, num_iterations, learning_rate, print_cost=False, print_num=100): |
67 | self.w, self.b = self.__parameters_initializer(X.shape[0]) |
68 | for i in range(num_iterations): |
69 | A = self.__forward_propagation(X) # 前行传播 |
70 | cost = self.__compute_cost(A, Y) # 计算代价 |
71 | grads = self.__backward_propagation(A, X, Y) # 后向传播计算梯度 |
72 | self.__update_parameters(grads, learning_rate) # 更新参数 |
73 | if i % print_num == 0 and print_cost: |
74 | print("Cost after iteration {}: {:.6f}".format(i, cost)) |
75 | return self |
76 | |
77 | # 预测结果的概率 |
78 | def predict_prob(self, X): |
79 | A = self.__forward_propagation(X) |
80 | return A |
81 | |
82 | # 预测结果(0 or 1) |
83 | def predict(self, X, threshold=0.5): |
84 | pred_prob = self.predict_prob(X) |
85 | threshold_func = np.vectorize(lambda x: 1 if x > threshold else 0) |
86 | Y_prediction = threshold_func(pred_prob) |
87 | return Y_prediction |
88 | |
89 | # 精度 |
90 | def accuracy_score(self, X, Y): |
91 | pred = self.predict(X) |
92 | return len(Y[pred == Y]) / Y.shape[1] |
93 | |
94 | |
95 | # 加载数据 (cat/non-cat) |
96 | train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() |
97 | m_train = train_set_x_orig.shape[0] |
98 | m_test = test_set_x_orig.shape[0] |
99 | num_px = train_set_x_orig.shape[1] |
100 | print("Number of training examples: m_train = " + str(m_train)) |
101 | print("Number of testing examples: m_test = " + str(m_test)) |
102 | print("Height/Width of each image: num_px = " + str(num_px)) |
103 | print("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)") |
104 | print("train_set_x shape: " + str(train_set_x_orig.shape)) |
105 | print("train_set_y shape: " + str(train_set_y.shape)) |
106 | print("test_set_x shape: " + str(test_set_x_orig.shape)) |
107 | print("test_set_y shape: " + str(test_set_y.shape)) |
108 | |
109 | # 调整训练集与测试集数据 |
110 | train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T |
111 | test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T |
112 | print("train_set_x_flatten shape: " + str(train_set_x_flatten.shape)) |
113 | print("train_set_y shape: " + str(train_set_y.shape)) |
114 | print("test_set_x_flatten shape: " + str(test_set_x_flatten.shape)) |
115 | print("test_set_y shape: " + str(test_set_y.shape)) |
116 | print("sanity check after reshaping: " + str(train_set_x_flatten[0:5,0])) |
117 | X_train = train_set_x_flatten/255 |
118 | y_train = train_set_y |
119 | X_test = test_set_x_flatten/255 |
120 | y_test = test_set_y |
121 | |
122 | # 开始训练 |
123 | num_iter = 2001 # 迭代次数 |
124 | learning_rate = 0.005 # 学习率 |
125 | clf = LogisticRegression().fit(X_train, y_train, num_iter, learning_rate, True, 500) |
126 | train_acc = clf.accuracy_score(X_train, y_train) |
127 | print('training acc: {}'.format(train_acc)) |
128 | test_acc = clf.accuracy_score(X_test, y_test) |
129 | print('testing acc: {}'.format(test_acc)) |
- 输出结果
1 | Cost after iteration 0: 0.693147 |
2 | Cost after iteration 500: 0.303273 |
3 | Cost after iteration 1000: 0.214820 |
4 | Cost after iteration 1500: 0.166521 |
5 | Cost after iteration 2000: 0.135608 |
6 | training acc: 0.9904306220095693 |
7 | testing acc: 0.7 |