Upgrade to Pro — share decks privately, control downloads, hide ads and more …

深層学習-1-誤差逆伝播法まで-20250616

Avatar for TomNJP TomNJP
July 19, 2025
40

 深層学習-1-誤差逆伝播法まで-20250616

Avatar for TomNJP

TomNJP

July 19, 2025
Tweet

Transcript

  1. パーセプトロン(AND ゲート)1/2 とおく. をバイアスと呼ぶ. ​ ​ ​ ⎩ ⎨ ⎧

    入力 重み 閾値 出力 x ​ , x ​ 1 2 w ​ , w ​ 1 2 θ y y = ​ ​ { 0 1 (w ​ x ​ + w ​ x ​ ≤ θ) 1 1 2 2 (w ​ x ​ + w ​ x ​ > θ) 1 1 2 2 θ = −b −b y = ​ ​ { 0 1 (w ​ x ​ + w ​ x ​ + b ≤ 0) 1 1 2 2 (w ​ x ​ + w ​ x ​ + b > 0) 1 1 2 2
  2. パーセプトロン(AND ゲート)2/2 同様に NAND ゲート、OR ゲートを実装した. def AND(x1, x2): x

    = np.array([x1, x2]) w = np.array([0.5, 0.5]) b = -0.7 tmp = np.sum(x*w) + b if tmp <= 0: return 0 else: return 1 print(AND(0, 0)) # 0 print(AND(0, 1)) # 0 print(AND(1, 0)) # 0 print(AND(1, 1)) # 1
  3. 多層パーセプトロン(XOR ゲート) def XOR(x1, x2): s1 = NAND(x1, x2) s2

    = OR(x1, x2) y = AND(s1, s2) return y print(XOR(0, 0)) # 0 print(XOR(0, 1)) # 1 print(XOR(1, 0)) # 1 print(XOR(1, 1)) # 0
  4. シグモイド関数 ```python def sigmoid(x): return 1 / (1 + np.exp(-x))

    x = np.arange(-5.0, 5.0, 0.1) y = sigmoid(x) plt.plot(x, y) plt.ylim(-0.1, 1.1) plt.show() ````
  5. ReLU 関数 ```python def relu(x): return np.maximum(0, x) x =

    np.arange(-5.0, 5.0, 0.1) y = relu(x) plt.plot(x, y) plt.ylim(-1.1, 5.6) plt.show() ````
  6. ニューラルネットワーク(学習能力なし) def init_network(): network = {} network['W1'] = np.array([[0.1, 0.3,

    0.5], [0.2, 0.4, 0.6]]) network['b1'] = np.array([0.1, 0.2, 0.3]) network['W2'] = np.array([[0.1, 0.3], [0.5, 0.2], [0.4, 0.6]]) network['b2'] = np.array([0.2, 0.3]) network['W3'] = np.array([[0.5, 0.2], [0.4, 0.6]]) network['b3'] = np.array([0.1, 0.3]) return network def forward(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = sigmoid(a3) return y
  7. ソフトマックス関数 確率を求める関数 y ​ = k ​ = ​ exp(a

    ​ ) ∑ i=1 n i exp(a ​ ) k ​ = C ​ exp(a ​ ) ∑ i=1 n i C exp(a ​ ) k ​ = ​ exp(a ​ + log C) ∑ i=1 n i exp(a ​ + log C) k ​ ​ exp(a ​ + C ) ∑ i=1 n i ′ exp(a ​ + C ) k ′ def softmax(a): c = np.max(a) exp_a = np.exp(a - c) sum_exp_a = np.sum(exp_a) y = exp_a / sum_exp_a return y a = np.array([2.2, 0.4, 0.7]) y = softmax(a) print(y) # [0.72023846 0.11905462 0.16070692]
  8. 交差エントロピー誤差 onehot 表現 t = ​ ​ ​ ​ [t ​

    , 1 t ​ , 2 … , t ​ C ]T E = − ​ t ​ log(y ​ + k=1 ∑ C k k ϵ), ϵ = 10 . −7 実装上、log(0) を避けるために ϵ (10 ) を加算 −7 def cross_entropy_error(y, t): batch_size = y.shape[0] return -np.sum(t * np.log(y + 1e-7)) / batch_size
  9. 概略図 第2層 第1層 Affine1 W₁ ∈ ℝ^(784×50) b₁ ∈ ℝ^(50)

    a₁ ∈ ℝ^(50) = x·W₁ + b₁ ReLU1 z₁ ∈ ℝ^(50) = ReLU(a₁) Affine2 W₂ ∈ ℝ^(50×10) b₂ ∈ ℝ^(10) a₂ ∈ ℝ^(10) = z₁·W₂ + b₂ SoftmaxWithLoss y ∈ ℝ^(10) = softmax(a₂) L = cross_entropy(y,t) ( 学習時のみ) Input x ∈ ℝ^(784) Output y ∈ ℝ^(10)
  10. 各パーツ import numpy as np from collections import OrderedDict from

    tensorflow.keras.datasets import mnist def softmax(x): x = x - np.max(x, axis=1, keepdims=True) exp_x = np.exp(x) return exp_x / np.sum(exp_x, axis=1, keepdims=True) def cross_entropy_error(y, t): batch_size = y.shape[0] return -np.sum(t * np.log(y + 1e-7)) / batch_size class Affine: def __init__(self, W, b): self.W = W self.b = b self.x = None self.dW = None self.db = None def forward(self, x): self.x = x return np.dot(x, self.W) + self.b
  11. def backward(self, dout): dx = np.dot(dout, self.W.T) self.dW = np.dot(self.x.T,

    dout) self.db = np.sum(dout, axis=0) return dx class ReLU: def __init__(self): self.mask = None def forward(self, x): self.mask = (x <= 0) out = x.copy() out[self.mask] = 0 return out def backward(self, dout): dout[self.mask] = 0 return dout class SoftmaxWithLoss: def __init__(self): self.y = None self.t = None self.loss = None
  12. def forward(self, x, t): self.y = softmax(x) self.t = t

    self.loss = cross_entropy_error(self.y, self.t) return self.loss def backward(self, dout=1): batch_size = self.y.shape[0] dx = (self.y - self.t) / batch_size return dx * dout def to_onehot(labels, num_cls=10): y = np.zeros((labels.size, num_cls), dtype=np.float32) y[np.arange(labels.size), labels] = 1.0 return y
  13. ネットワーク部分 class TwoLayerNet: def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params

    = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['ReLU1'] = ReLU() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss() def predict(self, x): for layer in self.layers.values(): x = layer.forward(x) return x def loss(self, x, t): scores = self.predict(x) return self.lastLayer.forward(scores, t)
  14. def accuracy(self, x, t): scores = self.predict(x) y_pred = np.argmax(scores,

    axis=1) t_labels = np.argmax(t, axis=1) return np.sum(y_pred == t_labels) / float(x.shape[0]) def gradient(self, x, t): self.loss(x, t) dout = self.lastLayer.backward() layers = list(self.layers.values()) layers.reverse() for layer in layers: dout = layer.backward(dout) grads = {} grads['W1'] = self.layers['Affine1'].dW grads['b1'] = self.layers['Affine1'].db grads['W2'] = self.layers['Affine2'].dW grads['b2'] = self.layers['Affine2'].db return grads
  15. 学習部分 (x_train, t_train), (x_test, t_test) = mnist.load_data() x_train = x_train.reshape(-1,

    28*28).astype('float32') / 255.0 x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0 t_train_oh = to_onehot(t_train) t_test_oh = to_onehot(t_test) network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) iters_num = 10000 train_size = x_train.shape[0] batch_size = 100 learning_rate = 0.1 iter_per_epoch = max(train_size // batch_size, 1) train_loss_list = [] train_acc_list = [] test_acc_list = []
  16. for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch =

    x_train[batch_mask] t_batch = t_train_oh[batch_mask] grad = network.gradient(x_batch, t_batch) for key in ('W1', 'b1', 'W2', 'b2'): network.params[key] -= learning_rate * grad[key] loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if i % iter_per_epoch == 0: print(loss) train_acc = network.accuracy(x_train, t_train_oh) test_acc = network.accuracy(x_test, t_test_oh) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print(f"iter {i:5d} / {iters_num} | train acc: {train_acc:.4f} test acc: {test_acc:.4f}")