Minecraftian14 · May 25, 2022 09:16
diff --git a/playingWithDLConcepts.py b/playingWithDLConcepts.py
 import numpy as np
 import matplotlib.pyplot as pyt

 x = np.array([
    [0, 0, 0, 0],
    [0, 0, 0, 1],
    [0, 0, 1, 0],
    [0, 0, 1, 1],
    [0, 1, 0, 0],
    [0, 1, 0, 1],
    [0, 1, 1, 0],
    [0, 1, 1, 1],
    [1, 0, 0, 0],
    [1, 0, 0, 1]
 ])

 # x = np.array([
 #     [0],
 #     [1],
 #     [2],
 #     [3],
 #     [4],
 #     [5],
 #     [6],
 #     [7],
 #     [8],
 #     [9]
 # ]) / 9

 y = np.array([
    [1, 1, 1, 0, 1, 1, 0],
    [0, 0, 1, 0, 0, 1, 0],
    [1, 0, 1, 1, 1, 1, 0],
    [1, 0, 1, 1, 0, 1, 1],
    [0, 1, 1, 1, 0, 1, 0],
    [1, 1, 0, 1, 0, 1, 1],
    [1, 1, 0, 1, 1, 1, 1],
    [1, 1, 0, 0, 1, 0, 0],
    [1, 1, 1, 1, 1, 1, 1],
    [1, 1, 1, 1, 0, 1, 0]
 ])


 # x = np.array([
 #     [1, 0, 0, 0],
 #     [0, 1, 0, 0],
 #     [0, 0, 1, 0],
 #     [0, 0, 0, 1]
 # ])
 #
 # y = np.array([
 #     [1],
 #     [2],
 #     [3],
 #     [4]
 # ])


 class Layer:
    def __init__(self, s_in, s_out, lr=0.001, lm=0.0001):
        self.weights = np.random.randn(s_in, s_out) / s_out
        self.bias = np.zeros((1, s_out))
        self.lr = lr
        self.lm = lm

    def forward(self, a_in):
        self.a_in = a_in
        a_out = a_in @ self.weights + self.bias
        return a_out

    def backward(self, da_out):
        R = self.lm * np.sum(self.weights @ self.weights.T) / (2 * da_out.shape[0])
        da_out += R

        self.weights += self.lr * (self.a_in.T @ da_out)
        self.bias += self.lr * np.sum(da_out, axis=0, keepdims=True)

        da_in = da_out @ self.weights.T
        return da_in


 class AdamLayer:
    def __init__(self, s_in, s_out, lr=0.001, lm=0):
        # self.weights = np.random.randn(s_in, s_out) / s_out
        self.weights = np.random.randn(s_in, s_out) * np.sqrt(2 / (s_in + s_out))
        self.bias = np.zeros((1, s_out))
        self.lr = lr
        print(lr)
        self.lm = lm

        self.vdw = 0
        self.sdw = 0
        self.vdb = 0
        self.sdb = 0
        self.b1 = 0.9
        self.b2 = 0.999

    def forward(self, a_in):
        self.a_in = a_in
        a_out = a_in @ self.weights + self.bias

        # a_out = np.where(a_out > 0, a_out, a_out * 0.01)
        a_out = a_out * (a_out > 0)

        return a_out

    def backward(self, da_out):
        # R = self.lm * np.sum(self.weights @ self.weights.T) / (2 * da_out.shape[0])
        # da_out += R

        # da_out[da_out < 0] = 0.01
        # da_out[da_out >= 0] = 1
        da_out = np.where(da_out > 0, 1, 0.01)

        dw = self.a_in.T @ da_out
        self.vdw = self.b1 * self.vdw + (1 - self.b1) * dw
        self.sdw = self.b2 * self.sdw + (1 - self.b2) * (dw * dw)
        self.weights = self.weights + self.lr * self.vdw / np.sqrt(self.sdw + 1e-8)

        db = np.sum(da_out, axis=0, keepdims=True)
        self.vdb = self.b1 * self.vdb + (1 - self.b1) * db
        self.sdb = self.b2 * self.sdb + (1 - self.b2) * (db * db)
        self.bias = self.bias + self.lr * self.vdb / np.sqrt(self.sdb + 1e-8)

        da_in = da_out @ self.weights.T
        return da_in


 class LayerChain:
    def __init__(self, sizes):
        print(sizes)
        self.layers = []
        l = 0.00001
        for i in range(1, len(sizes)):
            self.layers.append(AdamLayer(sizes[i - 1], sizes[i], lr=l))
            # l /= 3

    def forward(self, a_in):
        for layer in self.layers:
            a_in = layer.forward(a_in)
        return a_in

    def backward(self, da_out):
        for layer in reversed(self.layers):
            da_out = layer.backward(da_out)


 # net = LayerChain([2, 8, 6, 4, 2])

 net = LayerChain([x.shape[1], 40, 40, 40, 40, 40, 40, 40, 40, y.shape[1]])

 mn = []
 ls = []
 for i in range(20000):
    if (i == 2000):
        for layer in net.layers:
            layer.lr *= 0.1
    o = net.forward(x)
    o = 1 / (1 + np.exp(-o))
    # ddy = net.forward(x)
    # ddy[ddy < 0.5] = 0
    # ddy[ddy >= 0.5] = 1
    # o=ddy
    L = (y * np.log(o) + (1 - y) * np.log(1 - o))
    # L = y-o
    # L = L * L
    J = np.abs(np.mean(L))

    if i > 0 and J - ls[len(ls) - 1] > 1e-8:
        print("cost rose at", i)
        for layer in net.layers:
            layer.lr *= 0.1
        if (net.layers[0].lr < 1e-12):
            print("lol")
            break

    ls.append(J)
    mn.append(np.mean(np.abs(net.forward(x) - y)))
    dy = np.mean(L, axis=1, keepdims=True) @ np.ones((1, y.shape[1]))

    # R = np.mean([np.mean(l.weights.T @ l.weights) for l in net.layers])
    # dy += 100*R

    dy = 1 / (1 + np.exp(-dy))
    dy = dy * (1 - dy)
    net.backward(dy)

 pyt.subplot(1, 2, 1)
 pyt.plot(mn)
 pyt.subplot(1, 2, 2)
 pyt.plot(ls)
 pyt.show()

 ddy = net.forward(x)
 ddy[ddy < 0.5] = 0
 ddy[ddy >= 0.5] = 1
 print(ddy)
 print(mn[len(mn) - 1])
 print(ls[len(ls) - 1])
	import numpy as np
	import matplotlib.pyplot as pyt

	x = np.array([
	[0, 0, 0, 0],
	[0, 0, 0, 1],
	[0, 0, 1, 0],
	[0, 0, 1, 1],
	[0, 1, 0, 0],
	[0, 1, 0, 1],
	[0, 1, 1, 0],
	[0, 1, 1, 1],
	[1, 0, 0, 0],
	[1, 0, 0, 1]
	])

	# x = np.array([
	# [0],
	# [1],
	# [2],
	# [3],
	# [4],
	# [5],
	# [6],
	# [7],
	# [8],
	# [9]
	# ]) / 9

	y = np.array([
	[1, 1, 1, 0, 1, 1, 0],
	[0, 0, 1, 0, 0, 1, 0],
	[1, 0, 1, 1, 1, 1, 0],
	[1, 0, 1, 1, 0, 1, 1],
	[0, 1, 1, 1, 0, 1, 0],
	[1, 1, 0, 1, 0, 1, 1],
	[1, 1, 0, 1, 1, 1, 1],
	[1, 1, 0, 0, 1, 0, 0],
	[1, 1, 1, 1, 1, 1, 1],
	[1, 1, 1, 1, 0, 1, 0]
	])


	# x = np.array([
	# [1, 0, 0, 0],
	# [0, 1, 0, 0],
	# [0, 0, 1, 0],
	# [0, 0, 0, 1]
	# ])
	#
	# y = np.array([
	# [1],
	# [2],
	# [3],
	# [4]
	# ])


	class Layer:
	def __init__(self, s_in, s_out, lr=0.001, lm=0.0001):
	self.weights = np.random.randn(s_in, s_out) / s_out
	self.bias = np.zeros((1, s_out))
	self.lr = lr
	self.lm = lm

	def forward(self, a_in):
	self.a_in = a_in
	a_out = a_in @ self.weights + self.bias
	return a_out

	def backward(self, da_out):
	R = self.lm * np.sum(self.weights @ self.weights.T) / (2 * da_out.shape[0])
	da_out += R

	self.weights += self.lr * (self.a_in.T @ da_out)
	self.bias += self.lr * np.sum(da_out, axis=0, keepdims=True)

	da_in = da_out @ self.weights.T
	return da_in


	class AdamLayer:
	def __init__(self, s_in, s_out, lr=0.001, lm=0):
	# self.weights = np.random.randn(s_in, s_out) / s_out
	self.weights = np.random.randn(s_in, s_out) * np.sqrt(2 / (s_in + s_out))
	self.bias = np.zeros((1, s_out))
	self.lr = lr
	print(lr)
	self.lm = lm

	self.vdw = 0
	self.sdw = 0
	self.vdb = 0
	self.sdb = 0
	self.b1 = 0.9
	self.b2 = 0.999

	def forward(self, a_in):
	self.a_in = a_in
	a_out = a_in @ self.weights + self.bias

	# a_out = np.where(a_out > 0, a_out, a_out * 0.01)
	a_out = a_out * (a_out > 0)

	return a_out

	def backward(self, da_out):
	# R = self.lm * np.sum(self.weights @ self.weights.T) / (2 * da_out.shape[0])
	# da_out += R

	# da_out[da_out < 0] = 0.01
	# da_out[da_out >= 0] = 1
	da_out = np.where(da_out > 0, 1, 0.01)

	dw = self.a_in.T @ da_out
	self.vdw = self.b1 * self.vdw + (1 - self.b1) * dw
	self.sdw = self.b2 * self.sdw + (1 - self.b2) * (dw * dw)
	self.weights = self.weights + self.lr * self.vdw / np.sqrt(self.sdw + 1e-8)

	db = np.sum(da_out, axis=0, keepdims=True)
	self.vdb = self.b1 * self.vdb + (1 - self.b1) * db
	self.sdb = self.b2 * self.sdb + (1 - self.b2) * (db * db)
	self.bias = self.bias + self.lr * self.vdb / np.sqrt(self.sdb + 1e-8)

	da_in = da_out @ self.weights.T
	return da_in


	class LayerChain:
	def __init__(self, sizes):
	print(sizes)
	self.layers = []
	l = 0.00001
	for i in range(1, len(sizes)):
	self.layers.append(AdamLayer(sizes[i - 1], sizes[i], lr=l))
	# l /= 3

	def forward(self, a_in):
	for layer in self.layers:
	a_in = layer.forward(a_in)
	return a_in

	def backward(self, da_out):
	for layer in reversed(self.layers):
	da_out = layer.backward(da_out)


	# net = LayerChain([2, 8, 6, 4, 2])

	net = LayerChain([x.shape[1], 40, 40, 40, 40, 40, 40, 40, 40, y.shape[1]])

	mn = []
	ls = []
	for i in range(20000):
	if (i == 2000):
	for layer in net.layers:
	layer.lr *= 0.1
	o = net.forward(x)
	o = 1 / (1 + np.exp(-o))
	# ddy = net.forward(x)
	# ddy[ddy < 0.5] = 0
	# ddy[ddy >= 0.5] = 1
	# o=ddy
	L = (y * np.log(o) + (1 - y) * np.log(1 - o))
	# L = y-o
	# L = L * L
	J = np.abs(np.mean(L))

	if i > 0 and J - ls[len(ls) - 1] > 1e-8:
	print("cost rose at", i)
	for layer in net.layers:
	layer.lr *= 0.1
	if (net.layers[0].lr < 1e-12):
	print("lol")
	break

	ls.append(J)
	mn.append(np.mean(np.abs(net.forward(x) - y)))
	dy = np.mean(L, axis=1, keepdims=True) @ np.ones((1, y.shape[1]))

	# R = np.mean([np.mean(l.weights.T @ l.weights) for l in net.layers])
	# dy += 100*R

	dy = 1 / (1 + np.exp(-dy))
	dy = dy * (1 - dy)
	net.backward(dy)

	pyt.subplot(1, 2, 1)
	pyt.plot(mn)
	pyt.subplot(1, 2, 2)
	pyt.plot(ls)
	pyt.show()

	ddy = net.forward(x)
	ddy[ddy < 0.5] = 0
	ddy[ddy >= 0.5] = 1
	print(ddy)
	print(mn[len(mn) - 1])
	print(ls[len(ls) - 1])