scrivo riguardo un problema con una rete neurale back-propagation in python il cui codice è stato trovato in rete.
Mi succede che non impara i pattern ad essa presentati.
Invio gli allegati delle immagini usate come ingressi da presentare ai 1024 neuroni di input ed il relativo codice in python della
rete.
Essa è composta da 1024 neuroni di input, 42 neuroni hidden e 7 neuroni di output.
Riporto di seguito i risultati dell'addestramento della rete, presentandole le imppagini strada4.jpeg, strada3.jpeg, strada2.jpeg
e strada1.jpeg.
- Code: Select all
Combined error 0.499998982028
Combined error 0.499999700088
Combined error 0.499999699588
Combined error 0.499999699085
Combined error 0.499999698581
Combined error 0.499999698075
Combined error 0.499999697568
Combined error 0.499999697059
Combined error 0.499999696547
Combined error 0.499999696034
Combined error 0.49999969552
Combined error 0.499999695003
Combined error 0.499999694485
Combined error 0.499999693964
Combined error 0.499999693442
Combined error 0.499999692918
Combined error 0.499999692392
Combined error 0.499999691864
Combined error 0.499999691334
Combined error 0.499999690803
Inputs: [ 0. 0. 0. ..., 0. 0. 0.] --> [-0.9999999874304197, 0.9999999994018853, 0.9999999999813893, 0.9999996945158525, -0.9999997980149685, 0.9999999999919921, 0.999999690271247] Target [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1]
Inputs: [ 0. 0. 0. ..., 0. 0. 0.] --> [-0.9999999874304197, 0.9999999994018853, 0.9999999999813893, 0.9999996945158525, -0.9999997980149685, 0.9999999999919921, 0.999999690271247] Target [0.0, 0.0, 0.0, 0.0, 0.0, 1, 0.0]
rete back-propagation BP.py:
- Code: Select all
import math
import random
import string
import numpy as np
import cv2 as cv
from numpy import *
class NN:
def __init__(self, NI, NH, NO):
# number of nodes in layers
self.ni = NI + 1 # +1 for bias
self.nh = NH
self.no = NO
# initialize node-activations
self.ai, self.ah, self.ao = [],[], []
self.ai = [1.0]*self.ni
self.ah = [1.0]*self.nh
self.ao = [1.0]*self.no
# create node weight matrices
self.wi = makeMatrix (self.ni, self.nh)
self.wo = makeMatrix (self.nh, self.no)
# initialize node weights to random vals
randomizeMatrix ( self.wi, -0.2, 0.2 )
randomizeMatrix ( self.wo, -2.0, 2.0 )
# create last change in weights matrices for momentum
self.ci = makeMatrix (self.ni, self.nh)
self.co = makeMatrix (self.nh, self.no)
def runNN (self, inputs):
if len(inputs) != self.ni-1:
print 'incorrect number of inputs'
for i in range(self.ni-1):
self.ai[i] = inputs[i]
for j in range(self.nh):
sum = 0.0
for i in range(self.ni):
sum +=( self.ai[i] * self.wi[i][j] )
self.ah[j] = sigmoid (sum)
for k in range(self.no):
sum = 0.0
for j in range(self.nh):
sum +=( self.ah[j] * self.wo[j][k] )
self.ao[k] = sigmoid (sum)
return self.ao
def backPropagate (self, targets, N, M):
# http://www.youtube.com/watch?v=aVId8KMsdUU&feature=BFa&list=LLldMCkmXl4j9_v0HeKdNcRA
# calc output deltas
# we want to find the instantaneous rate of change of ( error with respect to weight from node j to node k)
# output_delta is defined as an attribute of each ouput node. It is not the final rate we need.
# To get the final rate we must multiply the delta by the activation of the hidden layer node in question.
# This multiplication is done according to the chain rule as we are taking the derivative of the activation function
# of the ouput node.
# dE/dw[j][k] = (t[k] - ao[k]) * s'( SUM( w[j][k]*ah[j] ) ) * ah[j]
output_deltas = [0.0] * self.no
for k in range(self.no):
error = targets[k] - self.ao[k]
output_deltas[k] = error * dsigmoid(self.ao[k])
# update output weights
for j in range(self.nh):
for k in range(self.no):
# output_deltas[k] * self.ah[j] is the full derivative of dError/dweight[j][k]
change = output_deltas[k] * self.ah[j]
self.wo[j][k] += N*change + M*self.co[j][k]
self.co[j][k] = change
# calc hidden deltas
hidden_deltas = [0.0] * self.nh
for j in range(self.nh):
error = 0.0
for k in range(self.no):
error += output_deltas[k] * self.wo[j][k]
hidden_deltas[j] = error * dsigmoid(self.ah[j])
#update input weights
for i in range (self.ni):
for j in range (self.nh):
change = hidden_deltas[j] * self.ai[i]
#print 'activation',self.ai[i],'synapse',i,j,'change',change
self.wi[i][j] += N*change + M*self.ci[i][j]
self.ci[i][j] = change
# calc combined error
# 1/2 for differential convenience & **2 for modulus
error = 0.0
for k in range(len(targets)):
error = 0.5 * (targets[k]-self.ao[k])**2
return error
def weights(self):
print 'Input weights:'
for i in range(self.ni):
print self.wi[i]
print
print 'Output weights:'
for j in range(self.nh):
print self.wo[j]
print ''
def test(self, patterns):
for p in patterns:
inputs = p[0]
print 'Inputs:', p[0], '-->', self.runNN(inputs), '\tTarget', p[1]
def train (self, patterns, max_iterations = 1000, N=0.5, M=0.1):
for i in range(max_iterations):
for p in patterns:
inputs = p[0]
targets = p[1]
self.runNN(inputs)
error = self.backPropagate(targets, N, M)
if i % 50 == 0:
print 'Combined error', error
self.test(patterns)
def sigmoid (x):
return math.tanh(x)
# the derivative of the sigmoid function in terms of output
# proof here:
# http://www.math10.com/en/algebra/hyperbolic-functions/hyperbolic-functions.html
def dsigmoid (y):
return 1 - y**2
def makeMatrix ( I, J, fill=0.0):
m = []
for i in range(I):
m.append([fill]*J)
return m
def randomizeMatrix ( matrix, a, b):
for i in range ( len (matrix) ):
for j in range ( len (matrix[0]) ):
matrix[i][j] = random.uniform(a,b)
def main ():
inx = 0
a = zeros((1024))
b = zeros((1024))
c = zeros((1024))
d = zeros((1024))
#interfaccia opencv per rilevare le foto
#dimensioni in pixel dell' immagine
Y=32
X=32
#leggi immagine originale
img1 = cv.imread('strada4.png')
#ingrandisci immagine originale a 500x500 pixel
dst = cv.resize(img1, (500,500))
#ridimensiona a 100x100 pixel
dst = cv.resize(dst, (X,Y))
#converti immagine in scala di grigi
grayimg = cv.cvtColor(dst, cv.COLOR_BGR2GRAY)
#threshold
th1 = cv.adaptiveThreshold(grayimg,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv.THRESH_BINARY,11,2)
#operatore canny per trovare i bordi
#edges = cv.Canny(grayimg,X,Y)
#mostra immagine con threshholding
cv.imshow('Strada filtrata', th1)
#leggi immagine originale
img1 = cv.imread('strada3.png')
#ingrandisci immagine originale a 500x500 pixel
dst = cv.resize(img1, (500,500))
#ridimensiona a 100x100 pixel
dst = cv.resize(dst, (X,Y))
#converti immagine in scala di grigi
grayimg = cv.cvtColor(dst, cv.COLOR_BGR2GRAY)
#threshold
th2 = cv.adaptiveThreshold(grayimg,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv.THRESH_BINARY,11,2)
#operatore canny per trovare i bordi
#edges = cv.Canny(grayimg,X,Y)
#mostra immagine con threshholding
cv.imshow('Strada filtrata', th2)
#leggi immagine originale
img1 = cv.imread('strada2.png')
#ingrandisci immagine originale a 500x500 pixel
dst = cv.resize(img1, (500,500))
#ridimensiona a 100x100 pixel
dst = cv.resize(dst, (X,Y))
#converti immagine in scala di grigi
grayimg = cv.cvtColor(dst, cv.COLOR_BGR2GRAY)
#threshold
th3 = cv.adaptiveThreshold(grayimg,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv.THRESH_BINARY,11,2)
#operatore canny per trovare i bordi
#edges = cv.Canny(grayimg,X,Y)
#mostra immagine con threshholding
cv.imshow('Strada filtrata', th3)
#leggi immagine originale
img1 = cv.imread('strada1.png')
#ingrandisci immagine originale a 500x500 pixel
dst = cv.resize(img1, (500,500))
#ridimensiona a 100x100 pixel
dst = cv.resize(dst, (X,Y))
#converti immagine in scala di grigi
grayimg = cv.cvtColor(dst, cv.COLOR_BGR2GRAY)
#threshold
th4 = cv.adaptiveThreshold(grayimg,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv.THRESH_BINARY,11,2)
#operatore canny per trovare i bordi
#edges = cv.Canny(grayimg,X,Y)
#mostra immagine con threshholding
th1 = th1/1000.0
th2 = th2/1000.0
th3 = th3/1000.0
th4 = th4/1000.0
for y in range(32):
for x in range(32):
th1[x][y] = a[inx]
inx = inx + 1
inx = 0
for y in range(32):
for x in range(32):
th2[x][y] = b[inx]
inx = inx + 1
inx = 0
for y in range(32):
for x in range(32):
th3[x][y] = c[inx]
inx = inx + 1
inx = 0
for y in range(32):
for x in range(32):
th4[x][y] = d[inx]
inx = inx + 1
pat = [
[a, [0.0,0.0,0.0,0.0,0.0,0.0,1]],
[b, [0.0,0.0,0.0,0.0,0.0,1,0.0]]
#[c, [0.0,0.0,0.0,0.0,1,0.0,0.0]],
#[d, [0.0,0.0,0.0,1,0.0,0.0,0.0]]
]
myNN = NN (1024, 42, 7)
myNN.train(pat)
print myNN.runNN(c)
if __name__ == "__main__":
main()
Come si vede sopra "Inputs" sono gli ingressi presentati dopo la --> ci sono i risultati ottenuti dell'esecuzione della rete, mentre
con "Target" vediamo le uscite volute.Come si puo' notare non apprende correttamente i pattern.