Sei sulla pagina 1di 3

import math

from math import log


S = [('age', 'competition', 'type', 'profit'), ('old', 'yes', 'swr', 'down'), ('
old', 'no', 'swr', 'down'), ('old', 'no', 'hwr', 'down'),\
('mid', 'yes', 'swr', 'down'), ('mid', 'yes', 'hwr', 'down'), ('mid', 'no', 'hwr
', 'up'), ('mid', 'no', 'swr', 'up'), ('new', 'yes', 'swr', 'up'), \
('new', 'no', 'hwr', 'up'), ('new', 'no', 'swr', 'up')]
class Tree(object):
def __init__(self):
self.children = None
self.leg = None
self.data = None
def entropy(S):
dicti = {}
for i in range(1, len(S)):
if dicti.get(S[i][3], None) is None:
dicti[S[i][3]] = 1
else:
dicti[S[i][3]] = dicti[S[i][3]] + 1
result = 0
for ke in dicti.keys():
result = result - (dicti[ke]/float(len(S) - 1))*log((dicti[ke]/f
loat(len(S)-1)), 2)
return result
def gain(attrib_pos):
dictiat = {}
for i in range(1, len(S)):
if dictiat.get(S[i][attrib_pos], None) is None:
dictiat[S[i][attrib_pos]] = {}
if dictiat[S[i][attrib_pos]].get(S[i][3], None) is None:
dictiat[S[i][attrib_pos]][S[i][3]] = 1
else:
dictiat[S[i][attrib_pos]][S[i][3]] = dictiat[S[i][attrib
_pos]][S[i][3]] + 1
return dictiat
def getnrcls(S):
dicu = {}
for i in range(1, len(S)):
if dicu.get(S[i][3], None) is None:
dicu[S[i][3]] = 0
return dicu.keys()
def gain_calc(attrib_pos, S):
dic = gain(attrib_pos)
result = entropy(S)
for key in dic.keys():
if len(dic[key].keys()) == 2:
ceva = 0
toate = 0
for keyy in dic[key].keys():
toate = toate + dic[key][keyy]
for keyy in dic[key].keys():
ceva = ceva - dic[key][keyy]/float(toate)*log(di
c[key][keyy]/float(toate),2)
ceva = ceva * toate / (len(S) - 1)
result = result - ceva

return result
dicc = {}
dicc['new'] = 0
dicc['old'] = 0
dicc['mid'] = 0
dicc['yes'] = 1
dicc['no'] = 1
dicc['swr'] = 2
dicc['hwr'] = 2
def id3(S, dicc):
maxx = -10000
for i in range(0, 2):
if maxx < gain_calc(i, S):
maxx = gain_calc(i, S)
pos = i
id3tree = Tree()
id3tree.data = S[0][pos]
id3tree.children = [Tree() for i in range(3)]
id3tree.children[0].leg = 'new'
id3tree.children[1].leg = 'mid'
id3tree.children[2].leg = 'old'
for i in range(len(id3tree.children)):
continueid3(S, id3tree.children[i], id3tree.children[i].leg, dic
c)
return id3tree
def continueid3(S, id3tree, leg, dicc):
newS = [S[0]]
for i in range(1, len(S)):
if S[i][dicc[leg]] == leg:
newS.append(S[i])
if len(getnrcls(newS)) == 1:
id3tree.data=getnrcls(newS)[0]
else:
maxx = -10000
for i in range(1,2):
print gain_calc(i, newS)
if maxx < gain_calc(i, newS):
maxx = gain_calc(i, newS)
pos = i
id3tree.data = newS[0][pos]
id3tree.children = [Tree() for i in range(2)]
id3tree.children[0].leg = 'yes'
id3tree.children[1].leg = 'no'
for i in range(len(id3tree.children)):
continueid3(newS, id3tree.children[i], id3tree.children[
i].leg, dicc)
copac = id3(S, dicc)
print copac.data
print copac.children[0].leg
print copac.children[0].data
print copac.children[1].leg
print copac.children[1].data
print copac.children[2].leg
print copac.children[2].data

print copac.children[1].children[0].leg
print copac.children[1].children[0].data
print copac.children[1].children[1].leg
print copac.children[1].children[1].data
dike = {}
dike['up'] = []
dike['down'] = []
def afis(copac, dic, currentval):
if copac.children is None:
dic[copac.data].append(currentval+copac.leg)
else:
if copac.leg is None:
currentval = currentval + copac.data + ' '
else:
currentval = currentval + copac.leg + ':' + copac.data +
' '
for i in copac.children:
dic = afis(i, dic, currentval)
return dic
print afis(copac, dike, '')

Potrebbero piacerti anche