ML Lab

PROGRAM 1: FINDS
import csv
with open('findsdataset.csv', 'r') as f:
reader = csv.reader(f)
your_list = list(reader)
h = [['0', '0', '0', '0', '0', '0']]
for i in your_list:
print(i)
if i[-1] == "True":
j=0
for x in i:
if x != "True":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0][j] != '0':
h[0][j] = '?'
else:
pass
j=j+1
print("A Maximally Specific hypothesis is")
print(h)
[' Sunny', 'Warm', 'Normal', 'Strong', ' Warm', 'Same', 'TRUE']

[' Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', 'TRUE']
[' Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', 'FALSE']
[' Sunny', 'Warm', 'High', 'Strong', 'Cool', 'Change', 'TRUE']
['Rainy', 'Cold', 'High', 'Strong', 'Cool', 'Change', 'TRUE']
A Maximally Specific hypothesis is
[['0', '0', '0', '0', '0', '0']]
PROGRAM 2: CANDIDATE ELIMINATION
import numpy as np
import pandas as pd
data = pd.DataFrame(data=pd.read_csv('CE.csv'))
print('The Dataset is: \n')
print(data)
concepts = np.array(data.iloc[:,0:-1])
print('\n The Concepts are: \n',concepts)

target = np.array(data.iloc[:,-1])
print('\nThe target is: \n',target)
def learn(concepts, target):

specific_h = concepts[0].copy()
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
for i, h in enumerate(concepts):
if target[i] == "Yes":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
if target[i] == "No":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
indices = [i for i,val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
s_final, g_final = learn(concepts, target)

print("\n\nFinal S:", s_final)
print("\n\nFinal G:")
for i in g_final:
print(i)
Sky Airtemp Humidity Wind Water Forecast EnjoySport
0 Sunny Warm Normal Strong Warm Same Yes
1 Sunny Warm High Strong Warm Same Yes
2 Rainy Cold High Strong Warm Change No
3 Sunny Warm High Strong Cool Change Yes
The Concepts are:

[['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']]
The target is:

['Yes' 'Yes' 'No' 'Yes']
Final S: ['Sunny' 'Warm' '?' 'Strong' '?' '?']
Final G:
['Sunny', '?', '?', '?', '?', '?']
['?', 'Warm', '?', '?', '?', '?']
PROGRAM 3: ID3 ALGORITHM
import pandas as pd
import numpy as np
from pprint import pprint
dataset = pd.read_csv('id3dataset.csv')
f=['Age','competition','type']
def entropy(target_col):
elements,counts = np.unique(target_col,return_counts = True)
entropy = np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts)) for i in range(len(elements))])
return entropy
def InfoGain(data,sname,tn="EnjoySport"):
te = entropy(data[tn])
vals,counts= np.unique(data[sname],return_counts=True)
we =np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[sname]==vals[i]).dropna()[tn])
for i in range(len(vals))]) #same line
Information_Gain = te - we
return Information_Gain
def ID3(data,originaldata,f,tname="profit",pnode = None):

if len(np.unique(data[tname])) <= 1:
return np.unique(data[tname])[0]
#elif len(data)==0:
#return np.unique(originaldata[tname])[np.argmax(np.unique(originaldata[tname],return_counts=True)[1])]
elif len(f) ==0:
return pnode
else:
pnode = np.unique(data[tname])[np.argmax(np.unique(data[tname],return_counts=True)[1])]
item_values = [InfoGain(data,feature,tname) for feature in f]
bfi= np.argmax(item_values)
bf = f[bfi]
tree = {bf:{}}
f = [i for i in f if i != bf]
for value in np.unique(data[bf]):

value = value
sub_data = data.where(data[bf] == value).dropna()
subtree = ID3(sub_data,dataset,f,tname,pnode)
tree[bf][value] = subtree
return(tree)
def predict(query,tree,default = 1):

for key in list(query.keys()):
if key in list(tree.keys()):
try:
result = tree[key][query[key]]
except:
return default
result = tree[key][query[key]]
if isinstance(result,dict):
return predict(query,result)
else:
return result
tdata = dataset.iloc[:13]
print(tdata)
f=['Age','competition','type']
tname="profit"
pnode=None
tree=ID3(tdata,tdata,f,tname,pnode)
pprint(tree)
query=dataset.iloc[:,:-1].to_dict(orient="records")
result=predict(query[9],tree,1.0)
print(result)
Age competition type profit

0 old yes software down
1 old no software down
2 old no hardware down
3 mid yes software down
4 mid yes hardware down
5 mid no hardware up
6 mid no software up
7 new yes software up
8 new no haardware up
9 new no software up
{'Age': {'mid': {'competition': {'no': 'up', 'yes': 'down'}},

'new': 'up',
'old': 'down'}}
Up
PROGRAM 4: BACK PROPAGATION
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
epoch=7000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
Ij=np.dot(X,wh)+bh
Oj=sigmoid(Ij)
Ik=np.dot(Oj,wout)+bout
Ok=sigmoid(Ik)
#Backpropagation
Ek=(y-Ok)*derivatives_sigmoid(Ok)
Ej=derivatives_sigmoid(Oj)*(np.dot(Ek,wout.T))
wout+=np.dot(Oj.T,Ek)*lr
wh+=np.dot(X.T,Ej)*lr
print("Input: \n" + str(X)) print("Actual Output: \n" + str(y)) print("Predicted Output: \n" ,Ok)
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.89484562]
[0.87912054]
[0.8958235 ]]
PROGRAM 6: NAÏVE BAYESIAN CLASSIFIER
import pandas as pd
msg=pd.read_csv('naivetext1.csv',names=['message','label'])
print('The dimensions of the dataset',msg.shape)
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
print(X) print(y)
#splitting the dataset into train and test data
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print(xtest.shape)
print(xtrain.shape)
print(ytest.shape)
print(ytrain.shape)
#output of count vectoriser is a sparse matrix
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print(count_vect.get_feature_names())
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
print(df)#tabular representation
# Training Naive Bayes (NB) classifier on training data.
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)#to load the text data
predicted = clf.predict(xtest_dtm)
#printing accuracy metrics

from sklearn import metrics
print('Accuracy metrics')
print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted))
print('Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('Recall and Precison ')
print(metrics.recall_score(ytest,predicted))
print(metrics.precision_score(ytest,predicted))
[13 rows x 44 columns]

Accuracy metrics
Accuracy of the classifer is 1.0
Confusion matrix
[[2 0]
[0 3]]
Recall and Precison
1.0
1.0
PROGRAM 7: BAYESIAN NETWORK
import bayespy as bp
import numpy as np
import csv
from colorama import init
init()
# Define Parameter Enum values

#Age
ageEnum = {'SuperSeniorCitizen':0, 'SeniorCitizen':1, 'MiddleAged':2, 'Youth':3, 'Teen':4}
# Gender
genderEnum = {'Male':0, 'Female':1}
# FamilyHistory
familyHistoryEnum = {'Yes':0, 'No':1}
# Diet(Calorie Intake)
dietEnum = {'High':0, 'Medium':1, 'Low':2}
# LifeStyle
lifeStyleEnum = {'Athlete':0, 'Active':1, 'Moderate':2, 'Sedetary':3}
# Cholesterol
cholesterolEnum = {'High':0, 'BorderLine':1, 'Normal':2}
# HeartDisease
heartDiseaseEnum = {'Yes':0, 'No':1}
#heart_disease_data.csv
with open('heart_disease_data.csv') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
data = []
for x in dataset:
data.append(
[ageEnum[x[0]],genderEnum[x[1]],familyHistoryEnum[x[2]],dietEnum[x[3]],lifeStyleEnum[x[4]],cholesterolEnum[x[5]
],heartDiseaseEnum[x[6]]])
# Training data for machine learning todo: should import from csv
data = np.array(data)
N = len(data)
# Input data column assignment

p_age = bp.nodes.Dirichlet(1.0*np.ones(5))
age = bp.nodes.Categorical(p_age, plates=(N,))
age.observe(data[:,0])
p_gender = bp.nodes.Dirichlet(1.0*np.ones(2))
gender = bp.nodes.Categorical(p_gender, plates=(N,))
gender.observe(data[:,1])
p_familyhistory = bp.nodes.Dirichlet(1.0*np.ones(2))
familyhistory = bp.nodes.Categorical(p_familyhistory, plates=(N,))
familyhistory.observe(data[:,2])
p_diet = bp.nodes.Dirichlet(1.0*np.ones(3))
diet = bp.nodes.Categorical(p_diet, plates=(N,))
diet.observe(data[:,3])
p_lifestyle = bp.nodes.Dirichlet(1.0*np.ones(4))
lifestyle = bp.nodes.Categorical(p_lifestyle, plates=(N,))
lifestyle.observe(data[:,4])
p_cholesterol = bp.nodes.Dirichlet(1.0*np.ones(3))
cholesterol = bp.nodes.Categorical(p_cholesterol, plates=(N,))
cholesterol.observe(data[:,5])
p_heartdisease = bp.nodes.Dirichlet(np.ones(2), plates=(5, 2, 2, 3, 4, 3))

heartdisease = bp.nodes.MultiMixture([age, gender, familyhistory, diet, lifestyle, cholesterol], bp.nodes.Categorical,
p_heartdisease)
heartdisease.observe(data[:,6])
p_heartdisease.update()
m=0
while m == 0:
print("\n")
res = bp.nodes.MultiMixture([int(input('Enter Age: ' + str(ageEnum))), int(input('Enter Gender: ' +
str(genderEnum))), int(input('Enter FamilyHistory: ' + str(familyHistoryEnum))), int(input('Enter dietEnum: ' +
str(dietEnum))), int(input('Enter LifeStyle: ' + str(lifeStyleEnum))), int(input('Enter Cholesterol: ' +
str(cholesterolEnum)))], bp.nodes.Categorical, p_heartdisease).get_moments()[0][heartDiseaseEnum['Yes']]
print("Probability(HeartDisease) = " + str(res))
m = int(input("Enter for Continue:0, Exit :1 "))
PROGRAM 8: KMEANS AND EMM
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
# Importing the dataset

data = pd.read_csv('xclara.csv')
print("Input Data and Shape")
print(data.shape)
data.head()
# Getting the values and plotting it

f1 = data['V1'].values
f2 = data['V2'].values
X = np.array(list(zip(f1, f2)))
print('Graph for whole dataset')
plt.scatter(f1, f2, c='black', s=7)
plt.show()
kmeans = KMeans(3, random_state=0)

labels = kmeans.fit(X).predict(X)
centroids = kmeans.cluster_centers_
plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis');
print('Graph using Kmeans Algorithm')
plt.scatter(centroids[:, 0], centroids[:, 1], marker='*', s=200, c='#050505')
plt.show()
#gmm demo
gmm = GaussianMixture(n_components=3).fit(X)
labels = gmm.predict(X)
# for ploting
probs = gmm.predict_proba(X)
size = 10 * probs.max(1) ** 3
print('Graph using EM Algorithm')
plt.scatter(X[:, 0], X[:, 1], c=labels, s=size, cmap='viridis');
plt.show()
PROGRAM 9: K NEAREST ALGORITHM
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import datasets
iris=datasets.load_iris()
iris_data=iris.data
iris_labels=iris.target
xtrain,xtest,ytrain,ytest=train_test_split(iris_data,iris_labels,test_size=0.30)
classifier=KNeighborsClassifier(n_neighbors=5)
classifier.fit(xtrain,ytrain)
ypred=classifier.predict(xtest)
print(confusion_matrix(ytest,ypred))
print(classification_report(ytest,ypred))
[[11 0 0]
[ 0 13 1]
[ 0 1 19]]
precision recall f1-score support
0 1.00 1.00 1.00 11

1 0.93 0.93 0.93 14
2 0.95 0.95 0.95 20
avg / total 0.96 0.96 0.96 45

PROGRAM NO:10 REGRESSION
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
def kernel(point,xmat, k):

m,n = np.shape(xmat)
weights = np.mat(np.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np.exp(diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W=(X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
data = pd.read_csv('data10.csv')
bill = np.array(data.total_bill)
tip = np.array(data.tip)
mbill = np.mat(bill)
mtip = np.mat(tip)
m= np.shape(mbill)[1]
one = np.mat(np.ones(m))
X= np.hstack((one.T,mbill.T))
ypred = localWeightRegression(X,mtip,2)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();

ML Lab

Caricato da

Informazioni sul documento

Titolo originale

Copyright

Formati disponibili

Condividi questo documento

Condividi o incorpora il documento

Opzioni di condivisione

Hai trovato utile questo documento?

Questo contenuto è inappropriato?

Copyright:

Formati disponibili

ML Lab

Caricato da

Copyright:

Formati disponibili

PROGRAM 1: FINDS

h = [['0', '0', '0', '0', '0', '0']]

[' Sunny', 'Warm', 'Normal', 'Strong', ' Warm', 'Same', 'TRUE']

print('\n The Concepts are: \n',concepts)

def learn(concepts, target):

s_final, g_final = learn(concepts, target)

The Concepts are:

The target is:

Final S: ['Sunny' 'Warm' '?' 'Strong' '?' '?']

def ID3(data,originaldata,f,tname="profit",pnode = None):

for value in np.unique(data[bf]):

def predict(query,tree,default = 1):

Age competition type profit

{'Age': {'mid': {'competition': {'no': 'up', 'yes': 'down'}},

#printing accuracy metrics

[13 rows x 44 columns]

# Define Parameter Enum values

# Input data column assignment

p_heartdisease = bp.nodes.Dirichlet(np.ones(2), plates=(5, 2, 2, 3, 4, 3))

# Importing the dataset

# Getting the values and plotting it

kmeans = KMeans(3, random_state=0)

from sklearn.model_selection import train_test_split

0 1.00 1.00 1.00 11

avg / total 0.96 0.96 0.96 45

import matplotlib.pyplot as plt

def kernel(point,xmat, k):

Potrebbero piacerti anche