Sei sulla pagina 1di 7

# Example of Naive Bayes implemented from Scratch in Python

import csv
import random
import math

def loadCsv(filename):
lines = csv.reader(open(filename, "r"))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset

def splitDataset(dataset, splitRatio):


trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
index = random.randrange(len(copy))
trainSet.append(copy.pop(index))
return [trainSet, copy]

def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated

def mean(numbers):
return sum(numbers)/float(len(numbers))

def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)

def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in
zip(*dataset)]
del summaries[-1]
return summaries

def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries

def calculateProbability(x, mean, stdev):


exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent

def calculateClassProbabilities(summaries, inputVector):


probabilities = {}
for classValue, classSummaries in summaries.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean,
stdev)
return probabilities

def predict(summaries, inputVector):


probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel

def getPredictions(summaries, testSet):


predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
predictions.append(result)
return predictions

def getAccuracy(testSet, predictions):


correct = 0
truepos = 0
trueneg = 0
pos = 0
neg = 0
for i in range(len(testSet)):
print(testSet[i][-1]," ",predictions[i])
if testSet[i][-1] == predictions[i]:
correct += 1
if testSet[i][-1] == 1 and predictions[i] == 1:
truepos += 1
if testSet[i][-1] == 0 and predictions[i] == 0:
trueneg += 1
if testSet[i][-1] == 1:
pos += 1
else:
neg += 1

acc = correct/float(len(testSet)) * 100.0


preci = truepos/float(truepos + (pos - truepos)) * 100.0
recal = truepos/float(truepos + (neg - trueneg)) * 100.0
return (acc, preci, recal)

def main():
filename = 'pima-indians-diabetes.csv'
splitRatio = 0.67
dataset = loadCsv(filename)
trainingSet,testSet=splitDataset(dataset, splitRatio) #dividing into
training and test data
#trainingSet = dataset #passing entire dataset as training data
#testSet=[[8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0]]
print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset),
len(trainingSet), len(testSet)))
# prepare model
summaries = summarizeByClass(trainingSet)
# test model
predictions = getPredictions(summaries, testSet)
accuracy, precision, recall = getAccuracy(testSet, predictions)
print('Accuracy: {0}%, precision: {1}% recall:{2} %'.format(accuracy,
precision, recall))

main()
kmeans2

import matplotlib.pyplot as plt


from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
# import some data to play with
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
# Build the K Means Model
model = KMeans(n_clusters=3)
model.fit(X) # model.labels_ : Gives cluster no for which samples belongs to
# # Visualise the clustering results
plt.figure(figsize=(14,14))
colormap = np.array(['red', 'lime', 'black'])
# Plot the Original Classifications using Petal features
plt.subplot(2, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real Clusters')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
# Plot the Models Classifications
plt.subplot(2, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
plt.title('K-Means Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
# General EM for GMM
from sklearn import preprocessing
# transform your data such that its distribution will have a
# mean value 0 and standard deviation of 1.
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
gmm_y = gmm.predict(xs)
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[gmm_y], s=40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
print('Observation: The GMM using EM algorithm based clustering matched the true
labels more closely than the Kmeans.')
lwr2

import matplotlib.pyplot as plt


import pandas as pd
import numpy as np1

def kernel(point,xmat, k):


m,n = np1.shape(xmat)
weights = np1.mat(np1.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np1.exp(diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W=(X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n = np1.shape(xmat)
ypred = np1.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred

def graphPlot(X,ypred):
sortindex = X[:,1].argsort(0) #argsort - index of the smallest
xsort = X[sortindex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[sortindex], color = 'red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();
# load data points
data = pd.read_csv('data10_tips.csv')
bill = np1.array(data.total_bill)
tip = np1.array(data.tip)
#preparing and add 1 in bill
mbill = np1.mat(bill)
mtip = np1.mat(tip)
m= np1.shape(mbill)[1]
one = np1.mat(np1.ones(m))
X= np1.hstack((one.T,mbill.T))
#set k here
ypred = localWeightRegression(X,mtip,9) #value of k is 9
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]
graphPlot(X,ypred)
Knn

# import the required packages


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
# Load dataset
iris=datasets.load_iris()
print("Iris Data set loaded...")
# Split the data into train and test samples
x_train, x_test, y_train, y_test =
train_test_split(iris.data,iris.target,test_size=0.1)
print("Dataset is split into training and testing...")
print("Size of trainng data and its label",x_train.shape,y_train.shape)
print("Size of trainng data and its label",x_test.shape, y_test.shape)
# Prints Label no. and their names
for i in range(len(iris.target_names)):
print("Label", i , "-",str(iris.target_names[i]))
# Create object of KNN classifier
classifier = KNeighborsClassifier(n_neighbors=25)
# Perform Training
classifier.fit(x_train, y_train)
# Perform testing
y_pred=classifier.predict(x_test)
# Display the results
print("Results of Classification using K-nn with K=1 ")
for r in range(0,len(x_test)):
print(" Sample:", str(x_test[r]), " Actual-label:", str(y_test[r]), "
Predicted-label:",
str(y_pred[r]))
print("Classification Accuracy :" , classifier.score(x_test,y_test));
#from sklearn.metrics import classification_report, confusion_matrix
#print('Confusion Matrix')
#print(confusion_matrix(y_test,y_pred))
#print('Accuracy Metrics')
#print(classification_report(y_test,y_pred))

Artificial Neural Networks (Chapter 4)

1. What is Artificial Neural Network?

2. What are the types of problems in which Artificial Neural Network can be applied.

3. Write a note on Representational Power of Perceptron

4. What is linearly in separable problem? Design a two-layer network of perceptron to implement a) X OR Y b) X


ANDY

5. Explain the concept of a Perceptron with a neat diagram.

6. Discuss the Perceptron training rule.

7. Define Delta Rule.

8. Under what conditions the perceptron rule fails and it becomes necessary to apply the delta rule
9. What do you mean by Gradient Descent?

10. Derive the Gradient Descent Rule.

11. What are the conditions in which Gradient Descent is applied.

12. What are the difficulties in applying Gradient Descent.

13. Explain the importance of Stochastic Gradient Descent

14. Differentiate between Gradient Descent and Stochastic Gradient Descent

15. Differentiate between Gradient Descent and Perceptron training rule.

16. Derive the Backpropagation rule considering the training rule for Output Unit weights and

Training Rule for Hidden Unit weights

17. Write the algorithm for Back propagation.

18. Explain how to learn Multilayer Networks using Backpropagation Algorithm.

19. What is Squashing Function?

20. Briefly explain the following with respect to Backpropagation

a) Convergence and Local Minima of MLP

b) Representational Power of Feedforward Networks

c) Generalization, Overfitting, and Stopping Criterion

Decision Tree Learning (Chapter 3)


1. Explain the following with examples:

a. Decision Tree,

b. Decision Tree Learning

c. Decision Tree Representation.

2. What are appropriate problems for Decision tree learning. OR

What are the characteristics of the problems suited for decision tree learning.

3. Explain the concepts of entropy and information gain.

4. Describe the ID3 algorithm for decision tree learning with example.OR

What is the procedure of building Decision tree using ID3 with Gain and Entropy. Illustrate with example.OR

What do you mean by Gain and Entropy? How is it used to build the Decision tree in algorithm? Illustrate using an
example.

5. Give Decision trees to represent the Boolean Functions:


a. A &amp;&amp; - B

b. A V [ B &amp;&amp; C]

c. A XOR B

d. [A&amp;&amp;B] V [C&amp;&amp;D]

6. Consider the following set of training examples.

a. What is the entropy of this collection of training example with respect to the target function classification?

b. What is the information gain of A2 relative to these training examples?

7. Discuss Hypothesis Space Search in Decision tree Learning.

8. Discuss Inductive Bias in Decision Tree Learning. Differentiate between two types of biases. Why prefer Short
Hypotheses?

9. What are issues in decision tree learning? Explain briefly how are they overcome?

a. Discuss the following issues in detail: a. avoiding over fitting in Decision Trees

b. Incorporating Continuous valued attributes

c. Handling Training Examples with Missing attribute values.

d. Handling Attributes with Different costs.

Potrebbero piacerti anche