Sei sulla pagina 1di 5

# -*- coding: utf-8 -*-

"""

@author: DFHC
"""

import numpy as np
import pandas as pd
import os
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import KFold, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression, Ridge, RidgeClassifier,
RidgeClassifierCV
from sklearn.svm import SVC
from sklearn import svm
from sklearn.model_selection import train_test_split
import random
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

os.chdir(r'C:\Users\57317\Documents\Econometría Financiera')
os.getcwd()

filename= "loansm.xlsx"
Data= pd.read_excel(filename,"loansm")

## Primer Punto - QDA/KNN/ARBOL

Data= Data.to_numpy()

Y= Data[:,0]
X= Data[:,1:]

kf= KFold(10, True)


Score_QDA_1=[]
for train, test in kf.split(Data):
x_train= X[train]
y_train= Y[train]
x_test= X[test]
y_test= Y[test]
clf= QuadraticDiscriminantAnalysis()
s= clf.fit(x_train, y_train)
y_pred= clf.predict(x_test)
Score_QDA= clf.score(x_test,y_test)
Score_QDA_1.append(Score_QDA)

Score_QDA= np.mean(Score_QDA_1)
QDA= QuadraticDiscriminantAnalysis()
QDA_Score= cross_val_score(QDA,X,Y, cv=KFold(n_splits=10))
QDA_Score= np.mean(QDA_Score)
matriz= confusion_matrix(y_test, y_pred)
print (matriz)
accuracy= accuracy_score(y_test, y_pred)
print (accuracy)
pd.crosstab(pd.Series(y_pred), pd.Series(y_test))

kf= KFold(10, True)


Score_tree_1=[]
for train, test in kf.split(Data):
x_train= X[train]
y_train= Y[train]
x_test= X[test]
y_test= Y[test]
clf= DecisionTreeClassifier()
s= clf.fit(x_train, y_train)
y_pred= clf.predict(x_test)
Score_tree= clf.score(x_test,y_test)
Score_tree_1.append(Score_tree)

Score_tree= np.mean(Score_tree_1)
tree= DecisionTreeClassifier()
tree_Score= cross_val_score(tree,X,Y, cv=KFold(n_splits=10))
tree_Score= np.mean(tree_Score)
matriz= confusion_matrix(y_test, y_pred)
print (matriz)
accuracy= accuracy_score(y_test, y_pred)
print (accuracy)
pd.crosstab(pd.Series(y_pred), pd.Series(y_test))

kf= KFold(10, True)


Score_KNN_1=[]
for train, test in kf.split(Data):
x_train= X[train]
y_train= Y[train]
x_test= X[test]
y_test= Y[test]
clf= KNeighborsClassifier(1)
s= clf.fit(x_train, y_train)
y_predict= clf.predict(x_test)
Score_K= clf.score(x_test,y_test)
Score_KNN_1.append(Score_K)

Score_KNN= np.mean(Score_KNN_1)
KNN= KNeighborsClassifier(1)
KNN_Score= cross_val_score(KNN,X,Y, cv=KFold(n_splits=10))
KNN_Score= np.mean(KNN_Score)
matriz= confusion_matrix(y_test, y_pred)
print (matriz)
accuracy= accuracy_score(y_test, y_pred)
print (accuracy)
pd.crosstab(pd.Series(y_pred), pd.Series(y_test))

##Punto tres - MR - LR/ARBOLE/KNN

os.chdir(r'C:\Users\57317\Documents\Econometría Financiera')
os.getcwd()

filename= "loansm.xlsx"
Data= pd.read_excel(filename,"loansm3")

Data= Data.to_numpy()

Y= Data[:,0]
X= Data[:,1:]

kf= KFold(10, True)


Score_LR_1=[]
for train, test in kf.split(Data):
x_train= X[train]
y_train= Y[train]
x_test= X[test]
y_test= Y[test]
clf= LogisticRegression()
s= clf.fit(x_train, y_train)
y_pred= clf.predict(x_test)
Score_LR= clf.score(x_test,y_test)
Score_LR_1.append(Score_LR)

Score_LR= np.mean(Score_LR_1)
LR= LogisticRegression()
LR_Score= cross_val_score(LR,X,Y, cv=KFold(n_splits=10))
LR_Score= np.mean(LR_Score)
matriz= confusion_matrix(y_test, y_pred)
print (matriz)
accuracy= accuracy_score(y_test, y_pred)
print (accuracy)
pd.crosstab(pd.Series(y_pred), pd.Series(y_test))

kf= KFold(10, True)


Score_tree_1=[]
for train, test in kf.split(Data):
x_train= X[train]
y_train= Y[train]
x_test= X[test]
y_test= Y[test]
clf= DecisionTreeClassifier()
s= clf.fit(x_train, y_train)
y_pred= clf.predict(x_test)
Score_tree= clf.score(x_test,y_test)
Score_tree_1.append(Score_tree)

Score_tree= np.mean(Score_tree_1)
tree= DecisionTreeClassifier()
tree_Score= cross_val_score(tree,X,Y, cv=KFold(n_splits=10))
tree_Score= np.mean(tree_Score)
matriz= confusion_matrix(y_test, y_pred)
print (matriz)
accuracy= accuracy_score(y_test, y_pred)
print (accuracy)
pd.crosstab(pd.Series(y_pred), pd.Series(y_test))

kf= KFold(10, True)


Score_KNN_1=[]
for train, test in kf.split(Data):
x_train= X[train]
y_train= Y[train]
x_test= X[test]
y_test= Y[test]
clf= KNeighborsClassifier(1)
s= clf.fit(x_train, y_train)
y_predict= clf.predict(x_test)
Score_K= clf.score(x_test,y_test)
Score_KNN_1.append(Score_K)

Score_KNN= np.mean(Score_KNN_1)
KNN= KNeighborsClassifier(1)
KNN_Score= cross_val_score(KNN,X,Y, cv=KFold(n_splits=10))
KNN_Score= np.mean(KNN_Score)
matriz= confusion_matrix(y_test, y_pred)
print (matriz)
accuracy= accuracy_score(y_test, y_pred)
print (accuracy)
pd.crosstab(pd.Series(y_pred), pd.Series(y_test))

kf= KFold(10, True)


Score_KNN_1=[]
for train, test in kf.split(Data):
x_train= X[train]
y_train= Y[train]
x_test= X[test]
y_test= Y[test]
clf= KNeighborsClassifier(5)
s= clf.fit(x_train, y_train)
y_predict= clf.predict(x_test)
Score_KNN= clf.score(x_test,y_test)
Score_KNN_1.append(Score_KNN)

Score_KNN= np.mean(Score_KNN_1)
KNN= KNeighborsClassifier(5)
KNN_Score= cross_val_score(KNN,X,Y, cv=KFold(n_splits=10))
KNN_Score= np.mean(KNN_Score)
matriz= confusion_matrix(y_test, y_pred)
print (matriz)
accuracy= accuracy_score(y_test, y_pred)
print (accuracy)
pd.crosstab(pd.Series(y_pred), pd.Series(y_test))

## Punto 5

filename= "loansm.xlsx"
Data= pd.read_excel(filename,"loansm")

Data= Data.to_numpy()

Y= Data[:,0]
X= Data[:,1:]

f= KFold(10, True)
Score_rd_1=[]
for train, test in kf.split(Data):
x_train= X[train]
y_train= Y[train]
x_test= X[test]
y_test= Y[test]
clf= RidgeClassifier(normalize= True)
s= clf.fit(x_train, y_train)
y_pred= clf.predict(x_test)
Score_rd= clf.score(x_test,y_test)
Score_rd_1.append(Score_rd)
alpha_space = np.logspace(-1, 1, 10, 100, base=0.1)
param_grid={'alpha':alpha_space}
grid=GridSearchCV(RidgeClassifier(normalize=True),param_grid,cv=10)
grid.fit(x_train, y_train)
alpha=grid.best_params_["alpha"]
Score_rd= np.mean(Score_rd_1)
rd= RidgeClassifier(normalize= True)
rd_Score= cross_val_score(rd,X,Y, cv=KFold(n_splits=10))
rd_Score= np.mean(rd_Score)
matriz= confusion_matrix(y_test, y_pred)
print (matriz)
accuracy= accuracy_score(y_test, y_pred)
print (accuracy)

Potrebbero piacerti anche