Documenti di Didattica
Documenti di Professioni
Documenti di Cultura
Prostate Project
Part 1. Describe you data, the purpose of your analysis
Prostate Cancer Stamey et al. (1989) studied potential predictors of prostate-
specific antigen (PSA) in patients. The independent variables include X, log of Cavol,
Log of weight, age, log of bph, svi, log of cp, gleason, and pgg45.
> dim(prostate)
[1] 97 10
> names(prostate)
> prostate[1:5,]
lpsa
1 -0.4307829
2 -0.1625189
3 -0.1625189
4 -0.1625189
5 0.3715636
1
Project 1
> pairs(lpsa~.,data=prostate)
> boxplot(prostate)
2
Project 1
> prostate.train<-
sample(1:nrow(prostate),round(nrow(prostate)/2),replace=FALSE)
> index.train<-sample(1:nrow(prostate),round(nrow(prostate)/2),replace=FALSE)
> prostate.train<-prostate[index.train,]
> prostate.validation<-prostate[-index.train,]
> model1<-
lm(lpsa~1+lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45,data=prostate.train)
> summary(model1)
Call:
Residuals:
Coefficients:
3
Project 1
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Best Subsets
> leaps<-
regsubsets(lpsa~1+lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45,data=prostat
e.train,nbest=1)
> names(leaps)
[1] "np" "nrbar" "d" "rbar" "thetab" "first" "last" "vorder" "tol"
"rss" "bound"
> summary(leaps)
4
Project 1
2 ( 1 ) "*" " " " " " " "*" " " " " ""
3 ( 1 ) "*" "*" " " " " "*" " " " " ""
4 ( 1 ) "*" "*" "*" " " "*" " " " " ""
5 ( 1 ) "*" "*" "*" "*" "*" " " " " ""
> leaps$rss
> plot(leaps$rss)
5
Project 1
> coef(leaps,6)
> library(car)
> subsets(leaps,statistic="rss")
6
Project 1
> subsets(leaps,statistic="cp")
7
Project 1
> subsets(leaps,statistic="bic")
8
Project 1
9
Project 1
> model2<-lm(lpsa~1+lcavol+lweight+age+svi,data=prostate.train)
> summary(model2)
Call:
Residuals:
Coefficients:
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Stepwise Regression
> model<-
lm(lpsa~1+lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45,data=prostate.train)
> steps<-stepAIC(model,direction="both")
Start: AIC=-26.05
10
Project 1
pgg45
Step: AIC=-27.97
11
Project 1
Step: AIC=-29.64
Step: AIC=-31.34
12
Project 1
> model3<-lm(lpsa~1+lcavol+lweight+age+lbph+svi,data=prostate.train)
> summary(model3)
Call:
data = prostate.train)
Residuals:
Coefficients:
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
13
Project 1
Ridge Regression
> model.ridge<-
lm.ridge(lpsa~1+lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45,data=prostate.t
rain,na.action="na.omit",lambda=seq(1,100,.02))
> plot(model.ridge$lambda,model.ridge$GCV)
> model.ridge$lambda[model.ridge$GCV==min(model.ridge$GCV)]
[1] 4.44
> model.ridge<-
lm.ridge(lpsa~1+lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45,data=prostate.t
rain,na.action="na.omit",lambda=4.44)
14
Project 1
> model.ridge
> model.ridge2<-
lm(lpsa~1+lcavol+lweight+age+lbph+svi+lcp+gleason,data=prostate.train)
> model.ridge2
Call:
lm(formula = lpsa ~ 1 + lcavol + lweight + age + lbph + svi + lcp + gleason, data =
prostate.train)
Coefficients:
> summary(model.ridge2)
Call:
Residuals:
Coefficients:
15
Project 1
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> y.validation<-data.frame(prostate.train[,10])
> predict.model1<-predict(model1,x.validation)
> ls()
16
Project 1
> predict.model2<-predict(model2,x.validation)
> predict.model3<-predict(model3,x.validation)
> predict.model4<-predict(model.ridge2,x.validation)
> rss.model1<-sum((y.validation-predict.model1)^2)
> rss.model2<-sum((y.validation-predict.model2)^2)
> rss.model3<-sum((y.validation-predict.model3)^2)
> rss.model4<-sum((y.validation-predict.model4)^2)
> rss.model1
[1] 19.17387
> rss.model2
[1] 20.51985
> rss.model3
[1] 19.45737
> rss.model4
[1] 19.20584
Call:
Residuals:
17
Project 1
Coefficients:
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
18