【R语言数据科学】:交叉验证再回首
文章目录
前言
1 K折交叉验证
2 K-fold交叉验证代码实现
# 7是我的幸运数字,我就这么设置了
library(ISLr2)
library(boot)
set.seed(7)
# 初始化向量来保存10折交叉验证的结果
cv_error_10 <- rep(0,10)
for (i in 1:10){
glm.fit <- glm(mpg~poly(horsepower,i), data = Auto)
cv_error_10[i] <- cv.glm(Auto,glm.fit,K = 10)$delta[1]
}
cv_error_10
- 24.1463716629577
- 19.3130825829741
- 19.434897545051
- 19.5493689322887
- 19.0736379228708
- 18.7058531603005
- 19.2522869995751
- 18.8552270777634
- 18.9304332711781
- 20.4425474405408
x <- seq(1,10)
library(ggplot2)
kcv <- data.frame(x,cv=cv_error_10)
ggplot(kcv, aes(x, cv)) +geom_point() + geom_line(lwd=1,col='blue')
3.留一法交叉验证(LOOCV)
4.留一法交叉验证代码实现
library(boot)
glm.fit <- glm(mpg~horsepower, data = Auto)
cv.err <- cv.glm(Auto,glm.fit)
cv.err$delta
- 24.2315135179292
- 24.2311440937562
cv.error <- rep(0,10)
for (i in 1:10) {
glm.fit <- glm(mpg~poly(horsepower,i),data= Auto)
cv.error[i] <- cv.glm(Auto, glm.fit)$delta[1]
}
cv.error
- 24.2315135179293
- 19.2482131244897
- 19.334984064029
- 19.4244303104303
- 19.0332138547041
- 18.9786436582254
- 18.8330450653183
- 18.9611507120531
- 19.0686299814599
- 19.490932299334
loocv <- data.frame(x,cv=cv.error)
library(ggplot2)
ggplot(loocv, aes(x, cv.error)) +geom_point() + geom_line(lwd=1,col='blue')