更多代码解释及示例数据请阅读原文:R语言|14. 森林图-3: 亚组分析三线表及森林图
载入R包和数据
#1.载入包
library(survival)
library(plyr)
##制作table1
library(tableone)
##制作森林图
library(forestplot)
#2.清理工作环境
rm(list = ls())
#3.数据放入工作路径,加载
R14<- read.csv('R14.csv')
#4.查看数据前6行
head(R14)
#5.查看数据数据性质
str(R14)
#6.查看结局:0为局部区域复发(55人)
R14$status<-factor(R14$status)
summary(R14$status)
所有需要修改的代码均用红色标出(详见原文链接,简书无法正常显示)
一、亚组分析:HR、95%CI及P值批量获取
1、所有亚变量都为“1”的亚组分析
#1-建一个名为"HR","5%CI","95%CI","P"的4列表:a表。
#用于放批量获取的结果。
a<-c("HR","5%CI","95%CI","P")
##2建两个for循环,第一个是批量Cox亚组分析,
##第二个是将结果批量提取放入a表
for(i in 5:dim(R14)[2]){
cox1<-coxph(Surv(time,status==0)~RT,subset=(R14[,i]=='1'),data=R14)
aa<-summary(cox1)
for(j in 1:dim(aa$coefficients)[1]){
a<-rbind(a,c(round(aa$coefficients[j,2],2),
round(aa$conf.int[j,3],2),
round(aa$conf.int[j,4],2),
round(aa$coefficients[j,5],3)))}}
# coxph(formula = Surv(time, status == 0) ~ RT, data = R14)
# n= 350, number of events= 55
#
# coef exp(coef) se(coef) z Pr(>|z|)
# RTYes 0.0003434 1.0003435 0.2714870 0.001 0.999
#
# exp(coef) exp(-coef) lower .95 upper .95
# RTYes 1 0.9997 0.5876 1.703
#
# Concordance= 0.513 (se = 0.036 )
# Likelihood ratio test= 0 on 1 df, p=1
# Wald test = 0 on 1 df, p=1
# Score (logrank) test = 0 on 1 df, p=1
#3-a表加入第一列名字,
#即变量T/ER/PR/KI67/LVI/N/Grade中亚变量名为1的实际含义
rownames(a)<-c("Characteristics",
"T1",
"Negative",
"Negative",
"Low",
"Absent",
"N0",
"I")
#4- 将a表的"HR","5%CI","95%CI"组合成HR(5CI-95CI)格式
#上图X1列是HR,X2,X3是95%CI
a$HR.CI95<-paste0(a$X1," (",a$X2,"-",a$X3,")");a
2、所有亚变量都为“2”的亚组分析
#1- 建立b表
b<-c("HR","5%CI","95%CI","P")
#2- 所有变量为2的亚组分析for循环
for(i in 5:dim(R14)[2]){
cox1<-coxph(Surv(time,status==0)~RT,subset=(R14[,i]=='2'),data=R14)
bb<-summary(cox1)
for(j in 1:dim(bb$coefficients)[1]){
b<-rbind(b,c(round(bb$coefficients[j,2],2),
round(bb$conf.int[j,3],2),
round(bb$conf.int[j,4],2),
round(bb$coefficients[j,5],3)))}}
#3- b表第一列名字,即变量T/ER/PR/KI67/LVI/N/Grade中亚变量名为2的实际含义
rownames(b)<-c("Characteristics",
"T2",
"Positive",
"Positive",
"High",
"Present",
"N1",
"II")
b <- data.frame(b);b
#4- 加入HR(95% CI)格式
b$HR.CI95<-paste0(b$X1," (",b$X2,"-",b$X3,")");b
3、所有亚变量都为“3”的亚组分析
#1- 建立c表
c<-c("HR","5%CI","95%CI","P")
#2- 亚变量为3的亚组分析
for(i in 10:dim(R14)[2]){
cox2<-coxph(Surv(time,status==0)~RT,subset=(R14[,i]=='3'),data=R14)
cc<-summary(cox2)
for(j in 1:dim(cc$coefficients)[1]){
c<-rbind(c,c(round(cc$coefficients[j,2],2),
round(cc$conf.int[j,3],2),
round(cc$conf.int[j,4],2),
round(cc$coefficients[j,5],3)))}}
#3- c表的第一列名字,即变量N/Grade中亚变量名为3的实际含义
rownames(c)<-c("Characteristics",
"N2~3",
"III")
c <- data.frame(c);c
#4- 加入HR(95% CI)列
c$HR.CI95<-paste0(c$X1," (",c$X2,"-",c$X3,")");c
4、在所有患者中做RT的单因素分析
#1- RT单因素分析
cox3<-coxph(Surv(time,status==0)~RT,data=R14)
dd<-summary(cox3)
#2- 提取信息
dd$coefficients
dd$conf.int
HR<- round(dd$coefficients[,2],2)
PValue <- round(dd$coefficients[,5],3)
CI5<- round(dd$conf.int[,3],2)
CI95<- round(dd$conf.int[,4],2)
HR.CI95<-paste0(HR," (",CI5,"-",CI95,")")
#3- d表是最后建的,就是把信息提取出来组合为d表
d<- data.frame("X1" = HR,
"X2" = CI5,
"X3" = CI95,
"X4" = PValue,
"HR.CI95" = HR.CI95)
#4- 行名,命名为所有人群
rownames(d)<-"All parents"
二、放疗患者在各亚组的数目
#1- 查看数据变量名
names(R14)
#"status" "time""RT" "AGE""T" "ER" "PR" "KI67""LVI" "N" "Grade"
#2- 指定需进行基线统计的变量,纳入的变量为上面进行的亚组分析的变量
myVars <- c("T","ER" ,"PR" ,"KI67","LVI","N","Grade")
#指定基线表中哪些变量是分类变量
catVars<-c("T","ER" ,"PR" ,"KI67","LVI","N","Grade")
#3- 构建table1函数,RT为分类标准
table<- print(CreateTableOne(vars=myVars,
data=R14,
strata="RT",
factorVars=catVars),
catDigits = 2,contDigits = 2,showAllLevels=TRUE)
#4- 去掉第4.5列(上图p和test列),新表命名为table1(17行3列)
table1<-table[,c(-4,-5)]
三、4个亚组分析表合成一个表并与table1表合并
#1- 按照table1的格式将abcd表合成一个表
res1<-rbind(d[1,],#abcd表合为res1表
a[2,],b[2,],#T的亚变量
a[3,],b[3,],#ER
a[4,],b[4,],#PR
a[5,],b[5,],#KI67
a[6,],b[6,],#LVI
a[7,],b[7,],c[2,],#N
a[8,],b[8,],c[3,])#G
#2- res1表列名进入表格内,并命名为Characteristics
>此时的res1表变为17行6列表(多了亚变量名列)
res1<-tibble::rownames_to_column(res1, var = "Characteristics")
#3- table与res1合成为res2
res2<-cbind(table1,res1)