我有一个DataFrame df1(为了节省空间,它的dput放在底部),我正在运行DFS分析,如下所示:

require(survival)
require(survminer)
require(ggplot2)

cox_fit <- coxph(Surv(DFS,Relapsed) ~ Gender + Stage + MSI, data=df1)     
ggforest(cox_fit, data = df1, refLabel = "Reference Group")

forest plot You can see that Gender and MSI variables have one level shown as "Reference Group", as expected - but Stage variable does not have it, and I don't understand why. When I just print cox_fit object, it looks fine with all three variables:

cox_fit 
# Call:
#   coxph(formula = Surv(DFS, Relapsed) ~ Gender + Stage + MSI, data = df1)
# 
# coef exp(coef) se(coef)     z        p
# GenderMale 0.7952    2.2149   0.2916 2.727  0.00639
# StageIII   1.8824    6.5693   0.4342 4.335 1.46e-05
# MSIMSI-Hi  0.4551    1.5763   0.3677 1.238  0.21579
# 
# Likelihood ratio test=35.43  on 3 df, p=9.872e-08
# n= 337, number of events= 57 

我不明白Stage变量有什么特别之处,让它在森林地块上看起来很奇怪.

df1 <- structure(
  list(
    DFS = c(474,280,439,468,155,390,484,432,254,453,375,426,540,236,396,315,255,444,336,414,394,362,376,280,366,346,163,363,308,297,253,288,261,111,288,339,274,282,136,213,85,113,35,76,67,75,28,136,1339,1238,1324,117,75,1191,1234,1222,1320,1162,651,1234,1224,270,1233,1266,1071,1149,1159,1140,1224,1071,1174,1056,561,1022,315,1110,1081,1183,1166,1085,1171,689,86,1048,1087,527,539,1199,1106,1102,1113,1108,870,228,1019,89,1056,1140,1063,1082,193,1030,777,231,137,394,1079,408,994,100,805,978,1022,518,36,516,634,930,934,847,904,345,817,852,342,97,663,1031,888,349,684,188,431,114,994,922,1030,743,935,888,287,915,613,927,2,137,842,793,770,94,160,884,892,711,679,676,409,924,806,887,978,855,189,801,518,843,807,944,657,775,988,891,756,1013,892,404,883,738,853,161,496,241,870,704,97,677,865,862,650,722,873,737,851,816,891,603,726,65,833,789,93,787,812,611,505,715,833,756,707,710,149,828,830,678,371,78,210,874,658,564,705,734,648,729,223,440,752,789,863,222,563,540,794,160,651,792,578,781,586,351,623,487,442,807,673,716,369,735,301,255,729,514,701,116,160,654,226,700,751,672,52,728,423,766,627,217,784,161,607,357,606,722,152,631,365,416,590,28,573,590,656,114,166,420,196,538,693,623,676,65,725,626,646,627,681,8,680,65,655,157,613,620,693,153,630,147,645,232,591,494,646,595,291,604,629,424,251,499,513,424,487,592,555,570,543,491,43,501,563,558,469,645,511,453,475,427,314),
    Relapsed = c(T,T,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,T,T,F,F,F,T,T,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,T,T,F,F,F,F,F,F,T,F,F,T,F,F,F,F,F,T,F,T,F,F,T,F,F,F,F,F,F,T,T,F,F,T,F,F,F,T,F,F,F,T,F,F,F,F,F,F,F,F,F,F,T,T,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,T,T,T,F,F,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,T,F,F,F,F,F,F,F,F,F,T,F,F,T,T,T,F,F,F,F,F,F,F,F,T,T,F,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,T,F,F,F,F,T,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,T,F,T,F,T,F,F,F,F,F,F,F,T,F,T,F,F,F,F,F,F,T,T,F,T,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,T),
    Gender = structure(
      c(2,2,2,1,1,1,2,2,2,1,1,2,1,2,1,1,1,2,2,1,1,1,2,2,2,1,2,1,2,1,2,1,2,1,2,1,1,2,2,2,1,1,2,2,2,1,1,1,2,2,2,2,2,2,1,1,2,2,2,1,1,2,2,2,2,1,1,1,1,1,2,1,2,1,1,1,1,2,1,1,1,1,1,2,1,2,1,2,1,1,1,1,2,2,2,2,2,2,1,2,2,1,1,2,1,2,1,2,1,1,2,2,2,1,2,2,1,2,1,2,2,2,2,1,2,1,2,1,2,2,1,2,2,2,1,2,2,2,1,2,1,2,1,2,2,2,2,1,1,1,1,2,1,2,1,2,2,2,2,2,1,2,1,1,2,1,1,2,1,1,2,1,2,2,2,2,2,1,2,1,2,2,2,1,2,2,2,2,2,1,1,1,1,2,2,1,1,2,2,2,2,1,2,2,2,2,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,1,1,1,1,1,1,2,2,2,1,1,2,2,2,1,1,1,2,1,2,1,2,2,1,1,1,1,1,2,2,1,1,1,1,1,2,2,1,2,1,1,1,2,2,1,1,2,2,2,2,1,2,1,2,2,1,2,1,1,2,2,1,1,1,1,2,1,1,2,1,2,1,2,2,2,2,1,2,1,2,1,2,1,2,2,1,1,1,2,2,2,2,1,1,2,2,1,1,1,2,2,1,2,2,2,1,2,2),
      levels = c("Female","Male"),
      class = "factor"
    ),
    Stage = structure(
c(1,2,1,1,1,2,1,2,1,2,2,2,2,2,2,1,2,2,1,2,1,1,2,2,2,2,2,1,1,1,1,1,2,1,1,2,1,2,1,2,2,2,2,2,1,1,2,2,1,1,2,2,1,2,2,2,2,1,1,2,2,2,1,2,2,2,1,1,1,2,2,1,2,2,2,2,1,2,2,2,1,2,2,1,2,2,1,2,2,1,2,2,2,1,1,1,1,1,2,1,2,2,2,2,2,1,2,1,1,2,1,2,1,1,2,2,2,2,2,1,2,1,2,2,2,2,2,2,2,2,1,2,1,2,2,1,1,2,2,1,1,1,1,2,1,2,2,2,1,2,1,1,1,2,2,1,1,1,2,2,2,1,2,1,1,1,1,1,1,2,2,2,1,2,2,2,1,1,1,2,2,2,2,1,2,1,2,1,2,2,1,2,1,2,2,1,2,2,1,1,2,1,2,2,1,2,1,2,2,2,2,2,1,2,2,2,1,2,1,2,2,1,1,1,2,2,1,2,2,1,2,2,2,1,2,1,2,1,2,2,2,1,2,2,1,2,1,1,2,2,1,2,2,2,2,2,2,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,1,2,2,1,2,2,1,2,1,1,2,1,1,2,1,1,1,2,2,2,1,2,2,1,2,2,2,1,1,2,2,2,2,2,1,2,2,2,2,2,1,2,1,1,2,2,2,2,2,2,1,1,2,1,1,2,1,2,2),
      levels = c("II","III"),
      class = "factor"
    ),
    MSI = structure( c(1,1,1,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,2,1,2,2,2,1,1,2,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,2,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,1,1,2,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,2,1,1,1,1,2,1,1,1,1,1,1,2,1,1,2,2,1,1,1,2,1,2,1,2,1,2,1,2,2,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,2,1,1,1,2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,2,1,1,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,2,1,1,2,1,1 ),
      levels = c("MSS","MSI-Hi"),
      class = "factor"
    )
  ),
  class = "data.frame")

推荐答案

This行在行名上执行"StageII"和"StageIII"的部分匹配.

coef <- structure(list(term = c("GenderMale", "StageIII", "MSIMSI-Hi"
), estimate = c(0.795207848408587, 1.88240540447955, 0.455091789862389
), std.error = c(0.291625356633886, 0.43423050039025, 0.36766034424876
), statistic = c(2.72681311936435, 4.33503727349369, 1.237804938665
), p.value = c(0.00639492461449034, 1.45735426457274e-05, 0.215788396790699
), conf.low = c(0.223632652427522, 1.03132926272586, -0.265509243408779
), conf.high = c(1.36678304438965, 2.73348154623325, 1.17569282313356
)), row.names = c("GenderMale", "StageIII", "MSIMSI-Hi"), class = "data.frame")

inds <- c("GenderFemale", "GenderMale", "StageII", "StageIII", "MSIMSS", "MSIMSI-Hi")

coef[inds, ]
#                  term  estimate std.error statistic      p.value   conf.low conf.high
# NA               <NA>        NA        NA        NA           NA         NA        NA
# GenderMale GenderMale 0.7952078 0.2916254  2.726813 6.394925e-03  0.2236327  1.366783
# StageIII     StageIII 1.8824054 0.4342305  4.335037 1.457354e-05  1.0313293  2.733482
# StageIII.1   StageIII 1.8824054 0.4342305  4.335037 1.457354e-05  1.0313293  2.733482
# NA.1             <NA>        NA        NA        NA           NA         NA        NA
# MSIMSI-Hi   MSIMSI-Hi 0.4550918 0.3676603  1.237805 2.157884e-01 -0.2655092  1.175693

这就是预期的结果

#                  term  estimate std.error statistic      p.value   conf.low conf.high
# NA               <NA>        NA        NA        NA           NA         NA        NA
# GenderMale GenderMale 0.7952078 0.2916254  2.726813 6.394925e-03  0.2236327  1.366783
# NA.1             <NA>        NA        NA        NA           NA         NA        NA
# StageIII     StageIII 1.8824054 0.4342305  4.335037 1.457354e-05  1.0313293  2.733482
# NA.2             <NA>        NA        NA        NA           NA         NA        NA
# MSIMSI-Hi   MSIMSI-Hi 0.4550918 0.3676603  1.237805 2.157884e-01 -0.2655092  1.175693

像这样的东西会更好

coef[match(inds, rownames(coef)), ]

#                  term  estimate std.error statistic      p.value   conf.low conf.high
# NA               <NA>        NA        NA        NA           NA         NA        NA
# GenderMale GenderMale 0.7952078 0.2916254  2.726813 6.394925e-03  0.2236327  1.366783
# NA.1             <NA>        NA        NA        NA           NA         NA        NA
# StageIII     StageIII 1.8824054 0.4342305  4.335037 1.457354e-05  1.0313293  2.733482
# NA.2             <NA>        NA        NA        NA           NA         NA        NA
# MSIMSI-Hi   MSIMSI-Hi 0.4550918 0.3676603  1.237805 2.157884e-01 -0.2655092  1.175693

您可以使用在错误修复之前不会部分匹配的标签,例如,使用2/3而不是II/III

R相关问答推荐

使用rlang s arg_match判断函数输入列表

如何在xyplot中 for each 面板打印R^2

有没有一个R函数允许你从一个数字变量中提取一个数字,而不考虑它的位置(不仅仅是第一个或最后一个数字?

在特定Quarto(reveal.js)幻灯片上隐藏徽标

我想在R中总结一个巨大的数据框架,使我只需要唯一的lat、lon、Date(Year)和Maxium Value""""""""

如何优化向量的以下条件赋值?

使用整齐的计算(curl -curl )和杂音

在使用tidyModels和XGBoost的二进制分类机器学习任务中,所有模型都失败

如何指定我的函数应该查找哪个引用表?

基于Key->Value数据帧的基因子集相关性提取

在带有`R`中的`ggmosaic`的马赛克图中使用图案而不是 colored颜色

Ggplot2如何找到存储在对象中的残差和拟合值?

按组使用dummy r获取高于标准的行的平均值

以R表示的NaN值的IS.NA状态

图中显示错误 colored颜色 的图例geom_sf

为什么R列名称忽略具有指定名称的向量,而只关注索引?

如何在一个GGPLATE中绘制多个灰度平滑?

如何将两个用不同的运算符替换*的矩阵相乘

如果y中存在x中的值,则将y行中的多个值复制到相应的x行中

移除y轴断开的geom_bar图的外框