下面的代码创建了一个包含3列的数据框.

set.seed(142222)
num_lots <- 5

# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
                   Time = rep(3 * 0:8, times = num_lots),
                   Measurement = numeric(num_lots * 9))

# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
  # Generate random intercept and slope for each lot
  intercept <- rnorm(1, mean = 95, sd = 2)
  slope <- runif(1, min = -.7, max = 0)
  
  for (month in 0:8) {
    # Simulate purity data with noise
    data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
  }
}

然后对模拟数据进行了混合效应模型的拟合.详情如下:

ggplot(data, aes(x = Time, y = Purity, color = as.factor(Lot), shape = as.factor(Lot))) +
  geom_point() +
  geom_smooth(method = "lm", se=FALSE, type = 1) +
  labs(
    title = "Test",
    x = "month",
    y = "Purity",
    color = "Lot",    # Set legend title for color
    shape = "Lot"     # Set legend title for shape
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))

结果如下所示:

enter image description here

我只想展示worst regression line号公路上的95% lower confidence bound号.我怎么能做到这一点?

Worst regression line是比其他线更早与水平线==80相交的线. 我知道如果我设置为se == TRUE,那么所有行的所有置信限都会显示出来.但我只想对最差的线有较低的置信度.

Bonus question:如何修复图例,使其只显示符号(而不显示符号上方的线条)?

推荐答案

你可以画出两个geom_smooth()--一个代表4条‘好’线,一个代表1条‘最差’线,例如

library(tidyverse)

set.seed(142222)
num_lots <- 5

# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
                   Time = rep(3 * 0:8, times = num_lots),
                   Measurement = numeric(num_lots * 9))

# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
  # Generate random intercept and slope for each lot
  intercept <- rnorm(1, mean = 95, sd = 2)
  slope <- runif(1, min = -.7, max = 0)
  
  for (month in 0:8) {
    # Simulate purity data with noise
    data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
  }
}

ggplot(data = data,
       aes(x = Time, y = Purity, 
           color = as.factor(Lot), 
           shape = as.factor(Lot))) +
  geom_point(key_glyph = "point") +
  geom_smooth(data = data %>% filter(Lot == 2),
              method = "lm", se=TRUE, type = 1,
              key_glyph = "point") +
  geom_smooth(data = data %>% filter(Lot != 2),
              method = "lm", se=FALSE, type = 1,
              key_glyph = "point") +
  labs(
    title = "Test",
    x = "month",
    y = "Purity",
    color = "Lot",    # Set legend title for color
    shape = "Lot"     # Set legend title for shape
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))
#> Warning in geom_smooth(data = data %>% filter(Lot == 2), method = "lm", :
#> Ignoring unknown parameters: `type`
#> Warning in geom_smooth(data = data %>% filter(Lot != 2), method = "lm", :
#> Ignoring unknown parameters: `type`
#> `geom_smooth()` using formula = 'y ~ x'
#> `geom_smooth()` using formula = 'y ~ x'

创建于2023-10-12年第reprex v2.0.2

编辑

与其使用key_glyph = "point",不如根据@stefan的 comments 使用show_legend = FALSE:

library(tidyverse)

set.seed(142222)
num_lots <- 5

# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
                   Time = rep(3 * 0:8, times = num_lots),
                   Measurement = numeric(num_lots * 9))

# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
  # Generate random intercept and slope for each lot
  intercept <- rnorm(1, mean = 95, sd = 2)
  slope <- runif(1, min = -.7, max = 0)
  
  for (month in 0:8) {
    # Simulate purity data with noise
    data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
  }
}

ggplot(data = data,
       aes(x = Time, y = Purity, 
           color = as.factor(Lot), 
           shape = as.factor(Lot))) +
  geom_point() +
  geom_smooth(data = data %>% filter(Lot == 2),
              method = "lm", formula = "y ~ x",
              se=TRUE,
              show.legend = FALSE) +
  geom_smooth(data = data %>% filter(Lot != 2),
              method = "lm", formula = "y ~ x",
              se=FALSE,
              show.legend = FALSE) +
  labs(
    title = "Test",
    x = "month",
    y = "Purity",
    color = "Lot",    # Set legend title for color
    shape = "Lot"     # Set legend title for shape
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))

创建于2023-10-12年第reprex v2.0.2

编辑 2

你可以用不同的方法自动 Select 最差的线;最简单的方法是在时间=0时 Select 纯度最低的批次,但这可能会根据你的数据而变化(即,你可能想要在时间=24?时 Select 纯度最低的批次?).你只能画出上界,但你必须自己计算坐标.

library(tidyverse)

set.seed(142222)
num_lots <- 5

# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
                   Time = rep(3 * 0:8, times = num_lots),
                   Measurement = numeric(num_lots * 9))

# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
  # Generate random intercept and slope for each lot
  intercept <- rnorm(1, mean = 95, sd = 2)
  slope <- runif(1, min = -.7, max = 0)
  
  for (month in 0:8) {
    # Simulate purity data with noise
    data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
  }
}

# Select the worst regression line
worst <- data %>% filter(Purity == min(Purity)) %>% pull(Lot)

# Build the 5 linear models
output <- data %>%
  nest_by(Lot) %>%
  reframe(model = list(lm(data = data, formula = Purity ~ Time)))

# Apply the models and extract the coordinates
preds <- predict(output$model[[worst]], newdata = data, se.fit = TRUE)
input_df <- data.frame(fit = preds$fit, se.fit = preds$se.fit) %>%
  bind_cols(data)
  
# Plot data and input_df
ggplot(data = data,
       aes(x = Time, y = Purity, 
           color = as.factor(Lot), 
           shape = as.factor(Lot))) +
  geom_point() +
  geom_smooth(method = "lm", formula = "y ~ x",
              se=FALSE,
              show.legend = FALSE) +
  geom_ribbon(data = input_df, aes(x = Time, y = Purity,
                                   ymin = fit, ymax = fit + se.fit * 2),
              inherit.aes = FALSE, lty = 2, fill = "blue", alpha = 0.25) +
  labs(
    title = "Test",
    x = "month",
    y = "Purity",
    color = "Lot",    # Set legend title for color
    shape = "Lot"     # Set legend title for shape
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))

创建于2023-10-12年第reprex v2.0.2

R相关问答推荐

通过R访问MoveApps API

指定要保留在wrap_plots中的传奇

在R中,将一个函数作为输入传递给另一个函数时进行参数判断

给定R中另一行中的值,如何插补缺失值

在特定列上滞后n行,同时扩展框架的长度

使用对管道内单元格的引用生成新变量

如何在R中合并和合并多个rabrame?

R根据条件进行累积更改

Ggplot2中的重复注记

使用R闪光显示所有数据点作为默认设置

使用Facet_WRAP时更改框图中线的 colored颜色

在使用bslb和bootstrap5时,有没有办法更改特定dt行的 colored颜色 ?

如何平滑或忽略R中变量的微小变化?

使用gt_summary是否有一种方法来限制每个变量集进行配对比较?

在不对R中的变量分组的情况下取两行的平均值

防止正则表达式覆盖以前的语句

使用ifElse语句在ggploy中设置aes y值

如何更改包中函数中的参数?

如果满足条件,则替换列的前一个值和后续值

策略表单连接两个非常大的箭头数据集,而不会 destruct 内存使用