R 如何在ggplot2中创建多个y轴(每个变量一个)

发布于02月21日

我想为描述性统计绘制一个类似于平行坐标的曲线图.我想画出每个变量按性别分层的平均值和标准差.

不幸的是，我无法找到一种方法来 for each 变量创建自己的y轴.

它看起来应该类似于这个曲线图，但带有平均值和标准差，而不是每行.

在this post中，他们创建了另一个图，但我无法根据我的需要和数据对其进行调整，因此平均值和标准差如上图所示.

这是我的数据和代码:


library(dplyr)
library(tidyr)
library(ggplot2)

my_data <- data.frame(
  sex = c("m", "w", "m", "w", "m", "w"),
  age = c(25, 30, 22, 35, 28, 46),
  testosterone = c(450, 200, 400, 300, 500, 350),
  cognition = c(75, 80, 70, 85, 78, 90),
  estrogen = c(20, 40, 15, 50, 10, 45)
)

numeric_vars <- c("age", "testosterone", "cognition", "estrogen")

# Calculate means and standard deviations for each sex and each variable
df_means <- my_data %>%
  group_by(sex) %>%
  summarise(across(all_of(numeric_vars), mean, na.rm = TRUE))

df_sds <- my_data %>%
  group_by(sex) %>%
  summarise(across(all_of(numeric_vars), sd, na.rm = TRUE))

# Reshape data for ggplot
df_melted <- reshape2::melt(df_means, id.vars = 'sex')
df_sds_melted <- reshape2::melt(df_sds, id.vars = 'sex')

# Combine means and standard deviations
df_combined <- merge(df_melted, df_sds_melted, by = c('sex', 'variable'))
names(df_combined)[names(df_combined) == 'value.x'] <- 'mean'
names(df_combined)[names(df_combined) == 'value.y'] <- 'sd'

# Create a parallel coordinates plot without normalization
ggplot(df_combined, aes(x = variable, y = mean, group = sex, color = sex)) +
  geom_line() +
  geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd, fill = sex), alpha = 0.2) +
  scale_color_manual(values = c("m" = "#007BC3", "w" = "#EA5451")) +
  scale_fill_manual(values = c("m" = "#007BC3", "w" = "#EA5451")) +
  theme_minimal()

library(dplyr) library(tidyr) library(ggplot2) long_data <- my_data %>% summarize( across(all_of(numeric_vars), list(mu = mean, sd = sd, min = min, max = max)), .by = sex ) %>% pivot_longer(-sex, names_pattern = "(.*)_(.*)", names_to = c("variable", ".value")) %>% mutate( variable = factor(variable, levels = c("age", "testosterone", "cognition", "estrogen")), x = as.numeric(variable) ) %>% mutate( min = min(min), max = max(max), mu_01 = scales::rescale(mu, from = c(min[1], max[1]), to = 0:1), sd_01 = sd / abs(min[1] - max[1]), .by = variable ) ticks <- summarize(long_data, .by = c(variable, x), y = pretty(c(min, max), n = 8), y_01 = scales::rescale(y, from = c(min[1], max[1]), to = 0:1)) tick_ends <- c(-0.05, 0) ggplot(long_data, aes(x, mu_01, group = sex, color = sex)) + geom_segment( aes(x = x + tick_ends[1], xend = x + tick_ends[2], y = y_01, yend = y_01), data = ticks, inherit.aes = FALSE ) + geom_line( aes(x = x, y = y_01, group = variable), data = summarize(ticks, .by = variable, x = x[1] + tick_ends[2], y_01 = range(y_01)), inherit.aes = FALSE ) + geom_ribbon(aes(ymin = mu_01 - sd_01, ymax = mu_01 + sd_01, fill = sex), alpha = 0.2) + geom_line() + geom_text( aes(x = x + tick_ends[1], y = y_01, label = y), hjust = 1.2, data = ticks, inherit.aes = FALSE ) + scale_x_continuous( name = NULL, breaks = seq_along(levels(long_data$variable)), labels = levels(long_data$variable), limits = range(as.numeric(long_data$variable)) + c(-0.1, 0.1), minor_breaks = seq(1, 10, 1), position = "top" ) + scale_y_continuous(name = NULL, breaks = NULL) + scale_color_manual(values = c("m" = "#007BC3", "w" = "#EA5451")) + scale_fill_manual(values = c("m" = "#007BC3", "w" = "#EA5451")) + theme_minimal()

延拓

为了使内轴对齐，我不知道有没有比这样控制重新zoom 更好的方法.现在使用新数据:

my_data <- data.frame( sex = c("m", "w", "m", "w", "m", "w"), age = c(25, 30, 22, 35, 28, 46), testosterone = c(450, 5, 400, 10, 500, 15), cognition = c(0.1, 0.5, 0.3, 0.6, 0.4, 0.1), estrogen = c(20, 80, 15, 60, 10, 70), Var6 = c(1000, 900, 600, 800, 700, 500), Var7 = c(3, 5, 6, 7, 3, 4), Var8 = c(20, 15, 30, 35, 25, 60), Var9 = c(0.6, 0.9, 1.4, 2.1, 1.2, 0.6), Var10 = c(200, 150, 300, 200, 130, 400), Var11 = c(900, 800, 450, 600, 650, 750), Var12 = c(30, 45, 60, 40, 30, 45) ) numeric_vars <- setdiff(names(my_data), "sex")

使用该代码和略微更新的代码:

extend_rescale <- function(x, name = "variable") { p <- pretty(range(x)) d <- diff(p)[1] p <- c(p[1], p[length(p)]) + d * c(-1, 1) out <- data.frame( x = scales::rescale(x, from = p, to = 0:1), step = d, min = p[1], max = p[2] ) names(out)[1] <- name out } long_data <- my_data %>% summarize( across(all_of(numeric_vars), list(mu = mean, sd = sd, min = min, max = max)), .by = sex ) %>% pivot_longer(-sex, names_pattern = "(.*)_(.*)", names_to = c("variable", ".value")) %>% mutate( variable = factor(variable), # set levels= if desired x = as.numeric(variable) ) %>% mutate( extend_rescale(c(range(c(mu + sd, mu - sd)), mu), name = "mu_01")[-(1:2),], sd_01 = sd / abs(max[1] - min[1]), .by = variable) ticks <- summarize(long_data, .by = c(variable, x), y = seq(min[1], max[2], by = step[1]), y_01 = scales::rescale(y, from = c(min[1], max[1]), to = 0:1)) tick_ends <- c(-0.05, 0)

(相同的打印代码.)

R 如何在ggplot2中创建多个y轴(每个变量一个)

推荐答案

延拓

R相关问答推荐

使用R的序列覆盖

如何删除R中除某些特定名称外的所有字符串？

R箱形图gplot 2 4组但6个参数

向gggplot 2中的数据和轴标签添加大写和星号

矩阵%*%矩阵中的错误：需要数字/复杂矩阵/向量参数

使用R中相同值创建分组观测指标

使用sf或terra的LINESTRAING的累积长度

为什么舍入POSIXct会更改能力以匹配等效的POSIXct？

使用Facet_WRAP时更改框图中线的 colored颜色

绘制采样开始和采样结束之间的事件

在R函数中使用加号

在使用具有Bray-Curtis相似性的pvCluust时计算p值

从数据创建数字的命名列表.R中的框

有没有办法定制Plot(allEffects())面板标题？

为R中的16组参数生成10000个样本的有效方法是什么？

如何在使用Alpha时让geom_curve在箭头中显示恒定透明度

访问数据帧中未定义的列时出现R错误

在r中整理图例和堆叠图的问题

构建一个6/49彩票模拟系统

整理曲线图、曲线图和点图