使用下面的代码,我绘制了一个小提琴情节.我还试图对我的数据应用Games-Howell方差分析事后测试.它正在成功完成并显示A、B、C和D亚基因组之间的显着差异.然而,我无法在我的情节中表现出显着的差异.我认为只有我代码的最后一部分需要修复#Annotate显着差异.

Anova事后测试显示了以下重要性.我所try 的只是绘制A和B、A和C、A和D之间的意义 *.

治疗之间的比较意味着

    difference pvalue signif.          LCL         UCL

A - B -0.037101857 0. 0000 * -0.046188341 -0.02801537 A-C-0.028211022 0.0000 * -0.037394783 -0.01902726 A-D-0.030163234 0. 0000 * -0.039466699 -0.02085977 B-C 0.008890835 0.1059 -0.001186650.01896834 B-D 0.006938623 0.2979 -0.003248084 0. 01712533 C-D-0.0019522120.9618 -0.012225785 0. 00832136

我try 调整ymax限制,但收到以下警告消息:

Warning messages:
1: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`). 
2: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`). 
3: Removed 1 row containing missing values or values outside the scale range
(`geom_text()`). 

我的代码:

library(dplyr)
library(agricolae)
library(stringr)
library(ggplot2)
library(Hmisc)

# Read the data
df <- read.table('ABCD-meth-r1.tsv', header = TRUE, sep = "\t")
df$subgenome <- factor(df$subgenome)

#create myxlab label
my_xlab <- paste(levels(df$subgenome), "\n ",table(df$subgenome),sep="")

#Conduct ANOVA
anova_result <- aov(value ~ subgenome, data = df)
summary(anova_result)

# Initialize sig_diff as a data frame to avoid scoping issues if no significant results
sig_diff <- data.frame(group1 = character(), group2 = character(), pvalue = numeric())

# Check if the ANOVA is significant and then perform Games-Howell test
if (summary(anova_result)[[1]][["Pr(>F)"]][1] < 0.05) {
    print("Significant differences detected, performing Games-Howell test")
    games_howell_result <- HSD.test(anova_result, "subgenome", group=FALSE, console=TRUE)

# Extracting pairs with significant differences
if (nrow(games_howell_result$comparison) > 0) {
    sig_diff <- games_howell_result$comparison[games_howell_result$comparison$pvalue < 
    0.05,]
       }
    }

# Calculate ymax
ymax <- max(df$value, na.rm = TRUE) * 1.1
print(paste("ymax for annotation:", ymax))

# Plotting code
p <- ggplot(df, aes(x = subgenome, y = value, fill = subgenome)) +
      geom_violin() + 
      geom_boxplot(width=0.08, fill="white")+
      stat_summary(fun=mean, geom="point", shape=20, size=1, color="darkgreen") +
      scale_x_discrete(labels=my_xlab)+
      xlab("") +
      ylab("CDS methylation") +
      theme_bw()
      ylim(0, ymax)

# Annotate significant differences
if (nrow(sig_diff) > 0) {
    for(i in 1:nrow(sig_diff)) {
        sub1_index <- which(levels(df$subgenome) == sig_diff$group1[i])
        sub2_index <- which(levels(df$subgenome) == sig_diff$group2[i])
        mid_x <- mean(c(sub1_index, sub2_index))
        print(paste("Annotating between:", sig_diff$group1[i], "and", sig_diff$group2[i], 
    "at", mid_x))
        p <- p + annotate("text", label = "*", x = mid_x, y = ymax * 0.95, size = 5, 
    vjust = 0)
        p <- p + annotate("segment", x = sub1_index, xend = sub2_index, y = ymax, yend = 
    ymax, color = "red", linewidth = 1)
    }
}

# Print the plot
print(p)

输出图:

enter image description here

  made the following change in the last line of the part of code. 
  Rest everything stay the same:
  # Plotting code
p <- ggplot(df, aes(x = subgenome, y = value, fill = subgenome)) +
  geom_violin() + 
  geom_boxplot(width=0.08, fill="white")+
  stat_summary(fun=mean, geom="point", shape=20, size=1, color="darkgreen") +
  scale_x_discrete(labels=my_xlab)+
  xlab("") +
  ylab("CDS methylation") +
  theme_bw()+
  coord_cartesian(ylim = c(0, ymax), clip = "off")

Made the foolowing change in the plot:
# Initialize sig_diff as a data frame to avoid scoping issues if no significant results
sig_diff <- data.frame(group1 = character(), group2 = character(), pvalue = numeric())
**sig_diff$group2 <- sub(".+ \\- (.+)", "\\1", row.names(sig_diff))
sig_diff$group1 <- sub("(.+) \\- .+", "\\1", row.names(sig_diff))**

enter image description here

推荐答案

做了一些改变,效果很好.设置ymax必须小心.包括条之间的一些间隔.您是否将数据帧修复在正确的位置?我在这里使用了一些编造的数据.

library(dplyr)
library(ggplot2)
library(agricolae)
set.seed(123)
df <- data.frame(
  subgenome = factor(c(rep("A", 250), rep("B", 250), rep("C", 250), rep("D", 250))),
  value = sample(x = 1:100000, size = 1000) / 100000
)
head(df)
#   subgenome   value
# 1         A 0.51663
# 2         A 0.57870
# 3         A 0.02986
# ...
xlabels <- paste0(levels(df$subgenome), "\n", table(df$subgenome))

anova_result <- aov(value ~ subgenome, data = df)
games_howell_result <- HSD.test(anova_result, "subgenome", group = FALSE, console = TRUE)
# 0.85 for this dummy example
sig_diff <- games_howell_result$comparison[games_howell_result$comparison$pvalue < 0.85, ]
sig_diff$group1 <- sub("(.+) \\- .+", "\\1", row.names(sig_diff))
sig_diff$group2 <- sub(".+ \\- (.+)", "\\1", row.names(sig_diff))
sig_diff
#       difference pvalue signif.         LCL        UCL group1 group2
# A - B 0.02266492 0.8184         -0.04407579 0.08940563      A      B
# A - C 0.02760976 0.7112         -0.03913095 0.09435047      A      C
# A - D 0.02409436 0.7893         -0.04264635 0.09083507      A      D

ymax <- max(df$value, na.rm = TRUE)
# [1] 0.99889

p <- ggplot(df, aes(x = subgenome, y = value, fill = subgenome)) +
  geom_violin() +
  geom_boxplot(width = 0.08, fill = "white") +
  stat_summary(fun = mean, geom = "point", shape = 20, size = 1, color = "darkgreen") +
  xlab("") +
  ylab("CDS methylation") +
  theme_bw()
  
# space between segments
sb <- 0.1 # starting value

if (nrow(sig_diff) > 0) {
  for (i in seq_along(row.names(sig_diff))) {
    sub1_index <- which(levels(df$subgenome) == sig_diff$group1[i])
    sub2_index <- which(levels(df$subgenome) == sig_diff$group2[i])
    mid_x <- mean(c(sub1_index, sub2_index))
    print(paste(
      "Annotating between:", sig_diff$group1[i], "and", sig_diff$group2[i],
      "at", mid_x
    ))
    p <- p + annotate("text", label = "*",
      x = mid_x, y = ymax + sb, size = 5, vjust = 0
    )
    p <- p + annotate("segment", color = "red", linewidth = 1,
      x = sub1_index,  xend = sub2_index,
      y = ymax + sb, yend = ymax + sb
    )
    # Add space
    sb <- sb + 0.15
  }
}
# [1] "Annotating between: A and B at 1.5"
# [1] "Annotating between: A and C at 2"
# [1] "Annotating between: A and D at 2.5"

p + coord_cartesian(ylim = c(0, ymax + sb), expand = FALSE, clip = "off")

上图将注释放在绘图区域中,但您也可以通过将ylim设置得较低(包括clip = "off")并包含较大的上页边距来将注释放在绘图区域中:

p +
  coord_cartesian(ylim = c(0, ymax), expand = FALSE, clip = "off") +
  theme(plot.margin = margin(1, 0.1, 0.1, 0.1, unit = "in"))

R相关问答推荐

R:如何自动化变量创建过程,其中我需要基于ifelse()为现有变量的每个级别创建一个单独的变量

给定R中另一行中的值,如何插补缺失值

如何在ggplot 2线性图的每个方面显示每个组的误差条?

ggplot geom_smooth()用于线性回归虚拟变量-没有回归线

如何对数据集进行逆向工程?

derrr summarise每个组返回多行?

ggplot的轴标签保存在officer中时被剪切

在for循环中转换rabrame

如何在区分不同条件的同时可视化跨时间的连续变量?

如何直接从R中的风险分数计算c指数?

对于变量的每个值,仅 Select 包含列表中所有值的值.R

我正在努力用R计算数据集中的中值逐步距离

根据列表中项目的名称合并数据框和列表

计算直线上点到参考点的总距离

计算Mean by分组和绑定到R中的数据集

对R中的列表列执行ROW Mean操作

如果满足条件,则替换列的前一个值和后续值

位置_道奇在geom_point图中不躲避

R/shiny APP:如何充分利用窗口?

将Geojson保存为R中的shapefile