Introduction

大家好.这是我在这里的第一个帖子-我正在尽我最大的努力给你一切所需的东西来帮助我,希望我不会忘记任何东西.

我正在try 构建一个循环,该循环使用带循环的ggplot创建堆叠条形图,但我失败了,因为循环变量在ggploy标签和‘group_by’命令中都无法识别.

首先,让我分享一下我的数据框的一部分:

Data frame snippet

Germany <-
  structure(
    list(
      Weight = structure(
        c(
          0.9254366,
          0.9673721,
          1.1321498,
          2.7208848,
          0.7328256,
          0.9142997,
          1.53218,
          0.9577866,
          0.2420226,
          0.7830253,
          1.1321498,
          0.9828443,
          0.9770092,
          0.7830253,
          0.8787283
        ),
        label = "Weight",
        format.spss = "F10.7",
        display_width = 12L
      ),
      Q6 = structure(
        c(10L, 9L, 10L, 6L, 10L, 10L, 10L, 10L, 8L,
          7L, 9L, 10L, 7L, 10L, 10L),
        levels = c("1", "2", "3", "4",
                   "5", "6", "7", "8", "9", "10", "99"),
        labels = c(
          `1 - not at all important` = 1,
          `10 - absolutely important` = 10,
          `No answer` = 99
        ),
        label = "Q6: Support for Democracy - How important is it for you to live in a country that is governed democratically?",
        class = "factor"
      ),
      Q7 = structure(
        c(3L, 8L, 10L, 8L, 10L, 4L, 8L, 8L, 5L, 4L,
          10L, 10L, 9L, 2L, 9L),
        levels = c("1", "2", "3", "4", "5",
                   "6", "7", "8", "9", "10", "99"),
        labels = c(
          `1 - not at all democratic` = 1,
          `10 - completely democratic` = 10,
          `No answer` = 99
        ),
        label = "Q7: Support for Democracy - And how democratically is #COUNTRY_NAME being governed today?",
        class = "factor"
      ),
      Q8 = structure(
        c(4L, 9L, 9L, 7L, 10L, 4L, 8L, 8L, 9L, 2L,
          7L, 9L, 5L, 2L, 8L),
        levels = c("1", "2", "3", "4", "5",
                   "6", "7", "8", "9", "10", "99"),
        labels = c(
          `1 - not satisfied` = 1,
          `10 - very satisfied` = 10,
          `No answer` = 99
        ),
        label = "Q8: Support for Democracy - Overall, could you tell me how satisfied you are with the way democracy works in #COUNTRY_NAME?",
        class = "factor"
      ),
      Q9 = structure(
        c(2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
          2L, 1L, 2L, 1L, 2L),
        levels = c("1", "2", "3", "4", "99"),
        labels = c(
          `Strongly agree` = 1,
          Agree = 2,
          Disagree = 3,
          `Strongly disagree` = 4,
          `No answer` = 99
        ),
        label = "Q9: Support for Democracy - To what extent do you agree or disagree with this statement?",
        class = "factor"
      ),
      D2_GENDER_BINARY = structure(
        c(2L, 1L, 2L, 2L, 1L, 1L, 2L,
          2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L),
        levels = c("1", "2"),
        labels = c(Male = 1,
                   Female = 2),
        label = "Gender binary",
        class = "factor"
      ),
      D1a_AGEGROUPS_75 = structure(
        c(3L, 1L, 4L, 2L, 5L, 3L, 1L,
          5L, 1L, 1L, 4L, 3L, 1L, 1L, 2L),
        levels = c("18-29", "30-39",
                   "40-49", "50-65", "66-75"),
        labels = c(
          `Younger than 18` = 1,
          `18-29` = 2,
          `30-39` = 3,
          `40-49` = 4,
          `50-65` = 5,
          `66-75` = 6
        ),
        label = "Age Groups (max age 75)",
        class = "factor"
      ),
      Q5_14 = structure(
        c(1L, 6L, 2L, 2L, 1L, 2L, 2L, 1L, 4L, 2L,
          1L, 10L, 7L, 1L, 5L),
        levels = c("1", "2", "3", "4", "5",
                   "6", "7", "8", "9", "10"),
        labels = c(
          `1 - not at all democratic` = 1,
          `10 - completely democratic` = 10,
          `No answer` = 99
        ),
        label = "Q5: Support for Democracy - The country’s security agencies collect data on their citizens’ internet activity : Countries around the world differ in how democratic they are. We sampled the following practices from around the world. How democratic do yo",
        class = "factor"
      ),
      Q14 = structure(
        c(39L, 9L, 7L, 6L, 8L, 9L, 8L, 11L, 7L, 11L,
          39L, 6L, 7L, 7L, 8L),
        levels = c(
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "96",
          "99"
        ),
        labels = c(
          Reform = 1,
          Centre = 2,
          EKRE = 3,
          SDE = 4,
          Isamaa = 5,
          `CDU/CSU` = 6,
          SPD = 7,
          `Bündnis 90/Die Grünen` = 8,
          AfD = 9,
          FDP = 10,
          `Die Linke` = 11,
          `United Right (Zjednoczona Prawica)` = 12,
          `Civic Coalition (Koalicja Obywatelska)` = 13,
          `The Left (Lewica)` = 14,
          `Polish Coalition (Koalicja Polska)` = 15,
          `Confederation (Konfederacja)` = 16,
          `SNS coalition` = 17,
          `SPS-JS-KP-ZS` = 18,
          SPAS = 19,
          VMSZ = 20,
          `SPP-DPM` = 21,
          PSOE = 22,
          PP = 23,
          Vox = 24,
          Ciudadanos = 25,
          `Unidas Podemos` = 26,
          ERC = 27,
          Socialdemokraterna = 28,
          Moderaterna = 29,
          Sverigedemokraterna = 30,
          Vänsterpartiet = 31,
          Centerpartiet = 32,
          `Miljöpartiet de Gröna` = 33,
          `Servant of the People (Sluha Narodu)` = 34,
          `Opposition Platform- for Life` = 35,
          Fatherland = 36,
          `European Solidarity` = 37,
          `Voice (Holos)` = 38,
          Other = 96,
          `No answer` = 99
        ),
        label = "Q14: Which party did you vote in the #DATE1 parliamentary election? Please click on the answer option that applies to you",
        class = "factor"
      ),
      Q20 = structure(
        c(10L, 11L, 7L, 6L, 8L, 11L, 8L, 10L, 7L,
          9L, 40L, 6L, 7L, 7L, 8L),
        levels = c(
          "1",
          "2",
          "3",
          "4",
          "5",
          "6",
          "7",
          "8",
          "9",
          "10",
          "11",
          "12",
          "13",
          "14",
          "15",
          "16",
          "17",
          "18",
          "19",
          "20",
          "21",
          "22",
          "23",
          "24",
          "25",
          "26",
          "27",
          "28",
          "29",
          "30",
          "31",
          "32",
          "33",
          "34",
          "35",
          "36",
          "37",
          "38",
          "39",
          "97",
          "98",
          "99"
        ),
        labels = c(
          Reform = 1,
          Centre = 2,
          EKRE = 3,
          SDE = 4,
          Isamaa = 5,
          `CDU/CSU` = 6,
          SPD = 7,
          `Bündnis 90/Die Grünen` = 8,
          FDP = 9,
          `Die Linke` = 10,
          AfD = 11,
          PiS = 12,
          PO = 13,
          Polska2050 = 14,
          Lewica = 15,
          Konfederacja = 16,
          SNS = 17,
          SPS = 18,
          `Dosta je bilo` = 19,
          SSP = 20,
          Dveri = 21,
          `Demokratska Stranka` = 22,
          PSOE = 23,
          PP = 24,
          Vox = 25,
          Ciudadanos = 26,
          `Unidas Podemos` = 27,
          ERC = 28,
          Socialdemokraterna = 29,
          Moderaterna = 30,
          Sverigedemokraterna = 31,
          Vänsterpartiet = 32,
          Centerpartiet = 33,
          `Miljöpartiet de Gröna` = 34,
          `Servant of the People (Sluha Narodu)` = 35,
          `Opposition Platform- for Life` = 36,
          Fatherland = 37,
          `European Solidarity` = 38,
          `Voice (Holos)` = 39,
          Other = 97,
          `Would not vote` = 98,
          `No answer` = 99
        ),
        label = "Q20: Party Preferences - If there were a parliamentary election in the following days, which party would you vote for?",
        class = "factor"
      ),
      Q42 = structure(
        c(2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
          2L, 2L, 1L, 2L, 2L),
        levels = c("1", "2"),
        labels = c(
          `Most people can be trusted` = 1,
          `Need to be very careful` = 2
        ),
        label = "Q42: Interest in Politics - Generally speaking, would you say that most people can be trusted or that you need to be very careful in dealing with people?",
        class = "factor"
      )
    ),
    row.names = c(NA,-15L),
    class = c("tbl_df", "tbl", "data.frame")
  )

Reproducible Working Example

在生成单个堆叠条形图时,使用以下代码即可正常工作:

library(tidyverse)
library(sjlabelled)
library(surveytoolbox) # install with devtools::install_github("martinctc/surveytoolbox")
library(hrbrthemes)



# Define function to wrap long labels

wrap_label <- function(label, width) {
  str_wrap(label, width = width)
  
}

# Compute weighted counts by answer and age group

counts <- Germany %>%
  
  filter(!is.na(Q6)) %>%
  
  group_by(Q6, D1a_AGEGROUPS_75) %>%
  
  summarise(weighted_count = sum(Weight)) %>%
  
  ungroup()

# Compute weighted percentages by answer and age group and

# total number of observations and percentage for each answer option

total_obs <- sum(Germany$Weight)

percentages <- counts %>%
  
  group_by(Q6) %>%
  
  mutate(
    total_weight = sum(weighted_count),
    
    weighted_pct = weighted_count / total_weight * 100,
    
    total_weighted_pct = sum(weighted_count) / total_obs * 100
  ) %>%
  
  ungroup()



#   The plot itself

# Plot stacked bar chart with percentages

ggplot(percentages, aes(x = Q6, y = weighted_pct, fill = D1a_AGEGROUPS_75)) +
  
  geom_col() +
  
  scale_fill_brewer(name = var_labels("D1a_AGEGROUPS_75"), palette = "Set2") +
  
  # Add text labels with percentage for each age group on top of bars
  
  geom_text(
    aes(label = round(weighted_pct), group = D1a_AGEGROUPS_75),
    
    position = position_stack(vjust = 0.5),
    
    size = 3,
    color = "white"
  ) +
  
  # Add text label with percentage for each answer option above bars
  
  geom_text(aes(
    x = Q6,
    y = 100 + 2.5,
    
    label = paste0(round(total_weighted_pct), "%")
  ),
  
  size = 3) +
  
  labs(
    x = wrap_label(var_labels("Answer"), width = 60),
    
    y = "Weighted percentage",
    
    title = str_wrap(get_label(Germany$Q6)),
    
    subtitle = paste0("Germany (n=", round(total_obs), ")") ) +
  
  theme_minimal(base_size = 11) +
  
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  
  guides(fill = guide_legend(title = str_wrap(
    get_label(Germany$D1a_AGEGROUPS_75, width = 65)
  )))

Reproducible failing example

在try 构建循环时,我使用嵌套的for循环编写了以下代码.当我最终设法运行代码时,gglot标签和by_group命令中的循环变量名被忽略,因此绘图看起来与单独创建的绘图完全不同.我确实阅读了很多关于符号链接、字符串和类似内容的内容,并try 调整代码,但代码在‘group_by’命令下失败.

这是我当前的循环代码.我希望你能帮我找出它出了什么问题.如果我能为您提供更多的信息,请告诉我.


# Define variables to loop over
variables <- c("Q6", "Q7", "Q8", "Q9", "Q10_1", "Q10_2", "Q10_3",
               "Q10_4", "Q5_1", "Q5_2", "Q5_3", "Q5_4",
               "Q5_5", "Q5_6", "Q5_7", "Q5_8",
               "Q5_9", "Q5_10","Q5_11","Q5_12","Q5_13","Q5_14")

# Define grouping variables
grouping_vars <- c("D1a_AGEGROUPS_75",
                   "D2_GENDER_BINARY",
                   "Q14",
                   "Q20",
                   "Q42")



# Loop over variables and grouping variables
for(var in variables){
  for(group_var in grouping_vars){
    
    
    # Compute weighted counts by answer and age group
    counts <- Germany %>%
      filter(!is.na(var)) %>%
        group_by(var, group_var) %>% 
        summarise(weighted_count = sum(Weight)) %>% 
        ungroup()
    
    # Compute weighted percentages by answer and age group and  
    # total number of observations and percentage for each answer option
    total_obs <- sum(Germany$Weight)
    percentages <- counts %>%
      group_by(var) %>%
      mutate(total_weight = sum(weighted_count),
             weighted_pct = weighted_count / total_weight * 100,
             total_weighted_pct = sum(weighted_count)/total_obs*100) %>% 
      ungroup()

    
    # The plot itself 
    p <- ggplot(percentages, aes(x = var, y = weighted_pct, fill = group_var)) +
      geom_col() +
      scale_fill_brewer(name = var_labels(group_var), palette = "Set2") +

      # Add text labels with percentage for each age group on top of bars
      geom_text(aes(label = round(weighted_pct), group = group_var),
                position = position_stack(vjust = 0.5),
                size=3, color="white") +

      # Add text label with percentage for each answer option above bars
      geom_text(aes(x=var, y=100+2.5,
                    label=paste0(round(total_weighted_pct),"%")),
                size=3) +
      # Graphic titles
      labs(x = wrap_label(var_labels(Germany[[var]]), width=60),
           y="Weighted percentage",
           title=str_wrap(get_label(Germany[[var]])),
           subtitle=paste0("Germany (n=", round(total_obs), ")")) +
      theme_minimal(base_size=11) +
      theme(axis.text.x=element_text(angle=45, hjust=1)) +
      guides(fill=guide_legend(title=str_wrap(get_label(Germany[[group_var]] ), width=65)))

    
# Save plot as png file with appropriate name based on variable and grouping variable names
ggsave(filename=paste0("Output/Plots/",
                           var, "_", group_var, ".png"),
           plot=p,
           dpi=300,
           height=4,
           width=7,
           units='in')
  }
}

推荐答案

问题是您的循环计数器vargroup_var是字符串.在dplyr个动词或ggplot2个动词中简单地使用这些字符串,就好像它们是不带引号的列名一样是不起作用的.相反,您必须告诉dplyr和/或ggplot2这些字符串是数据集中列的名称,您可以通过使用.data代词来实现这一点,例如,使用.data[[var]]而不只是var:

library(dplyr)
library(ggplot2)
library(stringr)
library(sjlabelled)

variables <- c(
  "Q6", "Q5_3"
)

grouping_vars <- c(
  "D1a_AGEGROUPS_75"
)

for (var in variables) {
  for (group_var in grouping_vars) {
    counts <- Germany %>%
      filter(!is.na(.data[[var]])) %>%
      group_by(.data[[var]], .data[[group_var]]) %>%
      summarise(weighted_count = sum(Weight)) %>%
      ungroup()

    total_obs <- sum(Germany$Weight)
    percentages <- counts %>%
      group_by(.data[[var]]) %>%
      mutate(
        total_weight = sum(weighted_count),
        weighted_pct = weighted_count / total_weight * 100,
        total_weighted_pct = sum(weighted_count) / total_obs * 100
      ) %>%
      ungroup()

    p <- ggplot(percentages, aes(x = .data[[var]], y = weighted_pct, fill = .data[[group_var]])) +
      geom_col() +
      scale_fill_brewer(name = var_labels(Germany[[group_var]]), palette = "Set2") +
      geom_text(aes(label = round(weighted_pct), group = .data[[group_var]]),
        position = position_stack(vjust = 0.5),
        size = 3, color = "white"
      ) +
      geom_text(
        aes(
          y = 100 + 2.5,
          label = paste0(round(total_weighted_pct), "%")
        ),
        size = 3
      ) +
      labs(
        x = wrap_label(var_labels(Germany[[var]]), width = 60),
        y = "Weighted percentage",
        title = str_wrap(get_label(Germany[[var]])),
        subtitle = paste0("Germany (n=", round(total_obs), ")")
      ) +
      theme_minimal(base_size = 11) +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    
    print(p)
  }
}

R相关问答推荐

创建重复删除的唯一数据集组合列表

跨列应用多个摘要函数:summerise_all:列表对象无法强制为double类型'

在ggplot的注释表格中突出显示最大值

根据R中的另一个日期从多列中 Select 最近的日期和相应的结果

如果行和大于值,则过滤

derrr summarise每个组返回多行?

如何通过Docker部署我的shiny 应用程序(多个文件)

根据元素和前一个值之间的差值过滤矩阵的元素

R—将各种CSV数字列转换为日期

在不丢失空值的情况下取消列出嵌套列表

使用`Watch()`和`renderUI()`时,不再满足仍出现在SHILINY AFTER条件中的条件输入

按时间顺序对不同事件进行分组

QY数据的处理:如何定义QY因素的水平

列名具有特殊字符时的循环回归

删除在R中的write.table()函数期间创建的附加行

为R中的16组参数生成10000个样本的有效方法是什么?

R-找出存在其他变量的各种大小的所有组合

使用一个标签共享多个组图图例符号

了解nchar在列表上的意外行为

如何准确地指出Read_delim所面临的问题?