Introduction
大家好.这是我在这里的第一个帖子-我正在尽我最大的努力给你一切所需的东西来帮助我,希望我不会忘记任何东西.
我正在try 构建一个循环,该循环使用带循环的ggplot创建堆叠条形图,但我失败了,因为循环变量在ggploy标签和‘group_by’命令中都无法识别.
首先,让我分享一下我的数据框的一部分:
Data frame snippet
Germany <-
structure(
list(
Weight = structure(
c(
0.9254366,
0.9673721,
1.1321498,
2.7208848,
0.7328256,
0.9142997,
1.53218,
0.9577866,
0.2420226,
0.7830253,
1.1321498,
0.9828443,
0.9770092,
0.7830253,
0.8787283
),
label = "Weight",
format.spss = "F10.7",
display_width = 12L
),
Q6 = structure(
c(10L, 9L, 10L, 6L, 10L, 10L, 10L, 10L, 8L,
7L, 9L, 10L, 7L, 10L, 10L),
levels = c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10", "99"),
labels = c(
`1 - not at all important` = 1,
`10 - absolutely important` = 10,
`No answer` = 99
),
label = "Q6: Support for Democracy - How important is it for you to live in a country that is governed democratically?",
class = "factor"
),
Q7 = structure(
c(3L, 8L, 10L, 8L, 10L, 4L, 8L, 8L, 5L, 4L,
10L, 10L, 9L, 2L, 9L),
levels = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10", "99"),
labels = c(
`1 - not at all democratic` = 1,
`10 - completely democratic` = 10,
`No answer` = 99
),
label = "Q7: Support for Democracy - And how democratically is #COUNTRY_NAME being governed today?",
class = "factor"
),
Q8 = structure(
c(4L, 9L, 9L, 7L, 10L, 4L, 8L, 8L, 9L, 2L,
7L, 9L, 5L, 2L, 8L),
levels = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10", "99"),
labels = c(
`1 - not satisfied` = 1,
`10 - very satisfied` = 10,
`No answer` = 99
),
label = "Q8: Support for Democracy - Overall, could you tell me how satisfied you are with the way democracy works in #COUNTRY_NAME?",
class = "factor"
),
Q9 = structure(
c(2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
2L, 1L, 2L, 1L, 2L),
levels = c("1", "2", "3", "4", "99"),
labels = c(
`Strongly agree` = 1,
Agree = 2,
Disagree = 3,
`Strongly disagree` = 4,
`No answer` = 99
),
label = "Q9: Support for Democracy - To what extent do you agree or disagree with this statement?",
class = "factor"
),
D2_GENDER_BINARY = structure(
c(2L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L),
levels = c("1", "2"),
labels = c(Male = 1,
Female = 2),
label = "Gender binary",
class = "factor"
),
D1a_AGEGROUPS_75 = structure(
c(3L, 1L, 4L, 2L, 5L, 3L, 1L,
5L, 1L, 1L, 4L, 3L, 1L, 1L, 2L),
levels = c("18-29", "30-39",
"40-49", "50-65", "66-75"),
labels = c(
`Younger than 18` = 1,
`18-29` = 2,
`30-39` = 3,
`40-49` = 4,
`50-65` = 5,
`66-75` = 6
),
label = "Age Groups (max age 75)",
class = "factor"
),
Q5_14 = structure(
c(1L, 6L, 2L, 2L, 1L, 2L, 2L, 1L, 4L, 2L,
1L, 10L, 7L, 1L, 5L),
levels = c("1", "2", "3", "4", "5",
"6", "7", "8", "9", "10"),
labels = c(
`1 - not at all democratic` = 1,
`10 - completely democratic` = 10,
`No answer` = 99
),
label = "Q5: Support for Democracy - The country’s security agencies collect data on their citizens’ internet activity : Countries around the world differ in how democratic they are. We sampled the following practices from around the world. How democratic do yo",
class = "factor"
),
Q14 = structure(
c(39L, 9L, 7L, 6L, 8L, 9L, 8L, 11L, 7L, 11L,
39L, 6L, 7L, 7L, 8L),
levels = c(
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"17",
"18",
"19",
"20",
"21",
"22",
"23",
"24",
"25",
"26",
"27",
"28",
"29",
"30",
"31",
"32",
"33",
"34",
"35",
"36",
"37",
"38",
"96",
"99"
),
labels = c(
Reform = 1,
Centre = 2,
EKRE = 3,
SDE = 4,
Isamaa = 5,
`CDU/CSU` = 6,
SPD = 7,
`Bündnis 90/Die Grünen` = 8,
AfD = 9,
FDP = 10,
`Die Linke` = 11,
`United Right (Zjednoczona Prawica)` = 12,
`Civic Coalition (Koalicja Obywatelska)` = 13,
`The Left (Lewica)` = 14,
`Polish Coalition (Koalicja Polska)` = 15,
`Confederation (Konfederacja)` = 16,
`SNS coalition` = 17,
`SPS-JS-KP-ZS` = 18,
SPAS = 19,
VMSZ = 20,
`SPP-DPM` = 21,
PSOE = 22,
PP = 23,
Vox = 24,
Ciudadanos = 25,
`Unidas Podemos` = 26,
ERC = 27,
Socialdemokraterna = 28,
Moderaterna = 29,
Sverigedemokraterna = 30,
Vänsterpartiet = 31,
Centerpartiet = 32,
`Miljöpartiet de Gröna` = 33,
`Servant of the People (Sluha Narodu)` = 34,
`Opposition Platform- for Life` = 35,
Fatherland = 36,
`European Solidarity` = 37,
`Voice (Holos)` = 38,
Other = 96,
`No answer` = 99
),
label = "Q14: Which party did you vote in the #DATE1 parliamentary election? Please click on the answer option that applies to you",
class = "factor"
),
Q20 = structure(
c(10L, 11L, 7L, 6L, 8L, 11L, 8L, 10L, 7L,
9L, 40L, 6L, 7L, 7L, 8L),
levels = c(
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"17",
"18",
"19",
"20",
"21",
"22",
"23",
"24",
"25",
"26",
"27",
"28",
"29",
"30",
"31",
"32",
"33",
"34",
"35",
"36",
"37",
"38",
"39",
"97",
"98",
"99"
),
labels = c(
Reform = 1,
Centre = 2,
EKRE = 3,
SDE = 4,
Isamaa = 5,
`CDU/CSU` = 6,
SPD = 7,
`Bündnis 90/Die Grünen` = 8,
FDP = 9,
`Die Linke` = 10,
AfD = 11,
PiS = 12,
PO = 13,
Polska2050 = 14,
Lewica = 15,
Konfederacja = 16,
SNS = 17,
SPS = 18,
`Dosta je bilo` = 19,
SSP = 20,
Dveri = 21,
`Demokratska Stranka` = 22,
PSOE = 23,
PP = 24,
Vox = 25,
Ciudadanos = 26,
`Unidas Podemos` = 27,
ERC = 28,
Socialdemokraterna = 29,
Moderaterna = 30,
Sverigedemokraterna = 31,
Vänsterpartiet = 32,
Centerpartiet = 33,
`Miljöpartiet de Gröna` = 34,
`Servant of the People (Sluha Narodu)` = 35,
`Opposition Platform- for Life` = 36,
Fatherland = 37,
`European Solidarity` = 38,
`Voice (Holos)` = 39,
Other = 97,
`Would not vote` = 98,
`No answer` = 99
),
label = "Q20: Party Preferences - If there were a parliamentary election in the following days, which party would you vote for?",
class = "factor"
),
Q42 = structure(
c(2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 2L, 2L),
levels = c("1", "2"),
labels = c(
`Most people can be trusted` = 1,
`Need to be very careful` = 2
),
label = "Q42: Interest in Politics - Generally speaking, would you say that most people can be trusted or that you need to be very careful in dealing with people?",
class = "factor"
)
),
row.names = c(NA,-15L),
class = c("tbl_df", "tbl", "data.frame")
)
Reproducible Working Example
在生成单个堆叠条形图时,使用以下代码即可正常工作:
library(tidyverse)
library(sjlabelled)
library(surveytoolbox) # install with devtools::install_github("martinctc/surveytoolbox")
library(hrbrthemes)
# Define function to wrap long labels
wrap_label <- function(label, width) {
str_wrap(label, width = width)
}
# Compute weighted counts by answer and age group
counts <- Germany %>%
filter(!is.na(Q6)) %>%
group_by(Q6, D1a_AGEGROUPS_75) %>%
summarise(weighted_count = sum(Weight)) %>%
ungroup()
# Compute weighted percentages by answer and age group and
# total number of observations and percentage for each answer option
total_obs <- sum(Germany$Weight)
percentages <- counts %>%
group_by(Q6) %>%
mutate(
total_weight = sum(weighted_count),
weighted_pct = weighted_count / total_weight * 100,
total_weighted_pct = sum(weighted_count) / total_obs * 100
) %>%
ungroup()
# The plot itself
# Plot stacked bar chart with percentages
ggplot(percentages, aes(x = Q6, y = weighted_pct, fill = D1a_AGEGROUPS_75)) +
geom_col() +
scale_fill_brewer(name = var_labels("D1a_AGEGROUPS_75"), palette = "Set2") +
# Add text labels with percentage for each age group on top of bars
geom_text(
aes(label = round(weighted_pct), group = D1a_AGEGROUPS_75),
position = position_stack(vjust = 0.5),
size = 3,
color = "white"
) +
# Add text label with percentage for each answer option above bars
geom_text(aes(
x = Q6,
y = 100 + 2.5,
label = paste0(round(total_weighted_pct), "%")
),
size = 3) +
labs(
x = wrap_label(var_labels("Answer"), width = 60),
y = "Weighted percentage",
title = str_wrap(get_label(Germany$Q6)),
subtitle = paste0("Germany (n=", round(total_obs), ")") ) +
theme_minimal(base_size = 11) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(title = str_wrap(
get_label(Germany$D1a_AGEGROUPS_75, width = 65)
)))
Reproducible failing example
在try 构建循环时,我使用嵌套的for循环编写了以下代码.当我最终设法运行代码时,gglot标签和by_group命令中的循环变量名被忽略,因此绘图看起来与单独创建的绘图完全不同.我确实阅读了很多关于符号链接、字符串和类似内容的内容,并try 调整代码,但代码在‘group_by’命令下失败.
这是我当前的循环代码.我希望你能帮我找出它出了什么问题.如果我能为您提供更多的信息,请告诉我.
# Define variables to loop over
variables <- c("Q6", "Q7", "Q8", "Q9", "Q10_1", "Q10_2", "Q10_3",
"Q10_4", "Q5_1", "Q5_2", "Q5_3", "Q5_4",
"Q5_5", "Q5_6", "Q5_7", "Q5_8",
"Q5_9", "Q5_10","Q5_11","Q5_12","Q5_13","Q5_14")
# Define grouping variables
grouping_vars <- c("D1a_AGEGROUPS_75",
"D2_GENDER_BINARY",
"Q14",
"Q20",
"Q42")
# Loop over variables and grouping variables
for(var in variables){
for(group_var in grouping_vars){
# Compute weighted counts by answer and age group
counts <- Germany %>%
filter(!is.na(var)) %>%
group_by(var, group_var) %>%
summarise(weighted_count = sum(Weight)) %>%
ungroup()
# Compute weighted percentages by answer and age group and
# total number of observations and percentage for each answer option
total_obs <- sum(Germany$Weight)
percentages <- counts %>%
group_by(var) %>%
mutate(total_weight = sum(weighted_count),
weighted_pct = weighted_count / total_weight * 100,
total_weighted_pct = sum(weighted_count)/total_obs*100) %>%
ungroup()
# The plot itself
p <- ggplot(percentages, aes(x = var, y = weighted_pct, fill = group_var)) +
geom_col() +
scale_fill_brewer(name = var_labels(group_var), palette = "Set2") +
# Add text labels with percentage for each age group on top of bars
geom_text(aes(label = round(weighted_pct), group = group_var),
position = position_stack(vjust = 0.5),
size=3, color="white") +
# Add text label with percentage for each answer option above bars
geom_text(aes(x=var, y=100+2.5,
label=paste0(round(total_weighted_pct),"%")),
size=3) +
# Graphic titles
labs(x = wrap_label(var_labels(Germany[[var]]), width=60),
y="Weighted percentage",
title=str_wrap(get_label(Germany[[var]])),
subtitle=paste0("Germany (n=", round(total_obs), ")")) +
theme_minimal(base_size=11) +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
guides(fill=guide_legend(title=str_wrap(get_label(Germany[[group_var]] ), width=65)))
# Save plot as png file with appropriate name based on variable and grouping variable names
ggsave(filename=paste0("Output/Plots/",
var, "_", group_var, ".png"),
plot=p,
dpi=300,
height=4,
width=7,
units='in')
}
}