library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:base':
##
## %notin%
library(esc)
library(metafor)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: metadat
## Loading required package: numDeriv
##
## Loading the 'metafor' package (version 5.0-1). For an
## introduction to the package please type: help(metafor)
The thesis is used for the study description and materials. Numerical
extraction follows the cleaned 2025 analysis in
data/analiza_2025_bm.Rmd.
recode_likert <- function(x) {
dplyr::recode(
x,
`A1` = 1, `A2` = 2, `A3` = 3, `A4` = 4,
`A5` = 5, `A6` = 6, `A7` = 7
)
}
read_survey <- function(file) read.csv(file.path("../data", file), na.strings = "")
demografia <- read.csv("../data/demo/625672.csv", na.strings = "")
end <- distinct(read.csv("../data/end/255249.csv", na.strings = ""), uid, step)
demografia <- demografia[!(duplicated(demografia$uid) | duplicated(demografia$uid, fromLast = TRUE)), ]
demografia <- demografia[demografia$uid %in% end$uid, ]
setnames(demografia, c("p01", "p02", "p03", "p04"), c("plec", "rok", "wyksztalcenie", "filo"))
rename_pair <- function(dat, first, second) {
setnames(dat, c("PytanieWiedza1.SQ001.", "PytanieWiedza2.SQ001."), c(first, second))
dat
}
rename_both <- function(dat, first, second) {
setnames(dat, c("PytanieWiedza11.SQ001.", "PytanieWiedza12.SQ001."), c(first, second))
dat
}
Biedronka1_MpP <- rename_pair(read_survey("445154.csv"), "MICHAL", "PIOTR")
Biedronka1_PpM <- rename_pair(read_survey("223631.csv"), "PIOTR", "MICHAL")
Biedronka2_PpM <- rename_pair(read_survey("734581.csv"), "PIOTR", "MICHAL")
Biedronka2_MpP <- rename_pair(read_survey("715275.csv"), "MICHAL", "PIOTR")
Biedronka1_oba <- rename_both(read_survey("627626.csv")[, c(1, 2, 3, 4, 5, 7, 8, 9, 10, 11)], "MICHAL", "PIOTR")
Bank1_ApZ <- rename_pair(read_survey("928725.csv"), "ALA", "ZOSIA")
Bank1_ZpA <- rename_pair(read_survey("137416.csv"), "ZOSIA", "ALA")
Bank2_ZpA <- rename_pair(read_survey("581937.csv"), "ZOSIA", "ALA")
Bank2_ApZ <- rename_pair(read_survey("516534.csv"), "ALA", "ZOSIA")
Bank1_oba <- rename_both(read_survey("267519.csv")[, c(1, 2, 3, 4, 5, 7, 8, 9, 10, 11)], "ALA", "ZOSIA")
Orzeszki1_ZpM <- rename_pair(read_survey("929928.csv"), "ZUZA", "MARTA")
Orzeszki1_MpZ <- rename_pair(read_survey("464781.csv"), "MARTA", "ZUZA")
Orzeszki2_MpZ <- rename_pair(read_survey("695972.csv"), "MARTA", "ZUZA")
Orzeszki2_ZpM <- rename_pair(read_survey("479826.csv"), "ZUZA", "MARTA")
Orzeszki1_oba <- rename_both(read_survey("654951.csv")[, c(1, 2, 3, 4, 5, 7, 8, 9, 10, 11)], "ZUZA", "MARTA")
prep_scenario <- function(dfs, typ, high_name, low_name, histories, presentation, evidence_holder, correct_control) {
tagged <- Map(
function(dat, history, pres, holder) {
dat$Typ_historyjki <- typ
dat$Historyjka <- history
dat$Sposob_prezentacji <- pres
dat$Posiadacz_swiadectw <- holder
dat
},
dfs, histories, presentation, evidence_holder
)
dat <- rbindlist(tagged, fill = TRUE)
dat <- dat[!(duplicated(uid) | duplicated(uid, fromLast = TRUE)) & !is.na(uid) & !is.na(shorturl)]
dat <- left_join(as.data.frame(dat), demografia, by = "uid")
dat <- dat[dat$PytanieKontrolne == correct_control, ]
dat$Wysoka <- recode_likert(dat[[high_name]])
dat$Niska <- recode_likert(dat[[low_name]])
long <- pivot_longer(dat, cols = c("Wysoka", "Niska"), names_to = "Stawka", values_to = "Wiedza")
long %>%
mutate(Swiadectwa = case_when(
Stawka == "Wysoka" & Posiadacz_swiadectw == "High" ~ "Tak",
Stawka == "Niska" & Posiadacz_swiadectw == "Low" ~ "Tak",
Stawka == "Wysoka" & Posiadacz_swiadectw == "Low" ~ "Nie",
Stawka == "Niska" & Posiadacz_swiadectw == "High" ~ "Nie"
))
}
bank <- prep_scenario(
list(Bank1_ApZ, Bank1_ZpA, Bank1_oba, Bank2_ApZ, Bank2_ZpA),
"Bank", "ALA", "ZOSIA",
c("Bank1ApZ", "Bank1ZpA", "Bank1oba", "Bank2ApZ", "Bank2ZpA"),
c("High-first", "Low-first", "Both", "High-first", "Low-first"),
c("High", "High", "High", "Low", "Low"),
"A2"
)
biedronka <- prep_scenario(
list(Biedronka1_MpP, Biedronka1_PpM, Biedronka1_oba, Biedronka2_PpM, Biedronka2_MpP),
"Biedronka", "MICHAL", "PIOTR",
c("Biedronka1MpP", "Biedronka1PpM", "Biedronka1oba", "Biedronka2PpM", "Biedronka2MpP"),
c("High-first", "Low-first", "Both", "Low-first", "High-first"),
c("High", "High", "High", "Low", "Low"),
"A1"
)
orzeszki <- prep_scenario(
list(Orzeszki1_MpZ, Orzeszki1_ZpM, Orzeszki1_oba, Orzeszki2_ZpM, Orzeszki2_MpZ),
"Orzeszki", "MARTA", "ZUZA",
c("Orzeszki1MpZ", "Orzeszki1ZpM", "Orzeszki1oba", "Orzeszki2ZpM", "Orzeszki2MpZ"),
c("High-first", "Low-first", "Both", "Low-first", "High-first"),
c("Low", "Low", "Low", "High", "High"),
"A1"
)
full_data <- bind_rows(bank, biedronka, orzeszki)
data.frame(
scenario = c("Bank", "Biedronka", "Orzeszki"),
cleaned_scenario_records = c(n_distinct(bank$uid), n_distinct(biedronka$uid), n_distinct(orzeszki$uid))
)
## scenario cleaned_scenario_records
## 1 Bank 263
## 2 Biedronka 268
## 3 Orzeszki 259
stats_all <- full_data %>%
group_by(Typ_historyjki, Swiadectwa, Stawka) %>%
summarise(M = mean(Wiedza), SD = sd(Wiedza), N = n(), .groups = "drop") %>%
pivot_wider(names_from = Stawka, values_from = c(M, SD, N))
stats <- stats_all %>%
filter(Swiadectwa == "Tak") %>%
mutate(
effect_id = case_when(
Typ_historyjki == "Bank" ~ "s1_e2",
Typ_historyjki == "Biedronka" ~ "s1_e4",
Typ_historyjki == "Orzeszki" ~ "s1_e6"
)
) %>%
arrange(match(effect_id, c("s1_e2", "s1_e4", "s1_e6")))
stats
## # A tibble: 3 × 9
## Typ_historyjki Swiadectwa M_Niska M_Wysoka SD_Niska SD_Wysoka N_Niska N_Wysoka
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <int> <int>
## 1 Bank Tak 3.81 4.14 2.00 2.12 108 155
## 2 Biedronka Tak 4.39 4.06 1.99 2.11 105 163
## 3 Orzeszki Tak 3.04 3.70 2.03 2.08 156 103
## # ℹ 1 more variable: effect_id <chr>
paper_key <- "pabich2018porownywaniestawki"
study_id <- 1
compute_from_row <- function(effect_id) {
inp <- stats[stats$effect_id == effect_id, ]
compute_effect_size(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
method_used = "between_groups",
n_high = inp$N_Wysoka,
n_low = inp$N_Niska,
mean_high = inp$M_Wysoka,
mean_low = inp$M_Niska,
sd_high = inp$SD_Wysoka,
sd_low = inp$SD_Niska,
notes_on_assumptions = "Group summaries computed from cleaned raw survey files following analiza_2025_bm.Rmd; effect size computed with esc::esc_mean_sd."
)
}
res_s1_e2 <- compute_from_row("s1_e2")
res_s1_e4 <- compute_from_row("s1_e4")
res_s1_e6 <- compute_from_row("s1_e6")
res_s1_e2$audit
## paper_key study_id effect_id design
## 1 pabich2018porownywaniestawki 1 s1_e2 Between-Subjects
## method_used computed_from_suggested
## 1 between_groups groups
## inputs_used
## 1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=108, n_high=155, mean_low=3.80555555555556, mean_high=4.13548387096774, sd_low=2.00214059900726, sd_high=2.11696124827221
## d v g v_g
## 1 -0.1593349 0.01575914 -0.1588766 0.01575914
## notes_on_assumptions
## 1 Group summaries computed from cleaned raw survey files following analiza_2025_bm.Rmd; effect size computed with esc::esc_mean_sd.
## imputed_flag needs_sensitivity
## 1 FALSE TRUE
cat(res_s1_e2$yaml_snippet)
## effect_size:
## metric: SMD
## d: -0.159334904523
## v: 0.015759137584
## computed_from: groups
## needs_review: false
## notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=108, n_high=155, mean_low=3.80555555555556, mean_high=4.13548387096774, sd_low=2.00214059900726, sd_high=2.11696124827221"
res_s1_e4$audit
## paper_key study_id effect_id design
## 1 pabich2018porownywaniestawki 1 s1_e4 Between-Subjects
## method_used computed_from_suggested
## 1 between_groups groups
## inputs_used
## 1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=105, n_high=163, mean_low=4.39047619047619, mean_high=4.05521472392638, sd_low=1.98792140232744, sd_high=2.10599243120095
## d v g v_g
## 1 0.1626981 0.01570816 0.162239 0.01570816
## notes_on_assumptions
## 1 Group summaries computed from cleaned raw survey files following analiza_2025_bm.Rmd; effect size computed with esc::esc_mean_sd.
## imputed_flag needs_sensitivity
## 1 FALSE TRUE
cat(res_s1_e4$yaml_snippet)
## effect_size:
## metric: SMD
## d: 0.162698137881
## v: 0.015708164454
## computed_from: groups
## needs_review: false
## notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=105, n_high=163, mean_low=4.39047619047619, mean_high=4.05521472392638, sd_low=1.98792140232744, sd_high=2.10599243120095"
res_s1_e6$audit
## paper_key study_id effect_id design
## 1 pabich2018porownywaniestawki 1 s1_e6 Between-Subjects
## method_used computed_from_suggested
## 1 between_groups groups
## inputs_used
## 1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=156, n_high=103, mean_low=3.03846153846154, mean_high=3.69902912621359, sd_low=2.0348087502574, sd_high=2.08088853314603
## d v g v_g
## 1 -0.3217226 0.01631881 -0.3207828 0.01631881
## notes_on_assumptions
## 1 Group summaries computed from cleaned raw survey files following analiza_2025_bm.Rmd; effect size computed with esc::esc_mean_sd.
## imputed_flag needs_sensitivity
## 1 FALSE TRUE
cat(res_s1_e6$yaml_snippet)
## effect_size:
## metric: SMD
## d: -0.321722594820
## v: 0.016318811703
## computed_from: groups
## needs_review: false
## notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=156, n_high=103, mean_low=3.03846153846154, mean_high=3.69902912621359, sd_low=2.0348087502574, sd_high=2.08088853314603"
audits <- rbind(
res_s1_e2$audit,
res_s1_e4$audit,
res_s1_e6$audit
)
audits
## paper_key study_id effect_id design
## 1 pabich2018porownywaniestawki 1 s1_e2 Between-Subjects
## 2 pabich2018porownywaniestawki 1 s1_e4 Between-Subjects
## 3 pabich2018porownywaniestawki 1 s1_e6 Between-Subjects
## method_used computed_from_suggested
## 1 between_groups groups
## 2 between_groups groups
## 3 between_groups groups
## inputs_used
## 1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=108, n_high=155, mean_low=3.80555555555556, mean_high=4.13548387096774, sd_low=2.00214059900726, sd_high=2.11696124827221
## 2 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=105, n_high=163, mean_low=4.39047619047619, mean_high=4.05521472392638, sd_low=1.98792140232744, sd_high=2.10599243120095
## 3 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=156, n_high=103, mean_low=3.03846153846154, mean_high=3.69902912621359, sd_low=2.0348087502574, sd_high=2.08088853314603
## d v g v_g
## 1 -0.1593349 0.01575914 -0.1588766 0.01575914
## 2 0.1626981 0.01570816 0.1622390 0.01570816
## 3 -0.3217226 0.01631881 -0.3207828 0.01631881
## notes_on_assumptions
## 1 Group summaries computed from cleaned raw survey files following analiza_2025_bm.Rmd; effect size computed with esc::esc_mean_sd.
## 2 Group summaries computed from cleaned raw survey files following analiza_2025_bm.Rmd; effect size computed with esc::esc_mean_sd.
## 3 Group summaries computed from cleaned raw survey files following analiza_2025_bm.Rmd; effect size computed with esc::esc_mean_sd.
## imputed_flag needs_sensitivity
## 1 FALSE TRUE
## 2 FALSE TRUE
## 3 FALSE TRUE