bialka2022stawkawycofanie
/data/papers/bialka2022stawkawycofanie/analysis/effect_sizes.qmd---
title: "Effect size computation: bialka2022stawkawycofanie"
format:
html:
toc: true
execute:
echo: true
warning: true
message: false
---
```{r}
library(dplyr)
library(tidyr)
library(esc)
library(metafor)
```
## Shared Helpers
```{r}
stop_if_missing <- function(x, name) {
if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}
compute_effect_size <- function(
paper_key,
study_id,
effect_id,
method_used,
sign_convention = "d = mean(low) - mean(high)",
n_high = NA_integer_,
n_low = NA_integer_,
n_total = NA_integer_,
mean_high = NA_real_,
mean_low = NA_real_,
sd_high = NA_real_,
sd_low = NA_real_,
r_within = NA_real_,
notes_on_assumptions = "",
imputed_flag = FALSE,
needs_sensitivity = TRUE
) {
d <- NA_real_
v <- NA_real_
g <- NA_real_
v_g <- NA_real_
computed_from_suggested <- NA_character_
design_used <- if (startsWith(method_used, "between_")) {
"Between-Subjects"
} else if (startsWith(method_used, "within_")) {
"Within-Subjects"
} else {
NA_character_
}
if (method_used == "between_groups") {
computed_from_suggested <- "groups"
stop_if_missing(n_high, "n_high")
stop_if_missing(n_low, "n_low")
stop_if_missing(mean_high, "mean_high")
stop_if_missing(mean_low, "mean_low")
stop_if_missing(sd_high, "sd_high")
stop_if_missing(sd_low, "sd_low")
es_d <- esc::esc_mean_sd(
grp1m = mean_low, grp1sd = sd_low, grp1n = n_low,
grp2m = mean_high, grp2sd = sd_high, grp2n = n_high,
es.type = "d"
)
es_g <- esc::esc_mean_sd(
grp1m = mean_low, grp1sd = sd_low, grp1n = n_low,
grp2m = mean_high, grp2sd = sd_high, grp2n = n_high,
es.type = "g"
)
d <- as.numeric(es_d$es)
v <- as.numeric(es_d$var)
g <- as.numeric(es_g$es)
v_g <- as.numeric(es_g$var)
} else {
stop(sprintf("Unknown method_used: %s", method_used), call. = FALSE)
}
inputs_used <- paste(
c(
sprintf("method=%s", method_used),
sprintf("sign_convention=%s", sign_convention),
if (!is.na(n_low)) sprintf("n_low=%s", n_low) else NULL,
if (!is.na(n_high)) sprintf("n_high=%s", n_high) else NULL,
if (!is.na(n_total)) sprintf("n_total=%s", n_total) else NULL,
if (!is.na(mean_low)) sprintf("mean_low=%s", mean_low) else NULL,
if (!is.na(mean_high)) sprintf("mean_high=%s", mean_high) else NULL,
if (!is.na(sd_low)) sprintf("sd_low=%s", sd_low) else NULL,
if (!is.na(sd_high)) sprintf("sd_high=%s", sd_high) else NULL,
if (!is.na(r_within)) sprintf("r_within=%s", r_within) else NULL
),
collapse = ", "
)
audit <- data.frame(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
design = design_used,
method_used = method_used,
computed_from_suggested = computed_from_suggested,
inputs_used = inputs_used,
d = d,
v = v,
g = g,
v_g = v_g,
notes_on_assumptions = notes_on_assumptions,
imputed_flag = imputed_flag,
needs_sensitivity = needs_sensitivity
)
yaml_snippet <- sprintf(
"effect_size:\n metric: SMD\n d: %.12f\n v: %.12f\n computed_from: %s\n needs_review: false\n notes: \"%s\"\n",
d, v, computed_from_suggested, gsub(pattern = "\"", replacement = "'", x = inputs_used)
)
list(audit = audit, yaml_snippet = yaml_snippet)
}
```
## Source Data
The thesis is used for study description and materials. Numerical extraction follows the
local dataset and manual analysis script in `data/ZBiałka_Analiza danych_metaanalizy.rmd`.
```{r}
raw <- read.csv("../data/daneZBlic.csv", na.strings = "")
excluded_ids <- c(
14, 16, 35, 73, 89, 92, 94, 95, 107, 111, 112, 119, 120, 124,
132, 134, 139, 145, 147, 154, 157, 168, 169, 173, 174, 181,
185, 186, 188, 193, 203, 211, 219, 222, 223, 225, 226, 230,
232, 233, 238, 240, 246, 258, 261, 277, 291, 292, 295, 319,
209, 320
)
dat <- raw[!(raw$id %in% excluded_ids), ]
data.frame(
raw_n = nrow(raw),
excluded_n = length(excluded_ids),
final_n = nrow(dat)
)
```
The thesis reports 186 participants and 51 attention-check exclusions. The copied analysis
script uses 204 LimeSurvey-complete records and excludes the 52 IDs above. The YAML uses
the dataset/script values for effect-size computation and flags the discrepancy in `REPORT.md`.
## Recode
```{r}
recode_likert <- function(x) {
dplyr::recode(
x,
`A1` = 1, `A2` = 2, `A3` = 3, `A4` = 4,
`A5` = 5, `A6` = 6, `A7` = 7
)
}
likert <- as.data.frame(apply(
dat[, c(
"AngDLikert.SQ001.", "KartaDLikert.SQ001.", "BankDLikert.SQ001.",
"AngNLikert.SQ001.", "KartaNLikert.SQ001.", "BankNLikert.SQ001.",
"AngSLikert.SQ001.", "KartaSLikert.SQ001.", "BankSLikert.SQ001."
)],
2,
recode_likert
))
know <- as.data.frame(apply(
dat[, c("AngD", "KartaD", "BankD", "AngN", "KartaN", "BankN", "AngS", "KartaS", "BankS")],
2,
function(x) ifelse(x == "A1", 1, -1)
))
df <- cbind(likert, know)
for (scenario in c("Ang", "Karta", "Bank")) {
for (condition in c("N", "S", "D")) {
df[[paste0(scenario, condition, "LikertO")]] <-
df[[paste0(scenario, condition)]] *
df[[paste0(scenario, condition, "Likert.SQ001.")]]
}
}
```
## Inputs
Primary extraction uses high-vs-low stakes contrasts within the same scenario. The separate
`D`/`dowód` condition and scenario-confounded paired comparisons are not used as primary
meta-analytic effects.
```{r}
effect_map <- data.frame(
effect_id = c("s1_e1", "s1_e2", "s1_e3"),
scenario = c("wypracowanie", "karta", "bank"),
low_col = c("AngNLikertO", "KartaNLikertO", "BankNLikertO"),
high_col = c("AngSLikertO", "KartaSLikertO", "BankSLikertO")
)
make_inputs <- function(effect_id, scenario, low_col, high_col) {
x_low <- df[[low_col]]
x_high <- df[[high_col]]
test <- t.test(x_high, x_low)
data.frame(
effect_id = effect_id,
scenario = scenario,
n_low = sum(!is.na(x_low)),
n_high = sum(!is.na(x_high)),
mean_low = mean(x_low, na.rm = TRUE),
mean_high = mean(x_high, na.rm = TRUE),
sd_low = sd(x_low, na.rm = TRUE),
sd_high = sd(x_high, na.rm = TRUE),
t_high_minus_low = as.numeric(test$statistic),
df = as.numeric(test$parameter),
p = test$p.value
)
}
inputs <- bind_rows(Map(
make_inputs,
effect_map$effect_id,
effect_map$scenario,
effect_map$low_col,
effect_map$high_col
))
inputs
```
## Effect Computations
### Effect s1_e1: Wypracowanie
```{r}
paper_key <- "bialka2022stawkawycofanie"
study_id <- 1
effect_id <- "s1_e1"
inp <- inputs[inputs$effect_id == effect_id, ]
res_s1_e1 <- compute_effect_size(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
method_used = "between_groups",
n_high = inp$n_high,
n_low = inp$n_low,
mean_high = inp$mean_high,
mean_low = inp$mean_low,
sd_high = inp$sd_high,
sd_low = inp$sd_low,
notes_on_assumptions = "Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd."
)
res_s1_e1$audit
cat(res_s1_e1$yaml_snippet)
```
### Effect s1_e2: Karta
```{r}
effect_id <- "s1_e2"
inp <- inputs[inputs$effect_id == effect_id, ]
res_s1_e2 <- compute_effect_size(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
method_used = "between_groups",
n_high = inp$n_high,
n_low = inp$n_low,
mean_high = inp$mean_high,
mean_low = inp$mean_low,
sd_high = inp$sd_high,
sd_low = inp$sd_low,
notes_on_assumptions = "Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd."
)
res_s1_e2$audit
cat(res_s1_e2$yaml_snippet)
```
### Effect s1_e3: Bank
```{r}
effect_id <- "s1_e3"
inp <- inputs[inputs$effect_id == effect_id, ]
res_s1_e3 <- compute_effect_size(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
method_used = "between_groups",
n_high = inp$n_high,
n_low = inp$n_low,
mean_high = inp$mean_high,
mean_low = inp$mean_low,
sd_high = inp$sd_high,
sd_low = inp$sd_low,
notes_on_assumptions = "Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd."
)
res_s1_e3$audit
cat(res_s1_e3$yaml_snippet)
```
## Audit Table
```{r}
audits <- rbind(res_s1_e1$audit, res_s1_e2$audit, res_s1_e3$audit)
audits
```