Effect size computation: bialka2022stawkawycofanie

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyr)
library(esc)
library(metafor)
Loading required package: Matrix

Attaching package: 'Matrix'
The following objects are masked from 'package:tidyr':

    expand, pack, unpack
Loading required package: metadat
Loading required package: numDeriv

Loading the 'metafor' package (version 5.0-1). For an
introduction to the package please type: help(metafor)

Shared Helpers

stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

compute_effect_size <- function(
    paper_key,
    study_id,
    effect_id,
    method_used,
    sign_convention = "d = mean(low) - mean(high)",
    n_high = NA_integer_,
    n_low = NA_integer_,
    n_total = NA_integer_,
    mean_high = NA_real_,
    mean_low = NA_real_,
    sd_high = NA_real_,
    sd_low = NA_real_,
    r_within = NA_real_,
    notes_on_assumptions = "",
    imputed_flag = FALSE,
    needs_sensitivity = TRUE
) {
  d <- NA_real_
  v <- NA_real_
  g <- NA_real_
  v_g <- NA_real_
  computed_from_suggested <- NA_character_
  design_used <- if (startsWith(method_used, "between_")) {
    "Between-Subjects"
  } else if (startsWith(method_used, "within_")) {
    "Within-Subjects"
  } else {
    NA_character_
  }

  if (method_used == "between_groups") {
    computed_from_suggested <- "groups"
    stop_if_missing(n_high, "n_high")
    stop_if_missing(n_low, "n_low")
    stop_if_missing(mean_high, "mean_high")
    stop_if_missing(mean_low, "mean_low")
    stop_if_missing(sd_high, "sd_high")
    stop_if_missing(sd_low, "sd_low")

    es_d <- esc::esc_mean_sd(
      grp1m = mean_low, grp1sd = sd_low, grp1n = n_low,
      grp2m = mean_high, grp2sd = sd_high, grp2n = n_high,
      es.type = "d"
    )
    es_g <- esc::esc_mean_sd(
      grp1m = mean_low, grp1sd = sd_low, grp1n = n_low,
      grp2m = mean_high, grp2sd = sd_high, grp2n = n_high,
      es.type = "g"
    )

    d <- as.numeric(es_d$es)
    v <- as.numeric(es_d$var)
    g <- as.numeric(es_g$es)
    v_g <- as.numeric(es_g$var)
  } else {
    stop(sprintf("Unknown method_used: %s", method_used), call. = FALSE)
  }

  inputs_used <- paste(
    c(
      sprintf("method=%s", method_used),
      sprintf("sign_convention=%s", sign_convention),
      if (!is.na(n_low)) sprintf("n_low=%s", n_low) else NULL,
      if (!is.na(n_high)) sprintf("n_high=%s", n_high) else NULL,
      if (!is.na(n_total)) sprintf("n_total=%s", n_total) else NULL,
      if (!is.na(mean_low)) sprintf("mean_low=%s", mean_low) else NULL,
      if (!is.na(mean_high)) sprintf("mean_high=%s", mean_high) else NULL,
      if (!is.na(sd_low)) sprintf("sd_low=%s", sd_low) else NULL,
      if (!is.na(sd_high)) sprintf("sd_high=%s", sd_high) else NULL,
      if (!is.na(r_within)) sprintf("r_within=%s", r_within) else NULL
    ),
    collapse = ", "
  )

  audit <- data.frame(
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    design = design_used,
    method_used = method_used,
    computed_from_suggested = computed_from_suggested,
    inputs_used = inputs_used,
    d = d,
    v = v,
    g = g,
    v_g = v_g,
    notes_on_assumptions = notes_on_assumptions,
    imputed_flag = imputed_flag,
    needs_sensitivity = needs_sensitivity
  )

  yaml_snippet <- sprintf(
    "effect_size:\n  metric: SMD\n  d: %.12f\n  v: %.12f\n  computed_from: %s\n  needs_review: false\n  notes: \"%s\"\n",
    d, v, computed_from_suggested, gsub(pattern = "\"", replacement = "'", x = inputs_used)
  )

  list(audit = audit, yaml_snippet = yaml_snippet)
}

Source Data

The thesis is used for study description and materials. Numerical extraction follows the local dataset and manual analysis script in data/ZBiałka_Analiza danych_metaanalizy.rmd.

raw <- read.csv("../data/daneZBlic.csv", na.strings = "")

excluded_ids <- c(
  14, 16, 35, 73, 89, 92, 94, 95, 107, 111, 112, 119, 120, 124,
  132, 134, 139, 145, 147, 154, 157, 168, 169, 173, 174, 181,
  185, 186, 188, 193, 203, 211, 219, 222, 223, 225, 226, 230,
  232, 233, 238, 240, 246, 258, 261, 277, 291, 292, 295, 319,
  209, 320
)

dat <- raw[!(raw$id %in% excluded_ids), ]

data.frame(
  raw_n = nrow(raw),
  excluded_n = length(excluded_ids),
  final_n = nrow(dat)
)
  raw_n excluded_n final_n
1   204         52     152

The thesis reports 186 participants and 51 attention-check exclusions. The copied analysis script uses 204 LimeSurvey-complete records and excludes the 52 IDs above. The YAML uses the dataset/script values for effect-size computation and flags the discrepancy in REPORT.md.

Recode

recode_likert <- function(x) {
  dplyr::recode(
    x,
    `A1` = 1, `A2` = 2, `A3` = 3, `A4` = 4,
    `A5` = 5, `A6` = 6, `A7` = 7
  )
}

likert <- as.data.frame(apply(
  dat[, c(
    "AngDLikert.SQ001.", "KartaDLikert.SQ001.", "BankDLikert.SQ001.",
    "AngNLikert.SQ001.", "KartaNLikert.SQ001.", "BankNLikert.SQ001.",
    "AngSLikert.SQ001.", "KartaSLikert.SQ001.", "BankSLikert.SQ001."
  )],
  2,
  recode_likert
))

know <- as.data.frame(apply(
  dat[, c("AngD", "KartaD", "BankD", "AngN", "KartaN", "BankN", "AngS", "KartaS", "BankS")],
  2,
  function(x) ifelse(x == "A1", 1, -1)
))

df <- cbind(likert, know)

for (scenario in c("Ang", "Karta", "Bank")) {
  for (condition in c("N", "S", "D")) {
    df[[paste0(scenario, condition, "LikertO")]] <-
      df[[paste0(scenario, condition)]] *
      df[[paste0(scenario, condition, "Likert.SQ001.")]]
  }
}

Inputs

Primary extraction uses high-vs-low stakes contrasts within the same scenario. The separate D/dowód condition and scenario-confounded paired comparisons are not used as primary meta-analytic effects.

effect_map <- data.frame(
  effect_id = c("s1_e1", "s1_e2", "s1_e3"),
  scenario = c("wypracowanie", "karta", "bank"),
  low_col = c("AngNLikertO", "KartaNLikertO", "BankNLikertO"),
  high_col = c("AngSLikertO", "KartaSLikertO", "BankSLikertO")
)

make_inputs <- function(effect_id, scenario, low_col, high_col) {
  x_low <- df[[low_col]]
  x_high <- df[[high_col]]
  test <- t.test(x_high, x_low)

  data.frame(
    effect_id = effect_id,
    scenario = scenario,
    n_low = sum(!is.na(x_low)),
    n_high = sum(!is.na(x_high)),
    mean_low = mean(x_low, na.rm = TRUE),
    mean_high = mean(x_high, na.rm = TRUE),
    sd_low = sd(x_low, na.rm = TRUE),
    sd_high = sd(x_high, na.rm = TRUE),
    t_high_minus_low = as.numeric(test$statistic),
    df = as.numeric(test$parameter),
    p = test$p.value
  )
}

inputs <- bind_rows(Map(
  make_inputs,
  effect_map$effect_id,
  effect_map$scenario,
  effect_map$low_col,
  effect_map$high_col
))

inputs
  effect_id     scenario n_low n_high  mean_low  mean_high   sd_low  sd_high
1     s1_e1 wypracowanie    49     44 2.7959184 -0.5909091 4.974852 5.474330
2     s1_e2        karta    44     41 4.5909091  4.4390244 3.780723 3.987786
3     s1_e3         bank    41     49 0.1219512 -2.4081633 5.653296 4.799819
  t_high_minus_low       df           p
1       -3.1096921 87.37661 0.002528755
2       -0.1799091 81.73062 0.857669277
3       -2.2632975 78.88849 0.026365635

Effect Computations

Effect s1_e1: Wypracowanie

paper_key <- "bialka2022stawkawycofanie"
study_id <- 1
effect_id <- "s1_e1"
inp <- inputs[inputs$effect_id == effect_id, ]

res_s1_e1 <- compute_effect_size(
  paper_key = paper_key,
  study_id = study_id,
  effect_id = effect_id,
  method_used = "between_groups",
  n_high = inp$n_high,
  n_low = inp$n_low,
  mean_high = inp$mean_high,
  mean_low = inp$mean_low,
  sd_high = inp$sd_high,
  sd_low = inp$sd_low,
  notes_on_assumptions = "Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd."
)
res_s1_e1$audit
                  paper_key study_id effect_id           design    method_used
1 bialka2022stawkawycofanie        1     s1_e1 Between-Subjects between_groups
  computed_from_suggested
1                  groups
                                                                                                                                                                                        inputs_used
1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=49, n_high=44, mean_low=2.79591836734694, mean_high=-0.590909090909091, sd_low=4.97485172239977, sd_high=5.4743298700779
          d          v         g        v_g
1 0.6492115 0.04540143 0.6438461 0.04540143
                                                                                                                  notes_on_assumptions
1 Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd.
  imputed_flag needs_sensitivity
1        FALSE              TRUE
cat(res_s1_e1$yaml_snippet)
effect_size:
  metric: SMD
  d: 0.649211530121
  v: 0.045401433900
  computed_from: groups
  needs_review: false
  notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=49, n_high=44, mean_low=2.79591836734694, mean_high=-0.590909090909091, sd_low=4.97485172239977, sd_high=5.4743298700779"

Effect s1_e2: Karta

effect_id <- "s1_e2"
inp <- inputs[inputs$effect_id == effect_id, ]

res_s1_e2 <- compute_effect_size(
  paper_key = paper_key,
  study_id = study_id,
  effect_id = effect_id,
  method_used = "between_groups",
  n_high = inp$n_high,
  n_low = inp$n_low,
  mean_high = inp$mean_high,
  mean_low = inp$mean_low,
  sd_high = inp$sd_high,
  sd_low = inp$sd_low,
  notes_on_assumptions = "Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd."
)
res_s1_e2$audit
                  paper_key study_id effect_id           design    method_used
1 bialka2022stawkawycofanie        1     s1_e2 Between-Subjects between_groups
  computed_from_suggested
1                  groups
                                                                                                                                                                                      inputs_used
1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=44, n_high=41, mean_low=4.59090909090909, mean_high=4.4390243902439, sd_low=3.78072333314353, sd_high=3.98778623102972
           d          v          g        v_g
1 0.03912646 0.04712652 0.03877184 0.04712652
                                                                                                                  notes_on_assumptions
1 Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd.
  imputed_flag needs_sensitivity
1        FALSE              TRUE
cat(res_s1_e2$yaml_snippet)
effect_size:
  metric: SMD
  d: 0.039126464904
  v: 0.047126521808
  computed_from: groups
  needs_review: false
  notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=44, n_high=41, mean_low=4.59090909090909, mean_high=4.4390243902439, sd_low=3.78072333314353, sd_high=3.98778623102972"

Effect s1_e3: Bank

effect_id <- "s1_e3"
inp <- inputs[inputs$effect_id == effect_id, ]

res_s1_e3 <- compute_effect_size(
  paper_key = paper_key,
  study_id = study_id,
  effect_id = effect_id,
  method_used = "between_groups",
  n_high = inp$n_high,
  n_low = inp$n_low,
  mean_high = inp$mean_high,
  mean_low = inp$mean_low,
  sd_high = inp$sd_high,
  sd_low = inp$sd_low,
  notes_on_assumptions = "Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd."
)
res_s1_e3$audit
                  paper_key study_id effect_id           design    method_used
1 bialka2022stawkawycofanie        1     s1_e3 Between-Subjects between_groups
  computed_from_suggested
1                  groups
                                                                                                                                                                                         inputs_used
1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=41, n_high=49, mean_low=0.121951219512195, mean_high=-2.40816326530612, sd_low=5.65329603837982, sd_high=4.79981929931976
        d          v         g        v_g
1 0.48608 0.04611104 0.4819254 0.04611104
                                                                                                                  notes_on_assumptions
1 Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd.
  imputed_flag needs_sensitivity
1        FALSE              TRUE
cat(res_s1_e3$yaml_snippet)
effect_size:
  metric: SMD
  d: 0.486079952874
  v: 0.046111038949
  computed_from: groups
  needs_review: false
  notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=41, n_high=49, mean_low=0.121951219512195, mean_high=-2.40816326530612, sd_low=5.65329603837982, sd_high=4.79981929931976"

Audit Table

audits <- rbind(res_s1_e1$audit, res_s1_e2$audit, res_s1_e3$audit)
audits
                  paper_key study_id effect_id           design    method_used
1 bialka2022stawkawycofanie        1     s1_e1 Between-Subjects between_groups
2 bialka2022stawkawycofanie        1     s1_e2 Between-Subjects between_groups
3 bialka2022stawkawycofanie        1     s1_e3 Between-Subjects between_groups
  computed_from_suggested
1                  groups
2                  groups
3                  groups
                                                                                                                                                                                         inputs_used
1  method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=49, n_high=44, mean_low=2.79591836734694, mean_high=-0.590909090909091, sd_low=4.97485172239977, sd_high=5.4743298700779
2    method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=44, n_high=41, mean_low=4.59090909090909, mean_high=4.4390243902439, sd_low=3.78072333314353, sd_high=3.98778623102972
3 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=41, n_high=49, mean_low=0.121951219512195, mean_high=-2.40816326530612, sd_low=5.65329603837982, sd_high=4.79981929931976
           d          v          g        v_g
1 0.64921153 0.04540143 0.64384615 0.04540143
2 0.03912646 0.04712652 0.03877184 0.04712652
3 0.48607995 0.04611104 0.48192542 0.04611104
                                                                                                                  notes_on_assumptions
1 Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd.
2 Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd.
3 Group summaries computed from daneZBlic.csv after the exclusion vector in the local Rmd; effect size computed with esc::esc_mean_sd.
  imputed_flag needs_sensitivity
1        FALSE              TRUE
2        FALSE              TRUE
3        FALSE              TRUE