Effect size computations: wundfolkspuristspragmatic

Computes standardized mean differences (d) and sampling variances (v) for the extraction YAML papers/wundfolkspuristspragmatic/wundfolkspuristspragmatic.yaml.

Data sources

Experiment 1 knowledge attribution counts: ../out/tables/camelot_stream_p12_t2.csv
Experiment 2 knowledge attribution counts: ../out/tables/camelot_stream_p18_t5.csv
Experiment 3 knowledge attribution counts: ../out/tables/camelot_stream_p23_t7.csv

Outcome coding

Experiments 1–2: knowledge attribution = options A&B (knows) vs C&D (does not know).
Experiment 3: knowledge attribution = option A (knows without checking) vs option B.

Stakes contrast sign convention (per extraction instructions): d = mean(low) - mean(high) where the “mean” is the proportion of knowledge attributions.

Conversion: exact 2×2 counts (OR) → d

For each stakes contrast, we use exact 2×2 counts and compute Cohen’s d via esc::esc_2x2(..., es.type = "d"), which converts the odds ratio (OR) to d.

Group mapping is explicit for sign: - grp1 = low stakes - grp2 = high stakes

so positive d means higher knowledge attribution in low-stakes than high-stakes.

paper_key <- "wundfolkspuristspragmatic"

# Each row defines one effect (scenario within experiment).
effects <- data.frame(
  study_id = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
  effect_id = c("s1_e1", "s1_e2", "s1_e3", "s2_e1", "s2_e2", "s2_e3", "s3_e1", "s3_e2", "s3_e3"),
  scenario = c("bank", "airport", "spelling", "bank", "airport", "spelling", "bank", "airport", "spelling"),
  table_ref = c(
    rep("camelot_stream_p12_t2.csv", 3),
    rep("camelot_stream_p18_t5.csv", 3),
    rep("camelot_stream_p23_t7.csv", 3)
  ),
  page = c(rep(12, 3), rep(18, 3), rep(23, 3)),
  # Counts: knowledge attribution (yes) vs no, by stakes.
  yes_low = c(40, 40, 29, 36, 35, 22, 36, 34, 33),
  no_low = c(10, 6, 19, 6, 9, 20, 12, 8, 31),
  yes_high = c(32, 41, 27, 36, 48, 29, 47, 57, 25),
  no_high = c(12, 4, 12, 12, 6, 13, 21, 16, 26),
  stringsAsFactors = FALSE
)

compute_from_2x2 <- function(yes_low, no_low, yes_high, no_high) {
  n_low <- yes_low + no_low
  n_high <- yes_high + no_high
  p_low <- yes_low / n_low
  p_high <- yes_high / n_high
  odds_ratio <- (yes_low / no_low) / (yes_high / no_high)
  fit <- esc::esc_2x2(
    grp1yes = yes_low,
    grp1no = no_low,
    grp2yes = yes_high,
    grp2no = no_high,
    es.type = "d"
  )

  list(
    n_low = n_low,
    n_high = n_high,
    p_low = p_low,
    p_high = p_high,
    odds_ratio = odds_ratio,
    d = as.numeric(fit$es),
    v = as.numeric(fit$var)
  )
}

rows <- lapply(seq_len(nrow(effects)), function(i) {
  r <- effects[i, ]
  out <- compute_from_2x2(r$yes_low, r$no_low, r$yes_high, r$no_high)
  cbind(r, as.data.frame(out, stringsAsFactors = FALSE))
})

results <- do.call(rbind, rows)
results

  study_id effect_id scenario                 table_ref page yes_low no_low
1        1     s1_e1     bank camelot_stream_p12_t2.csv   12      40     10
2        1     s1_e2  airport camelot_stream_p12_t2.csv   12      40      6
3        1     s1_e3 spelling camelot_stream_p12_t2.csv   12      29     19
4        2     s2_e1     bank camelot_stream_p18_t5.csv   18      36      6
5        2     s2_e2  airport camelot_stream_p18_t5.csv   18      35      9
6        2     s2_e3 spelling camelot_stream_p18_t5.csv   18      22     20
7        3     s3_e1     bank camelot_stream_p23_t7.csv   23      36     12
8        3     s3_e2  airport camelot_stream_p23_t7.csv   23      34      8
9        3     s3_e3 spelling camelot_stream_p23_t7.csv   23      33     31
  yes_high no_high n_low n_high     p_low    p_high odds_ratio           d
1       32      12    50     44 0.8000000 0.7272727  1.5000000  0.22354463
2       41       4    46     45 0.8695652 0.9111111  0.6504065 -0.23715838
3       27      12    48     39 0.6041667 0.6923077  0.6783626 -0.21395606
4       36      12    42     48 0.8571429 0.7500000  2.0000000  0.38215207
5       48       6    44     54 0.7954545 0.8888889  0.4861111 -0.39768349
6       29      13    42     42 0.5238095 0.6904762  0.4931034 -0.38980954
7       47      21    48     68 0.7500000 0.6911765  1.3404255  0.16153227
8       57      16    42     73 0.8095238 0.7808219  1.1929825  0.09728553
9       25      26    64     51 0.5156250 0.4901961  1.1070968  0.05609279
           v
1 0.07282460
2 0.14166431
3 0.06306779
4 0.09287775
5 0.09945157
6 0.06287802
7 0.05471549
8 0.07126596
9 0.04286574

YAML copy/paste snippets

for (i in seq_len(nrow(results))) {
  cat(sprintf(
    "%s (study %d; %s): d=%.12f v=%.12f (p_low=%.4f, p_high=%.4f; OR=%.6f)\n",
    results$effect_id[i],
    results$study_id[i],
    results$scenario[i],
    results$d[i],
    results$v[i],
    results$p_low[i],
    results$p_high[i],
    results$odds_ratio[i]
  ))
}

s1_e1 (study 1; bank): d=0.223544630185 v=0.072824600743 (p_low=0.8000, p_high=0.7273; OR=1.500000)
s1_e2 (study 1; airport): d=-0.237158381010 v=0.141664313471 (p_low=0.8696, p_high=0.9111; OR=0.650407)
s1_e3 (study 1; spelling): d=-0.213956059887 v=0.063067788588 (p_low=0.6042, p_high=0.6923; OR=0.678363)
s2_e1 (study 2; bank): d=0.382152069423 v=0.092877751672 (p_low=0.8571, p_high=0.7500; OR=2.000000)
s2_e2 (study 2; airport): d=-0.397683487904 v=0.099451566563 (p_low=0.7955, p_high=0.8889; OR=0.486111)
s2_e3 (study 2; spelling): d=-0.389809538289 v=0.062878015954 (p_low=0.5238, p_high=0.6905; OR=0.493103)
s3_e1 (study 3; bank): d=0.161532267823 v=0.054715492281 (p_low=0.7500, p_high=0.6912; OR=1.340426)
s3_e2 (study 3; airport): d=0.097285532690 v=0.071265964114 (p_low=0.8095, p_high=0.7808; OR=1.192982)
s3_e3 (study 3; spelling): d=0.056092791816 v=0.042865740373 (p_low=0.5156, p_high=0.4902; OR=1.107097)