Effect size computation: chemla2013experimentingcontextualismnat

Overview

This notebook reconstructs the knowledge-vignette effects from the supplementary Analyses-SHARE archive stored under ../out/external/Analyses-SHARE/.

It follows the original Analyses.R recoding and filtering, but computes scenario-level paired contrasts for the four knowledge scenarios (K1-K4) instead of only the aggregate Type == "K" result reported in Figure 5.

Helpers

stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

sd_pooled_within <- function(sd_low, sd_high) {
  sqrt((sd_low^2 + sd_high^2) / 2)
}

d_within_smcrp <- function(mean_low, mean_high, sd_low, sd_high) {
  (mean_low - mean_high) / sd_pooled_within(sd_low, sd_high)
}

var_d_within_smcrp <- function(d, r, n_total) {
  (2 * (1 - r) / n_total) + (d^2) * (1 + r^2) / (4 * n_total)
}

compute_effect <- function(df, scenario_num, polarity, effect_id, scenario_label) {
  subset_df <- subset(df, TYPE == "K" & SCENARIO == scenario_num & POS == polarity)
  low <- subset_df[subset_df$HI == 0, c("subject", "Ireponse")]
  high <- subset_df[subset_df$HI == 1, c("subject", "Ireponse")]
  names(low)[2] <- "low"
  names(high)[2] <- "high"
  paired <- merge(low, high, by = "subject")

  if (nrow(paired) == 0) {
    stop(sprintf("No paired rows for scenario %s polarity %s", scenario_num, polarity), call. = FALSE)
  }

  test <- t.test(paired$low, paired$high, paired = TRUE)
  mean_low <- mean(paired$low)
  mean_high <- mean(paired$high)
  sd_low <- sd(paired$low)
  sd_high <- sd(paired$high)
  r_within <- cor(paired$low, paired$high)
  d <- d_within_smcrp(mean_low, mean_high, sd_low, sd_high)
  v <- var_d_within_smcrp(d = d, r = r_within, n_total = nrow(paired))

  data.frame(
    effect_id = effect_id,
    scenario_num = scenario_num,
    scenario_label = scenario_label,
    polarity = ifelse(polarity == 1, "positive", "negative"),
    n_total = nrow(paired),
    mean_low = mean_low,
    sd_low = sd_low,
    mean_high = mean_high,
    sd_high = sd_high,
    r_within = r_within,
    t_value = as.numeric(test$statistic),
    df = as.numeric(test$parameter),
    p_value = as.numeric(test$p.value),
    d = d,
    v = v
  )
}

Load and recode archive data

archive_dir <- normalizePath("../out/external/Analyses-SHARE", mustWork = FALSE)
if (!dir.exists(archive_dir)) {
  stop("Missing extracted archive directory: ../out/external/Analyses-SHARE", call. = FALSE)
}

DDDpro <- read.table(file.path(archive_dir, "raw-results-pro.txt"), quote = "", header = TRUE)
DDDpro$PROBIAS <- 1

DDDanti <- read.table(file.path(archive_dir, "raw-results-anti.txt"), quote = "", header = TRUE)
DDDanti$PROBIAS <- 0

DDD <- rbind(DDDpro, DDDanti)
DDD$subject <- factor(DDD$subject)

DDD$TYPE <- sub("(.*)==(.*)==(.*)==(.*)", "\\1", DDD$Gname)
DDD$SCENARIO <- as.integer(sub("(.*)==(.*)==(.*)==(.*)", "\\2", DDD$Gname))
DDD$POS <- as.integer(sub("(.*)==(.*)==(.*)==(.*)", "\\3", DDD$Gname))
DDD$HI <- as.integer(sub("(.*)==(.*)==(.*)==(.*)", "\\4", DDD$Gname))

# Preserve the original script's special-case fixes.
DDD$TYPE[DDD$SCENARIO == 5 & DDD$PRO == 1] <- "KB+"
DDD$TYPE[DDD$SCENARIO == 5 & DDD$PRO == 0] <- "KB-"
DDD$HI[DDD$SCENARIO == 5 & DDD$Iname == 0] <- 1
DDD$HI[DDD$SCENARIO == 5 & DDD$Iname == 1] <- 0
DDD$POS[DDD$SCENARIO == 5] <- 1

TEMP <- DDD$POS == DDD$HI
DDD$POS[DDD$SCENARIO == 1 & DDD$TYPE == "M" & TEMP] <- 1
DDD$POS[DDD$SCENARIO == 1 & DDD$TYPE == "M" & !TEMP] <- 0

DD <- subset(
  DDD,
  Ireponse > -1 &
    !(TYPE %in% c("E")) &
    Qreponse_3_languages %in% c("American_E", "Other_Engl")
)

# Express the response scale in the same 0-100 percentage units used in the paper.
DD$Ireponse <- 100 * DD$Ireponse

summary_df <- data.frame(
  analyzed_subjects = length(unique(DD$subject)),
  analyzed_rows = nrow(DD),
  english_subjects = length(unique(DD$subject))
)
summary_df
  analyzed_subjects analyzed_rows english_subjects
1                39          1778               39

Vignette-level knowledge effects

effect_specs <- data.frame(
  scenario_num = c(1, 1, 2, 2, 3, 3, 4, 4),
  polarity = c(1, 0, 1, 0, 1, 0, 1, 0),
  effect_id = c("s1_e1", "s1_e2", "s1_e3", "s1_e4", "s1_e5", "s1_e6", "s1_e7", "s1_e8"),
  scenario_label = c(
    "Bank",
    "Bank",
    "Truck/bridge",
    "Truck/bridge",
    "Train",
    "Train",
    "Spelling/typos",
    "Spelling/typos"
  ),
  stringsAsFactors = FALSE
)

audit_rows <- do.call(
  rbind,
  lapply(
    seq_len(nrow(effect_specs)),
    function(i) {
      row <- effect_specs[i, ]
      compute_effect(
        df = DD,
        scenario_num = row$scenario_num,
        polarity = row$polarity,
        effect_id = row$effect_id,
        scenario_label = row$scenario_label
      )
    }
  )
)

audit_rows
  effect_id scenario_num scenario_label polarity n_total mean_low   sd_low
1     s1_e1            1           Bank positive      39 72.22051 23.01609
2     s1_e2            1           Bank negative      38 73.52105 24.55619
3     s1_e3            2   Truck/bridge positive      38 67.67895 24.62816
4     s1_e4            2   Truck/bridge negative      38 67.96842 30.45921
5     s1_e5            3          Train positive      39 79.81026 23.96111
6     s1_e6            3          Train negative      38 64.50000 28.22074
7     s1_e7            4 Spelling/typos positive      39 76.22564 25.61522
8     s1_e8            4 Spelling/typos negative      37 67.74054 29.73490
  mean_high  sd_high  r_within    t_value df     p_value          d          v
1  66.80513 25.19192 0.4568915  1.3425509 38 0.187383089  0.2244389 0.02824202
2  77.43684 21.06176 0.6602117 -1.2657503 37 0.213514895 -0.1711761 0.01816039
3  54.68421 29.60784 0.4627485  2.8175410 37 0.007721187  0.4771852 0.03009525
4  73.56842 23.14862 0.3341793 -1.0958297 37 0.280235059 -0.2070087 0.03535660
5  73.81538 20.72626 0.3957942  1.5150856 38 0.138025439  0.2676026 0.03151587
6  71.03158 22.91330 0.3170715 -1.3337111 37 0.190451026 -0.2541038 0.03641110
7  67.23590 30.10154 0.6280576  2.3040622 38 0.026780188  0.3216534 0.01999879
8  71.50270 30.10598 0.4828547 -0.7520127 36 0.456932319 -0.1257364 0.02808553

Paste-ready effect_size YAML snippets

make_yaml_snippet <- function(row) {
  sprintf(
    paste(
      "effect_size:",
      "  metric: SMD",
      "  d: %.12f",
      "  v: %.12f",
      "  computed_from: groups",
      "  needs_review: false",
      "  notes: \"Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast.\"",
      sep = "\n"
    ),
    row$d,
    row$v
  )
}

cat(
  paste(
    vapply(
      seq_len(nrow(audit_rows)),
      function(i) make_yaml_snippet(audit_rows[i, ]),
      character(1)
    ),
    collapse = "\n\n"
  )
)
effect_size:
  metric: SMD
  d: 0.224438935137
  v: 0.028242024805
  computed_from: groups
  needs_review: false
  notes: "Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast."

effect_size:
  metric: SMD
  d: -0.171176096003
  v: 0.018160389148
  computed_from: groups
  needs_review: false
  notes: "Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast."

effect_size:
  metric: SMD
  d: 0.477185244546
  v: 0.030095248511
  computed_from: groups
  needs_review: false
  notes: "Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast."

effect_size:
  metric: SMD
  d: -0.207008700785
  v: 0.035356602312
  computed_from: groups
  needs_review: false
  notes: "Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast."

effect_size:
  metric: SMD
  d: 0.267602569194
  v: 0.031515870642
  computed_from: groups
  needs_review: false
  notes: "Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast."

effect_size:
  metric: SMD
  d: -0.254103783945
  v: 0.036411104162
  computed_from: groups
  needs_review: false
  notes: "Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast."

effect_size:
  metric: SMD
  d: 0.321653379122
  v: 0.019998789289
  computed_from: groups
  needs_review: false
  notes: "Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast."

effect_size:
  metric: SMD
  d: -0.125736443703
  v: 0.028085528124
  computed_from: groups
  needs_review: false
  notes: "Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast."