chemla2013experimentingcontextualismnat
/data/papers/chemla2013experimentingcontextualismnat/analysis/effect_sizes.qmd
---
title: "Effect size computation: chemla2013experimentingcontextualismnat"
format:
  html:
    toc: true
execute:
  echo: true
  warning: true
  message: false
---

## Overview

This notebook reconstructs the knowledge-vignette effects from the supplementary
`Analyses-SHARE` archive stored under `../out/external/Analyses-SHARE/`.

It follows the original `Analyses.R` recoding and filtering, but computes
scenario-level paired contrasts for the four knowledge scenarios (`K1`-`K4`)
instead of only the aggregate `Type == "K"` result reported in Figure 5.

## Helpers

```{r}
stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

sd_pooled_within <- function(sd_low, sd_high) {
  sqrt((sd_low^2 + sd_high^2) / 2)
}

d_within_smcrp <- function(mean_low, mean_high, sd_low, sd_high) {
  (mean_low - mean_high) / sd_pooled_within(sd_low, sd_high)
}

var_d_within_smcrp <- function(d, r, n_total) {
  (2 * (1 - r) / n_total) + (d^2) * (1 + r^2) / (4 * n_total)
}

compute_effect <- function(df, scenario_num, polarity, effect_id, scenario_label) {
  subset_df <- subset(df, TYPE == "K" & SCENARIO == scenario_num & POS == polarity)
  low <- subset_df[subset_df$HI == 0, c("subject", "Ireponse")]
  high <- subset_df[subset_df$HI == 1, c("subject", "Ireponse")]
  names(low)[2] <- "low"
  names(high)[2] <- "high"
  paired <- merge(low, high, by = "subject")

  if (nrow(paired) == 0) {
    stop(sprintf("No paired rows for scenario %s polarity %s", scenario_num, polarity), call. = FALSE)
  }

  test <- t.test(paired$low, paired$high, paired = TRUE)
  mean_low <- mean(paired$low)
  mean_high <- mean(paired$high)
  sd_low <- sd(paired$low)
  sd_high <- sd(paired$high)
  r_within <- cor(paired$low, paired$high)
  d <- d_within_smcrp(mean_low, mean_high, sd_low, sd_high)
  v <- var_d_within_smcrp(d = d, r = r_within, n_total = nrow(paired))

  data.frame(
    effect_id = effect_id,
    scenario_num = scenario_num,
    scenario_label = scenario_label,
    polarity = ifelse(polarity == 1, "positive", "negative"),
    n_total = nrow(paired),
    mean_low = mean_low,
    sd_low = sd_low,
    mean_high = mean_high,
    sd_high = sd_high,
    r_within = r_within,
    t_value = as.numeric(test$statistic),
    df = as.numeric(test$parameter),
    p_value = as.numeric(test$p.value),
    d = d,
    v = v
  )
}
```

## Load and recode archive data

```{r}
archive_dir <- normalizePath("../out/external/Analyses-SHARE", mustWork = FALSE)
if (!dir.exists(archive_dir)) {
  stop("Missing extracted archive directory: ../out/external/Analyses-SHARE", call. = FALSE)
}

DDDpro <- read.table(file.path(archive_dir, "raw-results-pro.txt"), quote = "", header = TRUE)
DDDpro$PROBIAS <- 1

DDDanti <- read.table(file.path(archive_dir, "raw-results-anti.txt"), quote = "", header = TRUE)
DDDanti$PROBIAS <- 0

DDD <- rbind(DDDpro, DDDanti)
DDD$subject <- factor(DDD$subject)

DDD$TYPE <- sub("(.*)==(.*)==(.*)==(.*)", "\\1", DDD$Gname)
DDD$SCENARIO <- as.integer(sub("(.*)==(.*)==(.*)==(.*)", "\\2", DDD$Gname))
DDD$POS <- as.integer(sub("(.*)==(.*)==(.*)==(.*)", "\\3", DDD$Gname))
DDD$HI <- as.integer(sub("(.*)==(.*)==(.*)==(.*)", "\\4", DDD$Gname))

# Preserve the original script's special-case fixes.
DDD$TYPE[DDD$SCENARIO == 5 & DDD$PRO == 1] <- "KB+"
DDD$TYPE[DDD$SCENARIO == 5 & DDD$PRO == 0] <- "KB-"
DDD$HI[DDD$SCENARIO == 5 & DDD$Iname == 0] <- 1
DDD$HI[DDD$SCENARIO == 5 & DDD$Iname == 1] <- 0
DDD$POS[DDD$SCENARIO == 5] <- 1

TEMP <- DDD$POS == DDD$HI
DDD$POS[DDD$SCENARIO == 1 & DDD$TYPE == "M" & TEMP] <- 1
DDD$POS[DDD$SCENARIO == 1 & DDD$TYPE == "M" & !TEMP] <- 0

DD <- subset(
  DDD,
  Ireponse > -1 &
    !(TYPE %in% c("E")) &
    Qreponse_3_languages %in% c("American_E", "Other_Engl")
)

# Express the response scale in the same 0-100 percentage units used in the paper.
DD$Ireponse <- 100 * DD$Ireponse

summary_df <- data.frame(
  analyzed_subjects = length(unique(DD$subject)),
  analyzed_rows = nrow(DD),
  english_subjects = length(unique(DD$subject))
)
summary_df
```

## Vignette-level knowledge effects

```{r}
effect_specs <- data.frame(
  scenario_num = c(1, 1, 2, 2, 3, 3, 4, 4),
  polarity = c(1, 0, 1, 0, 1, 0, 1, 0),
  effect_id = c("s1_e1", "s1_e2", "s1_e3", "s1_e4", "s1_e5", "s1_e6", "s1_e7", "s1_e8"),
  scenario_label = c(
    "Bank",
    "Bank",
    "Truck/bridge",
    "Truck/bridge",
    "Train",
    "Train",
    "Spelling/typos",
    "Spelling/typos"
  ),
  stringsAsFactors = FALSE
)

audit_rows <- do.call(
  rbind,
  lapply(
    seq_len(nrow(effect_specs)),
    function(i) {
      row <- effect_specs[i, ]
      compute_effect(
        df = DD,
        scenario_num = row$scenario_num,
        polarity = row$polarity,
        effect_id = row$effect_id,
        scenario_label = row$scenario_label
      )
    }
  )
)

audit_rows
```

## Paste-ready `effect_size` YAML snippets

```{r}
make_yaml_snippet <- function(row) {
  sprintf(
    paste(
      "effect_size:",
      "  metric: SMD",
      "  d: %.12f",
      "  v: %.12f",
      "  computed_from: groups",
      "  needs_review: false",
      "  notes: \"Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast.\"",
      sep = "\n"
    ),
    row$d,
    row$v
  )
}

cat(
  paste(
    vapply(
      seq_len(nrow(audit_rows)),
      function(i) make_yaml_snippet(audit_rows[i, ]),
      character(1)
    ),
    collapse = "\n\n"
  )
)
```