chemla2013experimentingcontextualismnat
/data/papers/chemla2013experimentingcontextualismnat/analysis/effect_sizes.qmd---
title: "Effect size computation: chemla2013experimentingcontextualismnat"
format:
html:
toc: true
execute:
echo: true
warning: true
message: false
---
## Overview
This notebook reconstructs the knowledge-vignette effects from the supplementary
`Analyses-SHARE` archive stored under `../out/external/Analyses-SHARE/`.
It follows the original `Analyses.R` recoding and filtering, but computes
scenario-level paired contrasts for the four knowledge scenarios (`K1`-`K4`)
instead of only the aggregate `Type == "K"` result reported in Figure 5.
## Helpers
```{r}
stop_if_missing <- function(x, name) {
if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}
sd_pooled_within <- function(sd_low, sd_high) {
sqrt((sd_low^2 + sd_high^2) / 2)
}
d_within_smcrp <- function(mean_low, mean_high, sd_low, sd_high) {
(mean_low - mean_high) / sd_pooled_within(sd_low, sd_high)
}
var_d_within_smcrp <- function(d, r, n_total) {
(2 * (1 - r) / n_total) + (d^2) * (1 + r^2) / (4 * n_total)
}
compute_effect <- function(df, scenario_num, polarity, effect_id, scenario_label) {
subset_df <- subset(df, TYPE == "K" & SCENARIO == scenario_num & POS == polarity)
low <- subset_df[subset_df$HI == 0, c("subject", "Ireponse")]
high <- subset_df[subset_df$HI == 1, c("subject", "Ireponse")]
names(low)[2] <- "low"
names(high)[2] <- "high"
paired <- merge(low, high, by = "subject")
if (nrow(paired) == 0) {
stop(sprintf("No paired rows for scenario %s polarity %s", scenario_num, polarity), call. = FALSE)
}
test <- t.test(paired$low, paired$high, paired = TRUE)
mean_low <- mean(paired$low)
mean_high <- mean(paired$high)
sd_low <- sd(paired$low)
sd_high <- sd(paired$high)
r_within <- cor(paired$low, paired$high)
d <- d_within_smcrp(mean_low, mean_high, sd_low, sd_high)
v <- var_d_within_smcrp(d = d, r = r_within, n_total = nrow(paired))
data.frame(
effect_id = effect_id,
scenario_num = scenario_num,
scenario_label = scenario_label,
polarity = ifelse(polarity == 1, "positive", "negative"),
n_total = nrow(paired),
mean_low = mean_low,
sd_low = sd_low,
mean_high = mean_high,
sd_high = sd_high,
r_within = r_within,
t_value = as.numeric(test$statistic),
df = as.numeric(test$parameter),
p_value = as.numeric(test$p.value),
d = d,
v = v
)
}
```
## Load and recode archive data
```{r}
archive_dir <- normalizePath("../out/external/Analyses-SHARE", mustWork = FALSE)
if (!dir.exists(archive_dir)) {
stop("Missing extracted archive directory: ../out/external/Analyses-SHARE", call. = FALSE)
}
DDDpro <- read.table(file.path(archive_dir, "raw-results-pro.txt"), quote = "", header = TRUE)
DDDpro$PROBIAS <- 1
DDDanti <- read.table(file.path(archive_dir, "raw-results-anti.txt"), quote = "", header = TRUE)
DDDanti$PROBIAS <- 0
DDD <- rbind(DDDpro, DDDanti)
DDD$subject <- factor(DDD$subject)
DDD$TYPE <- sub("(.*)==(.*)==(.*)==(.*)", "\\1", DDD$Gname)
DDD$SCENARIO <- as.integer(sub("(.*)==(.*)==(.*)==(.*)", "\\2", DDD$Gname))
DDD$POS <- as.integer(sub("(.*)==(.*)==(.*)==(.*)", "\\3", DDD$Gname))
DDD$HI <- as.integer(sub("(.*)==(.*)==(.*)==(.*)", "\\4", DDD$Gname))
# Preserve the original script's special-case fixes.
DDD$TYPE[DDD$SCENARIO == 5 & DDD$PRO == 1] <- "KB+"
DDD$TYPE[DDD$SCENARIO == 5 & DDD$PRO == 0] <- "KB-"
DDD$HI[DDD$SCENARIO == 5 & DDD$Iname == 0] <- 1
DDD$HI[DDD$SCENARIO == 5 & DDD$Iname == 1] <- 0
DDD$POS[DDD$SCENARIO == 5] <- 1
TEMP <- DDD$POS == DDD$HI
DDD$POS[DDD$SCENARIO == 1 & DDD$TYPE == "M" & TEMP] <- 1
DDD$POS[DDD$SCENARIO == 1 & DDD$TYPE == "M" & !TEMP] <- 0
DD <- subset(
DDD,
Ireponse > -1 &
!(TYPE %in% c("E")) &
Qreponse_3_languages %in% c("American_E", "Other_Engl")
)
# Express the response scale in the same 0-100 percentage units used in the paper.
DD$Ireponse <- 100 * DD$Ireponse
summary_df <- data.frame(
analyzed_subjects = length(unique(DD$subject)),
analyzed_rows = nrow(DD),
english_subjects = length(unique(DD$subject))
)
summary_df
```
## Vignette-level knowledge effects
```{r}
effect_specs <- data.frame(
scenario_num = c(1, 1, 2, 2, 3, 3, 4, 4),
polarity = c(1, 0, 1, 0, 1, 0, 1, 0),
effect_id = c("s1_e1", "s1_e2", "s1_e3", "s1_e4", "s1_e5", "s1_e6", "s1_e7", "s1_e8"),
scenario_label = c(
"Bank",
"Bank",
"Truck/bridge",
"Truck/bridge",
"Train",
"Train",
"Spelling/typos",
"Spelling/typos"
),
stringsAsFactors = FALSE
)
audit_rows <- do.call(
rbind,
lapply(
seq_len(nrow(effect_specs)),
function(i) {
row <- effect_specs[i, ]
compute_effect(
df = DD,
scenario_num = row$scenario_num,
polarity = row$polarity,
effect_id = row$effect_id,
scenario_label = row$scenario_label
)
}
)
)
audit_rows
```
## Paste-ready `effect_size` YAML snippets
```{r}
make_yaml_snippet <- function(row) {
sprintf(
paste(
"effect_size:",
" metric: SMD",
" d: %.12f",
" v: %.12f",
" computed_from: groups",
" needs_review: false",
" notes: \"Computed from Analyses-SHARE raw data after reproducing the original Analyses.R recoding/filtering; within-subject d uses the project's SMCRP convention with r recovered from the paired contrast.\"",
sep = "\n"
),
row$d,
row$v
)
}
cat(
paste(
vapply(
seq_len(nrow(audit_rows)),
function(i) make_yaml_snippet(audit_rows[i, ]),
character(1)
),
collapse = "\n\n"
)
)
```