francis2019stakesscalesskepticism
/data/papers/francis2019stakesscalesskepticism/analysis/effect_sizes.qmd---
title: "Effect size computation: francis2019stakesscalesskepticism"
format:
html:
toc: true
execute:
echo: true
warning: true
message: false
---
This document is the source of truth for raw-data effect-size recovery in
`papers/francis2019stakesscalesskepticism/francis2019stakesscalesskepticism.yaml`.
It follows the project template in `docs/quarto_effect_template.qmd`:
- Sign convention: `d = mean(low) - mean(high)`.
- Within-subject scalar studies use the template's `within_smcrp_r` method.
- The registered replication uses the template's `between_groups` method.
- Study 1 negative-polarity evidence-fixed prompts are reverse-coded to
knowledge-attribution direction (`8 - raw agreement with "doesn't know"`)
before computing the low-high contrast.
- For evidence-seeking outcomes, `never`, blank, and non-positive values are
excluded from the continuous contrast. Extreme values are removed using the
authors' documented log-MAD rule as closely as the data documentation permits.
- The audit retains the raw low-minus-high `d` for YAML. Downstream
meta-analysis code reverses evidence-seeking effects programmatically.
The final audit table is written to `analysis/effect_sizes_raw_data.csv` and is
used to update the YAML.
## Setup
```{r}
paper_key <- "francis2019stakesscalesskepticism"
sign_convention <- "d = mean(low) - mean(high)"
if (!requireNamespace("esc", quietly = TRUE)) {
stop("Package 'esc' is required for this template.", call. = FALSE)
}
suppressPackageStartupMessages(library(esc))
candidate_paper_dirs <- c(
file.path("papers", paper_key),
"..",
"."
)
paper_dir <- candidate_paper_dirs[
vapply(
candidate_paper_dirs,
function(x) file.exists(file.path(x, "out", "external")),
logical(1)
)
][1]
if (is.na(paper_dir)) stop("Could not locate paper directory.", call. = FALSE)
paper_dir <- normalizePath(paper_dir, mustWork = TRUE)
external_dir <- file.path(paper_dir, "out", "external")
analysis_dir <- file.path(paper_dir, "analysis")
audit_csv <- file.path(analysis_dir, "effect_sizes_raw_data.csv")
```
## Template Methods
The functions below are copied from the project effect-size template, with a
thin wrapper for batch computation from raw CSV files.
```{r}
hedges_correction <- function(df) {
ifelse(df <= 1, NA_real_, exp(lgamma(df / 2) - log(sqrt(df / 2)) - lgamma((df - 1) / 2)))
}
sd_pooled_within <- function(sd_low, sd_high) {
sqrt((sd_low^2 + sd_high^2) / 2)
}
d_within_smcrp <- function(mean_low, mean_high, sd_low, sd_high) {
(mean_low - mean_high) / sd_pooled_within(sd_low, sd_high)
}
var_d_within_smcrp <- function(d, r, n_total) {
(2 * (1 - r) / n_total) + (d^2) * (1 + r^2) / (4 * n_total)
}
extract_esc <- function(x) {
list(d = as.numeric(x$es), v = as.numeric(x$var))
}
compute_with_esc <- function(fun, ...) {
d_obj <- fun(..., es.type = "d")
g_obj <- fun(..., es.type = "g")
d_out <- extract_esc(d_obj)
g_out <- extract_esc(g_obj)
list(d = d_out$d, v = d_out$v, g = g_out$d, v_g = g_out$v)
}
compute_within_smcrp_r <- function(mean_low, mean_high, sd_low, sd_high, n_total, r_within) {
d <- d_within_smcrp(mean_low, mean_high, sd_low, sd_high)
v <- var_d_within_smcrp(d = d, r = r_within, n_total = n_total)
df_used <- 2 * (n_total - 1) / (1 + r_within^2)
J <- hedges_correction(df_used)
g <- J * d
v_g <- (2 * (1 - r_within) / n_total) + (g^2) * (1 + r_within^2) / (4 * n_total)
list(d = d, v = v, g = g, v_g = v_g)
}
compute_between_groups <- function(mean_low, mean_high, sd_low, sd_high, n_low, n_high) {
res <- compute_with_esc(
esc::esc_mean_sd,
grp1m = mean_low,
grp1sd = sd_low,
grp1n = n_low,
grp2m = mean_high,
grp2sd = sd_high,
grp2n = n_high
)
list(d = res$d, v = res$v, g = res$g, v_g = res$v_g)
}
```
## Data Helpers
```{r}
trim_names <- function(x) {
out <- trimws(x)
make.unique(out, sep = ".")
}
read_qualtrics_csv <- function(path, skip = 0) {
dat <- read.csv(
path,
check.names = FALSE,
stringsAsFactors = FALSE,
fill = TRUE,
skip = skip
)
names(dat) <- trim_names(names(dat))
finished_col <- names(dat)[tolower(gsub(" ", "", names(dat))) == "finished"]
if (length(finished_col) > 0) {
dat <- dat[as.character(dat[[finished_col[1]]]) == "TRUE", , drop = FALSE]
}
dat
}
read_replication_csv <- function(path) {
raw <- read.csv(
path,
header = FALSE,
check.names = FALSE,
stringsAsFactors = FALSE,
fill = TRUE
)
condition <- trimws(as.character(raw[1, ]))
variable <- trimws(as.character(raw[2, ]))
active_condition <- ""
for (i in seq_along(condition)) {
if (!is.na(condition[i]) && condition[i] != "") {
active_condition <- condition[i]
} else {
condition[i] <- active_condition
}
}
names_out <- ifelse(
condition %in% c("Basic Low", "Basic High", "Implicit Low", "Explicit High", "Ignorant Low", "Ignorant High"),
paste(condition, variable, sep = "__"),
ifelse(variable != "", variable, condition)
)
names_out[names_out == ""] <- paste0("unnamed_", which(names_out == ""))
dat <- raw[-c(1, 2), , drop = FALSE]
names(dat) <- trim_names(names_out)
finished_col <- names(dat)[tolower(gsub(" ", "", names(dat))) == "finished"]
if (length(finished_col) > 0) {
dat <- dat[as.character(dat[[finished_col[1]]]) == "TRUE", , drop = FALSE]
}
dat
}
parse_numeric_response <- function(x) {
y <- trimws(as.character(x))
y[y == ""] <- NA_character_
y[grepl("never", y, ignore.case = TRUE)] <- NA_character_
match <- regexpr("-?[0-9]+(\\.[0-9]+)?", y, perl = TRUE)
out <- rep(NA_real_, length(y))
hit <- !is.na(y) & match > 0
out[hit] <- as.numeric(regmatches(y[hit], regexpr("-?[0-9]+(\\.[0-9]+)?", y[hit], perl = TRUE)))
out
}
log_mad_bounds <- function(values) {
values <- values[!is.na(values) & values > 0]
if (length(values) < 3) return(c(lower = -Inf, upper = Inf))
logged <- log(values)
center <- median(logged, na.rm = TRUE)
mad_value <- mad(logged, center = center, constant = 1, na.rm = TRUE)
if (is.na(mad_value) || mad_value == 0) return(c(lower = -Inf, upper = Inf))
c(lower = center - 2.5 * mad_value, upper = center + 2.5 * mad_value)
}
apply_log_mad <- function(values, bounds) {
out <- values
bad <- !is.na(out) & out > 0 & (log(out) < bounds[["lower"]] | log(out) > bounds[["upper"]])
out[bad] <- NA_real_
out
}
summarise_within_pair <- function(
dat,
low_col,
high_col,
all_stake_cols,
outcome_type,
score_transform = identity,
transform_note = NULL
) {
low <- score_transform(parse_numeric_response(dat[[low_col]]))
high <- score_transform(parse_numeric_response(dat[[high_col]]))
n_finished <- nrow(dat)
n_pair_raw <- sum(!is.na(low) & !is.na(high))
cleaning <- "complete low-high pairs from post-removal CSV"
if (!is.null(transform_note)) {
cleaning <- paste(cleaning, transform_note, sep = "; ")
}
if (outcome_type == "evidence_seeking") {
low[low <= 0] <- NA_real_
high[high <= 0] <- NA_real_
all_values <- unlist(lapply(all_stake_cols, function(col) parse_numeric_response(dat[[col]])), use.names = FALSE)
all_values[all_values <= 0] <- NA_real_
bounds <- log_mad_bounds(all_values)
low <- apply_log_mad(low, bounds)
high <- apply_log_mad(high, bounds)
cleaning <- paste(
"complete low-high pairs; 'never'/blank/non-positive excluded;",
"log-MAD outlier removal over the scenario's four stakes cells"
)
}
keep <- !is.na(low) & !is.na(high)
low <- low[keep]
high <- high[keep]
n_total <- length(low)
if (n_total < 3) stop(sprintf("Too few complete pairs for %s vs %s", low_col, high_col), call. = FALSE)
mean_low <- mean(low)
mean_high <- mean(high)
sd_low <- sd(low)
sd_high <- sd(high)
r_within <- suppressWarnings(cor(low, high))
res <- compute_within_smcrp_r(
mean_low = mean_low,
mean_high = mean_high,
sd_low = sd_low,
sd_high = sd_high,
n_total = n_total,
r_within = r_within
)
data.frame(
design = "Within-Subjects",
method_used = "within_smcrp_r",
computed_from_suggested = "groups",
n_finished = n_finished,
n_pair_raw = n_pair_raw,
n_low = n_total,
n_high = n_total,
n_total = n_total,
mean_low = mean_low,
mean_high = mean_high,
sd_low = sd_low,
sd_high = sd_high,
r_within = r_within,
d = res$d,
v = res$v,
g = res$g,
v_g = res$v_g,
cleaning = cleaning,
stringsAsFactors = FALSE
)
}
summarise_between_groups <- function(dat, low_col, high_col, transform = identity) {
low <- transform(parse_numeric_response(dat[[low_col]]))
high <- transform(parse_numeric_response(dat[[high_col]]))
low <- low[!is.na(low)]
high <- high[!is.na(high)]
n_low <- length(low)
n_high <- length(high)
if (n_low < 3 || n_high < 3) stop(sprintf("Too few between-group observations for %s vs %s", low_col, high_col), call. = FALSE)
mean_low <- mean(low)
mean_high <- mean(high)
sd_low <- sd(low)
sd_high <- sd(high)
res <- compute_between_groups(
mean_low = mean_low,
mean_high = mean_high,
sd_low = sd_low,
sd_high = sd_high,
n_low = n_low,
n_high = n_high
)
data.frame(
design = "Between-Subjects",
method_used = "between_groups",
computed_from_suggested = "groups",
n_finished = nrow(dat),
n_pair_raw = NA_integer_,
n_low = n_low,
n_high = n_high,
n_total = n_low + n_high,
mean_low = mean_low,
mean_high = mean_high,
sd_low = sd_low,
sd_high = sd_high,
r_within = NA_real_,
d = res$d,
v = res$v,
g = res$g,
v_g = res$v_g,
cleaning = "available low and high between-participant responses from post-removal CSV",
stringsAsFactors = FALSE
)
}
```
## Scalar Experiments
```{r}
scenario_labels <- c(
paramedic = "Paramedic",
vaccine = "Vaccine",
mountaineering = "Mountaineering",
game_show = "Game show",
introduction = "Introduction",
possessions = "Possessions/Arson"
)
full_map <- list(
paramedic = c(low = "Paramedic Low", s2 = "Paramedic 1", s3 = "Paramedic 2", high = "Paramedic 3"),
vaccine = c(low = "Vaccine Low", s2 = "Vaccine 1", s3 = "Vaccine 2", high = "Vaccine 3"),
mountaineering = c(low = "Mountaineering Low", s2 = "Mountaineering 1", s3 = "Mountaineering 2", high = "Mountaineering 3"),
game_show = c(low = "GameShow low", s2 = "GameShow 1", s3 = "GameShow 2", high = "GameShow 3"),
introduction = c(low = "Intro Low", s2 = "Intro 1", s3 = "Intro 2", high = "Intro 3"),
possessions = c(low = "Personal Val Low", s2 = "Personal Val 1", s3 = "Personal Val 2", high = "Personal Val 3")
)
exp2_neg_map <- list(
paramedic = c(low = "Paramedic Low", s2 = "Paramedic 1", s3 = "Paramedic 2", high = "Paramedic 3"),
vaccine = c(low = "Vaccine Low", s2 = "Vaccine 1", s3 = "Vaccine 2", high = "Vaccine 3"),
mountaineering = c(low = "Mountaineering Low", s2 = "Mountaineering 1", s3 = "Mountaineering 2", high = "Mountaineering 3"),
game_show = c(low = "Game Show Low", s2 = "Game Show 1", s3 = "Game Show 2", high = "Game Show 3"),
introduction = c(low = "Introductions Low", s2 = "Intro 1", s3 = "Intro 2", high = "Intro 3"),
possessions = c(low = "Pvalue Low", s2 = "Pvalue 1", s3 = "Pvalue 2", high = "Pvalue 3")
)
abbr_map <- list(
paramedic = c(low = "Para Low", s2 = "Para 1", s3 = "Para 2", high = "Para 3"),
vaccine = c(low = "Vacc Low", s2 = "Vacc 1", s3 = "Vacc 2", high = "Vacc 3"),
mountaineering = c(low = "Mount Low", s2 = "Mount 1", s3 = "Mount 2", high = "Mount 3"),
game_show = c(low = "Game Low", s2 = "Game 1", s3 = "Game 2", high = "Game 3"),
introduction = c(low = "Intro Low", s2 = "Intro 1", s3 = "Intro 2", high = "Intro 3"),
possessions = c(low = "Pval Low", s2 = "Pval 1", s3 = "Pval 2", high = "Pval 3")
)
scalar_sets <- list(
list(
study_id = 1,
experiment = "Experiment 1: Evidence-fixed design",
outcome_type = "evidence_fixed",
pos_file = file.path(external_dir, "Exp1_Evidence_Fixed_Design", "x1_EF_Pos_data_post_removal.csv"),
neg_file = file.path(external_dir, "Exp1_Evidence_Fixed_Design", "x1_EF_Neg_data_post_removal.csv"),
pos_map = full_map,
neg_map = full_map
),
list(
study_id = 3,
experiment = "Experiment 2: Evidence-seeking design",
outcome_type = "evidence_seeking",
pos_file = file.path(external_dir, "Exp2_Evidence_Seeking_Design", "x2_ES_Pos_data_post_removal.csv"),
neg_file = file.path(external_dir, "Exp2_Evidence_Seeking_Design", "x2_ES_Neg_data_post_removal.csv"),
pos_map = full_map,
neg_map = exp2_neg_map
),
list(
study_id = 4,
experiment = "Appendix IV: Symmetrical evidence-seeking experiment",
outcome_type = "evidence_seeking",
pos_file = file.path(external_dir, "Exp2.1_Symmetrical_Experiment", "x2.1_Sym_Pos_data_post_removal.csv"),
neg_file = file.path(external_dir, "Exp2.1_Symmetrical_Experiment", "x2.1_Sym_Neg_data_post_removal.csv"),
pos_map = abbr_map,
neg_map = abbr_map
),
list(
study_id = 5,
experiment = "Appendix IV: Matched evidence-seeking experiment",
outcome_type = "evidence_seeking",
pos_file = file.path(external_dir, "Exp2.2_Matched_Experiment", "x2.2_Matc_Pos_data_post_removal.csv"),
neg_file = file.path(external_dir, "Exp2.2_Matched_Experiment", "x2.2_Matc_Neg_data_post_removal.csv"),
pos_map = abbr_map,
neg_map = abbr_map
)
)
compute_scalar_set <- function(spec) {
out <- list()
effect_index <- 1
for (polarity in c("pos", "neg")) {
path <- if (polarity == "pos") spec$pos_file else spec$neg_file
col_map <- if (polarity == "pos") spec$pos_map else spec$neg_map
dat <- read_qualtrics_csv(path)
polarity_label <- if (polarity == "pos") "positive polarity" else "negative polarity"
score_transform <- identity
transform_note <- NULL
if (spec$outcome_type == "evidence_fixed" && polarity == "neg") {
score_transform <- function(x) 8 - x
transform_note <- "negative-polarity agreement-with-denial responses reverse-coded to knowledge-attribution direction"
}
for (scenario_code in names(scenario_labels)) {
cols <- col_map[[scenario_code]]
stats <- summarise_within_pair(
dat = dat,
low_col = unname(cols[["low"]]),
high_col = unname(cols[["high"]]),
all_stake_cols = unname(cols),
outcome_type = spec$outcome_type,
score_transform = score_transform,
transform_note = transform_note
)
effect_id <- sprintf("s%d_e%d", spec$study_id, effect_index)
subgroup <- sprintf("%s -- %s -- %s", spec$experiment, scenario_labels[[scenario_code]], polarity_label)
out[[length(out) + 1]] <- cbind(
paper_key = paper_key,
study_id = spec$study_id,
effect_id = effect_id,
experiment = spec$experiment,
subgroup = subgroup,
scenario_code = scenario_code,
scenario_label = scenario_labels[[scenario_code]],
polarity = polarity,
polarity_label = polarity_label,
outcome_type = spec$outcome_type,
source_file = path,
source_file_short = file.path(basename(dirname(path)), basename(path)),
low_col = unname(cols[["low"]]),
high_col = unname(cols[["high"]]),
all_stake_cols = paste(unname(cols), collapse = " | "),
stats,
stringsAsFactors = FALSE
)
effect_index <- effect_index + 1
}
}
do.call(rbind, out)
}
scalar_audit <- do.call(rbind, lapply(scalar_sets, compute_scalar_set))
scalar_audit
```
## Registered Replication
For the knowledge prompt in the replication, the raw scale is `1 = strongly agree`
and `7 = strongly disagree`; the QMD reverse-codes it to agreement (`8 - raw`)
before computing `d = mean(low) - mean(high)`.
```{r}
replication_path <- file.path(external_dir, "Replication_Experiment", "xReplic_data_post_removal.csv")
replication_dat <- read_replication_csv(replication_path)
agreement_transform <- function(x) 8 - x
rep_specs <- data.frame(
study_id = 2L,
effect_id = paste0("s2_e", 1:6),
experiment = "Appendix II: Registered replication of Sripada & Stanley (2012)",
scenario_code = "peanuts",
scenario_label = c(
"Basic -- Evidence strength",
"Basic -- Knowledge attribution",
"Implicit/Explicit -- Evidence strength",
"Implicit/Explicit -- Knowledge attribution",
"Ignorant -- Evidence strength",
"Ignorant -- Knowledge attribution"
),
polarity = NA_character_,
polarity_label = NA_character_,
outcome_type = c("evidence_strength", "knowledge_attribution", "evidence_strength", "knowledge_attribution", "evidence_strength", "knowledge_attribution"),
low_col = c(
"Basic Low__Strength_Evidence",
"Basic Low__Know_Prompt",
"Implicit Low__Strength_Evidence",
"Implicit Low__Know_Prompt",
"Ignorant Low__Strength_Evidence",
"Ignorant Low__Know_Prompt"
),
high_col = c(
"Basic High__Strength_Evidence",
"Basic High__Know_Prompt",
"Explicit High__Strength_Evidence",
"Explicit High__Know_Prompt",
"Ignorant High__Strength_Evidence",
"Ignorant High__Know_Prompt"
),
stringsAsFactors = FALSE
)
rep_out <- lapply(seq_len(nrow(rep_specs)), function(i) {
row <- rep_specs[i, ]
transform <- if (row$outcome_type == "knowledge_attribution") agreement_transform else identity
stats <- summarise_between_groups(replication_dat, row$low_col, row$high_col, transform = transform)
subgroup <- sprintf("%s -- %s", row$experiment, row$scenario_label)
cbind(
paper_key = paper_key,
study_id = row$study_id,
effect_id = row$effect_id,
experiment = row$experiment,
subgroup = subgroup,
scenario_code = row$scenario_code,
scenario_label = row$scenario_label,
polarity = row$polarity,
polarity_label = row$polarity_label,
outcome_type = row$outcome_type,
source_file = replication_path,
source_file_short = file.path(basename(dirname(replication_path)), basename(replication_path)),
low_col = row$low_col,
high_col = row$high_col,
all_stake_cols = paste(row$low_col, row$high_col, sep = " | "),
stats,
stringsAsFactors = FALSE
)
})
replication_audit <- do.call(rbind, rep_out)
replication_audit
```
## Combined Audit
```{r}
audit <- rbind(scalar_audit, replication_audit)
audit$sign_convention <- sign_convention
audit$inputs_used <- paste(
sprintf("method=%s", audit$method_used),
sprintf("sign_convention=%s", audit$sign_convention),
sprintf("n_low=%s", audit$n_low),
sprintf("n_high=%s", audit$n_high),
sprintf("n_total=%s", audit$n_total),
sprintf("mean_low=%s", signif(audit$mean_low, 12)),
sprintf("mean_high=%s", signif(audit$mean_high, 12)),
sprintf("sd_low=%s", signif(audit$sd_low, 12)),
sprintf("sd_high=%s", signif(audit$sd_high, 12)),
ifelse(is.na(audit$r_within), "", sprintf(", r_within=%s", signif(audit$r_within, 12))),
sep = ", "
)
audit$notes_on_assumptions <- paste(
audit$cleaning,
"Computed from the open University of Reading dataset.",
sep = "; "
)
audit$imputed_flag <- FALSE
audit$needs_sensitivity <- audit$outcome_type == "evidence_seeking"
audit$yaml_sign_multiplier <- 1
audit$d_for_yaml <- audit$yaml_sign_multiplier * audit$d
audit$yaml_sign_note <- ifelse(
audit$outcome_type == "evidence_seeking",
"YAML uses the raw low-minus-high d; downstream meta-analysis reverses evidence-seeking effects programmatically.",
"YAML uses the raw low-minus-high d."
)
write.csv(audit, audit_csv, row.names = FALSE)
audit
```
## Paste-Ready YAML Snippets
```{r}
for (i in seq_len(nrow(audit))) {
row <- audit[i, ]
cat(sprintf(
"\n# %s / %s\n",
row$study_id,
row$effect_id
))
cat(sprintf(
"effect_size:\n metric: SMD\n d: %.12f\n v: %.12f\n computed_from: %s\n needs_review: false\n notes: \"%s\"\n",
row$d_for_yaml,
row$v,
row$computed_from_suggested,
gsub('"', "'", paste(row$inputs_used, row$yaml_sign_note, sep = "; "))
))
}
```