paper_key <- "francis2019stakesscalesskepticism"
required_packages <- c("afex", "dplyr", "tidyr", "knitr")
missing_packages <- required_packages[
!vapply(required_packages, requireNamespace, logical(1), quietly = TRUE)
]
if (length(missing_packages) > 0) {
stop(
"Missing required package(s): ",
paste(missing_packages, collapse = ", "),
call. = FALSE
)
}
suppressPackageStartupMessages({
library(afex)
library(dplyr)
library(tidyr)
})
extra_r_lib <- Sys.getenv("FRANCIS_GEE_LIB", unset = "")
if (nzchar(extra_r_lib) && dir.exists(extra_r_lib)) {
.libPaths(c(extra_r_lib, .libPaths()))
}
has_geepack <- requireNamespace("geepack", quietly = TRUE)
candidate_paper_dirs <- c(
file.path("papers", paper_key),
"..",
"."
)
paper_dir <- candidate_paper_dirs[
vapply(
candidate_paper_dirs,
function(x) file.exists(file.path(x, "out", "external")),
logical(1)
)
][1]
if (is.na(paper_dir)) stop("Could not locate paper directory.", call. = FALSE)
paper_dir <- normalizePath(paper_dir, mustWork = TRUE)
external_dir <- file.path(paper_dir, "out", "external")
analysis_dir <- file.path(paper_dir, "analysis")
output_csv <- file.path(analysis_dir, "original_analysis_check.csv")
afex_options(correction_aov = "GG", type = 3)
options(contrasts = c("contr.sum", "contr.poly"))Original analysis reproduction check: Francis et al. 2019
This document is a diagnostic check on the raw-data interpretation for Experiment 1. It is intentionally separate from the meta-analysis effect-size coding in analysis/effect_sizes.qmd.
The paper’s Appendix I reports, for each evidence-fixed scenario, a 2 x 4 mixed ANOVA on raw levels of agreement with polarity (know, doesn't know) as a between-subjects factor and stakes (one [low], two, three, four [high]) as a within-subjects factor. The same model was also checked with a linear GEE. For this original-analysis reproduction, negative-polarity responses are not reverse-coded: the paper analyzed agreement with the doesn't know prompts directly.
The final comparison table is written to analysis/original_analysis_check.csv.
Setup
Data Preparation
trim_names <- function(x) {
out <- trimws(x)
make.unique(out, sep = ".")
}
read_exp1 <- function(path, polarity) {
dat <- read.csv(
path,
check.names = FALSE,
stringsAsFactors = FALSE,
fill = TRUE
)
names(dat) <- trim_names(names(dat))
dat$polarity <- polarity
dat
}
exp1_dir <- file.path(external_dir, "Exp1_Evidence_Fixed_Design")
pos <- read_exp1(file.path(exp1_dir, "x1_EF_Pos_data_post_removal.csv"), "know")
neg <- read_exp1(file.path(exp1_dir, "x1_EF_Neg_data_post_removal.csv"), "doesnt_know")
scenario_map <- list(
paramedic = c("Paramedic Low", "Paramedic 1", "Paramedic 2", "Paramedic 3"),
vaccine = c("Vaccine Low", "Vaccine 1", "Vaccine 2", "Vaccine 3"),
mountaineering = c("Mountaineering Low", "Mountaineering 1", "Mountaineering 2", "Mountaineering 3"),
game_show = c("GameShow low", "GameShow 1", "GameShow 2", "GameShow 3"),
introduction = c("Intro Low", "Intro 1", "Intro 2", "Intro 3"),
possessions = c("Personal Val Low", "Personal Val 1", "Personal Val 2", "Personal Val 3")
)
scenario_labels <- c(
paramedic = "Paramedic",
vaccine = "Vaccine",
mountaineering = "Mountaineering",
game_show = "Game show",
introduction = "Introduction",
possessions = "Possessions"
)
all_response_cols <- unlist(scenario_map, use.names = FALSE)
wide <- bind_rows(pos, neg) |>
mutate(across(all_of(all_response_cols), ~ suppressWarnings(as.numeric(.x)))) |>
filter(if_all(all_of(all_response_cols), ~ !is.na(.x))) |>
mutate(
subject = row_number(),
polarity = factor(polarity, levels = c("know", "doesnt_know"))
)New names:
New names:
• `` -> `...1`
sample_check <- wide |>
count(polarity, name = "n")
sample_check polarity n
1 know 55
2 doesnt_know 42
The complete-case filter is across the 24 Experiment 1 scenario-by-stakes response columns. This yields N = 97: 55 positive-polarity participants and 42 negative-polarity participants, matching the F(1, 95) denominator reported in Appendix I.
long <- bind_rows(lapply(names(scenario_map), function(scenario) {
cols <- scenario_map[[scenario]]
out <- wide[, c("subject", "polarity", cols)]
names(out) <- c("subject", "polarity", "one_low", "two", "three", "four_high")
pivot_longer(
out,
cols = c("one_low", "two", "three", "four_high"),
names_to = "stakes",
values_to = "rating"
) |>
mutate(
scenario = scenario,
stakes = factor(stakes, levels = c("one_low", "two", "three", "four_high")),
stake_index = as.integer(stakes)
)
}))Paper Targets
paper_targets <- tibble::tribble(
~scenario, ~paper_anova_p_stakes, ~paper_anova_p_interaction, ~paper_anova_F_polarity, ~paper_gee_p_stakes, ~paper_gee_p_interaction, ~paper_gee_wald_polarity,
"paramedic", .813, .333, 149.03, .795, .500, 154.42,
"vaccine", .075, .817, 212.46, .035, .863, 206.30,
"mountaineering", .650, .776, 163.29, .617, .789, 147.06,
"game_show", .252, .513, 72.55, .088, .417, 71.27,
"introduction", .055, .803, 278.95, .074, .871, 275.46,
"possessions", .983, .954, 323.81, .995, .931, 308.64
)ANOVA Reproduction
extract_anova <- function(scenario) {
dat <- long |>
filter(.data$scenario == !!scenario)
fit <- afex::aov_ez(
id = "subject",
dv = "rating",
data = dat,
within = "stakes",
between = "polarity",
type = 3,
factorize = FALSE,
anova_table = list(correction = "GG", es = "pes")
)
tab <- as.data.frame(fit$anova_table)
tab$term <- rownames(tab)
get_term <- function(term, column) tab[tab$term == term, column][[1]]
tibble(
scenario = scenario,
anova_F_polarity = get_term("polarity", "F"),
anova_p_polarity = get_term("polarity", "Pr(>F)"),
anova_p_stakes = get_term("stakes", "Pr(>F)"),
anova_p_interaction = get_term("polarity:stakes", "Pr(>F)")
)
}
anova_results <- bind_rows(lapply(names(scenario_map), extract_anova))Optional GEE Reproduction
wald_test <- function(fit, idx) {
beta <- coef(fit)[idx]
vc <- stats::vcov(fit)[idx, idx, drop = FALSE]
wald <- as.numeric(t(beta) %*% solve(vc, beta))
tibble(
wald = wald,
df = length(idx),
p = pchisq(wald, df = length(idx), lower.tail = FALSE)
)
}
extract_gee <- function(scenario) {
if (!has_geepack) {
return(tibble(
scenario = scenario,
gee_wald_polarity = NA_real_,
gee_p_polarity = NA_real_,
gee_wald_stakes = NA_real_,
gee_p_stakes = NA_real_,
gee_wald_interaction = NA_real_,
gee_p_interaction = NA_real_
))
}
dat <- long |>
filter(.data$scenario == !!scenario) |>
arrange(subject, stake_index)
fit <- geepack::geeglm(
rating ~ polarity * stakes,
id = subject,
waves = stake_index,
data = dat,
family = gaussian,
corstr = "exchangeable"
)
coef_names <- names(coef(fit))
polarity <- wald_test(fit, which(coef_names == "polarity1"))
stakes <- wald_test(fit, grep("^stakes[0-9]+$", coef_names))
interaction <- wald_test(fit, grep("^polarity1:stakes[0-9]+$", coef_names))
tibble(
scenario = scenario,
gee_wald_polarity = polarity$wald,
gee_p_polarity = polarity$p,
gee_wald_stakes = stakes$wald,
gee_p_stakes = stakes$p,
gee_wald_interaction = interaction$wald,
gee_p_interaction = interaction$p
)
}
gee_results <- bind_rows(lapply(names(scenario_map), extract_gee))The GEE chunk is optional because geepack is not a core project dependency. If geepack is installed, the rows are explicitly sorted by subject before fitting; otherwise each row can be misread as a separate cluster.
Comparison
comparison <- paper_targets |>
left_join(anova_results, by = "scenario") |>
left_join(gee_results, by = "scenario") |>
mutate(
scenario_label = unname(scenario_labels[scenario]),
n_total = nrow(wide),
n_know = as.integer(sample_check$n[sample_check$polarity == "know"]),
n_doesnt_know = as.integer(sample_check$n[sample_check$polarity == "doesnt_know"]),
anova_p_stakes_diff = anova_p_stakes - paper_anova_p_stakes,
anova_p_interaction_diff = anova_p_interaction - paper_anova_p_interaction,
anova_F_polarity_diff = anova_F_polarity - paper_anova_F_polarity,
gee_p_stakes_diff = gee_p_stakes - paper_gee_p_stakes,
gee_p_interaction_diff = gee_p_interaction - paper_gee_p_interaction,
gee_wald_polarity_diff = gee_wald_polarity - paper_gee_wald_polarity,
note = if_else(
scenario == "vaccine" & !is.na(gee_wald_polarity),
"GEE polarity Wald is 207.30 here versus 206.30 in the paper; the same model reproduces the reported vaccine stakes and interaction tests.",
NA_character_
)
) |>
select(
scenario,
scenario_label,
n_total,
n_know,
n_doesnt_know,
anova_F_polarity,
paper_anova_F_polarity,
anova_F_polarity_diff,
anova_p_stakes,
paper_anova_p_stakes,
anova_p_stakes_diff,
anova_p_interaction,
paper_anova_p_interaction,
anova_p_interaction_diff,
gee_wald_polarity,
paper_gee_wald_polarity,
gee_wald_polarity_diff,
gee_p_stakes,
paper_gee_p_stakes,
gee_p_stakes_diff,
gee_p_interaction,
paper_gee_p_interaction,
gee_p_interaction_diff,
note
)
write.csv(comparison, output_csv, row.names = FALSE)
comparison |>
mutate(across(where(is.numeric), ~ round(.x, 6))) |>
knitr::kable()| scenario | scenario_label | n_total | n_know | n_doesnt_know | anova_F_polarity | paper_anova_F_polarity | anova_F_polarity_diff | anova_p_stakes | paper_anova_p_stakes | anova_p_stakes_diff | anova_p_interaction | paper_anova_p_interaction | anova_p_interaction_diff | gee_wald_polarity | paper_gee_wald_polarity | gee_wald_polarity_diff | gee_p_stakes | paper_gee_p_stakes | gee_p_stakes_diff | gee_p_interaction | paper_gee_p_interaction | gee_p_interaction_diff | note |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| paramedic | Paramedic | 97 | 55 | 42 | 149.02522 | 149.03 | -0.004776 | 0.812797 | 0.813 | -0.000203 | 0.332625 | 0.333 | -0.000375 | 154.42430 | 154.42 | 0.004295 | 0.794890 | 0.795 | -0.000110 | 0.500370 | 0.500 | 0.000370 | NA |
| vaccine | Vaccine | 97 | 55 | 42 | 212.46166 | 212.46 | 0.001664 | 0.074622 | 0.075 | -0.000378 | 0.817492 | 0.817 | 0.000492 | 207.29986 | 206.30 | 0.999860 | 0.035446 | 0.035 | 0.000446 | 0.863337 | 0.863 | 0.000337 | GEE polarity Wald is 207.30 here versus 206.30 in the paper; the same model reproduces the reported vaccine stakes and interaction tests. |
| mountaineering | Mountaineering | 97 | 55 | 42 | 163.28970 | 163.29 | -0.000301 | 0.650329 | 0.650 | 0.000329 | 0.776489 | 0.776 | 0.000489 | 147.05835 | 147.06 | -0.001646 | 0.616742 | 0.617 | -0.000258 | 0.789417 | 0.789 | 0.000417 | NA |
| game_show | Game show | 97 | 55 | 42 | 72.55354 | 72.55 | 0.003536 | 0.252202 | 0.252 | 0.000202 | 0.513212 | 0.513 | 0.000212 | 71.27339 | 71.27 | 0.003391 | 0.088380 | 0.088 | 0.000380 | 0.416847 | 0.417 | -0.000153 | NA |
| introduction | Introduction | 97 | 55 | 42 | 278.94579 | 278.95 | -0.004212 | 0.055354 | 0.055 | 0.000354 | 0.803079 | 0.803 | 0.000079 | 275.46393 | 275.46 | 0.003928 | 0.074269 | 0.074 | 0.000269 | 0.870854 | 0.871 | -0.000146 | NA |
| possessions | Possessions | 97 | 55 | 42 | 323.80959 | 323.81 | -0.000407 | 0.983404 | 0.983 | 0.000404 | 0.954026 | 0.954 | 0.000026 | 308.63648 | 308.64 | -0.003520 | 0.994664 | 0.995 | -0.000336 | 0.931035 | 0.931 | 0.000035 | NA |
The Greenhouse-Geisser corrected ANOVA results reproduce Appendix I to the reported rounding. When geepack is available, the linear GEE reproduction also recovers the reported stakes and interaction tests to rounding. The lone noticeable discrepancy is the vaccine GEE polarity Wald statistic: the recomputed value is 207.30, while Appendix I reports 206.30; the ANOVA polarity statistic and both vaccine GEE stakes terms match the paper.