Please log in to access this page.
Please log in to access this page.
francis2019stakesscalesskepticism
/data/papers/francis2019stakesscalesskepticism/analysis/original_analysis_check.qmd---
title: "Original analysis reproduction check: Francis et al. 2019"
format:
html:
toc: true
execute:
echo: true
warning: true
message: false
---
This document is a diagnostic check on the raw-data interpretation for
Experiment 1. It is intentionally separate from the meta-analysis effect-size
coding in `analysis/effect_sizes.qmd`.
The paper's Appendix I reports, for each evidence-fixed scenario, a `2 x 4`
mixed ANOVA on raw levels of agreement with polarity (`know`, `doesn't know`) as
a between-subjects factor and stakes (`one [low]`, `two`, `three`, `four
[high]`) as a within-subjects factor. The same model was also checked with a
linear GEE. For this original-analysis reproduction, negative-polarity
responses are not reverse-coded: the paper analyzed agreement with the
`doesn't know` prompts directly.
The final comparison table is written to `analysis/original_analysis_check.csv`.
## Setup
```{r}
paper_key <- "francis2019stakesscalesskepticism"
required_packages <- c("afex", "dplyr", "tidyr", "knitr")
missing_packages <- required_packages[
!vapply(required_packages, requireNamespace, logical(1), quietly = TRUE)
]
if (length(missing_packages) > 0) {
stop(
"Missing required package(s): ",
paste(missing_packages, collapse = ", "),
call. = FALSE
)
}
suppressPackageStartupMessages({
library(afex)
library(dplyr)
library(tidyr)
})
extra_r_lib <- Sys.getenv("FRANCIS_GEE_LIB", unset = "")
if (nzchar(extra_r_lib) && dir.exists(extra_r_lib)) {
.libPaths(c(extra_r_lib, .libPaths()))
}
has_geepack <- requireNamespace("geepack", quietly = TRUE)
candidate_paper_dirs <- c(
file.path("papers", paper_key),
"..",
"."
)
paper_dir <- candidate_paper_dirs[
vapply(
candidate_paper_dirs,
function(x) file.exists(file.path(x, "out", "external")),
logical(1)
)
][1]
if (is.na(paper_dir)) stop("Could not locate paper directory.", call. = FALSE)
paper_dir <- normalizePath(paper_dir, mustWork = TRUE)
external_dir <- file.path(paper_dir, "out", "external")
analysis_dir <- file.path(paper_dir, "analysis")
output_csv <- file.path(analysis_dir, "original_analysis_check.csv")
afex_options(correction_aov = "GG", type = 3)
options(contrasts = c("contr.sum", "contr.poly"))
```
## Data Preparation
```{r}
trim_names <- function(x) {
out <- trimws(x)
make.unique(out, sep = ".")
}
read_exp1 <- function(path, polarity) {
dat <- read.csv(
path,
check.names = FALSE,
stringsAsFactors = FALSE,
fill = TRUE
)
names(dat) <- trim_names(names(dat))
dat$polarity <- polarity
dat
}
exp1_dir <- file.path(external_dir, "Exp1_Evidence_Fixed_Design")
pos <- read_exp1(file.path(exp1_dir, "x1_EF_Pos_data_post_removal.csv"), "know")
neg <- read_exp1(file.path(exp1_dir, "x1_EF_Neg_data_post_removal.csv"), "doesnt_know")
scenario_map <- list(
paramedic = c("Paramedic Low", "Paramedic 1", "Paramedic 2", "Paramedic 3"),
vaccine = c("Vaccine Low", "Vaccine 1", "Vaccine 2", "Vaccine 3"),
mountaineering = c("Mountaineering Low", "Mountaineering 1", "Mountaineering 2", "Mountaineering 3"),
game_show = c("GameShow low", "GameShow 1", "GameShow 2", "GameShow 3"),
introduction = c("Intro Low", "Intro 1", "Intro 2", "Intro 3"),
possessions = c("Personal Val Low", "Personal Val 1", "Personal Val 2", "Personal Val 3")
)
scenario_labels <- c(
paramedic = "Paramedic",
vaccine = "Vaccine",
mountaineering = "Mountaineering",
game_show = "Game show",
introduction = "Introduction",
possessions = "Possessions"
)
all_response_cols <- unlist(scenario_map, use.names = FALSE)
wide <- bind_rows(pos, neg) |>
mutate(across(all_of(all_response_cols), ~ suppressWarnings(as.numeric(.x)))) |>
filter(if_all(all_of(all_response_cols), ~ !is.na(.x))) |>
mutate(
subject = row_number(),
polarity = factor(polarity, levels = c("know", "doesnt_know"))
)
sample_check <- wide |>
count(polarity, name = "n")
sample_check
```
The complete-case filter is across the 24 Experiment 1 scenario-by-stakes
response columns. This yields `N = 97`: 55 positive-polarity participants and
42 negative-polarity participants, matching the `F(1, 95)` denominator reported
in Appendix I.
```{r}
long <- bind_rows(lapply(names(scenario_map), function(scenario) {
cols <- scenario_map[[scenario]]
out <- wide[, c("subject", "polarity", cols)]
names(out) <- c("subject", "polarity", "one_low", "two", "three", "four_high")
pivot_longer(
out,
cols = c("one_low", "two", "three", "four_high"),
names_to = "stakes",
values_to = "rating"
) |>
mutate(
scenario = scenario,
stakes = factor(stakes, levels = c("one_low", "two", "three", "four_high")),
stake_index = as.integer(stakes)
)
}))
```
## Paper Targets
```{r}
paper_targets <- tibble::tribble(
~scenario, ~paper_anova_p_stakes, ~paper_anova_p_interaction, ~paper_anova_F_polarity, ~paper_gee_p_stakes, ~paper_gee_p_interaction, ~paper_gee_wald_polarity,
"paramedic", .813, .333, 149.03, .795, .500, 154.42,
"vaccine", .075, .817, 212.46, .035, .863, 206.30,
"mountaineering", .650, .776, 163.29, .617, .789, 147.06,
"game_show", .252, .513, 72.55, .088, .417, 71.27,
"introduction", .055, .803, 278.95, .074, .871, 275.46,
"possessions", .983, .954, 323.81, .995, .931, 308.64
)
```
## ANOVA Reproduction
```{r}
extract_anova <- function(scenario) {
dat <- long |>
filter(.data$scenario == !!scenario)
fit <- afex::aov_ez(
id = "subject",
dv = "rating",
data = dat,
within = "stakes",
between = "polarity",
type = 3,
factorize = FALSE,
anova_table = list(correction = "GG", es = "pes")
)
tab <- as.data.frame(fit$anova_table)
tab$term <- rownames(tab)
get_term <- function(term, column) tab[tab$term == term, column][[1]]
tibble(
scenario = scenario,
anova_F_polarity = get_term("polarity", "F"),
anova_p_polarity = get_term("polarity", "Pr(>F)"),
anova_p_stakes = get_term("stakes", "Pr(>F)"),
anova_p_interaction = get_term("polarity:stakes", "Pr(>F)")
)
}
anova_results <- bind_rows(lapply(names(scenario_map), extract_anova))
```
## Optional GEE Reproduction
```{r}
wald_test <- function(fit, idx) {
beta <- coef(fit)[idx]
vc <- stats::vcov(fit)[idx, idx, drop = FALSE]
wald <- as.numeric(t(beta) %*% solve(vc, beta))
tibble(
wald = wald,
df = length(idx),
p = pchisq(wald, df = length(idx), lower.tail = FALSE)
)
}
extract_gee <- function(scenario) {
if (!has_geepack) {
return(tibble(
scenario = scenario,
gee_wald_polarity = NA_real_,
gee_p_polarity = NA_real_,
gee_wald_stakes = NA_real_,
gee_p_stakes = NA_real_,
gee_wald_interaction = NA_real_,
gee_p_interaction = NA_real_
))
}
dat <- long |>
filter(.data$scenario == !!scenario) |>
arrange(subject, stake_index)
fit <- geepack::geeglm(
rating ~ polarity * stakes,
id = subject,
waves = stake_index,
data = dat,
family = gaussian,
corstr = "exchangeable"
)
coef_names <- names(coef(fit))
polarity <- wald_test(fit, which(coef_names == "polarity1"))
stakes <- wald_test(fit, grep("^stakes[0-9]+$", coef_names))
interaction <- wald_test(fit, grep("^polarity1:stakes[0-9]+$", coef_names))
tibble(
scenario = scenario,
gee_wald_polarity = polarity$wald,
gee_p_polarity = polarity$p,
gee_wald_stakes = stakes$wald,
gee_p_stakes = stakes$p,
gee_wald_interaction = interaction$wald,
gee_p_interaction = interaction$p
)
}
gee_results <- bind_rows(lapply(names(scenario_map), extract_gee))
```
The GEE chunk is optional because `geepack` is not a core project dependency.
If `geepack` is installed, the rows are explicitly sorted by `subject` before
fitting; otherwise each row can be misread as a separate cluster.
## Comparison
```{r}
comparison <- paper_targets |>
left_join(anova_results, by = "scenario") |>
left_join(gee_results, by = "scenario") |>
mutate(
scenario_label = unname(scenario_labels[scenario]),
n_total = nrow(wide),
n_know = as.integer(sample_check$n[sample_check$polarity == "know"]),
n_doesnt_know = as.integer(sample_check$n[sample_check$polarity == "doesnt_know"]),
anova_p_stakes_diff = anova_p_stakes - paper_anova_p_stakes,
anova_p_interaction_diff = anova_p_interaction - paper_anova_p_interaction,
anova_F_polarity_diff = anova_F_polarity - paper_anova_F_polarity,
gee_p_stakes_diff = gee_p_stakes - paper_gee_p_stakes,
gee_p_interaction_diff = gee_p_interaction - paper_gee_p_interaction,
gee_wald_polarity_diff = gee_wald_polarity - paper_gee_wald_polarity,
note = if_else(
scenario == "vaccine" & !is.na(gee_wald_polarity),
"GEE polarity Wald is 207.30 here versus 206.30 in the paper; the same model reproduces the reported vaccine stakes and interaction tests.",
NA_character_
)
) |>
select(
scenario,
scenario_label,
n_total,
n_know,
n_doesnt_know,
anova_F_polarity,
paper_anova_F_polarity,
anova_F_polarity_diff,
anova_p_stakes,
paper_anova_p_stakes,
anova_p_stakes_diff,
anova_p_interaction,
paper_anova_p_interaction,
anova_p_interaction_diff,
gee_wald_polarity,
paper_gee_wald_polarity,
gee_wald_polarity_diff,
gee_p_stakes,
paper_gee_p_stakes,
gee_p_stakes_diff,
gee_p_interaction,
paper_gee_p_interaction,
gee_p_interaction_diff,
note
)
write.csv(comparison, output_csv, row.names = FALSE)
comparison |>
mutate(across(where(is.numeric), ~ round(.x, 6))) |>
knitr::kable()
```
The Greenhouse-Geisser corrected ANOVA results reproduce Appendix I to the
reported rounding. When `geepack` is available, the linear GEE reproduction also
recovers the reported stakes and interaction tests to rounding. The lone
noticeable discrepancy is the vaccine GEE polarity Wald statistic: the
recomputed value is `207.30`, while Appendix I reports `206.30`; the ANOVA
polarity statistic and both vaccine GEE stakes terms match the paper.