shurakovndstakeseffectnew
/data/papers/shurakovndstakeseffectnew/analysis/effect_sizes.qmd---
title: "Effect size computations: shurakovndstakeseffectnew"
format:
html:
toc: true
execute:
echo: true
warning: true
message: false
---
Computes standardized mean differences (`d`) and sampling variances (`v`) for the
extraction YAML `papers/shurakovndstakeseffectnew/shurakovndstakeseffectnew.yaml`.
Also reproduces the Table 2 composite-score descriptives and Cohen's *d* from the
OSF data files in `papers/shurakovndstakeseffectnew/data/`, plus a full-valid
sample sensitivity analysis that keeps all valid responses surviving the paper's
stated exclusion rules.
## Inputs (from extraction)
```{r}
paper_key <- "shurakovndstakeseffectnew"
sign_convention <- "d = mean(low) - mean(high)"
effects <- list(
list(
study_id = 1,
effect_id = "s1_e1",
method_used = "between_reported_d_n",
n_low = 100,
n_high = 100,
mean_low = 5.32,
mean_high = 0.28,
reported_d = 1.06,
notes_on_assumptions = "Experiment 1 (first-person): d from Table 2 (Tukey HSD); n=100 per condition (first 100 valid responses per condition)."
),
list(
study_id = 2,
effect_id = "s2_e1",
method_used = "between_reported_d_n",
n_low = 100,
n_high = 100,
mean_low = 4.64,
mean_high = -0.94,
reported_d = 1.28,
notes_on_assumptions = "Experiment 1 (third-person): d from Table 2 (Tukey HSD); n=100 per condition (first 100 valid responses per condition)."
),
list(
study_id = 3,
effect_id = "s3_e1",
method_used = "between_reported_d_n",
n_low = 100,
n_high = 100,
mean_low = 5.36,
mean_high = 1.59,
reported_d = 0.94,
notes_on_assumptions = "Experiment 2 (modified design; first-person): d from Table 2 (Tukey HSD); n=100 per condition (first 100 valid responses per condition)."
)
)
```
## Computation (d and v)
```{r}
default_or <- function(x, default) {
if (is.null(x)) default else x
}
stop_if_missing <- function(x, name) {
if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}
var_d_independent <- function(d, n_high, n_low) {
n <- n_high + n_low
(n / (n_high * n_low)) + (d^2 / (2 * (n - 2)))
}
infer_sign <- function(mean_low, mean_high) {
if (!is.na(mean_low) && !is.na(mean_high) && mean_low != mean_high) {
return(sign(mean_low - mean_high))
}
stop("Cannot infer sign from means", call. = FALSE)
}
compute_effect <- function(effect_inputs) {
study_id <- effect_inputs$study_id
effect_id <- effect_inputs$effect_id
method_used <- effect_inputs$method_used
n_high <- default_or(effect_inputs$n_high, NA_integer_)
n_low <- default_or(effect_inputs$n_low, NA_integer_)
mean_high <- default_or(effect_inputs$mean_high, NA_real_)
mean_low <- default_or(effect_inputs$mean_low, NA_real_)
reported_d <- default_or(effect_inputs$reported_d, NA_real_)
notes_on_assumptions <- default_or(effect_inputs$notes_on_assumptions, "")
if (method_used != "between_reported_d_n") {
stop(sprintf("Unknown method_used: %s", method_used), call. = FALSE)
}
stop_if_missing(reported_d, "reported_d")
stop_if_missing(n_high, "n_high")
stop_if_missing(n_low, "n_low")
sign_used <- infer_sign(mean_low, mean_high)
d <- sign_used * abs(reported_d)
v <- var_d_independent(d = d, n_high = n_high, n_low = n_low)
inputs_used <- paste(
c(
sprintf("method=%s", method_used),
sprintf("sign_convention=%s", sign_convention),
sprintf("n_low=%s", n_low),
sprintf("n_high=%s", n_high),
sprintf("mean_low=%s", mean_low),
sprintf("mean_high=%s", mean_high),
sprintf("reported_d=%s", reported_d)
),
collapse = ", "
)
data.frame(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
design = "Between-Subjects",
method_used = method_used,
inputs_used = inputs_used,
d = d,
v = v,
notes_on_assumptions = notes_on_assumptions,
stringsAsFactors = FALSE
)
}
audit <- do.call(rbind, lapply(effects, compute_effect))
audit
```
## Paste-ready YAML snippets
```{r}
for (i in seq_len(nrow(audit))) {
row <- audit[i, ]
cat(sprintf("\n# %s (study_id=%s)\n", row$effect_id, row$study_id))
cat(sprintf(
"effect_size:\n metric: SMD\n d: %.12f\n v: %.12f\n computed_from: reported_d\n needs_review: false\n notes: \"%s\"\n",
row$d,
row$v,
gsub("\"", "'", row$inputs_used)
))
}
```
## Reproduce Table 2 (OSF data check)
```{r}
read_csv_local <- function(path) {
read.csv(path, na.strings = c("", " "), check.names = FALSE)
}
composite_from_cols <- function(df, col_a, col_b, n_take = NULL) {
a <- suppressWarnings(as.numeric(df[[col_a]]))
b <- suppressWarnings(as.numeric(df[[col_b]]))
keep <- !is.na(a) & !is.na(b)
a <- a[keep]
b <- b[keep]
if (!is.null(n_take)) {
a <- head(a, n_take)
b <- head(b, n_take)
}
# In OSF data, A is coded 1="I do", 2="I don't". Convert to +1 / -1.
sign <- ifelse(a == 1, 1, ifelse(a == 2, -1, NA_real_))
if (any(is.na(sign))) stop(sprintf("Unexpected A values in %s", col_a), call. = FALSE)
sign * b
}
summarize_conditions <- function(df, mapping, n_take = NULL) {
composites <- list()
for (nm in names(mapping)) {
cols <- mapping[[nm]]
composites[[nm]] <- composite_from_cols(df, cols$a, cols$b, n_take = n_take)
}
summarize_composites(composites)
}
summarize_composites <- function(composites) {
rows <- list()
for (nm in names(composites)) {
comp <- composites[[nm]]
rows[[nm]] <- data.frame(
condition = nm,
n = length(comp),
mean = mean(comp),
sd = sd(comp),
stringsAsFactors = FALSE
)
}
do.call(rbind, rows)
}
exp1 <- read_csv_local("../data/Experiment_1_analized_responses.csv")
exp2_first100 <- read_csv_local("../data/Experiment_2_analyzed_responses.csv")
exp2_all <- read_csv_local("../data/Experiment_2_all_responses.csv")
exp1_first_map <- list(
Neutral = list(a = "BA_1_N_A", b = "BA_1_N_B"),
Stakes = list(a = "BA_1_S_A", b = "BA_1_S_B"),
Evidence = list(a = "BA_1_E_A", b = "BA_1_E_B")
)
exp1_third_map <- list(
Neutral = list(a = "BA_3_N_A", b = "BA_3_N_B"),
Stakes = list(a = "BA_3_S_A", b = "BA_3_S_B"),
Evidence = list(a = "BA_3_E_A", b = "BA_3_E_B")
)
exp2_mod_map <- list(
Neutral = list(a = "BA_1_N_A", b = "BA_1_N_B"),
Stakes = list(a = "BA_1_S_A", b = "BA_1_S_B"),
Evidence = list(a = "BA_1_E_A", b = "BA_1_E_B")
)
target_n <- 100
stats_e1_first <- summarize_conditions(exp1, exp1_first_map, n_take = target_n)
stats_e1_third <- summarize_conditions(exp1, exp1_third_map, n_take = target_n)
stats_e2_mod <- summarize_conditions(exp2_first100, exp2_mod_map, n_take = target_n)
pooled_sd <- function(n_high, n_low, sd_high, sd_low) {
sqrt(((n_high - 1) * sd_high^2 + (n_low - 1) * sd_low^2) / (n_high + n_low - 2))
}
cohen_d_from_group_stats <- function(stats, low = "Neutral", high = "Stakes") {
low_row <- stats[stats$condition == low, ]
high_row <- stats[stats$condition == high, ]
if (nrow(low_row) != 1 || nrow(high_row) != 1) stop("Missing condition rows", call. = FALSE)
(low_row$mean - high_row$mean) / pooled_sd(
n_high = high_row$n,
n_low = low_row$n,
sd_high = high_row$sd,
sd_low = low_row$sd
)
}
d_e1_first <- cohen_d_from_group_stats(stats_e1_first)
d_e1_third <- cohen_d_from_group_stats(stats_e1_third)
d_e2_mod <- cohen_d_from_group_stats(stats_e2_mod)
reported <- data.frame(
subset = c("Experiment 1 (first-person)", "Experiment 1 (third-person)", "Experiment 2 (modified design)"),
mean_neutral = c(5.32, 4.64, 5.36),
sd_neutral = c(2.93, 3.53, 2.63),
mean_stakes = c(0.28, -0.94, 1.59),
sd_stakes = c(5.32, 5.04, 5.06),
d_stakes_vs_neutral = c(1.06, 1.28, 0.94),
stringsAsFactors = FALSE
)
computed <- data.frame(
subset = c("Experiment 1 (first-person)", "Experiment 1 (third-person)", "Experiment 2 (modified design)"),
mean_neutral = c(stats_e1_first$mean[stats_e1_first$condition == "Neutral"], stats_e1_third$mean[stats_e1_third$condition == "Neutral"], stats_e2_mod$mean[stats_e2_mod$condition == "Neutral"]),
sd_neutral = c(stats_e1_first$sd[stats_e1_first$condition == "Neutral"], stats_e1_third$sd[stats_e1_third$condition == "Neutral"], stats_e2_mod$sd[stats_e2_mod$condition == "Neutral"]),
mean_stakes = c(stats_e1_first$mean[stats_e1_first$condition == "Stakes"], stats_e1_third$mean[stats_e1_third$condition == "Stakes"], stats_e2_mod$mean[stats_e2_mod$condition == "Stakes"]),
sd_stakes = c(stats_e1_first$sd[stats_e1_first$condition == "Stakes"], stats_e1_third$sd[stats_e1_third$condition == "Stakes"], stats_e2_mod$sd[stats_e2_mod$condition == "Stakes"]),
d_stakes_vs_neutral = c(d_e1_first, d_e1_third, d_e2_mod),
stringsAsFactors = FALSE
)
merge(reported, computed, by = "subset", suffixes = c("_reported", "_from_data"))
```
## Full-valid sensitivity analysis (OSF data)
The paper's confirmatory analyses used the first 100 valid responses per condition.
For meta-analytic sensitivity analysis, we can instead retain all valid responses
surviving the paper's stated exclusion rules:
- Experiment 1: all rows in `Experiment_1_analized_responses.csv` (already filtered
for failed attention checks).
- Experiment 2: rows in `Experiment_2_all_responses.csv` that both
endorsed the knowledge ascription (`Set-up = 1`) and passed the
condition-specific attention check (`*_AT = 2`).
```{r}
subset_exp2_full_valid <- function(df, prefix) {
keep <- df[["Set-up"]] == 1 & df[[paste0(prefix, "_AT")]] == 2
df[keep, , drop = FALSE]
}
full_valid_exp2_composites <- list(
Neutral = composite_from_cols(
subset_exp2_full_valid(exp2_all, "BA_1_N"),
"BA_1_N_A",
"BA_1_N_B"
),
Stakes = composite_from_cols(
subset_exp2_full_valid(exp2_all, "BA_1_S"),
"BA_1_S_A",
"BA_1_S_B"
),
Evidence = composite_from_cols(
subset_exp2_full_valid(exp2_all, "BA_1_E"),
"BA_1_E_A",
"BA_1_E_B"
)
)
stats_e1_first_full <- summarize_conditions(exp1, exp1_first_map)
stats_e1_third_full <- summarize_conditions(exp1, exp1_third_map)
stats_e2_mod_full <- summarize_composites(full_valid_exp2_composites)
full_valid_stats <- rbind(
cbind(subset = "Experiment 1 (first-person, full valid sample sensitivity)", stats_e1_first_full),
cbind(subset = "Experiment 1 (third-person, full valid sample sensitivity)", stats_e1_third_full),
cbind(subset = "Experiment 2 (modified design, full valid sample sensitivity)", stats_e2_mod_full)
)
full_valid_stats
```
## First-100 vs full-valid comparison
```{r}
build_effect_from_stats <- function(study_id, effect_id, stats, notes_on_assumptions) {
low_row <- stats[stats$condition == "Neutral", ]
high_row <- stats[stats$condition == "Stakes", ]
if (nrow(low_row) != 1 || nrow(high_row) != 1) stop("Missing condition rows", call. = FALSE)
d <- cohen_d_from_group_stats(stats)
v <- var_d_independent(d = d, n_high = high_row$n, n_low = low_row$n)
inputs_used <- paste(
c(
"method=between_group_stats",
sprintf("sign_convention=%s", sign_convention),
sprintf("n_low=%s", low_row$n),
sprintf("n_high=%s", high_row$n),
sprintf("mean_low=%s", low_row$mean),
sprintf("mean_high=%s", high_row$mean),
sprintf("sd_low=%s", low_row$sd),
sprintf("sd_high=%s", high_row$sd)
),
collapse = ", "
)
data.frame(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
design = "Between-Subjects",
method_used = "between_group_stats",
inputs_used = inputs_used,
d = d,
v = v,
notes_on_assumptions = notes_on_assumptions,
stringsAsFactors = FALSE
)
}
audit_full_valid <- do.call(
rbind,
list(
build_effect_from_stats(
study_id = 4,
effect_id = "s4_e1",
stats = stats_e1_first_full,
notes_on_assumptions = "Experiment 1 (first-person, full valid sample sensitivity): all valid OSF analyzed responses after the paper's attention-check exclusion."
),
build_effect_from_stats(
study_id = 5,
effect_id = "s5_e1",
stats = stats_e1_third_full,
notes_on_assumptions = "Experiment 1 (third-person, full valid sample sensitivity): all valid OSF analyzed responses after the paper's attention-check exclusion."
),
build_effect_from_stats(
study_id = 6,
effect_id = "s6_e1",
stats = stats_e2_mod_full,
notes_on_assumptions = "Experiment 2 (modified design, full valid sample sensitivity): all valid OSF raw responses after applying the paper's stated exclusion rules (knowledge endorsers only; condition-specific attention-check pass)."
)
)
)
audit_first100_data <- do.call(
rbind,
list(
build_effect_from_stats(
study_id = 1,
effect_id = "s1_e1_first100_data",
stats = stats_e1_first,
notes_on_assumptions = "Experiment 1 (first-person): first 100 valid OSF analyzed responses per condition."
),
build_effect_from_stats(
study_id = 2,
effect_id = "s2_e1_first100_data",
stats = stats_e1_third,
notes_on_assumptions = "Experiment 1 (third-person): first 100 valid OSF analyzed responses per condition."
),
build_effect_from_stats(
study_id = 3,
effect_id = "s3_e1_first100_data",
stats = stats_e2_mod,
notes_on_assumptions = "Experiment 2 (modified design): first 100 valid OSF responses per condition."
)
)
)
comparison <- data.frame(
subset = c("Experiment 1 (first-person)", "Experiment 1 (third-person)", "Experiment 2 (modified design)"),
n_low_first100 = c(
stats_e1_first$n[stats_e1_first$condition == "Neutral"],
stats_e1_third$n[stats_e1_third$condition == "Neutral"],
stats_e2_mod$n[stats_e2_mod$condition == "Neutral"]
),
n_high_first100 = c(
stats_e1_first$n[stats_e1_first$condition == "Stakes"],
stats_e1_third$n[stats_e1_third$condition == "Stakes"],
stats_e2_mod$n[stats_e2_mod$condition == "Stakes"]
),
d_first100 = audit_first100_data$d,
v_first100 = audit_first100_data$v,
n_low_full_valid = c(
stats_e1_first_full$n[stats_e1_first_full$condition == "Neutral"],
stats_e1_third_full$n[stats_e1_third_full$condition == "Neutral"],
stats_e2_mod_full$n[stats_e2_mod_full$condition == "Neutral"]
),
n_high_full_valid = c(
stats_e1_first_full$n[stats_e1_first_full$condition == "Stakes"],
stats_e1_third_full$n[stats_e1_third_full$condition == "Stakes"],
stats_e2_mod_full$n[stats_e2_mod_full$condition == "Stakes"]
),
d_full_valid = audit_full_valid$d,
v_full_valid = audit_full_valid$v,
stringsAsFactors = FALSE
)
comparison
```
## Paste-ready YAML snippets (full-valid sensitivity studies)
```{r}
for (i in seq_len(nrow(audit_full_valid))) {
row <- audit_full_valid[i, ]
cat(sprintf("\n# %s (study_id=%s)\n", row$effect_id, row$study_id))
cat(sprintf(
"effect_size:\n metric: SMD\n d: %.12f\n v: %.12f\n computed_from: groups\n needs_review: false\n notes: \"%s\"\n",
row$d,
row$v,
gsub("\"", "'", row$inputs_used)
))
}
```