turrindepistemiccontextualismidle
/data/papers/turrindepistemiccontextualismidle/analysis/effect_sizes.qmd---
title: "Effect size computations: turrindepistemiccontextualismidle"
format:
html:
toc: true
execute:
echo: true
warning: true
message: false
---
Computes standardized mean differences (`d`) and sampling variances (`v`) for the
extraction YAML `papers/turrindepistemiccontextualismidle/turrindepistemiccontextualismidle.yaml`.
This file was rebuilt to follow the current project template:
- Between-group effects with condition means/SDs and split Ns use `esc::esc_mean_sd`.
- Effects with reported `d` but no split Ns use the project fallback `between_reported_d_t_df`.
- Study 7 (Experiment 4, bank; evidence strength required) uses an analyst-authorized
equal-N assumption because the paper reports total `N = 99` but not split condition Ns.
The implemented assumption is `n_low = n_high = 99 / 2`.
## Inputs and methods
```{r}
paper_key <- "turrindepistemiccontextualismidle"
sign_convention <- "d = mean(low) - mean(high)"
effects <- list(
list(
study_id = 1,
effect_id = "s1_e1",
method_used = "between_groups",
n_low = 102,
n_high = 99,
mean_low = 5.02,
sd_low = 1.48,
mean_high = 4.36,
sd_high = 1.81,
reported_d = 0.41,
notes_on_assumptions = "Experiment 1 (bank): split Ns recovered from one-sample tests, Low t(101), High t(98)."
),
list(
study_id = 2,
effect_id = "s2_e1",
method_used = "between_groups",
n_low = 52,
n_high = 49,
mean_low = 4.44,
sd_low = 1.55,
mean_high = 5.41,
sd_high = 1.61,
reported_d = 0.61,
notes_on_assumptions = "Conceptual replication of Experiment 1 (flight): split Ns recovered from one-sample tests, Low t(51), High t(48)."
),
list(
study_id = 3,
effect_id = "s3_e1",
method_used = "between_groups",
n_low = 98,
n_high = 101,
mean_low = 4.58,
sd_low = 1.67,
mean_high = 4.49,
sd_high = 1.78,
reported_d = 0.06,
notes_on_assumptions = "Experiment 2 (bank; deferral): stakes held constant; map low=Yes ('I do know'), high=No ('I don't know'). Split Ns recovered from one-sample tests, Yes t(97), No t(100)."
),
list(
study_id = 4,
effect_id = "s4_e1",
method_used = "between_groups",
n_low = 50,
n_high = 51,
mean_low = 4.54,
sd_low = 1.73,
mean_high = 4.61,
sd_high = 1.98,
reported_d = 0.04,
notes_on_assumptions = "Conceptual replication of Experiment 2 (flight; deferral): stakes held constant; map low=Yes ('I do know'), high=No ('I don't know'). Split Ns recovered from one-sample tests, Yes t(49), No t(50)."
),
list(
study_id = 5,
effect_id = "s5_e1",
method_used = "between_groups",
n_low = 102,
n_high = 100,
mean_low = 4.79,
sd_low = 1.72,
mean_high = 4.64,
sd_high = 1.68,
reported_d = 0.09,
notes_on_assumptions = "Experiment 3 (bank): split Ns recovered from one-sample tests, Low t(101), High t(99)."
),
list(
study_id = 6,
effect_id = "s6_e1",
method_used = "between_groups",
n_low = 50,
n_high = 51,
mean_low = 4.90,
sd_low = 1.61,
mean_high = 4.76,
sd_high = 1.72,
reported_d = 0.08,
notes_on_assumptions = "Conceptual replication of Experiment 3 (flight): split Ns recovered from one-sample tests, Low t(49), High t(50)."
),
list(
study_id = 7,
effect_id = "s7_e1",
method_used = "between_groups",
n_low = 99 / 2,
n_high = 99 / 2,
mean_low = 8.31,
sd_low = 1.86,
mean_high = 8.62,
sd_high = 1.98,
t_value = -0.81,
df = 97,
imputed_flag = TRUE,
needs_sensitivity = TRUE,
notes_on_assumptions = "Experiment 4 (bank; evidence strength required): split Ns not reported; equal-N assumption authorized by analyst, n_low=n_high=99/2."
),
list(
study_id = 8,
effect_id = "s8_e1",
method_used = "between_reported_d_t_df",
reported_d = 0.34,
t_value = 1.69,
df = 98,
mean_low = 8.59,
mean_high = 9.08,
notes_on_assumptions = "Conceptual replication of Experiment 4 (flight): reported Cohen's d is unsigned; sign set from means using d = mean(low) - mean(high)."
)
)
legacy_effect_sizes <- data.frame(
study_id = c(1, 2, 3, 4, 5, 6, 7, 8),
effect_id = c("s1_e1", "s2_e1", "s3_e1", "s4_e1", "s5_e1", "s6_e1", "s7_e1", "s8_e1"),
previous_d = c(
0.399811605602,
-0.614166764737,
0.052122137662,
-0.037624978833,
0.088218811866,
0.084010249876,
NA_real_,
-0.34
),
previous_v = c(
0.020306563127,
0.041543987117,
0.020111966953,
0.039614992829,
0.019823377966,
0.039643488198,
NA_real_,
0.041064569211
)
)
```
## Shared helpers
```{r}
if (!requireNamespace("esc", quietly = TRUE)) {
stop("Package 'esc' is required for this analysis. Install with install.packages('esc').", call. = FALSE)
}
suppressPackageStartupMessages(library(esc))
default_or <- function(x, default) {
if (is.null(x)) default else x
}
stop_if_missing <- function(x, name) {
if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}
# Exact small-sample correction factor used by metafor (.cmicalc).
hedges_correction <- function(df) {
ifelse(df <= 1, NA_real_, exp(lgamma(df / 2) - log(sqrt(df / 2)) - lgamma((df - 1) / 2)))
}
# Between-subjects: compute v when only reported d + t(df) are available.
var_d_between_from_d_t_df <- function(d, t_value, df) {
(d / t_value)^2 + (d^2) / (2 * df)
}
infer_sign <- function(mean_low, mean_high, sign_d) {
if (!is.na(mean_low) && !is.na(mean_high) && mean_low != mean_high) {
return(sign(mean_low - mean_high))
}
stop_if_missing(sign_d, "sign_d (cannot infer sign from means)")
if (!is.element(sign_d, c(-1, 1))) stop("sign_d must be +1 or -1", call. = FALSE)
sign_d
}
extract_esc <- function(x) {
list(
d = as.numeric(x$es),
v = as.numeric(x$var)
)
}
compute_with_esc <- function(fun, ...) {
d_obj <- fun(..., es.type = "d")
g_obj <- fun(..., es.type = "g")
d_out <- extract_esc(d_obj)
g_out <- extract_esc(g_obj)
list(d = d_out$d, v = d_out$v, g = g_out$d, v_g = g_out$v)
}
```
## Computation
```{r}
compute_effect <- function(effect_inputs) {
study_id <- effect_inputs$study_id
effect_id <- effect_inputs$effect_id
method_used <- effect_inputs$method_used
n_high <- default_or(effect_inputs$n_high, NA_real_)
n_low <- default_or(effect_inputs$n_low, NA_real_)
mean_high <- default_or(effect_inputs$mean_high, NA_real_)
mean_low <- default_or(effect_inputs$mean_low, NA_real_)
sd_high <- default_or(effect_inputs$sd_high, NA_real_)
sd_low <- default_or(effect_inputs$sd_low, NA_real_)
reported_d <- default_or(effect_inputs$reported_d, NA_real_)
t_value <- default_or(effect_inputs$t_value, NA_real_)
df <- default_or(effect_inputs$df, NA_real_)
sign_d <- default_or(effect_inputs$sign_d, NA_real_)
notes_on_assumptions <- default_or(effect_inputs$notes_on_assumptions, "")
imputed_flag <- default_or(effect_inputs$imputed_flag, FALSE)
needs_sensitivity <- default_or(effect_inputs$needs_sensitivity, FALSE)
d <- NA_real_
v <- NA_real_
g <- NA_real_
v_g <- NA_real_
computed_from_suggested <- NA_character_
if (method_used == "between_groups") {
computed_from_suggested <- "groups"
stop_if_missing(n_high, "n_high")
stop_if_missing(n_low, "n_low")
stop_if_missing(mean_high, "mean_high")
stop_if_missing(mean_low, "mean_low")
stop_if_missing(sd_high, "sd_high")
stop_if_missing(sd_low, "sd_low")
# esc_mean_sd computes grp1 - grp2; use grp1=low and grp2=high to match sign convention.
res <- compute_with_esc(
esc::esc_mean_sd,
grp1m = mean_low,
grp1sd = sd_low,
grp1n = n_low,
grp2m = mean_high,
grp2sd = sd_high,
grp2n = n_high
)
d <- res$d
v <- res$v
g <- res$g
v_g <- res$v_g
} else if (method_used == "between_reported_d_t_df") {
computed_from_suggested <- "reported_d"
stop_if_missing(reported_d, "reported_d")
stop_if_missing(t_value, "t_value")
stop_if_missing(df, "df")
sign_used <- infer_sign(mean_low, mean_high, sign_d)
d <- sign_used * abs(reported_d)
v <- var_d_between_from_d_t_df(d = d, t_value = abs(t_value), df = df)
J <- hedges_correction(df)
g <- J * d
v_g <- (J^2) * v
} else {
stop(sprintf("Unknown method_used: %s", method_used), call. = FALSE)
}
inputs_used <- paste(
c(
sprintf("method=%s", method_used),
sprintf("sign_convention=%s", sign_convention),
if (!is.na(n_low)) sprintf("n_low=%s", n_low) else NULL,
if (!is.na(n_high)) sprintf("n_high=%s", n_high) else NULL,
if (!is.na(mean_low)) sprintf("mean_low=%s", mean_low) else NULL,
if (!is.na(mean_high)) sprintf("mean_high=%s", mean_high) else NULL,
if (!is.na(sd_low)) sprintf("sd_low=%s", sd_low) else NULL,
if (!is.na(sd_high)) sprintf("sd_high=%s", sd_high) else NULL,
if (!is.na(df)) sprintf("df=%s", df) else NULL,
if (!is.na(t_value)) sprintf("t=%s", t_value) else NULL,
if (!is.na(reported_d)) sprintf("reported_d=%s", reported_d) else NULL,
if (!is.na(sign_d)) sprintf("sign_d=%s", sign_d) else NULL,
if (isTRUE(imputed_flag)) "imputed_equal_n=TRUE" else NULL
),
collapse = ", "
)
data.frame(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
design = "Between-Subjects",
method_used = method_used,
computed_from_suggested = computed_from_suggested,
inputs_used = inputs_used,
d = d,
v = v,
g = g,
v_g = v_g,
notes_on_assumptions = notes_on_assumptions,
imputed_flag = imputed_flag,
needs_sensitivity = needs_sensitivity,
stringsAsFactors = FALSE
)
}
audit <- do.call(rbind, lapply(effects, compute_effect))
audit
```
## Comparison With Previous Stored Values
```{r}
comparison <- merge(
audit[, c("study_id", "effect_id", "d", "v", "method_used", "imputed_flag")],
legacy_effect_sizes,
by = c("study_id", "effect_id"),
all.x = TRUE
)
comparison$delta_d <- comparison$d - comparison$previous_d
comparison$delta_v <- comparison$v - comparison$previous_v
comparison$abs_delta_d <- abs(comparison$delta_d)
comparison$abs_delta_v <- abs(comparison$delta_v)
comparison <- comparison[order(comparison$study_id), ]
comparison
```
## Study 7 Equal-N Sensitivity
The primary Study 7 estimate uses `n_low = n_high = 99 / 2`. The two possible
integer splits differ only by one participant. This table checks both directions.
```{r}
study7_sensitivity_inputs <- list(
list(split = "equal_fractional_49.5_49.5", n_low = 99 / 2, n_high = 99 / 2),
list(split = "low49_high50", n_low = 49, n_high = 50),
list(split = "low50_high49", n_low = 50, n_high = 49)
)
study7_sensitivity <- do.call(rbind, lapply(study7_sensitivity_inputs, function(x) {
res <- compute_with_esc(
esc::esc_mean_sd,
grp1m = 8.31,
grp1sd = 1.86,
grp1n = x$n_low,
grp2m = 8.62,
grp2sd = 1.98,
grp2n = x$n_high
)
data.frame(
split = x$split,
n_low = x$n_low,
n_high = x$n_high,
d = res$d,
v = res$v,
stringsAsFactors = FALSE
)
}))
study7_sensitivity$delta_d_from_primary <- study7_sensitivity$d - study7_sensitivity$d[1]
study7_sensitivity$delta_v_from_primary <- study7_sensitivity$v - study7_sensitivity$v[1]
study7_sensitivity
```
## Paste-ready YAML snippets
```{r}
for (i in seq_len(nrow(audit))) {
row <- audit[i, ]
cat(sprintf("\n# %s (study_id=%s)\n", row$effect_id, row$study_id))
cat(sprintf(
"effect_size:\n metric: SMD\n d: %.12f\n v: %.12f\n computed_from: %s\n needs_review: false\n notes: \"%s\"\n",
row$d,
row$v,
row$computed_from_suggested,
gsub("\"", "'", row$inputs_used)
))
}
```