turrietalndactionabilityjudgmentscause
/data/papers/turrietalndactionabilityjudgmentscause/analysis/effect_sizes.qmd---
title: "Effect size computations: turrietalndactionabilityjudgmentscause"
format:
html:
toc: true
execute:
echo: true
warning: true
message: false
---
Computes standardized mean differences (`d`) and sampling variances (`v`) for the
extraction YAML `papers/turrietalndactionabilityjudgmentscause/turrietalndactionabilityjudgmentscause.yaml`.
## Inputs and methods
```{r}
paper_key <- "turrietalndactionabilityjudgmentscause"
sign_convention <- "d = mean(low) - mean(high)"
effects <- list(
# Study 1: Experiment 1 (intelligence analyst; mTurk)
list(
study_id = 1,
effect_id = "s1_e1",
method_used = "between_reported_d_t_df",
mean_low = 4.53,
mean_high = 3.49,
t_value = 4.36,
df = 198,
reported_d = 0.62,
notes_on_assumptions = "Knowledge outcome. Paper reports d; per-condition split Ns not reported; v computed from reported d + t(df)."
),
# Study 2: Experiment 2 (coffee menu / pine nuts; mTurk)
list(
study_id = 2,
effect_id = "s2_e1",
method_used = "between_reported_d_t_df",
mean_low = 6.13,
mean_high = 6.19,
t_value = -0.38,
df = 203,
reported_d = 0.05,
notes_on_assumptions = "Knowledge outcome. Paper reports |d| (sign inferred from means); per-condition split Ns not reported; v computed from reported d + t(df)."
)
)
```
## Computation
```{r}
default_or <- function(x, default) {
if (is.null(x)) default else x
}
stop_if_missing <- function(x, name) {
if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}
# Exact small-sample correction factor used by metafor (.cmicalc).
hedges_correction <- function(df) {
ifelse(df <= 1, NA_real_, exp(lgamma(df/2) - log(sqrt(df/2)) - lgamma((df - 1)/2)))
}
pooled_sd <- function(n_high, n_low, sd_high, sd_low) {
sqrt(((n_high - 1) * sd_high^2 + (n_low - 1) * sd_low^2) / (n_high + n_low - 2))
}
d_from_groups_independent <- function(n_high, n_low, mean_high, mean_low, sd_high, sd_low) {
s <- pooled_sd(n_high, n_low, sd_high, sd_low)
(mean_low - mean_high) / s
}
d_from_t_independent <- function(t_value, n_high, n_low) {
t_value * sqrt((n_high + n_low) / (n_high * n_low))
}
d_from_f_independent <- function(f_value, n_high, n_low) {
# Assumes a 2-group contrast where F = t^2
sqrt(f_value * (n_high + n_low) / (n_high * n_low))
}
d_from_chi2_independent <- function(chi2_value, n_high, n_low) {
sqrt(chi2_value * (n_high + n_low) / (n_high * n_low))
}
var_d_independent <- function(d, n_high, n_low) {
n <- n_high + n_low
(n / (n_high * n_low)) + (d^2 / (2 * (n - 2)))
}
# Between-subjects: compute v without split group Ns using reported d + t(df).
var_d_between_from_d_t_df <- function(d, t_value, df) {
(d / t_value)^2 + (d^2) / (2 * df)
}
# Within-subjects (SMCRP-style): standardize by pooled SD across occasions.
sd_pooled_within <- function(sd_low, sd_high) {
sqrt((sd_low^2 + sd_high^2) / 2)
}
d_within_smcrp <- function(mean_low, mean_high, sd_low, sd_high) {
(mean_low - mean_high) / sd_pooled_within(sd_low, sd_high)
}
# Recover within-person correlation r from paired t + means/SDs (when r is not reported).
r_from_paired_t <- function(mean_low, mean_high, sd_low, sd_high, t_value, n_total) {
mean_diff <- mean_low - mean_high
sd_diff <- abs(mean_diff) * sqrt(n_total) / abs(t_value)
(sd_low^2 + sd_high^2 - sd_diff^2) / (2 * sd_low * sd_high)
}
var_d_within_smcrp <- function(d, r, n_total) {
(2 * (1 - r) / n_total) + (d^2) * (1 + r^2) / (4 * n_total)
}
infer_sign <- function(mean_low, mean_high, sign_d) {
if (!is.na(mean_low) && !is.na(mean_high) && mean_low != mean_high) {
return(sign(mean_low - mean_high))
}
stop_if_missing(sign_d, "sign_d (cannot infer sign from means)")
if (!is.element(sign_d, c(-1, 1))) stop("sign_d must be +1 or -1", call. = FALSE)
sign_d
}
compute_effect <- function(effect_inputs) {
study_id <- effect_inputs$study_id
effect_id <- effect_inputs$effect_id
method_used <- effect_inputs$method_used
# Sign convention (matches extraction instructions).
sign_convention_local <- sign_convention
# Defaults (mirror the template).
n_high <- default_or(effect_inputs$n_high, NA_integer_)
n_low <- default_or(effect_inputs$n_low, NA_integer_)
n_total <- default_or(effect_inputs$n_total, NA_integer_)
mean_high <- default_or(effect_inputs$mean_high, NA_real_)
mean_low <- default_or(effect_inputs$mean_low, NA_real_)
sd_high <- default_or(effect_inputs$sd_high, NA_real_)
sd_low <- default_or(effect_inputs$sd_low, NA_real_)
t_value <- default_or(effect_inputs$t_value, NA_real_)
f_value <- default_or(effect_inputs$f_value, NA_real_)
chi2_value <- default_or(effect_inputs$chi2_value, NA_real_)
df <- default_or(effect_inputs$df, NA_real_)
reported_d <- default_or(effect_inputs$reported_d, NA_real_)
r_within <- default_or(effect_inputs$r_within, NA_real_)
sign_d <- default_or(effect_inputs$sign_d, NA_real_)
notes_on_assumptions <- default_or(effect_inputs$notes_on_assumptions, "")
imputed_flag <- default_or(effect_inputs$imputed_flag, FALSE)
needs_sensitivity <- default_or(effect_inputs$needs_sensitivity, TRUE)
# Compute d/v (and small-sample corrected g/v_g) based on method_used.
d <- NA_real_
v <- NA_real_
g <- NA_real_
v_g <- NA_real_
computed_from_suggested <- NA_character_
design_used <- if (startsWith(method_used, "between_")) "Between-Subjects" else if (startsWith(method_used, "within_")) "Within-Subjects" else NA_character_
if (method_used == "between_groups") {
computed_from_suggested <- "groups"
stop_if_missing(n_high, "n_high")
stop_if_missing(n_low, "n_low")
stop_if_missing(mean_high, "mean_high")
stop_if_missing(mean_low, "mean_low")
stop_if_missing(sd_high, "sd_high")
stop_if_missing(sd_low, "sd_low")
d <- d_from_groups_independent(n_high, n_low, mean_high, mean_low, sd_high, sd_low)
v <- var_d_independent(d, n_high, n_low)
df_used <- n_high + n_low - 2
J <- hedges_correction(df_used)
g <- J * d
v_g <- (J^2) * v
} else if (method_used == "between_t") {
computed_from_suggested <- "t_df"
stop_if_missing(n_high, "n_high")
stop_if_missing(n_low, "n_low")
stop_if_missing(t_value, "t_value")
sign_used <- infer_sign(mean_low, mean_high, sign_d)
d <- sign_used * abs(d_from_t_independent(t_value, n_high, n_low))
v <- var_d_independent(d, n_high, n_low)
df_used <- n_high + n_low - 2
J <- hedges_correction(df_used)
g <- J * d
v_g <- (J^2) * v
} else if (method_used == "between_f") {
computed_from_suggested <- "f_df"
stop_if_missing(n_high, "n_high")
stop_if_missing(n_low, "n_low")
stop_if_missing(f_value, "f_value")
sign_used <- infer_sign(mean_low, mean_high, sign_d)
d <- sign_used * abs(d_from_f_independent(f_value, n_high, n_low))
v <- var_d_independent(d, n_high, n_low)
df_used <- n_high + n_low - 2
J <- hedges_correction(df_used)
g <- J * d
v_g <- (J^2) * v
} else if (method_used == "between_chi2") {
computed_from_suggested <- "other"
stop_if_missing(n_high, "n_high")
stop_if_missing(n_low, "n_low")
stop_if_missing(chi2_value, "chi2_value")
sign_used <- infer_sign(mean_low, mean_high, sign_d)
d <- sign_used * abs(d_from_chi2_independent(chi2_value, n_high, n_low))
v <- var_d_independent(d, n_high, n_low)
df_used <- n_high + n_low - 2
J <- hedges_correction(df_used)
g <- J * d
v_g <- (J^2) * v
} else if (method_used == "between_reported_d_t_df") {
computed_from_suggested <- "reported_d"
stop_if_missing(reported_d, "reported_d")
stop_if_missing(t_value, "t_value")
stop_if_missing(df, "df")
sign_used <- infer_sign(mean_low, mean_high, sign_d)
d <- sign_used * abs(reported_d)
v <- var_d_between_from_d_t_df(d = d, t_value = abs(t_value), df = df)
J <- hedges_correction(df)
g <- J * d
v_g <- (J^2) * v
} else if (method_used == "within_smcrp_r") {
computed_from_suggested <- "groups"
stop_if_missing(n_total, "n_total")
stop_if_missing(mean_high, "mean_high")
stop_if_missing(mean_low, "mean_low")
stop_if_missing(sd_high, "sd_high")
stop_if_missing(sd_low, "sd_low")
stop_if_missing(r_within, "r_within")
if (abs(r_within) > 1) stop("r_within must be between -1 and 1", call. = FALSE)
d <- d_within_smcrp(mean_low, mean_high, sd_low, sd_high)
v <- var_d_within_smcrp(d = d, r = r_within, n_total = n_total)
df_used <- 2 * (n_total - 1) / (1 + r_within^2)
J <- hedges_correction(df_used)
g <- J * d
v_g <- (2 * (1 - r_within) / n_total) + (g^2) * (1 + r_within^2) / (4 * n_total)
} else if (method_used == "within_smcrp_t") {
computed_from_suggested <- "groups"
stop_if_missing(mean_high, "mean_high")
stop_if_missing(mean_low, "mean_low")
stop_if_missing(sd_high, "sd_high")
stop_if_missing(sd_low, "sd_low")
stop_if_missing(t_value, "t_value")
if (is.na(n_total)) {
stop_if_missing(df, "df (or provide n_total)")
n_total <- df + 1
}
if (!is.na(df) && df != (n_total - 1)) {
warning("For a paired t-test, df should equal n_total - 1; check inputs.")
}
r_est <- r_from_paired_t(mean_low, mean_high, sd_low, sd_high, t_value, n_total)
if (abs(r_est) > 1) stop(sprintf("Recovered r=%.4f outside [-1,1]; check inputs or provide r_within directly.", r_est), call. = FALSE)
d <- d_within_smcrp(mean_low, mean_high, sd_low, sd_high)
v <- var_d_within_smcrp(d = d, r = r_est, n_total = n_total)
df_used <- 2 * (n_total - 1) / (1 + r_est^2)
J <- hedges_correction(df_used)
g <- J * d
v_g <- (2 * (1 - r_est) / n_total) + (g^2) * (1 + r_est^2) / (4 * n_total)
} else {
stop(sprintf("Unknown method_used: %s", method_used), call. = FALSE)
}
inputs_used <- paste(
c(
sprintf("method_used=%s", method_used),
sprintf("sign_convention=%s", sign_convention_local),
if (!is.na(mean_low)) sprintf("mean_low=%.6f", mean_low) else NULL,
if (!is.na(mean_high)) sprintf("mean_high=%.6f", mean_high) else NULL,
if (!is.na(t_value)) sprintf("t=%.6f", t_value) else NULL,
if (!is.na(df)) sprintf("df=%.6f", df) else NULL,
if (!is.na(reported_d)) sprintf("reported_d=%.6f", reported_d) else NULL,
if (!is.na(n_low)) sprintf("n_low=%s", n_low) else NULL,
if (!is.na(n_high)) sprintf("n_high=%s", n_high) else NULL,
if (!is.na(n_total)) sprintf("n_total=%s", n_total) else NULL
),
collapse = ", "
)
data.frame(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
design = design_used,
method_used = method_used,
computed_from_suggested = computed_from_suggested,
inputs_used = inputs_used,
d = d,
v = v,
g = g,
v_g = v_g,
notes_on_assumptions = notes_on_assumptions,
imputed_flag = imputed_flag,
needs_sensitivity = needs_sensitivity,
stringsAsFactors = FALSE
)
}
audit <- do.call(rbind, lapply(effects, compute_effect))
audit
```
## Paste-ready YAML snippets
```{r}
for (i in seq_len(nrow(audit))) {
row <- audit[i, ]
cat(sprintf("\n# %s (study_id=%s)\n", row$effect_id, row$study_id))
cat(sprintf(
"effect_size:\n metric: SMD\n d: %.12f\n v: %.12f\n computed_from: %s\n needs_review: false\n notes: \"%s\"\n",
row$d,
row$v,
row$computed_from_suggested,
gsub("\"", "'", row$inputs_used)
))
}
```