sripadastanley2012empiricaltestsinterest
/data/papers/sripadastanley2012empiricaltestsinterest/analysis/effect_sizes.qmd
---
title: "Effect size computations: sripadastanley2012empiricaltestsinterest"
format:
  html:
    toc: true
execute:
  echo: true
  warning: true
  message: false
---

Computes standardized mean differences (`d`) and sampling variances (`v`) for the
extraction YAML `papers/sripadastanley2012empiricaltestsinterest/sripadastanley2012empiricaltestsinterest.yaml`.

## Inputs and methods

```{r}
paper_key <- "sripadastanley2012empiricaltestsinterest"

# Extraction sign convention:
# d = mean(low stakes) - mean(high stakes)
sign_convention <- "d = mean(low) - mean(high)"

# All effects are computed from independent-samples t-tests with n_low=n_high=50.
# Direction is set by sign_d=+1, justified by the paper's wording:
# "ratings ... weaker in the high stakes case compared to the low stakes condition"
# and "diminished ratings for knowledge ... in ... high stakes compared to ... low stakes".
effects <- list(
  # Study 1: Basic vignette (pine nuts)
  list(
    study_id = 1,
    effect_id = "s1_e1",
    method_used = "between_t",
    n_low = 50,
    n_high = 50,
    t_value = 1.98,
    df = 98,
    sign_d = 1,
    notes_on_assumptions = "Basic vignette; Q1 evidence-strength. Direction set by text: evidence weaker in high stakes than low."
  ),
  list(
    study_id = 1,
    effect_id = "s1_e2",
    method_used = "between_t",
    n_low = 50,
    n_high = 50,
    t_value = 0.25,
    df = 98,
    sign_d = 1,
    notes_on_assumptions = "Basic vignette; Q2 knowledge. Direction set by general description: diminished knowledge ratings in high stakes vs low."
  ),

  # Study 2: Implicit/Explicit vignette (pine nuts; low stakes left implicit)
  list(
    study_id = 2,
    effect_id = "s2_e1",
    method_used = "between_t",
    n_low = 50,
    n_high = 50,
    t_value = 2.29,
    df = 98,
    sign_d = 1,
    notes_on_assumptions = "Implicit/Explicit vignette; Q1 evidence-strength. Direction set by text: evidence weaker in high stakes than low."
  ),
  list(
    study_id = 2,
    effect_id = "s2_e2",
    method_used = "between_t",
    n_low = 50,
    n_high = 50,
    t_value = 3.43,
    df = 98,
    sign_d = 1,
    notes_on_assumptions = "Implicit/Explicit vignette; Q2 knowledge. Direction set by text: higher knowledge ratings in Implicit Low Stakes than Explicit High Stakes."
  ),

  # Study 3: Ignorant vignette (Mongolian pine nuts; protagonist unaware of stakes)
  list(
    study_id = 3,
    effect_id = "s3_e1",
    method_used = "between_t",
    n_low = 50,
    n_high = 50,
    t_value = 4.15,
    df = 98,
    sign_d = 1,
    notes_on_assumptions = "Ignorant vignette; Q1 evidence-strength. Direction set by text: evidence weaker in high stakes than low."
  ),
  list(
    study_id = 3,
    effect_id = "s3_e2",
    method_used = "between_t",
    n_low = 50,
    n_high = 50,
    t_value = 3.61,
    df = 98,
    sign_d = 1,
    notes_on_assumptions = "Ignorant vignette; Q2 knowledge. Direction set by general description: diminished knowledge ratings in high stakes vs low."
  )
)
```

## Computation

```{r}
default_or <- function(x, default) {
  if (is.null(x)) default else x
}

stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

# Exact small-sample correction factor used by metafor (.cmicalc).
hedges_correction <- function(df) {
  ifelse(df <= 1, NA_real_, exp(lgamma(df / 2) - log(sqrt(df / 2)) - lgamma((df - 1) / 2)))
}

d_from_t_independent <- function(t_value, n_high, n_low) {
  t_value * sqrt((n_high + n_low) / (n_high * n_low))
}

var_d_independent <- function(d, n_high, n_low) {
  n <- n_high + n_low
  (n / (n_high * n_low)) + (d^2 / (2 * (n - 2)))
}

infer_sign <- function(mean_low, mean_high, sign_d) {
  if (!is.na(mean_low) && !is.na(mean_high) && mean_low != mean_high) {
    return(sign(mean_low - mean_high))
  }
  stop_if_missing(sign_d, "sign_d (cannot infer sign from means)")
  if (!is.element(sign_d, c(-1, 1))) stop("sign_d must be +1 or -1", call. = FALSE)
  sign_d
}

compute_effect <- function(effect_inputs) {
  study_id <- effect_inputs$study_id
  effect_id <- effect_inputs$effect_id
  method_used <- effect_inputs$method_used

  n_high <- default_or(effect_inputs$n_high, NA_integer_)
  n_low <- default_or(effect_inputs$n_low, NA_integer_)
  mean_high <- default_or(effect_inputs$mean_high, NA_real_)
  mean_low <- default_or(effect_inputs$mean_low, NA_real_)
  t_value <- default_or(effect_inputs$t_value, NA_real_)
  df <- default_or(effect_inputs$df, NA_real_)
  sign_d <- default_or(effect_inputs$sign_d, NA_real_)
  notes_on_assumptions <- default_or(effect_inputs$notes_on_assumptions, "")

  if (method_used != "between_t") {
    stop(sprintf("This paper uses between_t only; got method_used=%s", method_used), call. = FALSE)
  }

  stop_if_missing(n_high, "n_high")
  stop_if_missing(n_low, "n_low")
  stop_if_missing(t_value, "t_value")

  sign_used <- infer_sign(mean_low, mean_high, sign_d)
  d <- sign_used * abs(d_from_t_independent(t_value, n_high, n_low))
  v <- var_d_independent(d, n_high, n_low)

  df_used <- n_high + n_low - 2
  J <- hedges_correction(df_used)
  g <- J * d
  v_g <- (J^2) * v

  inputs_used <- paste(
    c(
      sprintf("method=%s", method_used),
      sprintf("sign_convention=%s", sign_convention),
      sprintf("n_low=%s", n_low),
      sprintf("n_high=%s", n_high),
      sprintf("t=%s", t_value),
      sprintf("df=%s", df_used),
      sprintf("sign_d=%s", sign_d)
    ),
    collapse = ", "
  )

  data.frame(
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    design = "Between-Subjects",
    method_used = method_used,
    computed_from_suggested = "t_df",
    inputs_used = inputs_used,
    d = d,
    v = v,
    g = g,
    v_g = v_g,
    notes_on_assumptions = notes_on_assumptions,
    stringsAsFactors = FALSE
  )
}

audit <- do.call(rbind, lapply(effects, compute_effect))
audit
```

## Paste-ready YAML snippets

```{r}
for (i in seq_len(nrow(audit))) {
  row <- audit[i, ]
  cat(sprintf("\n# %s (study_id=%s)\n", row$effect_id, row$study_id))
  cat(sprintf(
    "effect_size:\n  metric: SMD\n  d: %.12f\n  v: %.12f\n  computed_from: %s\n  needs_review: false\n  notes: \"%s\"\n",
    row$d,
    row$v,
    row$computed_from_suggested,
    gsub("\"", "'", row$inputs_used)
  ))
}
```