buckwalterschaffer2015knowledgestakesmistakes
/data/papers/buckwalterschaffer2015knowledgestakesmistakes/analysis/effect_sizes.qmd
---
title: "Effect size computations: buckwalterschaffer2015knowledgestakesmistakes"
format:
  html:
    toc: true
execute:
  echo: true
  warning: true
  message: false
---

Computes standardized mean differences (`d`) and sampling variances (`v`) for
`papers/buckwalterschaffer2015knowledgestakesmistakes/buckwalterschaffer2015knowledgestakesmistakes.yaml`.

All effects below use the analyst-authorized equal-cell assumption. For
Studies 2-7, this is an equal split within each reported two-condition stakes
contrast. For Study 1, the paper reports a 2x2 design with `N = 186`, so equal
allocation across the four cells implies an approximate cell size of `46.5`.

## Inputs and methods

```{r}
paper_key <- "buckwalterschaffer2015knowledgestakesmistakes"
sign_convention <- "d = mean(low) - mean(high)"

effects <- list(
  list(
    study_id = 1,
    effect_id = "s1_e1",
    method_used = "between_groups",
    n_low = 46.5,
    n_high = 46.5,
    mean_low = 2.11,
    sd_low = 1.00,
    mean_high = 4.61,
    sd_high = 2.76,
    notes_on_assumptions = "Typo know probe. Equal-cell approximation in 2x2 design: N=186 implies 46.5 per cell."
  ),
  list(
    study_id = 1,
    effect_id = "s1_e2",
    method_used = "between_groups",
    n_low = 46.5,
    n_high = 46.5,
    mean_low = 2.27,
    sd_low = 1.09,
    mean_high = 5.11,
    sd_high = 3.50,
    notes_on_assumptions = "Typo guess probe. Equal-cell approximation in 2x2 design: N=186 implies 46.5 per cell."
  ),
  list(
    study_id = 2,
    effect_id = "s2_e1",
    method_used = "between_groups",
    n_low = 40,
    n_high = 40,
    mean_low = 2.50,
    sd_low = 1.04,
    mean_high = 5.37,
    sd_high = 2.61,
    notes_on_assumptions = "Typo hope probe. Equal split assumed: n_low=n_high=40."
  ),
  list(
    study_id = 3,
    effect_id = "s3_e1",
    method_used = "between_groups",
    n_low = 50,
    n_high = 50,
    mean_low = 5.96,
    sd_low = 1.20,
    mean_high = 5.78,
    sd_high = 1.30,
    notes_on_assumptions = "Two reads knowledge probe. Equal split assumed: n_low=n_high=50."
  ),
  list(
    study_id = 4,
    effect_id = "s4_e1",
    method_used = "between_groups",
    n_low = 30,
    n_high = 30,
    mean_low = 5.73,
    sd_low = 1.26,
    mean_high = 5.63,
    sd_high = 1.25,
    notes_on_assumptions = "Two reads uncareful evidence probe. Equal split assumed: n_low=n_high=30."
  ),
  list(
    study_id = 4,
    effect_id = "s4_e2",
    method_used = "between_groups",
    n_low = 30,
    n_high = 30,
    mean_low = 5.43,
    sd_low = 1.33,
    mean_high = 5.50,
    sd_high = 1.38,
    notes_on_assumptions = "Two reads uncareful knowledge probe. Equal split assumed: n_low=n_high=30."
  ),
  list(
    study_id = 5,
    effect_id = "s5_e1",
    method_used = "between_groups",
    n_low = 50,
    n_high = 50,
    mean_low = 5.62,
    sd_low = 1.11,
    mean_high = 5.70,
    sd_high = 1.37,
    notes_on_assumptions = "Two reads knowledge probe with evidence preface. Equal split assumed: n_low=n_high=50."
  ),
  list(
    study_id = 5,
    effect_id = "s5_e2",
    method_used = "between_groups",
    n_low = 50,
    n_high = 50,
    mean_low = 5.86,
    sd_low = 1.07,
    mean_high = 5.88,
    sd_high = 1.24,
    notes_on_assumptions = "Two reads evidence probe with evidence preface. Equal split assumed: n_low=n_high=50. Means/SD route avoids the paper's inconsistent t/p pair."
  ),
  list(
    study_id = 6,
    effect_id = "s6_e1",
    method_used = "between_groups",
    n_low = 60,
    n_high = 60,
    mean_low = 3.57,
    sd_low = 2.04,
    mean_high = 3.43,
    sd_high = 2.06,
    notes_on_assumptions = "Two allergies knowledge probe. Equal split assumed: n_low=n_high=60."
  ),
  list(
    study_id = 6,
    effect_id = "s6_e2",
    method_used = "between_groups",
    n_low = 60,
    n_high = 60,
    mean_low = 3.77,
    sd_low = 1.91,
    mean_high = 3.67,
    sd_high = 1.71,
    notes_on_assumptions = "Two allergies evidence probe. Equal split assumed: n_low=n_high=60."
  ),
  list(
    study_id = 7,
    effect_id = "s7_e1",
    method_used = "between_groups",
    n_low = 60,
    n_high = 60,
    mean_low = 4.27,
    sd_low = 2.05,
    mean_high = 3.48,
    sd_high = 1.86,
    notes_on_assumptions = "Ignorant original knowledge probe. Equal split assumed within the original low/high pair: n_low=n_high=60."
  ),
  list(
    study_id = 7,
    effect_id = "s7_e2",
    method_used = "between_groups",
    n_low = 60,
    n_high = 60,
    mean_low = 4.57,
    sd_low = 1.80,
    mean_high = 3.55,
    sd_high = 1.70,
    notes_on_assumptions = "Ignorant original evidence probe. Equal split assumed within the original low/high pair: n_low=n_high=60."
  ),
  list(
    study_id = 7,
    effect_id = "s7_e3",
    method_used = "between_groups",
    n_low = 60,
    n_high = 60,
    mean_low = 3.55,
    sd_low = 1.99,
    mean_high = 3.60,
    sd_high = 2.03,
    notes_on_assumptions = "Ignorant salient knowledge probe. Equal split assumed within the salient low/high pair: n_low=n_high=60."
  ),
  list(
    study_id = 7,
    effect_id = "s7_e4",
    method_used = "between_groups",
    n_low = 60,
    n_high = 60,
    mean_low = 3.70,
    sd_low = 1.87,
    mean_high = 3.82,
    sd_high = 1.90,
    notes_on_assumptions = "Ignorant salient evidence probe. Equal split assumed within the salient low/high pair: n_low=n_high=60."
  )
)
```

## Computation

```{r}
if (!requireNamespace("esc", quietly = TRUE)) {
  stop("Package 'esc' is required for this template. Install with install.packages('esc').", call. = FALSE)
}
suppressPackageStartupMessages(library(esc))

stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

hedges_correction <- function(df) {
  ifelse(df <= 1, NA_real_, exp(lgamma(df / 2) - log(sqrt(df / 2)) - lgamma((df - 1) / 2)))
}

extract_esc <- function(x) {
  list(
    d = as.numeric(x$es),
    v = as.numeric(x$var)
  )
}

compute_with_esc <- function(fun, ...) {
  d_obj <- fun(..., es.type = "d")
  g_obj <- fun(..., es.type = "g")
  d_out <- extract_esc(d_obj)
  g_out <- extract_esc(g_obj)
  list(d = d_out$d, v = d_out$v, g = g_out$d, v_g = g_out$v)
}

compute_effect <- function(effect_inputs) {
  study_id <- effect_inputs$study_id
  effect_id <- effect_inputs$effect_id
  method_used <- effect_inputs$method_used
  n_high <- effect_inputs$n_high
  n_low <- effect_inputs$n_low
  mean_high <- effect_inputs$mean_high
  mean_low <- effect_inputs$mean_low
  sd_high <- effect_inputs$sd_high
  sd_low <- effect_inputs$sd_low
  notes_on_assumptions <- effect_inputs$notes_on_assumptions

  if (method_used != "between_groups") {
    stop(sprintf("This paper uses between_groups only; got method_used=%s", method_used), call. = FALSE)
  }

  stop_if_missing(n_high, "n_high")
  stop_if_missing(n_low, "n_low")
  stop_if_missing(mean_high, "mean_high")
  stop_if_missing(mean_low, "mean_low")
  stop_if_missing(sd_high, "sd_high")
  stop_if_missing(sd_low, "sd_low")

  res <- compute_with_esc(
    esc::esc_mean_sd,
    grp1m = mean_low,
    grp1sd = sd_low,
    grp1n = n_low,
    grp2m = mean_high,
    grp2sd = sd_high,
    grp2n = n_high
  )

  df_used <- n_high + n_low - 2
  J <- hedges_correction(df_used)

  inputs_used <- paste(
    c(
      sprintf("method=%s", method_used),
      sprintf("sign_convention=%s", sign_convention),
      sprintf("n_low=%s", n_low),
      sprintf("n_high=%s", n_high),
      sprintf("mean_low=%s", mean_low),
      sprintf("sd_low=%s", sd_low),
      sprintf("mean_high=%s", mean_high),
      sprintf("sd_high=%s", sd_high)
    ),
    collapse = ", "
  )

  data.frame(
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    design = "Between-Subjects",
    method_used = method_used,
    computed_from_suggested = "groups",
    inputs_used = inputs_used,
    d = res$d,
    v = res$v,
    g = res$g,
    v_g = res$v_g,
    J = J,
    notes_on_assumptions = notes_on_assumptions,
    stringsAsFactors = FALSE
  )
}

audit <- do.call(rbind, lapply(effects, compute_effect))
audit
```

## Effect sections

```{r}
for (i in seq_len(nrow(audit))) {
  row <- audit[i, ]
  cat(sprintf("### %s (study_id=%s)\n\n", row$effect_id, row$study_id))
  cat("Inputs\n\n")
  cat("```text\n")
  cat(row$inputs_used)
  cat("\n```\n\n")
  cat("Assumptions\n\n")
  cat("```text\n")
  cat(row$notes_on_assumptions)
  cat("\n```\n\n")
  cat("Results\n\n")
  cat("```text\n")
  cat(sprintf("d = %.12f\n", row$d))
  cat(sprintf("v = %.12f\n", row$v))
  cat(sprintf("g = %.12f\n", row$g))
  cat(sprintf("v_g = %.12f\n", row$v_g))
  cat("```\n\n")
}
```

## Paste-ready YAML snippets

```{r}
for (i in seq_len(nrow(audit))) {
  row <- audit[i, ]
  notes <- paste(
    row$notes_on_assumptions,
    "Computed from reported means/SDs via esc::esc_mean_sd(...).",
    sep = " "
  )
  notes <- gsub("\"", "'", notes, fixed = TRUE)

  cat(sprintf("\n# %s (study_id=%s)\n", row$effect_id, row$study_id))
  cat(sprintf(
    "effect_size:\n  metric: SMD\n  d: %.12f\n  v: %.12f\n  computed_from: %s\n  needs_review: false\n  notes: \"%s\"\n",
    row$d,
    row$v,
    row$computed_from_suggested,
    notes
  ))
}
```