Effect size computations: buckwalter2010knowledgeisntclosed

Computes standardized mean differences (d) and sampling variances (v) for the extraction YAML papers/buckwalter2010knowledgeisntclosed/buckwalter2010knowledgeisntclosed.yaml.

Inputs and methods

paper_key <- "buckwalter2010knowledgeisntclosed"

sign_convention <- "d = mean(low) - mean(high)"

effects <- list(
  list(
    study_id = 1,
    effect_id = "s1_e1",
    method_used = "between_groups",
    n_low = 183,
    n_high = 181,
    mean_low = 3.83,
    mean_high = 3.71,
    sd_low = 1.065,
    sd_high = 1.108,
    notes_on_assumptions = "Table 1: Bank (low stakes) vs High Stakes (high stakes)."
  ),
  list(
    study_id = 1,
    effect_id = "s1_e2",
    method_used = "between_groups",
    n_low = 183,
    n_high = 180,
    mean_low = 3.83,
    mean_high = 3.64,
    sd_low = 1.065,
    sd_high = 1.102,
    notes_on_assumptions = "Table 1: Bank (low standards/no skeptical pressure) vs High Standards (skeptical pressure)."
  )
)

Computation

default_or <- function(x, default) {
  if (is.null(x)) default else x
}

stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

# Exact small-sample correction factor used by metafor (.cmicalc).
hedges_correction <- function(df) {
  ifelse(df <= 1, NA_real_, exp(lgamma(df / 2) - log(sqrt(df / 2)) - lgamma((df - 1) / 2)))
}

pooled_sd <- function(n_high, n_low, sd_high, sd_low) {
  sqrt(((n_high - 1) * sd_high^2 + (n_low - 1) * sd_low^2) / (n_high + n_low - 2))
}

d_from_groups_independent <- function(n_high, n_low, mean_high, mean_low, sd_high, sd_low) {
  s <- pooled_sd(n_high, n_low, sd_high, sd_low)
  (mean_low - mean_high) / s
}

var_d_independent <- function(d, n_high, n_low) {
  n <- n_high + n_low
  (n / (n_high * n_low)) + (d^2 / (2 * (n - 2)))
}

compute_effect <- function(effect_inputs) {
  study_id <- effect_inputs$study_id
  effect_id <- effect_inputs$effect_id
  method_used <- effect_inputs$method_used

  n_high <- default_or(effect_inputs$n_high, NA_integer_)
  n_low <- default_or(effect_inputs$n_low, NA_integer_)
  mean_high <- default_or(effect_inputs$mean_high, NA_real_)
  mean_low <- default_or(effect_inputs$mean_low, NA_real_)
  sd_high <- default_or(effect_inputs$sd_high, NA_real_)
  sd_low <- default_or(effect_inputs$sd_low, NA_real_)
  notes_on_assumptions <- default_or(effect_inputs$notes_on_assumptions, "")

  if (method_used != "between_groups") {
    stop(sprintf("This paper uses between_groups only; got method_used=%s", method_used), call. = FALSE)
  }

  stop_if_missing(n_high, "n_high")
  stop_if_missing(n_low, "n_low")
  stop_if_missing(mean_high, "mean_high")
  stop_if_missing(mean_low, "mean_low")
  stop_if_missing(sd_high, "sd_high")
  stop_if_missing(sd_low, "sd_low")

  d <- d_from_groups_independent(n_high, n_low, mean_high, mean_low, sd_high, sd_low)
  v <- var_d_independent(d, n_high, n_low)

  df_used <- n_high + n_low - 2
  J <- hedges_correction(df_used)
  g <- J * d
  v_g <- (J^2) * v

  inputs_used <- paste(
    c(
      sprintf("method=%s", method_used),
      sprintf("sign_convention=%s", sign_convention),
      sprintf("n_low=%s", n_low),
      sprintf("n_high=%s", n_high),
      sprintf("mean_low=%s", mean_low),
      sprintf("mean_high=%s", mean_high),
      sprintf("sd_low=%s", sd_low),
      sprintf("sd_high=%s", sd_high)
    ),
    collapse = ", "
  )

  data.frame(
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    design = "Between-Subjects",
    method_used = method_used,
    computed_from_suggested = "groups",
    inputs_used = inputs_used,
    d = d,
    v = v,
    g = g,
    v_g = v_g,
    notes_on_assumptions = notes_on_assumptions,
    stringsAsFactors = FALSE
  )
}

audit <- do.call(rbind, lapply(effects, compute_effect))
audit
                          paper_key study_id effect_id           design
1 buckwalter2010knowledgeisntclosed        1     s1_e1 Between-Subjects
2 buckwalter2010knowledgeisntclosed        1     s1_e2 Between-Subjects
     method_used computed_from_suggested
1 between_groups                  groups
2 between_groups                  groups
                                                                                                                                           inputs_used
1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=183, n_high=181, mean_low=3.83, mean_high=3.71, sd_low=1.065, sd_high=1.108
2 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=183, n_high=180, mean_low=3.83, mean_high=3.64, sd_low=1.065, sd_high=1.102
          d          v         g        v_g
1 0.1104368 0.01100619 0.1102078 0.01096059
2 0.1753570 0.01106263 0.1749923 0.01101667
                                                                         notes_on_assumptions
1                                    Table 1: Bank (low stakes) vs High Stakes (high stakes).
2 Table 1: Bank (low standards/no skeptical pressure) vs High Standards (skeptical pressure).

Paste-ready YAML snippets

for (i in seq_len(nrow(audit))) {
  row <- audit[i, ]
  cat(sprintf("\n# %s (study_id=%s)\n", row$effect_id, row$study_id))
  cat(sprintf(
    "effect_size:\n  metric: SMD\n  d: %.12f\n  v: %.12f\n  computed_from: %s\n  needs_review: false\n  notes: \"%s\"\n",
    row$d,
    row$v,
    row$computed_from_suggested,
    gsub("\"", "'", row$inputs_used)
  ))
}

# s1_e1 (study_id=1)
effect_size:
  metric: SMD
  d: 0.110436839422
  v: 0.011006188465
  computed_from: groups
  needs_review: false
  notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=183, n_high=181, mean_low=3.83, mean_high=3.71, sd_low=1.065, sd_high=1.108"

# s1_e2 (study_id=1)
effect_size:
  metric: SMD
  d: 0.175356957931
  v: 0.011062626544
  computed_from: groups
  needs_review: false
  notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=183, n_high=180, mean_low=3.83, mean_high=3.64, sd_low=1.065, sd_high=1.102"