pinillos2012knowledgeexperimentspracitcal
/data/papers/pinillos2012knowledgeexperimentspracitcal/analysis/effect_sizes.qmd
---
title: "Effect size computation: pinillos2012knowledgeexperimentspracitcal"
format:
  html:
    toc: true
execute:
  echo: true
  warning: true
  message: false
---

```{r}
library(esc)
library(metafor)
```

## Shared helpers

```{r}
stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

# Exact small-sample correction factor used by metafor (.cmicalc).
hedges_correction <- function(df) {
  ifelse(df <= 1, NA_real_, exp(lgamma(df / 2) - log(sqrt(df / 2)) - lgamma((df - 1) / 2)))
}

compute_effect_size <- function(
    paper_key,
    study_id,
    effect_id,
    method_used,
    sign_convention = "d = mean(low) - mean(high)",
    n_high = NA_real_,
    n_low = NA_real_,
    n_total = NA_real_,
    t_value = NA_real_,
    reported_r = NA_real_,
    sign_d = NA_real_,
    notes_on_assumptions = "",
    imputed_flag = FALSE,
    needs_sensitivity = TRUE
) {
  d <- NA_real_
  v <- NA_real_
  g <- NA_real_
  v_g <- NA_real_
  computed_from_suggested <- NA_character_
  design_used <- if (startsWith(method_used, "between_")) {
    "Between-Subjects"
  } else if (startsWith(method_used, "within_")) {
    "Within-Subjects"
  } else {
    NA_character_
  }

  if (method_used == "between_rpb") {
    computed_from_suggested <- "other"
    stop_if_missing(reported_r, "reported_r")
    stop_if_missing(sign_d, "sign_d")
    if (!is.element(sign_d, c(-1, 1))) stop("sign_d must be +1 or -1", call. = FALSE)

    # Preferred: use split group Ns when known.
    if (!is.na(n_high) && !is.na(n_low)) {
      d_out <- esc::esc_rpb(r = abs(reported_r), grp1n = n_low, grp2n = n_high, es.type = "d")
      g_out <- esc::esc_rpb(r = abs(reported_r), grp1n = n_low, grp2n = n_high, es.type = "g")
    } else {
      # Fallback requested for this paper: if only total N is available,
      # assume equal split (n_low = n_high = totaln / 2) via esc_rpb(totaln=...).
      stop_if_missing(n_total, "n_total")
      d_out <- esc::esc_rpb(r = abs(reported_r), totaln = n_total, es.type = "d")
      g_out <- esc::esc_rpb(r = abs(reported_r), totaln = n_total, es.type = "g")
      if (is.na(n_low)) n_low <- n_total / 2
      if (is.na(n_high)) n_high <- n_total / 2
    }

    d <- sign_d * abs(d_out$es)
    v <- d_out$var
    g <- sign_d * abs(g_out$es)
    v_g <- g_out$var
  } else if (method_used == "within_smcc_t") {
    computed_from_suggested <- "t_df"
    stop_if_missing(n_total, "n_total")
    stop_if_missing(t_value, "t_value")

    esc_out <- metafor::escalc(measure = "SMCC", ti = t_value, ni = n_total)
    d <- esc_out$yi
    v <- esc_out$vi

    df_used <- n_total - 1
    J <- hedges_correction(df_used)
    g <- J * d
    v_g <- (J^2) * v
  } else {
    stop(sprintf("Unknown method_used: %s", method_used), call. = FALSE)
  }

  inputs_used <- paste(
    c(
      sprintf("method=%s", method_used),
      sprintf("sign_convention=%s", sign_convention),
      if (!is.na(n_low)) sprintf("n_low=%s", n_low) else NULL,
      if (!is.na(n_high)) sprintf("n_high=%s", n_high) else NULL,
      if (!is.na(n_total)) sprintf("n_total=%s", n_total) else NULL,
      if (!is.na(t_value)) sprintf("t=%s", t_value) else NULL,
      if (!is.na(reported_r)) sprintf("reported_r=%s", reported_r) else NULL,
      if (!is.na(sign_d)) sprintf("sign_d=%s", sign_d) else NULL
    ),
    collapse = ", "
  )

  audit <- data.frame(
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    design = design_used,
    method_used = method_used,
    computed_from_suggested = computed_from_suggested,
    inputs_used = inputs_used,
    d = d,
    v = v,
    g = g,
    v_g = v_g,
    notes_on_assumptions = notes_on_assumptions,
    imputed_flag = imputed_flag,
    needs_sensitivity = needs_sensitivity
  )

  yaml_snippet <- sprintf(
    "effect_size:\\n  metric: SMD\\n  d: %.12f\\n  v: %.12f\\n  computed_from: %s\\n  needs_review: false\\n  notes: \\\"%s\\\"\\n",
    d, v, computed_from_suggested, gsub(pattern = "\"", replacement = "'", x = inputs_used)
  )

  list(audit = audit, yaml_snippet = yaml_snippet)
}
```

## Study 1: Typo (Between-Subjects)

### Effect s1_e1: Typo-Low vs Typo-High (Mann-Whitney r -> d)

```{r}
paper_key <- "pinillos2012knowledgeexperimentspracitcal"
study_id <- 1
effect_id <- "s1_e1"

inputs <- list(
  n_low = 77,
  n_high = 67,
  reported_r = -0.56,
  sign_d = -1
)
inputs
```

```{r}
res_s1_e1 <- compute_effect_size(
  paper_key = paper_key,
  study_id = study_id,
  effect_id = effect_id,
  method_used = "between_rpb",
  n_high = inputs$n_high,
  n_low = inputs$n_low,
  reported_r = inputs$reported_r,
  sign_d = inputs$sign_d,
  notes_on_assumptions = "Computed with esc::esc_rpb from reported Mann-Whitney r using known split Ns."
)
res_s1_e1$audit
cat(res_s1_e1$yaml_snippet)
```

### Effect s1_e2: Typo-Low vs Ignorant-Typo-High (Mann-Whitney r -> d)

```{r}
paper_key <- "pinillos2012knowledgeexperimentspracitcal"
study_id <- 1
effect_id <- "s1_e2"

inputs <- list(
  n_low = 77,
  n_high = 69,
  reported_r = -0.39,
  sign_d = -1
)
inputs
```

```{r}
res_s1_e2 <- compute_effect_size(
  paper_key = paper_key,
  study_id = study_id,
  effect_id = effect_id,
  method_used = "between_rpb",
  n_high = inputs$n_high,
  n_low = inputs$n_low,
  reported_r = inputs$reported_r,
  sign_d = inputs$sign_d,
  notes_on_assumptions = "Computed with esc::esc_rpb from reported Mann-Whitney r using known split Ns."
)
res_s1_e2$audit
cat(res_s1_e2$yaml_snippet)
```

## Study 1 follow-up: Belief fixed (Between-Subjects)

### Effect s2_e1: Belief-Typo-Low vs Belief-Typo-High (Mann-Whitney r -> d)

```{r}
paper_key <- "pinillos2012knowledgeexperimentspracitcal"
study_id <- 2
effect_id <- "s2_e1"

inputs <- list(
  n_total = 78,
  reported_r = 0.34,
  sign_d = -1
)
inputs
```

```{r}
res_s2_e1 <- compute_effect_size(
  paper_key = paper_key,
  study_id = study_id,
  effect_id = effect_id,
  method_used = "between_rpb",
  n_total = inputs$n_total,
  reported_r = inputs$reported_r,
  sign_d = inputs$sign_d,
  notes_on_assumptions = "Computed with esc::esc_rpb from reported Mann-Whitney r. Split group Ns were not reported; equal split (n_low=n_high=totaln/2) assumed."
)
res_s2_e1$audit
cat(res_s2_e1$yaml_snippet)
```

## Study 2: Typo juxtaposed (Within-Subjects)

### Effect s3_e1: Juxtaposed Typo (paired t -> SMCC)

```{r}
paper_key <- "pinillos2012knowledgeexperimentspracitcal"
study_id <- 3
effect_id <- "s3_e1"

inputs <- list(
  n_total = 95,
  t_value = -7.6
)
inputs
```

```{r}
res_s3_e1 <- compute_effect_size(
  paper_key = paper_key,
  study_id = study_id,
  effect_id = effect_id,
  method_used = "within_smcc_t",
  n_total = inputs$n_total,
  t_value = inputs$t_value,
  notes_on_assumptions = "Use metafor::escalc(measure='SMCC') with paired t and n_total; within-subject fallback (no matching esc method for this project metric)."
)
res_s3_e1$audit
cat(res_s3_e1$yaml_snippet)
```

## Study 3: Counting contest knowledge prompt (Between-Subjects)

### Effect s4_e1: Count-Low-Knowledge vs Count-High-Knowledge (Mann-Whitney r -> d)

```{r}
paper_key <- "pinillos2012knowledgeexperimentspracitcal"
study_id <- 4
effect_id <- "s4_e1"

inputs <- list(
  n_total = 128,
  reported_r = -0.40,
  sign_d = -1
)
inputs
```

```{r}
res_s4_e1 <- compute_effect_size(
  paper_key = paper_key,
  study_id = study_id,
  effect_id = effect_id,
  method_used = "between_rpb",
  n_total = inputs$n_total,
  reported_r = inputs$reported_r,
  sign_d = inputs$sign_d,
  notes_on_assumptions = "Computed with esc::esc_rpb from reported Mann-Whitney r. Split group Ns were not reported; equal split (n_low=n_high=totaln/2) assumed."
)
res_s4_e1$audit
cat(res_s4_e1$yaml_snippet)
```