grindrodetalndthirdpersonknowledge
/data/papers/grindrodetalndthirdpersonknowledge/analysis/effect_sizes.qmd
---
title: "Effect size computation: grindrodetalndthirdpersonknowledge"
format:
  html:
    toc: true
execute:
  echo: true
  warning: true
  message: false
---

Computes standardized mean differences (`d`) and sampling variances (`v`) for the
extraction YAML `papers/grindrodetalndthirdpersonknowledge/grindrodetalndthirdpersonknowledge.yaml`.
For negative-polarity knowledge-denial targets, raw truth-rating contrasts are
also reported, but the YAML stores reverse-coded `d_for_yaml` values so that
positive effects consistently indicate stronger low-context knowledge attribution.

## Data sources

- Experiment 1 primary first-block results: `papers/grindrodetalndthirdpersonknowledge/out/tables/camelot_stream_p15_t3.csv` (Table 7).
- Experiment 2 primary first-block results: `papers/grindrodetalndthirdpersonknowledge/out/tables/camelot_stream_p19_t4.csv` (Table 8, `When presented first`).
- Experiment 1 supplemental all-response results: `papers/grindrodetalndthirdpersonknowledge/out/tables/camelot_stream_p12_t2.csv` (Table 6).
- Experiment 2 supplemental all-response results: `papers/grindrodetalndthirdpersonknowledge/out/tables/camelot_stream_p19_t4.csv` (Table 8, Overall).
- Within-subject paired `t` tests: `papers/grindrodetalndthirdpersonknowledge/out/fulltext.md`.

## Shared helpers

```{r}
stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

# Exact small-sample correction factor used by metafor (.cmicalc).
hedges_correction <- function(df) {
  ifelse(df <= 1, NA_real_, exp(lgamma(df / 2) - log(sqrt(df / 2)) - lgamma((df - 1) / 2)))
}

pooled_sd <- function(n_high, n_low, sd_high, sd_low) {
  sqrt(((n_high - 1) * sd_high^2 + (n_low - 1) * sd_low^2) / (n_high + n_low - 2))
}

d_from_groups_independent <- function(n_high, n_low, mean_high, mean_low, sd_high, sd_low) {
  s <- pooled_sd(n_high, n_low, sd_high, sd_low)
  (mean_low - mean_high) / s
}

var_d_independent <- function(d, n_high, n_low) {
  n <- n_high + n_low
  (n / (n_high * n_low)) + (d^2 / (2 * (n - 2)))
}

# Project metric for within-subject effects: standardize by pooled SD across occasions.
sd_pooled_within <- function(sd_low, sd_high) {
  sqrt((sd_low^2 + sd_high^2) / 2)
}

d_within_smcrp <- function(mean_low, mean_high, sd_low, sd_high) {
  (mean_low - mean_high) / sd_pooled_within(sd_low, sd_high)
}

# Recover within-person correlation r from paired t + means/SDs.
r_from_paired_t <- function(mean_low, mean_high, sd_low, sd_high, t_value, n_total) {
  mean_diff <- mean_low - mean_high
  sd_diff <- abs(mean_diff) * sqrt(n_total) / abs(t_value)
  (sd_low^2 + sd_high^2 - sd_diff^2) / (2 * sd_low * sd_high)
}

var_d_within_smcrp <- function(d, r, n_total) {
  (2 * (1 - r) / n_total) + (d^2) * (1 + r^2) / (4 * n_total)
}

# The paper's reported paired-samples d values match the dz convention t / sqrt(n),
# reported as unsigned magnitudes.
paper_dz_signed <- function(mean_low, mean_high, t_value, n_total) {
  sign(mean_low - mean_high) * abs(t_value) / sqrt(n_total)
}

compute_effect_size_between_groups <- function(
    paper_key,
    study_id,
    effect_id,
    n_low,
    n_high,
    mean_low,
    mean_high,
    sd_low,
    sd_high,
    sign_convention = "d = mean(low) - mean(high)",
    notes_on_assumptions = ""
) {
  stop_if_missing(n_low, "n_low")
  stop_if_missing(n_high, "n_high")
  stop_if_missing(mean_low, "mean_low")
  stop_if_missing(mean_high, "mean_high")
  stop_if_missing(sd_low, "sd_low")
  stop_if_missing(sd_high, "sd_high")

  d <- d_from_groups_independent(
    n_high = n_high,
    n_low = n_low,
    mean_high = mean_high,
    mean_low = mean_low,
    sd_high = sd_high,
    sd_low = sd_low
  )
  v <- var_d_independent(d = d, n_high = n_high, n_low = n_low)

  df_used <- n_high + n_low - 2
  J <- hedges_correction(df_used)
  g <- J * d
  v_g <- (J^2) * v

  inputs_used <- sprintf(
    "method=between_groups, sign_convention=%s, n_low=%s, n_high=%s, mean_low=%s, mean_high=%s, sd_low=%s, sd_high=%s",
    sign_convention, n_low, n_high, mean_low, mean_high, sd_low, sd_high
  )

  audit <- data.frame(
    analysis_variant = "first_block_primary",
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    design = "Between-Subjects",
    method_used = "between_groups",
    computed_from_suggested = "groups",
    inputs_used = inputs_used,
    d = d,
    v = v,
    g = g,
    v_g = v_g,
    notes_on_assumptions = notes_on_assumptions,
    stringsAsFactors = FALSE
  )

  yaml_snippet <- sprintf(
    "effect_size:\\n  metric: SMD\\n  d: %.12f\\n  v: %.12f\\n  computed_from: groups\\n  needs_review: false\\n  notes: \"%s\"\\n",
    d, v, gsub(pattern = "\"", replacement = "'", x = inputs_used)
  )

  list(audit = audit, yaml_snippet = yaml_snippet)
}

compute_effect_size_within <- function(
    paper_key,
    study_id,
    effect_id,
    n_total,
    df,
    mean_low,
    mean_high,
    sd_low,
    sd_high,
    t_value,
    reported_d_paper,
    sign_convention = "d = mean(low) - mean(high)",
    notes_on_assumptions = ""
) {
  stop_if_missing(n_total, "n_total")
  stop_if_missing(df, "df")
  stop_if_missing(mean_low, "mean_low")
  stop_if_missing(mean_high, "mean_high")
  stop_if_missing(sd_low, "sd_low")
  stop_if_missing(sd_high, "sd_high")
  stop_if_missing(t_value, "t_value")

  if (df != (n_total - 1)) {
    stop(sprintf("For paired t tests, expected df = n_total - 1 but got df=%s and n_total=%s.", df, n_total), call. = FALSE)
  }

  r_est <- r_from_paired_t(
    mean_low = mean_low,
    mean_high = mean_high,
    sd_low = sd_low,
    sd_high = sd_high,
    t_value = t_value,
    n_total = n_total
  )

  if (abs(r_est) > 1) {
    stop(sprintf("Recovered r=%.4f outside [-1,1]; check inputs.", r_est), call. = FALSE)
  }

  d <- d_within_smcrp(
    mean_low = mean_low,
    mean_high = mean_high,
    sd_low = sd_low,
    sd_high = sd_high
  )
  v <- var_d_within_smcrp(d = d, r = r_est, n_total = n_total)

  df_used <- 2 * (n_total - 1) / (1 + r_est^2)
  J <- hedges_correction(df_used)
  g <- J * d
  v_g <- (2 * (1 - r_est) / n_total) + (g^2) * (1 + r_est^2) / (4 * n_total)
  paper_d_signed <- paper_dz_signed(
    mean_low = mean_low,
    mean_high = mean_high,
    t_value = t_value,
    n_total = n_total
  )

  inputs_used <- sprintf(
    "method=within_smcrp_t, sign_convention=%s, n_total=%s, df=%s, mean_low=%s, mean_high=%s, sd_low=%s, sd_high=%s, t=%s",
    sign_convention, n_total, df, mean_low, mean_high, sd_low, sd_high, t_value
  )

  audit <- data.frame(
    analysis_variant = "all_responses_within_subject",
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    design = "Within-Subjects",
    method_used = "within_smcrp_t",
    computed_from_suggested = "groups",
    inputs_used = inputs_used,
    r_est = r_est,
    d = d,
    v = v,
    g = g,
    v_g = v_g,
    paper_d_signed = paper_d_signed,
    paper_d_reported = reported_d_paper,
    paper_d_abs_diff = abs(abs(paper_d_signed) - reported_d_paper),
    notes_on_assumptions = notes_on_assumptions,
    stringsAsFactors = FALSE
  )

  list(audit = audit)
}
```

## Primary YAML computations: first-block between-subject estimates

```{r}
paper_key <- "grindrodetalndthirdpersonknowledge"
negative_polarity_effects <- c("s1_e2", "s1_e4", "s1_e6", "s1_e8", "s2_e2", "s2_e4")

add_yaml_direction <- function(audit) {
  audit$d_raw <- audit$d
  audit$reverse_negative_polarity <- audit$effect_id %in% negative_polarity_effects
  audit$d_for_yaml <- ifelse(audit$reverse_negative_polarity, -audit$d_raw, audit$d_raw)
  audit
}

effects_first_block <- data.frame(
  study_id = c(1, 1, 1, 1, 2, 2),
  effect_id = c("s1_e1", "s1_e2", "s1_e3", "s1_e4", "s2_e1", "s2_e2"),
  n_low = c(105, 121, 121, 105, 107, 102),
  mean_low = c(71.90, 48.36, 72.67, 43.27, 49.77, 52.91),
  sd_low = c(33.69, 41.17, 34.15, 40.94, 42.48, 41.29),
  n_high = c(107, 98, 98, 107, 96, 97),
  mean_high = c(36.83, 78.73, 63.23, 54.86, 41.66, 75.07),
  sd_high = c(38.49, 31.29, 35.56, 36.82, 40.22, 31.30),
  notes_on_assumptions = c(
    "Experiment 1, first-block between-subjects emulation: Knowledge-Neighbor, positive sentence (Table 7).",
    "Experiment 1, first-block between-subjects emulation: Knowledge-Neighbor, negative sentence (Table 7).",
    "Experiment 1, first-block between-subjects emulation: Knowledge-Sunshine, positive sentence (Table 7).",
    "Experiment 1, first-block between-subjects emulation: Knowledge-Sunshine, negative sentence (Table 7).",
    "Experiment 2, between-subjects emulation: Revised Neighbor, positive sentence (Table 8, 'When presented first').",
    "Experiment 2, between-subjects emulation: Revised Neighbor, negative sentence (Table 8, 'When presented first')."
  ),
  stringsAsFactors = FALSE
)
effects_first_block
```

```{r}
results_first_block <- lapply(seq_len(nrow(effects_first_block)), function(i) {
  row <- effects_first_block[i, ]
  compute_effect_size_between_groups(
    paper_key = paper_key,
    study_id = row$study_id,
    effect_id = row$effect_id,
    n_low = row$n_low,
    n_high = row$n_high,
    mean_low = row$mean_low,
    mean_high = row$mean_high,
    sd_low = row$sd_low,
    sd_high = row$sd_high,
    notes_on_assumptions = row$notes_on_assumptions
  )
})

audit_first_block <- do.call(rbind, lapply(results_first_block, `[[`, "audit"))
audit_first_block <- add_yaml_direction(audit_first_block)
audit_first_block
```

## Supplemental computations: all-response within-subject estimates

```{r}
effects_within <- data.frame(
  study_id = c(1, 1, 1, 1, 2, 2),
  effect_id = c("s1_e5", "s1_e6", "s1_e7", "s1_e8", "s2_e3", "s2_e4"),
  n_total = c(431, 431, 431, 431, 402, 402),
  df = c(430, 430, 430, 430, 401, 401),
  mean_low = c(54.38, 61.75, 65.42, 47.61, 59.95, 58.03),
  mean_high = c(37.45, 72.23, 54.37, 57.34, 50.88, 64.33),
  sd_low = c(39.90, 38.97, 35.64, 39.98, 40.10, 40.33),
  sd_high = c(38.22, 35.91, 37.31, 37.31, 41.71, 39.42),
  t_value = c(9.63, 6.24, 5.69, 6.11, 5.73, 3.74),
  reported_d_paper = c(0.46, 0.30, 0.27, 0.29, 0.29, 0.19),
  notes_on_assumptions = c(
    "Experiment 1, all responses within-subject: Knowledge-Neighbor, positive sentence (Table 6 + paired t test).",
    "Experiment 1, all responses within-subject: Knowledge-Neighbor, negative sentence (Table 6 + paired t test).",
    "Experiment 1, all responses within-subject: Knowledge-Sunshine, positive sentence (Table 6 + paired t test).",
    "Experiment 1, all responses within-subject: Knowledge-Sunshine, negative sentence (Table 6 + paired t test).",
    "Experiment 2, all responses within-subject: Revised Neighbor, positive sentence (Table 8 Overall + paired t test).",
    "Experiment 2, all responses within-subject: Revised Neighbor, negative sentence (Table 8 Overall + paired t test)."
  ),
  stringsAsFactors = FALSE
)
effects_within
```

```{r}
results_within <- lapply(seq_len(nrow(effects_within)), function(i) {
  row <- effects_within[i, ]
  compute_effect_size_within(
    paper_key = paper_key,
    study_id = row$study_id,
    effect_id = row$effect_id,
    n_total = row$n_total,
    df = row$df,
    mean_low = row$mean_low,
    mean_high = row$mean_high,
    sd_low = row$sd_low,
    sd_high = row$sd_high,
    t_value = row$t_value,
    reported_d_paper = row$reported_d_paper,
    notes_on_assumptions = row$notes_on_assumptions
  )
})

audit_within <- do.call(rbind, lapply(results_within, `[[`, "audit"))
audit_within <- add_yaml_direction(audit_within)
audit_within
```

## Comparison with paper-reported within-subject d

```{r}
comparison <- audit_within[, c(
  "effect_id",
  "d_raw",
  "d_for_yaml",
  "reverse_negative_polarity",
  "paper_d_signed",
  "paper_d_reported",
  "paper_d_abs_diff",
  "r_est"
)]
comparison

# Double-check that the paper's reported d values are recoverable from t/sqrt(n)
# up to normal reporting-rounding tolerance.
stopifnot(all(comparison$paper_d_abs_diff < 0.01))
```

## Paste-ready YAML snippets for the supplemental within-subject effects

```{r}
for (row_idx in seq_len(nrow(audit_within))) {
  row <- audit_within[row_idx, ]
  direction_note <- if (isTRUE(row$reverse_negative_polarity)) {
    sprintf(
      " Raw truth-rating d was %.12f; YAML stores the reverse-coded value to align with knowledge-attribution direction.",
      row$d_raw
    )
  } else {
    ""
  }
  cat(
    sprintf(
      "\n### %s\n\neffect_size:\n  metric: SMD\n  d: %.12f\n  v: %.12f\n  computed_from: groups\n  needs_review: false\n  notes: \"Computed in analysis/effect_sizes.qmd using within_smcrp_t (paired t used to recover within-person r);%s supplemental within-subject all-response effect retained alongside the primary first-block between-subject effect.\"\n",
      row$effect_id,
      row$d_for_yaml,
      row$v,
      direction_note
    )
  )
}
```

## Paste-ready YAML snippets for the primary first-block effects

```{r}
for (row_idx in seq_len(nrow(audit_first_block))) {
  row <- audit_first_block[row_idx, ]
  direction_note <- if (isTRUE(row$reverse_negative_polarity)) {
    sprintf(
      " Raw truth-rating d was %.12f; YAML stores the reverse-coded value to align with knowledge-attribution direction.",
      row$d_raw
    )
  } else {
    ""
  }
  cat(
    sprintf(
      "\n### %s\n\neffect_size:\n  metric: SMD\n  d: %.12f\n  v: %.12f\n  computed_from: groups\n  needs_review: false\n  notes: \"Computed from Table 7/Table 8 first-block group stats in analysis/effect_sizes.qmd (method=between_groups).%s\"\n",
      row$effect_id,
      row$d_for_yaml,
      row$v,
      direction_note
    )
  )
}
```