Please log in to access this page.
Please log in to access this page.
roseetal2019nothingstakeknowledge
/data/papers/roseetal2019nothingstakeknowledge/analysis/effect_sizes.qmd---
title: "Effect size computations: roseetal2019nothingstakeknowledge"
format:
html:
toc: true
execute:
echo: true
warning: true
message: false
---
Computes standardized mean differences (`d`) and sampling variances (`v`) for the
extraction YAML `papers/roseetal2019nothingstakeknowledge/roseetal2019nothingstakeknowledge.yaml`.
The paper reports site-level `N` and Cramer's `V` for 2×2 (stakes × binary outcome) χ² tests.
Because the 2×2 cell counts are not reported, we use the *phi coefficient* conversion formulas
from the Campbell Collaboration Effect Size Calculator equations (Correlation / phi coefficient, §1.29).
Campbell notes that if 2×2 cell counts (or per-condition proportions) are available, logit/probit-based approximations are preferable; this is the best available option given the reporting in this paper.
Sign convention in extraction: `d = mean(low) - mean(high)`.
## Inputs
Tables (extracted from the paper):
- Knowledge Attribution: `../out/tables/tabula_stream_p10_t2.csv`
- Strict Knowledge Attribution: `../out/tables/tabula_stream_p14_t5.csv`
- Strict Knowledge Attribution (conditional on Knowledge Attribution): `../out/tables/tabula_stream_p18_t7.csv`
Effect direction (sign of `d`) was taken from the paper’s difference plots (difference = Low − High):
- Knowledge Attribution: Fig. 2
- Strict Knowledge Attribution: Fig. 5
- Strict Knowledge Attribution (conditional): Fig. 8
## Formula (Campbell, §1.29)
Let `r` be the phi correlation coefficient. For a 2×2 table, Cramer's `V = |phi|`, so we set `r = |V|` and apply the sign from the figures.
- `d = 2r / sqrt(1 - r^2)`
- `v_d = d^2 / chi^2`
- `chi^2 = r^2 * (n1 + n2) = r^2 * N`
Substituting yields an equivalent expression (used below):
- `v_d = 4 / (N * (1 - r^2))`
## Compute d and v
```{r}
paper_key <- "roseetal2019nothingstakeknowledge"
sign_convention <- "d = mean(low) - mean(high)"
sites <- c(
"Mexico", "USA", "Brazil", "Bulgaria", "France", "Germany", "Italy", "Portugal",
"Spain", "Switzerland", "UK", "Iran", "China", "Hong Kong", "Guangzhou China",
"Mainland China", "Mongolia", "Japan", "India"
)
read_table <- function(path) {
df <- read.csv(path, na.strings = c("", " "), check.names = FALSE)
# Normalize the curly apostrophe in “Cramer’s V” to simplify downstream use.
names(df) <- gsub("\u2019", "'", names(df))
df <- df[!is.na(df$N) & df$Sample %in% sites, ]
df$Sample <- factor(df$Sample, levels = sites, ordered = TRUE)
df <- df[order(df$Sample), ]
data.frame(
site = as.character(df$Sample),
n_total = as.integer(df$N),
v_cramers = as.numeric(df[["Cramer's V"]]),
stringsAsFactors = FALSE
)
}
phi_to_d_abs <- function(r_abs) {
2 * r_abs / sqrt(1 - r_abs^2)
}
var_d_from_phi <- function(n_total, r_abs) {
# Campbell, §1.29: v_d = d^2/chi^2 and chi^2 = r^2 * N
# Equivalent (after substitution): v_d = 4/(N*(1-r^2))
4 / (n_total * (1 - r_abs^2))
}
sign_e1 <- c(+1, +1, -1, +1, +1, +1, -1, -1, +1, -1, +1, -1, -1, +1, -1, -1, +1, +1, +1)
sign_e2 <- c(+1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, -1, -1, -1, +1, +1)
sign_e3 <- c(+1, +1, -1, +1, +1, +1, +1, +1, +1, +1, +1, -1, +1, +1, +1, -1, -1, +1, -1)
names(sign_e1) <- sites
names(sign_e2) <- sites
names(sign_e3) <- sites
compute_effects <- function(effect_suffix, subgroup, fig_sign_source, table_path, sign_map) {
df <- read_table(table_path)
r_abs <- abs(df$v_cramers)
d_abs <- phi_to_d_abs(r_abs)
d <- sign_map[df$site] * d_abs
v <- var_d_from_phi(df$n_total, r_abs)
data.frame(
paper_key = paper_key,
study_id = match(df$site, sites),
effect_id = paste0("s", match(df$site, sites), "_", effect_suffix),
site = df$site,
subgroup = subgroup,
sign_convention = sign_convention,
sign_source = paste0("Fig. ", fig_sign_source, " (difference Low - High)"),
n_total = df$n_total,
cramers_v = df$v_cramers,
r_abs = r_abs,
sign = sign_map[df$site],
d = d,
v = v,
stringsAsFactors = FALSE
)
}
e1 <- compute_effects(
effect_suffix = "e1",
subgroup = "Knowledge Attribution",
fig_sign_source = 2,
table_path = "../out/tables/tabula_stream_p10_t2.csv",
sign_map = sign_e1
)
e2 <- compute_effects(
effect_suffix = "e2",
subgroup = "Strict Knowledge Attribution",
fig_sign_source = 5,
table_path = "../out/tables/tabula_stream_p14_t5.csv",
sign_map = sign_e2
)
e3 <- compute_effects(
effect_suffix = "e3",
subgroup = "Strict Knowledge Attribution (conditional)",
fig_sign_source = 8,
table_path = "../out/tables/tabula_stream_p18_t7.csv",
sign_map = sign_e3
)
audit <- rbind(e1, e2, e3)
audit
```
## Quick sanity checks
```{r}
stopifnot(nrow(e1) == 19, nrow(e2) == 19, nrow(e3) == 19)
stopifnot(all(!is.na(audit$d)), all(audit$v >= 0))
# Ensure ordering matches the extraction YAML (study_id 1..19, then e1/e2/e3 per study).
audit$effect_order <- as.integer(sub(".*_e", "", audit$effect_id))
audit <- audit[order(audit$study_id, audit$effect_order), ]
audit[c("effect_id", "site", "subgroup", "n_total", "cramers_v", "sign", "d", "v")]
```