Please log in to access this page.
dinges2021knowledgeasymmetricloss
/data/papers/dinges2021knowledgeasymmetricloss/analysis/effect_sizes.qmd---
title: "Effect size computations: dinges2021knowledgeasymmetricloss"
format:
html:
toc: true
execute:
echo: true
warning: true
message: false
---
Computes standardized mean differences (`d`) and sampling variances (`v`) for the
extraction YAML `papers/dinges2021knowledgeasymmetricloss/dinges2021knowledgeasymmetricloss.yaml`.
Group summaries are derived from the OSF Qualtrics XML after applying the paper's
exclusion criteria, then converted to `d` and `v` with `esc::esc_mean_sd`,
following the current project template for between-groups effects.
## Data source
Raw data file: `papers/dinges2021knowledgeasymmetricloss/data/data_study_1.xml`
(Qualtrics XML; downloaded from OSF node `qknmh`).
## Load + clean (per paper exclusions)
```{r}
library(xml2)
paper_key <- "dinges2021knowledgeasymmetricloss"
file_data <- "../data/data_study_1.xml"
read_qualtrics_xml <- function(file) {
doc <- read_xml(file)
responses <- xml_find_all(doc, ".//Response")
get_text <- function(node, name) {
x <- xml_find_first(node, paste0("./", name))
if (inherits(x, "xml_missing")) return(NA_character_)
v <- xml_text(x)
if (!nzchar(v)) return(NA_character_)
v
}
rows <- lapply(responses, function(r) {
data.frame(
Finished = get_text(r, "Finished"),
Consent = get_text(r, "Consent"),
check = get_text(r, "check"),
Gender = get_text(r, "Gender"),
Age = get_text(r, "Age"),
low70 = get_text(r, "low70"),
high70 = get_text(r, "high70"),
low90 = get_text(r, "low90"),
high90 = get_text(r, "high90"),
stringsAsFactors = FALSE
)
})
df <- do.call(rbind, rows)
num_cols <- c("Finished", "Consent", "check", "Age", "low70", "high70", "low90", "high90")
for (col in num_cols) {
df[[col]] <- suppressWarnings(as.numeric(df[[col]]))
}
df
}
df_raw <- read_qualtrics_xml(file_data)
df <- subset(df_raw, Finished == 1 & Consent == 1)
# Apply exclusion criteria described in the paper:
# - attention check failures: check != 1
# - three extreme outliers: low-stakes-90% response 65; high-stakes-70% responses 50 and 70
df_included <- subset(df, check == 1)
df_included <- df_included[is.na(df_included$low70) | df_included$low70 != 65, ]
df_included <- df_included[is.na(df_included$high90) | !(df_included$high90 %in% c(50, 70)), ]
summ <- function(x) {
x <- x[!is.na(x)]
data.frame(
n = length(x),
mean = mean(x),
sd = sd(x)
)
}
# In the Qualtrics export, the variable suffixes are swapped relative to the
# paper's condition labels:
# - low70/high70 correspond to the 90%-certainty conditions
# - low90/high90 correspond to the 70%-certainty conditions
stats_ls90 <- summ(df_included$low70)
stats_hs90 <- summ(df_included$high70)
stats_ls70 <- summ(df_included$low90)
stats_hs70 <- summ(df_included$high90)
group_summaries <- rbind(
cbind(group_id = "LS90", stats_ls90),
cbind(group_id = "HS90", stats_hs90),
cbind(group_id = "LS70", stats_ls70),
cbind(group_id = "HS70", stats_hs70)
)
group_summaries
n_total_included <- sum(group_summaries$n)
n_total_included
```
## Inputs and methods
```{r}
sign_convention <- "d = mean(low) - mean(high)"
effects <- list(
list(
study_id = 1,
effect_id = "s1_e1",
method_used = "between_groups",
n_low = stats_ls90$n,
n_high = stats_hs90$n,
mean_low = stats_ls90$mean,
mean_high = stats_hs90$mean,
sd_low = stats_ls90$sd,
sd_high = stats_hs90$sd,
notes_on_assumptions = paste(
"Group summaries computed from OSF Qualtrics XML after excluding",
"attention-check failures and the three outliers described in the paper.",
"In the raw export, low70/high70 correspond to the paper's 90% condition."
)
),
list(
study_id = 1,
effect_id = "s1_e2",
method_used = "between_groups",
n_low = stats_ls70$n,
n_high = stats_hs70$n,
mean_low = stats_ls70$mean,
mean_high = stats_hs70$mean,
sd_low = stats_ls70$sd,
sd_high = stats_hs70$sd,
notes_on_assumptions = paste(
"Group summaries computed from OSF Qualtrics XML after excluding",
"attention-check failures and the three outliers described in the paper.",
"In the raw export, low90/high90 correspond to the paper's 70% condition."
)
)
)
```
## Shared helpers
```{r}
if (!requireNamespace("esc", quietly = TRUE)) {
stop("Package 'esc' is required for this analysis. Install with install.packages('esc').", call. = FALSE)
}
suppressPackageStartupMessages(library(esc))
default_or <- function(x, default) {
if (is.null(x)) default else x
}
stop_if_missing <- function(x, name) {
if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}
extract_esc <- function(x) {
list(
d = as.numeric(x$es),
v = as.numeric(x$var)
)
}
compute_with_esc <- function(fun, ...) {
d_obj <- fun(..., es.type = "d")
g_obj <- fun(..., es.type = "g")
d_out <- extract_esc(d_obj)
g_out <- extract_esc(g_obj)
list(d = d_out$d, v = d_out$v, g = g_out$d, v_g = g_out$v)
}
```
## Computation
```{r}
compute_effect <- function(effect_inputs) {
study_id <- effect_inputs$study_id
effect_id <- effect_inputs$effect_id
method_used <- effect_inputs$method_used
n_high <- default_or(effect_inputs$n_high, NA_integer_)
n_low <- default_or(effect_inputs$n_low, NA_integer_)
mean_high <- default_or(effect_inputs$mean_high, NA_real_)
mean_low <- default_or(effect_inputs$mean_low, NA_real_)
sd_high <- default_or(effect_inputs$sd_high, NA_real_)
sd_low <- default_or(effect_inputs$sd_low, NA_real_)
notes_on_assumptions <- default_or(effect_inputs$notes_on_assumptions, "")
if (method_used != "between_groups") {
stop(sprintf("Unsupported method_used for this file: %s", method_used), call. = FALSE)
}
stop_if_missing(n_high, "n_high")
stop_if_missing(n_low, "n_low")
stop_if_missing(mean_high, "mean_high")
stop_if_missing(mean_low, "mean_low")
stop_if_missing(sd_high, "sd_high")
stop_if_missing(sd_low, "sd_low")
# esc_mean_sd computes grp1 - grp2; use grp1=low and grp2=high to match sign convention.
res <- compute_with_esc(
esc::esc_mean_sd,
grp1m = mean_low,
grp1sd = sd_low,
grp1n = n_low,
grp2m = mean_high,
grp2sd = sd_high,
grp2n = n_high
)
inputs_used <- paste(
c(
sprintf("method=%s", method_used),
sprintf("sign_convention=%s", sign_convention),
sprintf("n_low=%s", n_low),
sprintf("n_high=%s", n_high),
sprintf("mean_low=%s", mean_low),
sprintf("mean_high=%s", mean_high),
sprintf("sd_low=%s", sd_low),
sprintf("sd_high=%s", sd_high)
),
collapse = ", "
)
data.frame(
paper_key = paper_key,
study_id = study_id,
effect_id = effect_id,
design = "Between-Subjects",
method_used = method_used,
computed_from_suggested = "groups",
inputs_used = inputs_used,
d = res$d,
v = res$v,
g = res$g,
v_g = res$v_g,
notes_on_assumptions = notes_on_assumptions,
stringsAsFactors = FALSE
)
}
audit <- do.call(rbind, lapply(effects, compute_effect))
audit
```
## Paste-ready YAML snippets
```{r}
for (i in seq_len(nrow(audit))) {
row <- audit[i, ]
cat(sprintf("\n# %s (study_id=%s)\n", row$effect_id, row$study_id))
cat(sprintf(
"effect_size:\n metric: SMD\n d: %.12f\n v: %.12f\n computed_from: %s\n needs_review: false\n notes: \"%s\"\n",
row$d,
row$v,
row$computed_from_suggested,
gsub("\"", "'", row$inputs_used)
))
}
```