Effect size computations: dinges2021knowledgeasymmetricloss

Computes standardized mean differences (d) and sampling variances (v) for the extraction YAML papers/dinges2021knowledgeasymmetricloss/dinges2021knowledgeasymmetricloss.yaml.

Group summaries are derived from the OSF Qualtrics XML after applying the paper’s exclusion criteria, then converted to d and v with esc::esc_mean_sd, following the current project template for between-groups effects.

Data source

Raw data file: papers/dinges2021knowledgeasymmetricloss/data/data_study_1.xml (Qualtrics XML; downloaded from OSF node qknmh).

Load + clean (per paper exclusions)

library(xml2)

paper_key <- "dinges2021knowledgeasymmetricloss"
file_data <- "../data/data_study_1.xml"

read_qualtrics_xml <- function(file) {
  doc <- read_xml(file)
  responses <- xml_find_all(doc, ".//Response")

  get_text <- function(node, name) {
    x <- xml_find_first(node, paste0("./", name))
    if (inherits(x, "xml_missing")) return(NA_character_)
    v <- xml_text(x)
    if (!nzchar(v)) return(NA_character_)
    v
  }

  rows <- lapply(responses, function(r) {
    data.frame(
      Finished = get_text(r, "Finished"),
      Consent = get_text(r, "Consent"),
      check = get_text(r, "check"),
      Gender = get_text(r, "Gender"),
      Age = get_text(r, "Age"),
      low70 = get_text(r, "low70"),
      high70 = get_text(r, "high70"),
      low90 = get_text(r, "low90"),
      high90 = get_text(r, "high90"),
      stringsAsFactors = FALSE
    )
  })

  df <- do.call(rbind, rows)
  num_cols <- c("Finished", "Consent", "check", "Age", "low70", "high70", "low90", "high90")
  for (col in num_cols) {
    df[[col]] <- suppressWarnings(as.numeric(df[[col]]))
  }
  df
}

df_raw <- read_qualtrics_xml(file_data)
df <- subset(df_raw, Finished == 1 & Consent == 1)

# Apply exclusion criteria described in the paper:
# - attention check failures: check != 1
# - three extreme outliers: low-stakes-90% response 65; high-stakes-70% responses 50 and 70
df_included <- subset(df, check == 1)
df_included <- df_included[is.na(df_included$low70) | df_included$low70 != 65, ]
df_included <- df_included[is.na(df_included$high90) | !(df_included$high90 %in% c(50, 70)), ]

summ <- function(x) {
  x <- x[!is.na(x)]
  data.frame(
    n = length(x),
    mean = mean(x),
    sd = sd(x)
  )
}

# In the Qualtrics export, the variable suffixes are swapped relative to the
# paper's condition labels:
# - low70/high70 correspond to the 90%-certainty conditions
# - low90/high90 correspond to the 70%-certainty conditions
stats_ls90 <- summ(df_included$low70)
stats_hs90 <- summ(df_included$high70)
stats_ls70 <- summ(df_included$low90)
stats_hs70 <- summ(df_included$high90)

group_summaries <- rbind(
  cbind(group_id = "LS90", stats_ls90),
  cbind(group_id = "HS90", stats_hs90),
  cbind(group_id = "LS70", stats_ls70),
  cbind(group_id = "HS70", stats_hs70)
)
group_summaries
  group_id  n     mean        sd
1     LS90 26 2.346154 0.7452413
2     HS90 28 4.964286 3.4262834
3     LS70 28 2.035714 1.2317450
4     HS70 27 3.259259 2.1229985
n_total_included <- sum(group_summaries$n)
n_total_included
[1] 109

Inputs and methods

sign_convention <- "d = mean(low) - mean(high)"

effects <- list(
  list(
    study_id = 1,
    effect_id = "s1_e1",
    method_used = "between_groups",
    n_low = stats_ls90$n,
    n_high = stats_hs90$n,
    mean_low = stats_ls90$mean,
    mean_high = stats_hs90$mean,
    sd_low = stats_ls90$sd,
    sd_high = stats_hs90$sd,
    notes_on_assumptions = paste(
      "Group summaries computed from OSF Qualtrics XML after excluding",
      "attention-check failures and the three outliers described in the paper.",
      "In the raw export, low70/high70 correspond to the paper's 90% condition."
    )
  ),
  list(
    study_id = 1,
    effect_id = "s1_e2",
    method_used = "between_groups",
    n_low = stats_ls70$n,
    n_high = stats_hs70$n,
    mean_low = stats_ls70$mean,
    mean_high = stats_hs70$mean,
    sd_low = stats_ls70$sd,
    sd_high = stats_hs70$sd,
    notes_on_assumptions = paste(
      "Group summaries computed from OSF Qualtrics XML after excluding",
      "attention-check failures and the three outliers described in the paper.",
      "In the raw export, low90/high90 correspond to the paper's 70% condition."
    )
  )
)

Shared helpers

if (!requireNamespace("esc", quietly = TRUE)) {
  stop("Package 'esc' is required for this analysis. Install with install.packages('esc').", call. = FALSE)
}
suppressPackageStartupMessages(library(esc))

default_or <- function(x, default) {
  if (is.null(x)) default else x
}

stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

extract_esc <- function(x) {
  list(
    d = as.numeric(x$es),
    v = as.numeric(x$var)
  )
}

compute_with_esc <- function(fun, ...) {
  d_obj <- fun(..., es.type = "d")
  g_obj <- fun(..., es.type = "g")
  d_out <- extract_esc(d_obj)
  g_out <- extract_esc(g_obj)
  list(d = d_out$d, v = d_out$v, g = g_out$d, v_g = g_out$v)
}

Computation

compute_effect <- function(effect_inputs) {
  study_id <- effect_inputs$study_id
  effect_id <- effect_inputs$effect_id
  method_used <- effect_inputs$method_used

  n_high <- default_or(effect_inputs$n_high, NA_integer_)
  n_low <- default_or(effect_inputs$n_low, NA_integer_)
  mean_high <- default_or(effect_inputs$mean_high, NA_real_)
  mean_low <- default_or(effect_inputs$mean_low, NA_real_)
  sd_high <- default_or(effect_inputs$sd_high, NA_real_)
  sd_low <- default_or(effect_inputs$sd_low, NA_real_)
  notes_on_assumptions <- default_or(effect_inputs$notes_on_assumptions, "")

  if (method_used != "between_groups") {
    stop(sprintf("Unsupported method_used for this file: %s", method_used), call. = FALSE)
  }

  stop_if_missing(n_high, "n_high")
  stop_if_missing(n_low, "n_low")
  stop_if_missing(mean_high, "mean_high")
  stop_if_missing(mean_low, "mean_low")
  stop_if_missing(sd_high, "sd_high")
  stop_if_missing(sd_low, "sd_low")

  # esc_mean_sd computes grp1 - grp2; use grp1=low and grp2=high to match sign convention.
  res <- compute_with_esc(
    esc::esc_mean_sd,
    grp1m = mean_low,
    grp1sd = sd_low,
    grp1n = n_low,
    grp2m = mean_high,
    grp2sd = sd_high,
    grp2n = n_high
  )

  inputs_used <- paste(
    c(
      sprintf("method=%s", method_used),
      sprintf("sign_convention=%s", sign_convention),
      sprintf("n_low=%s", n_low),
      sprintf("n_high=%s", n_high),
      sprintf("mean_low=%s", mean_low),
      sprintf("mean_high=%s", mean_high),
      sprintf("sd_low=%s", sd_low),
      sprintf("sd_high=%s", sd_high)
    ),
    collapse = ", "
  )

  data.frame(
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    design = "Between-Subjects",
    method_used = method_used,
    computed_from_suggested = "groups",
    inputs_used = inputs_used,
    d = res$d,
    v = res$v,
    g = res$g,
    v_g = res$v_g,
    notes_on_assumptions = notes_on_assumptions,
    stringsAsFactors = FALSE
  )
}

audit <- do.call(rbind, lapply(effects, compute_effect))
audit
                          paper_key study_id effect_id           design
1 dinges2021knowledgeasymmetricloss        1     s1_e1 Between-Subjects
2 dinges2021knowledgeasymmetricloss        1     s1_e2 Between-Subjects
     method_used computed_from_suggested
1 between_groups                  groups
2 between_groups                  groups
                                                                                                                                                                                        inputs_used
1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=26, n_high=28, mean_low=2.34615384615385, mean_high=4.96428571428571, sd_low=0.745241313525099, sd_high=3.42628340763253
2  method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=28, n_high=27, mean_low=2.03571428571429, mean_high=3.25925925925926, sd_low=1.23174500899974, sd_high=2.12299847082435
          d          v          g        v_g
1 -1.037955 0.08415128 -1.0229117 0.08415128
2 -0.708311 0.07731227 -0.6982402 0.07731227
                                                                                                                                                                                              notes_on_assumptions
1 Group summaries computed from OSF Qualtrics XML after excluding attention-check failures and the three outliers described in the paper. In the raw export, low70/high70 correspond to the paper's 90% condition.
2 Group summaries computed from OSF Qualtrics XML after excluding attention-check failures and the three outliers described in the paper. In the raw export, low90/high90 correspond to the paper's 70% condition.

Paste-ready YAML snippets

for (i in seq_len(nrow(audit))) {
  row <- audit[i, ]
  cat(sprintf("\n# %s (study_id=%s)\n", row$effect_id, row$study_id))
  cat(sprintf(
    "effect_size:\n  metric: SMD\n  d: %.12f\n  v: %.12f\n  computed_from: %s\n  needs_review: false\n  notes: \"%s\"\n",
    row$d,
    row$v,
    row$computed_from_suggested,
    gsub("\"", "'", row$inputs_used)
  ))
}

# s1_e1 (study_id=1)
effect_size:
  metric: SMD
  d: -1.037954558457
  v: 0.084151284041
  computed_from: groups
  needs_review: false
  notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=26, n_high=28, mean_low=2.34615384615385, mean_high=4.96428571428571, sd_low=0.745241313525099, sd_high=3.42628340763253"

# s1_e2 (study_id=1)
effect_size:
  metric: SMD
  d: -0.708311022820
  v: 0.077312272797
  computed_from: groups
  needs_review: false
  notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=28, n_high=27, mean_low=2.03571428571429, mean_high=3.25925925925926, sd_low=1.23174500899974, sd_high=2.12299847082435"