Effect size computation: shurakov2023trainflightretraction

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(esc)

Shared Helpers

stop_if_missing <- function(x, name) {
  if (is.na(x)) stop(sprintf("Missing required input: %s", name), call. = FALSE)
}

compute_effect_size <- function(
    paper_key,
    study_id,
    effect_id,
    method_used,
    sign_convention = "d = mean(low) - mean(high)",
    n_high = NA_integer_,
    n_low = NA_integer_,
    mean_high = NA_real_,
    mean_low = NA_real_,
    sd_high = NA_real_,
    sd_low = NA_real_,
    notes_on_assumptions = "",
    imputed_flag = FALSE,
    needs_sensitivity = TRUE
) {
  d <- NA_real_
  v <- NA_real_
  g <- NA_real_
  v_g <- NA_real_
  computed_from_suggested <- NA_character_

  if (method_used == "between_groups") {
    computed_from_suggested <- "groups"
    stop_if_missing(n_high, "n_high")
    stop_if_missing(n_low, "n_low")
    stop_if_missing(mean_high, "mean_high")
    stop_if_missing(mean_low, "mean_low")
    stop_if_missing(sd_high, "sd_high")
    stop_if_missing(sd_low, "sd_low")

    es_d <- esc::esc_mean_sd(
      grp1m = mean_low, grp1sd = sd_low, grp1n = n_low,
      grp2m = mean_high, grp2sd = sd_high, grp2n = n_high,
      es.type = "d"
    )
    es_g <- esc::esc_mean_sd(
      grp1m = mean_low, grp1sd = sd_low, grp1n = n_low,
      grp2m = mean_high, grp2sd = sd_high, grp2n = n_high,
      es.type = "g"
    )

    d <- as.numeric(es_d$es)
    v <- as.numeric(es_d$var)
    g <- as.numeric(es_g$es)
    v_g <- as.numeric(es_g$var)
  } else {
    stop(sprintf("Unknown method_used: %s", method_used), call. = FALSE)
  }

  inputs_used <- paste(
    c(
      sprintf("method=%s", method_used),
      sprintf("sign_convention=%s", sign_convention),
      sprintf("n_low=%s", n_low),
      sprintf("n_high=%s", n_high),
      sprintf("mean_low=%s", mean_low),
      sprintf("mean_high=%s", mean_high),
      sprintf("sd_low=%s", sd_low),
      sprintf("sd_high=%s", sd_high)
    ),
    collapse = ", "
  )

  audit <- data.frame(
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    design = "Between-Subjects",
    method_used = method_used,
    computed_from_suggested = computed_from_suggested,
    inputs_used = inputs_used,
    d = d,
    v = v,
    g = g,
    v_g = v_g,
    notes_on_assumptions = notes_on_assumptions,
    imputed_flag = imputed_flag,
    needs_sensitivity = needs_sensitivity
  )

  yaml_snippet <- sprintf(
    "effect_size:\n  metric: SMD\n  d: %.12f\n  v: %.12f\n  computed_from: %s\n  needs_review: false\n  notes: \"%s\"\n",
    d, v, computed_from_suggested, gsub(pattern = "\"", replacement = "'", x = inputs_used)
  )

  list(audit = audit, yaml_snippet = yaml_snippet)
}

Source Data

The unpublished draft describes Experiment 3 as a Train/Flight extension of the retraction design. The effect-size computation below follows the author-sent Qualtrics exports data/Finished_Responses.csv and data/Unfinished_Responses.csv. Combining these files reproduces the manuscript’s reported Train/Flight Neutral and Stakes group descriptives more closely than the timestamped export alone. The first two Qualtrics metadata rows are removed by filtering to consenting responses with usable scenario data.

read_qualtrics <- function(file) {
  dat <- read.csv(
    file.path("../data", file),
    na.strings = c("", "NA"),
    check.names = FALSE
  )
  dat$source_file <- file
  dat
}

raw <- bind_rows(
  read_qualtrics("Finished_Responses.csv"),
  read_qualtrics("Unfinished_Responses.csv")
)

dat <- raw %>%
  filter(Consent == "I voluntarily agree to participate in this research study.")

data.frame(
  physical_rows_after_header = nrow(raw),
  consenting_rows = nrow(dat)
)

##   physical_rows_after_header consenting_rows
## 1                        370             365

Composite Scores

Composite scores follow the draft and author analysis script: I do is coded as +1, I don't as -1, multiplied by confidence from 1 to 7.

make_composite <- function(response, confidence) {
  response_sign <- case_when(
    response == "I do" ~ 1,
    response == "I don't" ~ -1,
    TRUE ~ NA_real_
  )
  response_sign * as.numeric(confidence)
}

extract_condition <- function(dat, prefix, scenario, condition, condition_label) {
  response_col <- paste0(prefix, "_", condition, "_A")
  confidence_col <- paste0(prefix, "_", condition, "_B_1")

  data.frame(
    scenario = scenario,
    condition = condition_label,
    response = dat[[response_col]],
    confidence = suppressWarnings(as.numeric(dat[[confidence_col]])),
    composite = make_composite(dat[[response_col]], dat[[confidence_col]])
  ) %>%
    filter(!is.na(composite))
}

train_positive <- "The guy knows that the train stops at Kensington"
train_negative <- "The guy does not know that the train stops at Kensington"
flight_positive <- "The woman knows that the flight takes 14 hours"
flight_negative <- "The woman does not know that the flight takes 14 hours"

train_assigned <- dat %>% filter(TR_SU %in% c(train_positive, train_negative))
flight_assigned <- dat %>% filter(FL_SU %in% c(flight_positive, flight_negative))

train_valid <- train_assigned %>%
  filter(TR_SU == train_positive, TR_AT == "Kensington")

flight_valid <- flight_assigned %>%
  filter(FL_SU == flight_positive, FL_AT == "Tokyo")

screening_counts <- data.frame(
  scenario = c("Train", "Flight"),
  assigned = c(nrow(train_assigned), nrow(flight_assigned)),
  initial_knowledge_endorsers = c(nrow(filter(train_assigned, TR_SU == train_positive)), nrow(filter(flight_assigned, FL_SU == flight_positive))),
  initial_knowledge_skeptics = c(nrow(filter(train_assigned, TR_SU == train_negative)), nrow(filter(flight_assigned, FL_SU == flight_negative))),
  valid_knowledge_endorsers = c(nrow(train_valid), nrow(flight_valid)),
  attention_failures_among_endorsers = c(
    nrow(filter(train_assigned, TR_SU == train_positive, is.na(TR_AT) | TR_AT != "Kensington")),
    nrow(filter(flight_assigned, FL_SU == flight_positive, is.na(FL_AT) | FL_AT != "Tokyo"))
  )
)

screening_counts

##   scenario assigned initial_knowledge_endorsers initial_knowledge_skeptics
## 1    Train      181                         161                         20
## 2   Flight      182                         178                          4
##   valid_knowledge_endorsers attention_failures_among_endorsers
## 1                       161                                  0
## 2                       177                                  1

composites <- bind_rows(
  extract_condition(train_valid, "TR", "Train", "N", "Neutral"),
  extract_condition(train_valid, "TR", "Train", "S", "Stakes"),
  extract_condition(train_valid, "TR", "Train", "E", "Evidence"),
  extract_condition(flight_valid, "FL", "Flight", "N", "Neutral"),
  extract_condition(flight_valid, "FL", "Flight", "S", "Stakes"),
  extract_condition(flight_valid, "FL", "Flight", "E", "Evidence")
)

group_stats <- composites %>%
  group_by(scenario, condition) %>%
  summarise(
    n = n(),
    retract_n = sum(response == "I don't"),
    retract_rate = retract_n / n,
    mean = mean(composite),
    sd = sd(composite),
    .groups = "drop"
  )

group_stats

## # A tibble: 6 × 7
##   scenario condition     n retract_n retract_rate  mean    sd
##   <chr>    <chr>     <int>     <int>        <dbl> <dbl> <dbl>
## 1 Flight   Evidence     59        49       0.831  -2.71  3.61
## 2 Flight   Neutral      59         3       0.0508  4.92  2.21
## 3 Flight   Stakes       59         7       0.119   4.42  3.13
## 4 Train    Evidence     54        34       0.630  -1.13  5.04
## 5 Train    Neutral      54         2       0.0370  5.33  2.17
## 6 Train    Stakes       53         4       0.0755  4.94  2.95

Effect Computations

Only the Stakes vs Neutral contrasts are extracted as primary stakes effects. The Evidence condition is retained above as a manipulation check and contextual comparison, but it is not the focal low-vs-high stakes comparison.

paper_key <- "shurakov2023trainflightretraction"
study_id <- 1

compute_from_stats <- function(effect_id, scenario_name) {
  low <- group_stats[group_stats$scenario == scenario_name & group_stats$condition == "Neutral", ]
  high <- group_stats[group_stats$scenario == scenario_name & group_stats$condition == "Stakes", ]

  compute_effect_size(
    paper_key = paper_key,
    study_id = study_id,
    effect_id = effect_id,
    method_used = "between_groups",
    n_high = high$n,
    n_low = low$n,
    mean_high = high$mean,
    mean_low = low$mean,
    sd_high = high$sd,
    sd_low = low$sd,
    notes_on_assumptions = "Computed from author-sent Finished and Unfinished Qualtrics exports after excluding non-consent, initial knowledge non-endorsers, and attention-check failures; effect size computed with esc::esc_mean_sd."
  )
}

res_s1_e1 <- compute_from_stats("s1_e1", "Train")
res_s1_e2 <- compute_from_stats("s1_e2", "Flight")

Effect s1_e1: Train, Stakes vs Neutral

res_s1_e1$audit

##                           paper_key study_id effect_id           design
## 1 shurakov2023trainflightretraction        1     s1_e1 Between-Subjects
##      method_used computed_from_suggested
## 1 between_groups                  groups
##                                                                                                                                                                                        inputs_used
## 1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=54, n_high=53, mean_low=5.33333333333333, mean_high=4.94339622641509, sd_low=2.17186121381535, sd_high=2.95097828850303
##           d          v         g        v_g
## 1 0.1507176 0.03749259 0.1496385 0.03749259
##                                                                                                                                                                                          notes_on_assumptions
## 1 Computed from author-sent Finished and Unfinished Qualtrics exports after excluding non-consent, initial knowledge non-endorsers, and attention-check failures; effect size computed with esc::esc_mean_sd.
##   imputed_flag needs_sensitivity
## 1        FALSE              TRUE

cat(res_s1_e1$yaml_snippet)

## effect_size:
##   metric: SMD
##   d: 0.150717614261
##   v: 0.037492591641
##   computed_from: groups
##   needs_review: false
##   notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=54, n_high=53, mean_low=5.33333333333333, mean_high=4.94339622641509, sd_low=2.17186121381535, sd_high=2.95097828850303"

Effect s1_e2: Flight, Stakes vs Neutral

res_s1_e2$audit

##                           paper_key study_id effect_id           design
## 1 shurakov2023trainflightretraction        1     s1_e2 Between-Subjects
##      method_used computed_from_suggested
## 1 between_groups                  groups
##                                                                                                                                                                                        inputs_used
## 1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=59, n_high=59, mean_low=4.91525423728814, mean_high=4.42372881355932, sd_low=2.20726180495842, sd_high=3.13051383812741
##          d          v         g        v_g
## 1 0.181474 0.03403785 0.1802982 0.03403785
##                                                                                                                                                                                          notes_on_assumptions
## 1 Computed from author-sent Finished and Unfinished Qualtrics exports after excluding non-consent, initial knowledge non-endorsers, and attention-check failures; effect size computed with esc::esc_mean_sd.
##   imputed_flag needs_sensitivity
## 1        FALSE              TRUE

cat(res_s1_e2$yaml_snippet)

## effect_size:
##   metric: SMD
##   d: 0.181474047555
##   v: 0.034037850974
##   computed_from: groups
##   needs_review: false
##   notes: "method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=59, n_high=59, mean_low=4.91525423728814, mean_high=4.42372881355932, sd_low=2.20726180495842, sd_high=3.13051383812741"

Audit Table

audits <- rbind(res_s1_e1$audit, res_s1_e2$audit)
audits

##                           paper_key study_id effect_id           design
## 1 shurakov2023trainflightretraction        1     s1_e1 Between-Subjects
## 2 shurakov2023trainflightretraction        1     s1_e2 Between-Subjects
##      method_used computed_from_suggested
## 1 between_groups                  groups
## 2 between_groups                  groups
##                                                                                                                                                                                        inputs_used
## 1 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=54, n_high=53, mean_low=5.33333333333333, mean_high=4.94339622641509, sd_low=2.17186121381535, sd_high=2.95097828850303
## 2 method=between_groups, sign_convention=d = mean(low) - mean(high), n_low=59, n_high=59, mean_low=4.91525423728814, mean_high=4.42372881355932, sd_low=2.20726180495842, sd_high=3.13051383812741
##           d          v         g        v_g
## 1 0.1507176 0.03749259 0.1496385 0.03749259
## 2 0.1814740 0.03403785 0.1802982 0.03403785
##                                                                                                                                                                                          notes_on_assumptions
## 1 Computed from author-sent Finished and Unfinished Qualtrics exports after excluding non-consent, initial knowledge non-endorsers, and attention-check failures; effect size computed with esc::esc_mean_sd.
## 2 Computed from author-sent Finished and Unfinished Qualtrics exports after excluding non-consent, initial knowledge non-endorsers, and attention-check failures; effect size computed with esc::esc_mean_sd.
##   imputed_flag needs_sensitivity
## 1        FALSE              TRUE
## 2        FALSE              TRUE