Why this vignette
This vignette validates samplyr on synthetic finite
populations with known truths. It uses two complementary layers:
- Deterministic checks on design metadata and invariants.
- Monte Carlo checks for estimator bias, standard-error calibration, and 95% CI coverage.
The goal is not to benchmark speed. The goal is to show statistical correctness.
Synthetic population with known properties
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
set.seed(20260302)
make_population <- function(
n_strata = 6L,
psu_per_stratum = 50L,
units_per_psu = 30L) {
n_psu <- n_strata * psu_per_stratum
stratum <- rep(LETTERS[seq_len(n_strata)], each = psu_per_stratum * units_per_psu)
psu <- rep(sprintf("PSU%03d", seq_len(n_psu)), each = units_per_psu)
psu_mos <- round(runif(n_psu, min = 80, max = 420))
psu_re <- rnorm(n_psu, sd = 18)
stratum_re <- rep(
seq(-12, 16, length.out = n_strata),
each = psu_per_stratum * units_per_psu
)
x <- rep(psu_mos, each = units_per_psu) + rnorm(n_psu * units_per_psu, sd = 10)
y <- 200 + stratum_re + rep(psu_re, each = units_per_psu) + 0.08 * x + rnorm(n_psu * units_per_psu, sd = 20)
tibble(
id = seq_len(n_psu * units_per_psu),
stratum = factor(stratum),
psu = factor(psu),
psu_mos = rep(psu_mos, each = units_per_psu),
x = x,
y = y
)
}
population <- make_population()
truth <- population |>
summarise(
N = dplyr::n(),
Y_total = sum(y),
Y_mean = mean(y)
)
truth
#> # A tibble: 1 × 3
#> N Y_total Y_mean
#> <int> <dbl> <dbl>
#> 1 9000 2008269. 223.Layer 1: deterministic checks
library(samplyr)
library(survey)
#> Loading required package: grid
#> Loading required package: Matrix
#> Loading required package: survival
#>
#> Attaching package: 'survey'
#> The following object is masked from 'package:graphics':
#>
#> dotchart
srs_sample <- sampling_design() |>
draw(n = 400) |>
execute(population, seed = 1001)
strat_sample <- sampling_design() |>
stratify_by(stratum, alloc = "proportional") |>
draw(n = 400) |>
execute(population, seed = 1002)
pps_two_stage <- sampling_design() |>
add_stage("PSU") |>
stratify_by(stratum) |>
cluster_by(psu) |>
draw(
n = 8,
method = "pps_brewer",
mos = psu_mos,
certainty_size = 390
) |>
add_stage("SSU") |>
draw(n = 6) |>
execute(population, seed = 1003)
stratum_sizes <- population |>
count(stratum, name = "N_h")
strat_weight_by_stratum <- strat_sample |>
summarise(weight_sum = sum(.weight), .by = stratum) |>
left_join(stratum_sizes, by = "stratum")
checks <- tibble(
check = c(
"SRS: sum(weights) = N",
"Stratified SRS: sum_h weights_h = N_h",
"Two-stage PPS: .weight = .weight_1 * .weight_2",
"Two-stage PPS certainty: certainty PSUs have .weight_1 = 1"
),
pass = c(
isTRUE(all.equal(sum(srs_sample$.weight), truth$N)),
isTRUE(all(abs(strat_weight_by_stratum$weight_sum - strat_weight_by_stratum$N_h) < 1e-8)),
isTRUE(all.equal(pps_two_stage$.weight, pps_two_stage$.weight_1 * pps_two_stage$.weight_2, tolerance = 1e-10)),
all(pps_two_stage$.weight_1[pps_two_stage$.certainty_1] == 1)
)
)
checks
#> # A tibble: 4 × 2
#> check pass
#> <chr> <lgl>
#> 1 SRS: sum(weights) = N TRUE
#> 2 Stratified SRS: sum_h weights_h = N_h TRUE
#> 3 Two-stage PPS: .weight = .weight_1 * .weight_2 TRUE
#> 4 Two-stage PPS certainty: certainty PSUs have .weight_1 = 1 TRUEThese are structural, non-random checks. If they fail, the design object is wrong before analysis begins.
Layer 2: Monte Carlo validation
Below we repeatedly sample from the same finite population and
analyze with survey. For each design we report:
-
rel_bias_pct: relative bias of \(\hat{\bar{Y}}\), -
se_ratio: empirical SD of estimates divided by mean estimated SE, -
coverage_95: fraction of 95% CIs covering the true finite-population mean.
This vignette fixes Monte Carlo replication at
R = 199.
evaluate_design <- function(design_fn, population, true_mean, R = 199L, seed_init = 3000L) {
est <- numeric(R)
se <- numeric(R)
cover <- logical(R)
for (i in seq_len(R)) {
sample_i <- design_fn(population, seed_init + i)
svy_i <- as_svydesign(sample_i)
fit <- svymean(~y, svy_i)
est[i] <- unname(coef(fit)[1])
se[i] <- unname(SE(fit)[1])
ci <- suppressWarnings(confint(fit, level = 0.95))
cover[i] <- ci[1, 1] <= true_mean && ci[1, 2] >= true_mean
}
tibble(R = R,
bias = mean(est - true_mean),
rel_bias_pct = 100 * mean(est - true_mean) / true_mean,
emp_sd = sd(est),
mean_se = mean(se),
se_ratio = sd(est) / mean(se),
coverage_95 = mean(cover))
}
mcse_coverage <- function(R, p = 0.95) {
sqrt(p * (1 - p) / R)
}
design_srs <- function(pop, seed) {
sampling_design() |>
draw(n = 400, method = "srswor") |>
execute(pop, seed = seed)
}
design_strat <- function(pop, seed) {
sampling_design() |>
stratify_by(stratum, alloc = "proportional") |>
draw(n = 400, method = "srswor") |>
execute(pop, seed = seed)
}
design_two_stage <- function(pop, seed) {
sampling_design() |>
add_stage("PSU") |>
stratify_by(stratum) |>
cluster_by(psu) |>
draw(n = 8, method = "pps_brewer", mos = psu_mos) |>
add_stage("SSU") |>
draw(n = 6, method = "srswor") |>
execute(pop, seed = seed)
}
R_mc <- 199L
R_mc
#> [1] 199
mc_results <- bind_rows(
evaluate_design(design_srs, population, truth$Y_mean, R = R_mc) |>
mutate(design = "SRSWOR"),
evaluate_design(design_strat, population, truth$Y_mean, R = R_mc) |>
mutate(design = "Stratified SRS"),
evaluate_design(design_two_stage, population, truth$Y_mean, R = R_mc) |>
mutate(design = "Two-stage PPS->SRS")
) |>
select(design, everything())
mc_results
#> # A tibble: 3 × 8
#> design R bias rel_bias_pct emp_sd mean_se se_ratio coverage_95
#> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 SRSWOR 199 -0.0344 -0.0154 1.41 1.41 0.998 0.950
#> 2 Stratified SRS 199 -0.128 -0.0573 1.34 1.32 1.01 0.940
#> 3 Two-stage PPS-… 199 0.0816 0.0366 3.24 2.96 1.10 0.950Interpreting the results
For this short validation, practical acceptance bands are:
-
abs(rel_bias_pct) <= 2, -
0.85 <= se_ratio <= 1.15, -
0.90 <= coverage_95 <= 0.98.
Coverage has Monte Carlo uncertainty. At nominal 95% coverage, the
Monte Carlo standard error is approximately \(\sqrt{0.95 \times 0.05 / R}\). With
R = 199, this is about 1.5 percentage points.
acceptance <- mc_results |>
mutate(
pass_bias = abs(rel_bias_pct) <= 2,
pass_se = se_ratio >= 0.85 & se_ratio <= 1.15,
pass_cov = coverage_95 >= 0.90 & coverage_95 <= 0.98,
pass_all = pass_bias & pass_se & pass_cov
) |>
select(design, pass_bias, pass_se, pass_cov, pass_all)
acceptance
#> # A tibble: 3 × 5
#> design pass_bias pass_se pass_cov pass_all
#> <chr> <lgl> <lgl> <lgl> <lgl>
#> 1 SRSWOR TRUE TRUE TRUE TRUE
#> 2 Stratified SRS TRUE TRUE TRUE TRUE
#> 3 Two-stage PPS->SRS TRUE TRUE TRUE TRUE
tibble(
R = R_mc,
mcse_coverage_95 = mcse_coverage(R_mc, p = 0.95)
)
#> # A tibble: 1 × 2
#> R mcse_coverage_95
#> <int> <dbl>
#> 1 199 0.0154If these checks hold across core designs, we have strong evidence that:
-
samplyrdesign metadata is internally consistent, - exported
surveydesigns reproduce correct design-based inference on known finite populations.
In this vignette, R is fixed at 199 as a practical
default. If you need higher confidence when interpreting package
accuracy, rerun the same validation with a larger R.