Compute post-hoc power analysis for individual perturbation-gene pairs

This function performs post-hoc power analysis using detailed experimental information. Unlike compute_power_plan, this function accepts specific cell count assignments per gRNA (via cells_per_grna) and calculates power for individual perturbation-gene pairs. Also, we don't allow specifying multiple experimental designs. This is useful for analyzing completed experiments or evaluating specific experimental designs with known cell distributions, providing individual power estimates for each perturbation-gene pair as well as expected total discoveries.

Usage

compute_power_posthoc(
  discovery_pairs,
  cells_per_grna,
  baseline_expression_stats,
  control_group,
  fold_change_mean,
  fold_change_sd,
  num_total_cells = NULL,
  cutoff = NULL,
  n_nonzero_trt_thresh = 7L,
  n_nonzero_cntrl_thresh = 7L,
  side = "both",
  multiple_testing_method = "BH",
  multiple_testing_alpha = 0.1
)

Arguments

discovery_pairs: A data frame specifying which element-gene pairs to consider, with columns grna_target and response_id
cells_per_grna: A data frame specifying how many cells contain each gRNA, with columns grna_id, grna_target, and num_cells
baseline_expression_stats: A data frame specifying the baseline expression statistics for each gene, with columns response_id, expression_mean, and expression_size
control_group: A character string specifying the control group, either "complement" or "nt_cells"
fold_change_mean: A numeric value to use for mean effect size for all element-gene pairs
fold_change_sd: A numeric value to use for standard deviation of effect size for all element-gene pairs
num_total_cells: (Required only if control_group == "complement") A positive integer specifying the total number of cells in the experiment
cutoff: (Optional) A numeric value between 0 and 1 to use as the p-value cutoff
n_nonzero_trt_thresh: (Optional) An integer specifying the sceptre QC parameter of the same name; defaults to 7
n_nonzero_cntrl_thresh: (Optional) An integer specifying the sceptre QC parameter of the same name; defaults to 7
side: (Optional) A character string specifying the side of the test, either "left", "right", or "both"; defaults to "both"
multiple_testing_method: (Optional) A character string specifying the multiple testing correction method to use, either "BH" or "bonferroni"; defaults to "BH"
multiple_testing_alpha: (Optional) A numeric value between 0 and 1 specifying the alpha level for multiple testing correction; defaults to 0.1

Value

A list with two elements: individual_power (a data frame with columns grna_target, response_id, and power) and expected_num_discoveries (a numeric value)

Examples

## --- Toy perturb-seq dataset setup ---
# Total number of cells in the experiment
num_total_cells <- 1000L

# Number of cells receiving each gRNA
# (3 enhancers × 2 gRNAs each + 2 non-targeting gRNAs)
cells_per_grna <- data.frame(
  grna_id      = c("enh1_grna1","enh2_grna1","enh3_grna1","nt_grna1",
                   "enh1_grna2","enh2_grna2","enh3_grna2","nt_grna2"),
  grna_target  = c("enh1","enh2","enh3","non-targeting",
                   "enh1","enh2","enh3","non-targeting"),
  num_cells    = c(93L,113L,112L,104L,84L,104L,107L,105L),
  stringsAsFactors = FALSE
)

# Baseline expression statistics (negative binomial mean and size per gene)
baseline_expression_stats <- data.frame(
  response_id       = paste0("gene", 1:4),
  expression_mean   = c(2.002931, 12.326867, 4.014221, 1.460472),
  expression_size   = c(0.2967991, 8.3723191, 2.5988431, 2.6746265),
  stringsAsFactors = FALSE
)

# Discovery pairs: test 3 enhancers against 4 genes
discovery_pairs <- within(expand.grid(
  grna_target = c("enh1","enh2","enh3"),
  response_id = paste0("gene", 1:4),
  KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE
), { grna_target <- as.character(grna_target); response_id <- as.character(response_id) })

## --- Analysis choices ---
control_group <- "complement"          # use complement control group
side <- "left"                         # left-sided test
n_nonzero_trt_thresh   <- 7L            # min. nonzero counts in treatment
n_nonzero_cntrl_thresh <- 7L            # min. nonzero counts in control

## --- Power analysis parameters ---
fold_change_mean <- 0.85                # expected mean fold change
fold_change_sd   <- 0.13                # expected SD of fold change
cutoff <- 0.005                         # significance threshold (p-value cutoff)

## --- Run PerturbPlan posthoc power analysis ---
power_results <- compute_power_posthoc(
  num_total_cells = num_total_cells,
  cells_per_grna = cells_per_grna,
  baseline_expression_stats = baseline_expression_stats,
  discovery_pairs = discovery_pairs,
  control_group = control_group,
  side = side,
  n_nonzero_trt_thresh = n_nonzero_trt_thresh,
  n_nonzero_cntrl_thresh = n_nonzero_cntrl_thresh,
  fold_change_mean = fold_change_mean,
  fold_change_sd = fold_change_sd,
  cutoff = cutoff
)

# Inspect outputs
names(power_results)                    # available fields
#> [1] "individual_power"         "expected_num_discoveries"
power_results$expected_num_discoveries  # expected number of discoveries
#> [1] 4.8324
head(power_results$individual_power)    # power per enhancer-gene pair
#> # A tibble: 6 × 3
#>   grna_target response_id  power
#>   <chr>       <chr>        <dbl>
#> 1 enh1        gene1       0.0660
#> 2 enh2        gene1       0.0831
#> 3 enh3        gene1       0.0839
#> 4 enh1        gene2       0.718 
#> 5 enh2        gene2       0.744 
#> 6 enh3        gene2       0.745