Skip to contents

Internal function to load baseline expression and library parameters from the pilot datasets stored in the package data/ directory.

Usage

get_pilot_data_from_package(biological_system)

Arguments

biological_system

Character. The biological system name (e.g., "K562", "A549", "THP-1", "T_CD8", "iPSC")

Value

A list containing:

baseline_expression_stats

Data frame with gene expression data or list with baseline_expression_stats data frame

library_parameters

List with UMI_per_cell and variation parameters

Examples

# Load pilot data for K562 cells
k562_data <- get_pilot_data_from_package("K562")

# View the structure
str(k562_data)
#> List of 3
#>  $ baseline_expression_stats:'data.frame':	19942 obs. of  3 variables:
#>   ..$ response_id        : chr [1:19942] "ENSG00000243485" "ENSG00000238009" "ENSG00000241860" "ENSG00000286448" ...
#>   ..$ relative_expression: num [1:19942] 1.50e-07 1.55e-06 7.33e-07 9.31e-07 7.43e-06 ...
#>   ..$ expression_size    : num [1:19942] 0.3556 2.4156 7.7329 0.0341 3.4513 ...
#>  $ library_parameters       :List of 2
#>   ..$ UMI_per_cell: num 59163
#>   ..$ variation   : num 0.397
#>  $ mapping_efficiency       : num 0.711

# Access baseline expression data
head(k562_data$baseline_expression_stats)
#>                     response_id relative_expression expression_size
#> ENSG00000243485 ENSG00000243485        1.503611e-07      0.35563916
#> ENSG00000238009 ENSG00000238009        1.554532e-06      2.41555115
#> ENSG00000241860 ENSG00000241860        7.325731e-07      7.73288757
#> ENSG00000286448 ENSG00000286448        9.312333e-07      0.03414057
#> ENSG00000237491 ENSG00000237491        7.428012e-06      3.45126152
#> ENSG00000228794 ENSG00000228794        2.774533e-05      4.99961853

# Access library parameters including mapping efficiency
k562_data$library_parameters
#> $UMI_per_cell
#> [1] 59163.3
#> 
#> $variation
#> [1] 0.3968175
#> 
cat("Mapping efficiency:", k562_data$mapping_efficiency)
#> Mapping efficiency: 0.7110861

# The mapping efficiency affects power calculations by determining
# what fraction of sequencing reads contribute to gene expression
# Higher mapping efficiency means more effective sequencing depth

# Compare mapping efficiency across cell types
a549_data <- get_pilot_data_from_package("A549")
cat("K562 mapping efficiency:", k562_data$mapping_efficiency)
#> K562 mapping efficiency: 0.7110861
cat("A549 mapping efficiency:", a549_data$mapping_efficiency)
#> A549 mapping efficiency: 0.7938657