Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Setup assessment table

Cornell University

It’s not worthwhile to keep repeating this process for each size class, so we will do it once here and save it as a table, then read that table into each of the inversion size-assessment notebooks

library(dplyr)

Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


read_data <- function(size_treatment, depth){
    .depth <- paste("depth",depth, sep = "_")
    truthfile <- paste0("../../simulated_variants/setup_inversions/", size_treatment, "/inv.", size_treatment,".vcf")

    true_inversions <- read.table(truthfile, header = F)[,c(1,2,8)]
    true_inversions$V8 <- as.numeric(unlist(lapply(true_inversions$V8, function(X){gsub(".+END=", "", X)})))
    names(true_inversions) <- c("contig", "position_start", "position_end")
    true_inversions$sample <- "simulated_inversions"

    true_inversions$depth <- depth
    return(true_inversions)
}
inventory_to_assessment <- function(.size){
    inventory <- rbind(
        filter(read.table("../../inversion_simulations.inventory", header = T), size == .size),
        filter(read.table("../../inversion_simulations.pool.inventory", header = T), size == .size)
    )
    inventory <- rename(inventory, "simulated" = "present", "zygosity" = "state")

    inventory$sample <- gsub("pool", "11", inventory$sample)
    inventory$sample <- gsub("sample_", "", inventory$sample)
    inventory$sample <- as.integer(inventory$sample)
    inventory$zygosity[is.na(inventory$zygosity)] <- "hom"
    inventory$zygosity <- ifelse(inventory$zygosity == "het", "heterozygous", "homozygous")

    # add depths to performance
    # arrange() is for sanity-checking
    performance <- inventory
    performance$depth <- 0.5
    for(i in c(2, 5, 10, 20)){
        inventory$depth <- i
        performance <- rbind(performance, inventory)
    }
    performance <- arrange(performance, sample, contig, inversion)

    x <- Reduce(
        rbind,
        Map(
            function(i){read_data(.size, i)},
            c(0.5, 2, 5, 10, 20)
        )
    )
    x <- arrange(x, sample, contig, position_start, depth)

    truth <- group_by(x, contig, depth) %>% mutate(inversion = 1:n()) %>% ungroup()
    truth <- truth[, c(1,6,2,3,5)]

    # merge it all together
    performance <- merge(performance,truth, by = c("contig", "inversion", "depth"))
    performance$id <- as.integer(as.factor(performance$position_start))
    performance <- arrange(performance[, c(1,2,10,3,4:9)], contig, inversion, sample, depth)
    return (performance)
}
assess_tb <- Reduce(
    rbind,
    Map(
        inventory_to_assessment,
        c("small", "medium", "large", "xl")
    )
)
head(assess_tb)
Loading...

Finally, write the table to a file

write.table(assess_tb, file = "inversion.assessment", quote = F, row.names = F)