It’s not worthwhile to keep repeating this process for each size class, so we will do it once here and save it as a table, then read that table into each of the inversion size-assessment notebooks
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
read_data <- function(size_treatment, depth){
.depth <- paste("depth",depth, sep = "_")
truthfile <- paste0("../../simulated_variants/setup_inversions/", size_treatment, "/inv.", size_treatment,".vcf")
true_inversions <- read.table(truthfile, header = F)[,c(1,2,8)]
true_inversions$V8 <- as.numeric(unlist(lapply(true_inversions$V8, function(X){gsub(".+END=", "", X)})))
names(true_inversions) <- c("contig", "position_start", "position_end")
true_inversions$sample <- "simulated_inversions"
true_inversions$depth <- depth
return(true_inversions)
}inventory_to_assessment <- function(.size){
inventory <- rbind(
filter(read.table("../../inversion_simulations.inventory", header = T), size == .size),
filter(read.table("../../inversion_simulations.pool.inventory", header = T), size == .size)
)
inventory <- rename(inventory, "simulated" = "present", "zygosity" = "state")
inventory$sample <- gsub("pool", "11", inventory$sample)
inventory$sample <- gsub("sample_", "", inventory$sample)
inventory$sample <- as.integer(inventory$sample)
inventory$zygosity[is.na(inventory$zygosity)] <- "hom"
inventory$zygosity <- ifelse(inventory$zygosity == "het", "heterozygous", "homozygous")
# add depths to performance
# arrange() is for sanity-checking
performance <- inventory
performance$depth <- 0.5
for(i in c(2, 5, 10, 20)){
inventory$depth <- i
performance <- rbind(performance, inventory)
}
performance <- arrange(performance, sample, contig, inversion)
x <- Reduce(
rbind,
Map(
function(i){read_data(.size, i)},
c(0.5, 2, 5, 10, 20)
)
)
x <- arrange(x, sample, contig, position_start, depth)
truth <- group_by(x, contig, depth) %>% mutate(inversion = 1:n()) %>% ungroup()
truth <- truth[, c(1,6,2,3,5)]
# merge it all together
performance <- merge(performance,truth, by = c("contig", "inversion", "depth"))
performance$id <- as.integer(as.factor(performance$position_start))
performance <- arrange(performance[, c(1,2,10,3,4:9)], contig, inversion, sample, depth)
return (performance)
}assess_tb <- Reduce(
rbind,
Map(
inventory_to_assessment,
c("small", "medium", "large", "xl")
)
)
head(assess_tb)Loading...
Finally, write the table to a file
write.table(assess_tb, file = "inversion.assessment", quote = F, row.names = F)