Here, we visualize the inversions that were identified for the xl
treatment across the range of depths for the 90% threshold called variants. An extra consideration here is that with the deconvolution-during-concatenation step, Harpy was unable to call variants on the 10X and 20X data because there were too many molecules (more than the 96^4 possible with haplotagging).
library(ggplot2)
options(warn = -1)
read_data <- function(size_treatment, depth){
truthfile <- paste0("simulated_data/inversions/", size_treatment, "/inv.", size_treatment,".vcf")
samplesfile <- paste("simulated_data/called_sv/leviathan90", size_treatment, depth , "by_sample/inversions.bedpe", sep = "/")
poolfile <- paste("simulated_data/called_sv/leviathan90", size_treatment, depth, "by_pop/inversions.bedpe", sep = "/")
true_inversions <- read.table(truthfile, header = F)[,c(1,2,8)]
true_inversions$V8 <- as.numeric(unlist(lapply(true_inversions$V8, function(X){gsub(".+END=", "", X)})))
names(true_inversions) <- c("contig", "position_start", "position_end")
true_inversions$sample <- factor(c("simulated_inversions"))
sample_inversions <- read.table(
paste("simulated_data/called_sv/leviathan90", size_treatment, depth, "by_sample/inversions.bedpe", sep = "/"),
header = T
)[,1:4]
sample_inversions$sample <- as.factor(sample_inversions$sample)
if(file.exists(poolfile)){
pooled_inversions <- read.table(poolfile, header = T)
pooled_inversions <- pooled_inversions[,c("population","contig", "position_start", "position_end")]
names(pooled_inversions)[1] <- "sample"
if(nrow(pooled_inversions > 0)){
pooled_inversions$sample <- factor(paste("pooled", depth, size_treatment, sep = "_"))
}
} else {
# get an empty copy
pooled_inversions <- sample_inversions[0, ]
}
return(
rbind(true_inversions, sample_inversions, pooled_inversions)
)
}
plot_data <- function(data, size_treatment, depth){
axis_ticks <- c(paste0("sample_", sprintf("%02d", 1:10), ".", depth), "simulated_inversions", paste("pooled", depth, size_treatment, sep = "_"))
ggplot(
data,
aes(
x = position_start,
xend = position_end,
y = sample,
yend = sample,
color = sample
)
) +
scale_x_continuous(labels = scales::comma) +
scale_y_discrete(limits = axis_ticks) +
labs(title = "Called Inversions", subtitle = paste(size_treatment, depth), x = "genomic position (bp)", caption = "Simulated inversions (groundtruth) shown in top row") +
geom_segment(linewidth = 2) +
facet_wrap(~contig, ncol = 1, scales = "free_x") +
theme_light() +
theme(panel.grid.major.y = element_blank(), legend.position = "None")
}
Set a global variable .size
to "xl"
to avoid writing it each time
.size <- "xl"
0.5X depth¶
depth_05X <- read_data(.size, "05X")
tail(depth_05X)
Loading...
plot_data(depth_05X, .size, "05X")

2X Depth¶
depth_2X <- read_data(.size, "2X")
tail(depth_2X)
Loading...
plot_data(depth_2X, .size, "2X")

5X Depth¶
depth_5X <- read_data(.size, "5X")
tail(depth_5X)
Loading...
plot_data(depth_5X, .size, "5X")

10X Depth¶
depth_10X <- read_data(.size, "10X")
tail(depth_10X)
Loading...
plot_data(depth_10X, .size, "10X")

20X Depth¶
depth_20X <- read_data(.size, "20X")
tail(depth_20X)
Loading...
plot_data(depth_20X, .size, "20X")
