Skip to article frontmatterSkip to article content

04 Linked-Read Inversion Detection

Here, we aggregate the visualizations of all size classes and depth treatments for linked-read simulations.

library(ggplot2)
library(ggpattern)
library(dplyr)
library(ggpubr)
Output

Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Let’s read in the data from the outcomes of the individual size-class assessments and combine them into a single table. Since we’ll be comparing this to the long-read data, we should also add a column specifing this is linked-read data and write the entire thing into one file.

lr_inversions <- rbind(
    read.csv("assess_called_sv/small.sv.assessment", header = T),
    read.csv("assess_called_sv/medium.sv.assessment", header = T),
    read.csv("assess_called_sv/large.sv.assessment", header = T),
    read.csv("assess_called_sv/xl.sv.assessment", header = T)
)
lr_inversions$technology <- "linkedread"
if(!file.exists("assess_called_sv/linkedread.sv.assessment")){
    write.csv(lr_inversions, file = "assess_called_sv/linkedread.sv.assessment", row.names = F, quote = F)
}
head(lr_inversions)
Loading...

Single-Sample Detection

Let’s visualize what detection looked like across all treatments with respect to false/true positive/negative. Here, we facet rows across depths and show all the size treatments across columns.

Source
options(warn = -1, repr.plot.width = 20, repr.plot.height = 15)
axis_ticks <- factor(paste0("sample_", sprintf("%02d", 1:10)))
lr_inversions[lr_inversions$sample != 11,] %>%
    ggplot(aes(y = sample, x = id, fill = assessment, pattern = zygosity)) +
        geom_tile_pattern(
            pattern_color = NA,
            color = "white",
            pattern_fill = "black",
            pattern_angle = 45,
            pattern_density = 0.5,
            pattern_spacing = 0.025,
            pattern_key_scale_factor = 1
        ) +
        theme_light() +
        labs(title = "By-Sample Inversion Detection", subtitle = "Inversions detected in individual samples, as a function of zygotic state.") +
        scale_fill_manual(values = c("false negative" = "#eaf398ff", "true negative" = "grey70", "true positive" = "#90aed8")) +
        scale_pattern_manual(values = c("het" = "circle", "hom" = "none")) +
        scale_x_discrete(name = "Inversion") +
        scale_y_discrete(limits = axis_ticks, breaks = axis_ticks) +
        #coord_cartesian(xlim = c(1, max(lr_inversions$id)), expand = F) +
        coord_cartesian(expand = F) +
        theme(
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank()
        ) +
        facet_grid(depth ~ factor(size, levels = c("small", "medium", "large", "xl")), scales = "free_x")
plot without title

Pooled Sample Detection

Source
plot_pools_matrix <- function(data, size_treatment){
    .data <- data[data$sample == 11 & data$size == size_treatment,]
    .data$contig <- gsub("2L", "2L (all homozygous)", .data$contig)
    .data$contig <- gsub("2R", "2R (all heterozygous)", .data$contig)
    .data$contig <- gsub("3L", "3L (inversions common)", .data$contig)
    .data$contig <- gsub("3R", "3R (inversions rare)", .data$contig)
 
    ggplot(.data, aes(y = 1, x = inversion, fill = assessment)) +
        geom_tile(color = "white", linewidth = 1.5) +
        theme_light() +
        #labs(title = "Pooled-Sample Detection", subtitle = paste(size_treatment, "inversions detected in sample-pooled data, as a function of inversion frequency in the population.")) +
        scale_fill_manual(values = c("false negative" = "grey70", "true positive" = "#90aed8")) +
        scale_y_continuous(breaks = 1, name = "Assessment") +
        scale_x_discrete(name = "Inversion") +
        theme(
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            axis.text.y = element_blank(),
            axis.ticks.y = element_blank()
        ) +
        coord_cartesian(expand = F) +
        facet_grid(cols = vars(contig), rows = vars(depth))
}
Source
options(warn = -1, repr.plot.width = 25, repr.plot.height = 10)
plot <- ggarrange(
    plot_pools_matrix(lr_inversions, "small"),
    plot_pools_matrix(lr_inversions, "medium"),
    plot_pools_matrix(lr_inversions, "large"),
    plot_pools_matrix(lr_inversions, "xl"),
    labels = c("small", "medium", "large", "xl"),
    nrow = 1, ncol = 4, common.legend = T, font.label = c(face = "plain"), legend = "top", vjust = 0, label.x = c(0.42, 0.39, 0.41, 0.48)
)
annotate_figure(
    plot,
    top = text_grob(
        "Pooled-Sample Detection", color = "black", size = 20
        )
)
plot without title