# # Time Course Inspector: Shiny app for plotting time series data # Author: Maciej Dobrzynski # # This is the module of a Shiny web application. # Outlier identification, selection # UI-remove-outliers ---- modSelOutliersUI = function(id, label = "Outlier Selection") { ns <- NS(id) tagList( h4( "Remove outliers" ), fluidRow( column(2, numericInput(ns('numOutliersPerc'), label = '% of data', min = 0, max = 100, value = 0, step = 0.05, width = '100px') ), column(2, radioButtons(ns('rbOutliersType'), label = 'From', choices = c('top' = 'top', 'middle' = 'mid', 'bottom' = 'bot')) ), column(3, sliderInput(ns('slOutliersGapLen'), label = 'Remove tracks with gaps equal to or longer than', min = 1, max = 10, value = 1, step = 1) ), column(3, downloadButton(ns('downOutlierCSV'), label = 'CSV with outlier IDs'), htmlOutput(ns("txtOutliersPerc")) ) ) ) } # Server-remove-outliers ---- modSelOutliers = function(input, output, session, in.data) { # reactive counter to hold number of tracks before and after outlier removal nCellsCounter <- reactiveValues( nCellsOrig = 0, nCellsAfter = 0, nOutlierTpts = 0 ) # reactive vector with cell ids vOut = reactiveValues( id = NULL ) # Display number of tracks and outliers output$txtOutliersPerc <- renderText({ cat(file = stderr(), 'modSelOutliers: txtOutliersPerc\n') sprintf('%d total track(s)
%d outlier track(s)
%d outlier point(s)

', nCellsCounter[['nCellsOrig']], nCellsCounter[['nCellsOrig']] - nCellsCounter[['nCellsAfter']], nCellsCounter[['nOutlierTpts']]) }) # button for downloading CSV with ids of removed tracks output$downOutlierCSV <- downloadHandler( filename = FCSVOUTLIERS, content = function(file) { loc.dt = vOut[['id']] if (is.null(loc.dt)) return(NULL) else write.csv(unique(loc.dt[, (COLID), with = F]), file, row.names = FALSE, quote = F) } ) # Identify outliers and remove them from dt dtReturn = reactive({ cat(file = stderr(), 'modSelOutliers: dtReturn\n') loc.out = in.data() if (is.null(loc.out)) { return(NULL) } # Remove outliers if the slider with percentage of data is smaller than 100 if (input$numOutliersPerc < 100) { # store the number of trajectories before prunning nCellsCounter[['nCellsOrig']] = length(unique(loc.out[['id']])) # scale all points (independently per track) loc.out[, y.sc := scale(get(COLY))] # Identify outlier points # In the UI, user selectes percentage of data to remove from the bottom, middle, or top part. # loc.outpts stores outlier points switch(input$rbOutliersType, 'top' = {loc.outpts = loc.out[ y.sc > quantile(y.sc, 1 - input$numOutliersPerc * 0.01, na.rm = T)]}, 'mid' = {loc.outpts = loc.out[ y.sc < quantile(y.sc, input$numOutliersPerc * 0.005, na.rm = T) | y.sc > quantile(y.sc, 1 - input$numOutliersPerc * 0.005, na.rm = T)]}, 'bot' = {loc.outpts = loc.out[ y.sc < quantile(y.sc, input$numOutliersPerc * 0.01, na.rm = T)]} ) if (input$slOutliersGapLen > 1) { # remove tracks with gaps longer than the value set in slOutliersGapLen # shorter gaps are interpolated linearly # add index column per trajecory loc.out[, (COLIDX) := 1:.N, by = c(COLID)] # remove single outlier points (anti-join) # From: https://stackoverflow.com/a/46333620/1898713 loc.out = loc.out[!loc.outpts, on = names(loc.outpts)] # calculate diff on index column to see the length of gaps due to removed points # the value of that column corresponds to the gap length (hence the "-1") loc.out[, (COLIDXDIFF) := c(1, diff(get(COLIDX))) - 1, by = c(COLID)] # get track ids where the max gap is equal to or longer than the threshold loc.idgaps = loc.out[, max(get(COLIDXDIFF)), by = c(COLID)][V1 >= input$slOutliersGapLen, get(COLID)] # remove outlier tracks with gaps longer than the value set in slOutliersGapLen loc.out = loc.out[!(get(COLID) %in% unique(loc.idgaps))] # fill removed outliers with NA's loc.out = loc.out[setkeyv(loc.out[, .(seq(min(get(COLIDX), na.rm = T), max(get(COLIDX), na.rm = T), 1)), by = c(COLGR, COLID)], c(COLGR, COLID, 'V1'))] # interpolate gaps with NAs if( (COLPOSX %in% names(loc.out)) & (COLPOSY %in% names(loc.out)) ) s.cols = c(COLY, COLPOSX, COLPOSY) else s.cols = c(COLY) # Here, the missing part in interpolation of mid.in column (for highlighting trajectories) loc.out[, (s.cols) := lapply(.SD, na.interpolation), by = c(COLID), .SDcols = s.cols] # clean loc.out[, c(COLIDX, COLIDXDIFF) := NULL] } else { # remove outlier tracks with gaps of length 1 time point loc.out = loc.out[!(get(COLID) %in% unique(loc.outpts[[COLID]]))] } # clean loc.out[, y.sc := NULL] # count number of trajectories after removing outlier tracks nCellsCounter[['nCellsAfter']] = length(unique(loc.out[[COLID]])) # count number of outlier points nCellsCounter[['nOutlierTpts']] = length(loc.outpts[[COLID]]) # store a vector of outlier timepoints with the corresponding IDs vOut[['id']] = loc.outpts } # return cleaned dt return(loc.out) }) return(dtReturn) }