Commit e1b3f361 authored by dmattek's avatar dmattek

Added: outlier selection

parent c4fb7539
source('modules/auxfunc.R')
source('modules/selOutliers.R')
source('modules/downPlot.R')
source('modules/downCellIDsCls.R')
source('modules/downCSV.R')
source('modules/dispStats.R')
source('modules/dispTrackStats.R')
source('modules/trajPlot.R')
......
......@@ -24,6 +24,28 @@ PLOTFONTLEGEND = 12
# default number of facets in plots
PLOTNFACETDEFAULT = 3
# internal column names
COLRT = 'realtime'
COLY = 'y'
COLID = 'id'
COLIDUNI = 'trackObjectsLabelUni'
COLGR = 'group'
COLIN = 'mid.in'
COLOBJN = 'obj.num'
COLPOSX = 'pos.x'
COLPOSY = 'pos.y'
COLIDX = 'IDX'
COLIDXDIFF = 'IDXdiff'
# file names
FCSVOUTLIERS = 'outliers.csv'
FCSVTCCLEAN = 'tCoursesSelected_clean.csv'
FPDFTCMEAN = "tCoursesMeans.pdf"
FPDFTCSINGLE = "tCourses.pdf"
FPDFBOXAUC = 'boxplotAUC.pdf'
FPDFBOXTP = 'boxplotTP.pdf'
FPDFSCATTER = 'scatter.pdf'
# Colour definitions ----
rhg_cols <- c(
"#771C19",
......@@ -176,7 +198,7 @@ LOCcalcTrajCI = function(in.dt, in.col.meas, in.col.by = NULL, in.type = c('norm
#' @export
#' @import data.table
LOCgenTraj <- function(in.ntpts = 60, in.ntracks = 10, in.nfov = 6, in.nwells = 1, in.addna = NULL) {
LOCgenTraj <- function(in.ntpts = 60, in.ntracks = 10, in.nfov = 6, in.nwells = 1, in.addna = NULL, in.addout = NULL) {
x.rand.1 = c(rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 0.5, 0.1), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 1, 0.2), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 2, 0.5))
x.rand.2 = c(rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 0.25, 0.1), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 0.5, 0.2), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 1, 0.2))
......@@ -188,6 +210,13 @@ LOCgenTraj <- function(in.ntpts = 60, in.ntracks = 10, in.nfov = 6, in.nwells =
x.rand.2[round(runif(in.addna) * locTabLen)] = NA
}
# add outliers for testing
if (!is.null(in.addout)) {
locTabLen = length(x.rand.1)
x.rand.1[round(runif(in.addout) * locTabLen)] = 10
x.rand.2[round(runif(in.addout) * locTabLen)] = 10
}
x.arg = rep(seq(1, in.ntpts), in.ntracks * in.nfov)
dt.nuc = data.table(Metadata_Well = rep(LETTERS[1:in.nwells], each = in.ntpts * in.nfov * in.ntracks / in.nwells),
......@@ -225,7 +254,7 @@ LOCgenTraj <- function(in.ntpts = 60, in.ntracks = 10, in.nfov = 6, in.nwells =
LOCnormTraj = function(in.dt,
in.meas.col,
in.rt.col = 'RealTime',
in.rt.col = COLRT,
in.rt.min = 10,
in.rt.max = 20,
in.by.cols = NULL,
......
......@@ -17,7 +17,7 @@ modTrackStats = function(input, output, session,
ns <- session$ns
output$uiTabStats = renderUI({
cat(file = stderr(), 'UI uiTabStats\n')
cat(file = stderr(), 'modTrackStats: uiTabStats\n')
ns <- session$ns
if(input$chbTabStats) {
......@@ -33,7 +33,7 @@ modTrackStats = function(input, output, session,
# unused at the moment
calcStats = reactive({
cat(file = stderr(), 'tabBoxPlot: calsStats\n')
cat(file = stderr(), 'modTrackStats: calsStats\n')
loc.dt = in.data()
if (is.null(loc.dt))
......@@ -53,6 +53,9 @@ modTrackStats = function(input, output, session,
# Print number of tracks
output$txtNtracks = renderText({
cat(file = stderr(), 'modTrackStats: txtNtracks\n')
loc.dt = in.data()
loc.dt = in.data()
if (is.null(loc.dt))
......@@ -66,7 +69,7 @@ modTrackStats = function(input, output, session,
# Print a table with Track IDs assigned to multiple objects in a frame
output$outTabStats = DT::renderDataTable(server = FALSE, {
cat(file = stderr(), 'tabBoxPlot: outTabStats\n')
cat(file = stderr(), 'modTrackStats: outTabStats\n')
loc.dt = in.data()
if (is.null(loc.dt))
......
# RShiny module for downloading cellIDs with cluster numbers
# Use:
#
# Time Course Inspector: Shiny app for plotting time series data
# Author: Maciej Dobrzynski
#
# This is the module of a Shiny web application.
# Download CSV with data
#
# Usage:
# in ui.R
# downPlotUI('uniqueID', "your_label")
# downCSV('uniqueID', "your_label")
#
# in server.R
# callModule(downPlot, "uniqueID", 'fname.pdf', input_plot_to_save)
# callModule(downCSV, "uniqueID", 'fname.csv', input_data_to_save)
downCellClUI <- function(id, label = "Download Data") {
# UI-download-csv ----
downCsvUI <- function(id, label = "Download Data") {
ns <- NS(id)
tagList(
......@@ -17,7 +23,8 @@ downCellClUI <- function(id, label = "Download Data") {
)
}
downCellCl <- function(input, output, session, in.fname, in.data) {
# SERVER-download-csv ----
downCsv <- function(input, output, session, in.fname, in.data) {
output$downCellCl <- downloadHandler(
filename = function() {
......
#
# Time Course Inspector: Shiny app for plotting time series data
# Author: Maciej Dobrzynski
#
# This is the module of a Shiny web application.
# Outlier identification, selection
# UI-remove-outliers ----
modSelOutliersUI = function(id, label = "Outlier Selection") {
ns <- NS(id)
tagList(
h4(
"Remove outliers"
),
fluidRow(
column(2,
numericInput(ns('numOutliersPerc'),
label = '% of data',
min = 0,
max = 100,
value = 0,
step = 0.05, width = '100px')
),
column(2,
radioButtons(ns('rbOutliersType'),
label = 'From',
choices = c('top' = 'top', 'middle' = 'mid', 'bottom' = 'bot'))
),
column(3,
sliderInput(ns('slOutliersGapLen'),
label = 'Remove tracks with gaps equal to or longer than',
min = 1,
max = 10,
value = 1,
step = 1)
),
column(3,
downloadButton(ns('downOutlierCSV'), label = 'CSV with outlier IDs'),
htmlOutput(ns("txtOutliersPerc"))
)
)
)
}
# Server-remove-outliers ----
modSelOutliers = function(input, output, session, in.data) {
# reactive counter to hold number of tracks before and after outlier removal
nCellsCounter <- reactiveValues(
nCellsOrig = 0,
nCellsAfter = 0,
nOutlierTpts = 0
)
# reactive vector with cell ids
vOut = reactiveValues(
id = NULL
)
# Display number of tracks and outliers
output$txtOutliersPerc <- renderText({
cat(file = stderr(), 'modSelOutliers: txtOutliersPerc\n')
sprintf('<b>%d total tracks<br>%d outlier tracks<br>%d outlier points</b><br>',
nCellsCounter[['nCellsOrig']],
nCellsCounter[['nCellsOrig']] - nCellsCounter[['nCellsAfter']],
nCellsCounter[['nOutlierTpts']])
})
# button for downloading CSV with ids of removed tracks
output$downOutlierCSV <- downloadHandler(
filename = FCSVOUTLIERS,
content = function(file) {
loc.dt = vOut[['id']]
if (is.null(loc.dt))
return(NULL)
else
write.csv(unique(loc.dt[, (COLID), with = F]), file, row.names = FALSE, quote = F)
}
)
# Identify outliers and remove them from dt
dtReturn = reactive({
cat(file = stderr(), 'modSelOutliers: dtReturn\n')
loc.out = in.data()
if (is.null(loc.out)) {
return(NULL)
}
# Remove outliers if the slider with percentage of data is smaller than 100
if (input$numOutliersPerc < 100) {
# store the number of trajectories before prunning
nCellsCounter[['nCellsOrig']] = length(unique(loc.out[['id']]))
# scale all points (independently per track)
loc.out[, y.sc := scale(get(COLY))]
# Identify outlier points
# In the UI, user selectes percentage of data to remove from the bottom, middle, or top part.
# loc.outpts stores outlier points
switch(input$rbOutliersType,
'top' = {loc.outpts = loc.out[ y.sc > quantile(y.sc, 1 - input$numOutliersPerc * 0.01, na.rm = T)]},
'mid' = {loc.outpts = loc.out[ y.sc < quantile(y.sc, input$numOutliersPerc * 0.005, na.rm = T) |
y.sc > quantile(y.sc, 1 - input$numOutliersPerc * 0.005, na.rm = T)]},
'bot' = {loc.outpts = loc.out[ y.sc < quantile(y.sc, input$numOutliersPerc * 0.01, na.rm = T)]}
)
if (input$slOutliersGapLen > 1) {
# remove tracks with gaps longer than the value set in slOutliersGapLen
# shorter gaps are interpolated linearly
# add index column per trajecory
loc.out[, (COLIDX) := 1:.N, by = c(COLID)]
# remove single outlier points (anti-join)
# From: https://stackoverflow.com/a/46333620/1898713
loc.out = loc.out[!loc.outpts, on = names(loc.outpts)]
# calculate diff on index column to see the length of gaps due to removed points
# the value of that column corresponds to the gap length (hence the "-1")
loc.out[, (COLIDXDIFF) := c(1, diff(get(COLIDX))) - 1, by = c(COLID)]
# get track ids where the max gap is equal to or longer than the threshold
loc.idgaps = loc.out[, max(get(COLIDXDIFF)), by = c(COLID)][V1 >= input$slOutliersGapLen, get(COLID)]
# remove outlier tracks with gaps longer than the value set in slOutliersGapLen
loc.out = loc.out[!(get(COLID) %in% unique(loc.idgaps))]
# fill removed outliers with NA's
loc.out = loc.out[setkeyv(loc.out[, .(seq(min(get(COLIDX), na.rm = T), max(get(COLIDX), na.rm = T), 1)), by = c(COLGR, COLID)], c(COLGR, COLID, 'V1'))]
# interpolate gaps with NAs
if( (COLPOSX %in% names(loc.out)) & (COLPOSY %in% names(loc.out)) )
s.cols = c(COLY, COLPOSX, COLPOSY)
else
s.cols = c(COLY)
# Here, the missing part in interpolation of mid.in column (for highlighting trajectories)
loc.out[, (s.cols) := lapply(.SD, na.interpolation), by = c(COLID), .SDcols = s.cols]
# clean
loc.out[, c(COLIDX, COLIDXDIFF) := NULL]
} else {
# remove outlier tracks with gaps of length 1 time point
loc.out = loc.out[!(get(COLID) %in% unique(loc.outpts[[COLID]]))]
}
# clean
loc.out[, y.sc := NULL]
# count number of trajectories after removing outlier tracks
nCellsCounter[['nCellsAfter']] = length(unique(loc.out[[COLID]]))
# count number of outlier points
nCellsCounter[['nOutlierTpts']] = length(loc.outpts[[COLID]])
# store a vector of outlier timepoints with the corresponding IDs
vOut[['id']] = loc.outpts
}
# return cleaned dt
return(loc.out)
})
return(dtReturn)
}
\ No newline at end of file
#
# Time Course Inspector: Shiny app for plotting time series data
# Author: Maciej Dobrzynski
#
# This is the module of a Shiny web application.
# Hierarchical clustering
# UI
# UI ----
clustHierUI <- function(id, label = "Hierarchical CLustering") {
ns <- NS(id)
......@@ -173,7 +179,7 @@ clustHierUI <- function(id, label = "Hierarchical CLustering") {
)
}
# SERVER
# SERVER ----
clustHier <- function(input, output, session, in.data4clust, in.data4trajPlot, in.data4stimPlot) {
output$uiPlotHierClAss = renderUI({
......
......@@ -38,6 +38,15 @@ shinyServer(function(input, output, session) {
dataLoadTrajRem = isolate(input$inButLoadTrajRem),
dataLoadStim = isolate(input$inButLoadStim)
)
nCellsCounter <- reactiveValues(
nCellsOrig = 0,
nCellsAfterOutlierTrim = 0
)
myReactVals = reactiveValues(
outlierIDs = NULL
)
# UI-side-panel-data-load ----
......@@ -45,7 +54,7 @@ shinyServer(function(input, output, session) {
dataGen1 <- eventReactive(input$inDataGen1, {
cat("dataGen1\n")
return(LOCgenTraj(in.nwells = 3))
return(LOCgenTraj(in.nwells = 3, in.addout = 3))
})
# Load main data file
......@@ -343,7 +352,7 @@ shinyServer(function(input, output, session) {
})
# UI-side-panel-remove-outliers ----
# UI-main-tab-remove-outliers ----
output$uiSlOutliers = renderUI({
cat(file = stderr(), 'UI uiSlOutliers\n')
......@@ -362,7 +371,22 @@ shinyServer(function(input, output, session) {
}
})
output$uiTxtOutliers = renderUI({
cat(file = stderr(), 'UI uiTxtOutliers\n')
if (input$chBoutliers) {
htmlOutput(
'txtOutliersPerc'
)
}
})
output$txtOutliersPerc <- renderText({
sprintf('<b>#tracks: %d <br>#outliers: %d</b>',
nCellsCounter[['nCellsOrig']],
nCellsCounter[['nCellsOrig']] - nCellsCounter[['nCellsAfterOutlierTrim']]) })
# Processing-data ----
dataInBoth <- reactive({
......@@ -634,7 +658,7 @@ shinyServer(function(input, output, session) {
if (input$chBtrajInter) {
# here we fill missing data with NA's
loc.out = loc.out[setkey(loc.out[, .(seq(min(realtime, na.rm = T), max(realtime, na.rm = T), input$inSelTimeFreq)), by = .(group, id)], group, id, V1)]
loc.out = loc.out[setkeyv(loc.out[, .(seq(min(get(COLRT), na.rm = T), max(get(COLRT), na.rm = T), input$inSelTimeFreq)), by = c(COLGR, COLID)], c(COLGR, COLID, 'V1'))]
# x-check: print all rows with NA's
print('Rows with NAs:')
......@@ -643,11 +667,11 @@ shinyServer(function(input, output, session) {
# NA's may be already present in the dataset'.
# Interpolate (linear) them with na.interpolate as well
if(locPos)
s.cols = c('y', 'pos.x', 'pos.y')
s.cols = c(COLY, COLPOSX, COLPOSY)
else
s.cols = c('y')
s.cols = c(COLY)
loc.out[, (s.cols) := lapply(.SD, na.interpolation), by = id, .SDcols = s.cols]
loc.out[, (s.cols) := lapply(.SD, na.interpolation), by = c(COLID), .SDcols = s.cols]
# !!! Current issue with interpolation:
......@@ -665,7 +689,7 @@ shinyServer(function(input, output, session) {
## Trim x-axis (time)
if(input$chBtimeTrim) {
loc.out = loc.out[realtime >= input$slTimeTrim[[1]] & realtime <= input$slTimeTrim[[2]] ]
loc.out = loc.out[get(COLRT) >= input$slTimeTrim[[1]] & get(COLRT) <= input$slTimeTrim[[2]] ]
}
## Normalization
......@@ -673,8 +697,8 @@ shinyServer(function(input, output, session) {
if (input$chBnorm) {
loc.out = LOCnormTraj(
in.dt = loc.out,
in.meas.col = 'y',
in.rt.col = 'realtime',
in.meas.col = COLY,
in.rt.col = COLRT,
in.rt.min = input$slNormRtMinMax[1],
in.rt.max = input$slNormRtMinMax[2],
in.type = input$rBnormMeth,
......@@ -684,39 +708,14 @@ shinyServer(function(input, output, session) {
# Column with normalized data is renamed to the original name
# Further code assumes column name y produced by data4trajPlot
loc.out[, y := NULL]
setnames(loc.out, 'y.norm', 'y')
}
##### MOD HERE
## display number of filtered tracks in textUI: uiTxtOutliers
## How?
## 1. through reactive values?
## 2. through additional comumn to tag outliers?
# Remove outliers
# 1. Scale all points (independently per track)
# 2. Pick time points that exceed the bounds
# 3. Identify IDs of outliers
# 4. Select cells that don't have these IDs
cat('Ncells orig = ', length(unique(loc.out$id)), '\n')
if (input$chBoutliers) {
loc.out[, y.sc := scale(y)]
loc.tmp = loc.out[ y.sc < quantile(y.sc, (1 - input$slOutliersPerc * 0.01)*0.5, na.rm = T) |
y.sc > quantile(y.sc, 1 - (1 - input$slOutliersPerc * 0.01)*0.5, na.rm = T)]
loc.out = loc.out[!(id %in% unique(loc.tmp$id))]
loc.out[, y.sc := NULL]
loc.out[, get(COLY) := NULL]
setnames(loc.out, 'y.norm', COLY)
}
cat('Ncells trim = ', length(unique(loc.out$id)), '\n')
return(loc.out)
})
# prepare data for clustering
# return a matrix with:
# cells as columns
......@@ -724,7 +723,7 @@ shinyServer(function(input, output, session) {
data4clust <- reactive({
cat(file = stderr(), 'data4clust\n')
loc.dt = data4trajPlot()
loc.dt = data4trajPlotNoOut()
if (is.null(loc.dt))
return(NULL)
......@@ -768,9 +767,9 @@ shinyServer(function(input, output, session) {
# download data as prepared for plotting
# after all modification
output$downloadDataClean <- downloadHandler(
filename = 'tCoursesSelected_clean.csv',
filename = FCSVTCCLEAN,
content = function(file) {
write.csv(data4trajPlot(), file, row.names = FALSE)
write.csv(data4trajPlotNoOut(), file, row.names = FALSE)
}
)
......@@ -797,34 +796,37 @@ shinyServer(function(input, output, session) {
}
})
# Taking out outliers
data4trajPlotNoOut = callModule(modSelOutliers, 'returnOutlierIDs', data4trajPlot)
# Trajectory plotting - ribbon
callModule(modTrajRibbonPlot, 'modTrajRibbon',
in.data = data4trajPlot,
in.data = data4trajPlotNoOut,
in.data.stim = data4stimPlot,
in.fname = function() return( "tCoursesMeans.pdf"))
in.fname = function() return(FPDFTCMEAN))
###### Trajectory plotting - individual
# Trajectory plotting - individual
callModule(modTrajPlot, 'modTrajPlot',
in.data = data4trajPlot,
in.data = data4trajPlotNoOut,
in.data.stim = data4stimPlot,
in.fname = function() {return( "tCourses.pdf")})
in.fname = function() {return(FPDFTCSINGLE)})
# Tabs ----
###### AUC calculation and plotting
callModule(modAUCplot, 'tabAUC', data4trajPlot, in.fname = function() return('boxplotAUC.pdf'))
callModule(modAUCplot, 'tabAUC', data4trajPlotNoOut, in.fname = function() return(FPDFBOXAUC))
###### Box-plot
callModule(tabBoxPlot, 'tabBoxPlot', data4trajPlot, in.fname = function() return('boxplotTP.pdf'))
callModule(tabBoxPlot, 'tabBoxPlot', data4trajPlotNoOut, in.fname = function() return(FPDFBOXTP))
###### Scatter plot
callModule(tabScatterPlot, 'tabScatter', data4trajPlot, in.fname = function() return('scatter.pdf'))
callModule(tabScatterPlot, 'tabScatter', data4trajPlotNoOut, in.fname = function() return(FPDFSCATTER))
##### Hierarchical clustering
callModule(clustHier, 'tabClHier', data4clust, data4trajPlot, data4stimPlot)
callModule(clustHier, 'tabClHier', data4clust, data4trajPlotNoOut, data4stimPlot)
##### Sparse hierarchical clustering using sparcl
callModule(clustHierSpar, 'tabClHierSpar', data4clust, data4trajPlot, data4stimPlot)
callModule(clustHierSpar, 'tabClHierSpar', data4clust, data4trajPlotNoOut, data4stimPlot)
})
......@@ -101,9 +101,6 @@ shinyUI(fluidPage(
uiOutput('uiChBnormRobust'),
uiOutput('uiChBnormGroup'),
tags$hr(),
checkboxInput('chBoutliers', 'Remove outliers', FALSE),
uiOutput('uiSlOutliers'),
uiOutput("uiTxtOutliers"),
downloadButton('downloadDataClean', 'Download mod\'d data')
),
......@@ -115,7 +112,7 @@ shinyUI(fluidPage(
"Plot time series: means per group or individual"
),
br(),
modSelOutliersUI('returnOutlierIDs'),
tabsetPanel(
tabPanel("Means",
br(),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment