In order to mitigate against the brute force attacks against Gitlab accounts, we are moving to all edu-ID Logins. We would like to remind you to link your account with your edu-id. Login will be possible only by edu-ID after November 30, 2021. Here you can find the instructions for linking your account.

If you don't have a SWITCH edu-ID, you can create one with this guide here

kind regards

This Server has been upgraded to GitLab release 14.2.6

Commit 03f7ea27 authored by Maciej Dobrzynski's avatar Maciej Dobrzynski
Browse files

Merge branch 'feature-outlier-new'

Implements new outlier selection method
parents 7c389fb2 8fc29800
......@@ -26,6 +26,7 @@ Following packages need to be installed in order to run the app locally:
* MASS
* robust
* pracma
* Hmisc
Install packages using `install.packages('name_of_the_package_from_the_list_above')` command in RStudio command line.
......@@ -36,27 +37,7 @@ install.packages(c("shiny", "shinyjs",
"dendextend", "dendextend", "RColorBrewer",
"sparcl", "dtw",
"imputeTS",
"MASS", "robust", "pracma"))
```
**Note**
As of July 2018, *sparcl* has been removed from CRAN. If you don't need sparse hierarchical clustering, you can simply comment the line `library(sparcl)` in `server.R` file or install it from the [archive](https://cran.r-project.org/web/packages/sparcl/index.html):
```
install_url('https://cran.r-project.org/src/contrib/Archive/sparcl/sparcl_1.0.3.tar.gz')
```
**Additionally**, a time series analysis package needs to be installed from [GitHub](https://github.com/dmattek/tca-package):
```
install_github("dmattek/tca-package")
```
The `install_url` and `install_github` functions are available in *devtools* package:
```
install.packages("devtools")
library(devtools)
"MASS", "robust", "pracma", "Hmisc"))
```
## Input file
......
source('modules/auxfunc.R')
source('modules/selOutliers.R')
source('modules/downPlot.R')
source('modules/downCellIDsCls.R')
source('modules/downCSV.R')
source('modules/dispStats.R')
source('modules/dispTrackStats.R')
source('modules/trajPlot.R')
......
......@@ -14,6 +14,10 @@ require(Hmisc) # for CI calculation
# Global parameters ----
# if true, additional output printed to R console
DEB = T
# font sizes in pts for plots
PLOTFONTBASE = 12
PLOTFONTAXISTEXT = 12
......@@ -24,6 +28,28 @@ PLOTFONTLEGEND = 12
# default number of facets in plots
PLOTNFACETDEFAULT = 3
# internal column names
COLRT = 'realtime'
COLY = 'y'
COLID = 'id'
COLIDUNI = 'trackObjectsLabelUni'
COLGR = 'group'
COLIN = 'mid.in'
COLOBJN = 'obj.num'
COLPOSX = 'pos.x'
COLPOSY = 'pos.y'
COLIDX = 'IDX'
COLIDXDIFF = 'IDXdiff'
# file names
FCSVOUTLIERS = 'outliers.csv'
FCSVTCCLEAN = 'tCoursesSelected_clean.csv'
FPDFTCMEAN = "tCoursesMeans.pdf"
FPDFTCSINGLE = "tCourses.pdf"
FPDFBOXAUC = 'boxplotAUC.pdf'
FPDFBOXTP = 'boxplotTP.pdf'
FPDFSCATTER = 'scatter.pdf'
# Colour definitions ----
rhg_cols <- c(
"#771C19",
......@@ -176,7 +202,7 @@ LOCcalcTrajCI = function(in.dt, in.col.meas, in.col.by = NULL, in.type = c('norm
#' @export
#' @import data.table
LOCgenTraj <- function(in.ntpts = 60, in.ntracks = 10, in.nfov = 6, in.nwells = 1, in.addna = NULL) {
LOCgenTraj <- function(in.ntpts = 60, in.ntracks = 10, in.nfov = 6, in.nwells = 1, in.addna = NULL, in.addout = NULL) {
x.rand.1 = c(rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 0.5, 0.1), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 1, 0.2), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 2, 0.5))
x.rand.2 = c(rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 0.25, 0.1), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 0.5, 0.2), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 1, 0.2))
......@@ -188,6 +214,13 @@ LOCgenTraj <- function(in.ntpts = 60, in.ntracks = 10, in.nfov = 6, in.nwells =
x.rand.2[round(runif(in.addna) * locTabLen)] = NA
}
# add outliers for testing
if (!is.null(in.addout)) {
locTabLen = length(x.rand.1)
x.rand.1[round(runif(in.addout) * locTabLen)] = 10
x.rand.2[round(runif(in.addout) * locTabLen)] = 10
}
x.arg = rep(seq(1, in.ntpts), in.ntracks * in.nfov)
dt.nuc = data.table(Metadata_Well = rep(LETTERS[1:in.nwells], each = in.ntpts * in.nfov * in.ntracks / in.nwells),
......@@ -225,7 +258,7 @@ LOCgenTraj <- function(in.ntpts = 60, in.ntracks = 10, in.nfov = 6, in.nwells =
LOCnormTraj = function(in.dt,
in.meas.col,
in.rt.col = 'RealTime',
in.rt.col = COLRT,
in.rt.min = 10,
in.rt.max = 20,
in.by.cols = NULL,
......
......@@ -17,7 +17,7 @@ modTrackStats = function(input, output, session,
ns <- session$ns
output$uiTabStats = renderUI({
cat(file = stderr(), 'UI uiTabStats\n')
cat(file = stderr(), 'modTrackStats: uiTabStats\n')
ns <- session$ns
if(input$chbTabStats) {
......@@ -33,7 +33,7 @@ modTrackStats = function(input, output, session,
# unused at the moment
calcStats = reactive({
cat(file = stderr(), 'tabBoxPlot: calsStats\n')
cat(file = stderr(), 'modTrackStats: calsStats\n')
loc.dt = in.data()
if (is.null(loc.dt))
......@@ -53,6 +53,9 @@ modTrackStats = function(input, output, session,
# Print number of tracks
output$txtNtracks = renderText({
cat(file = stderr(), 'modTrackStats: txtNtracks\n')
loc.dt = in.data()
loc.dt = in.data()
if (is.null(loc.dt))
......@@ -66,7 +69,7 @@ modTrackStats = function(input, output, session,
# Print a table with Track IDs assigned to multiple objects in a frame
output$outTabStats = DT::renderDataTable(server = FALSE, {
cat(file = stderr(), 'tabBoxPlot: outTabStats\n')
cat(file = stderr(), 'modTrackStats: outTabStats\n')
loc.dt = in.data()
if (is.null(loc.dt))
......
# RShiny module for downloading cellIDs with cluster numbers
# Use:
#
# Time Course Inspector: Shiny app for plotting time series data
# Author: Maciej Dobrzynski
#
# This is the module of a Shiny web application.
# Download CSV with data
#
# Usage:
# in ui.R
# downPlotUI('uniqueID', "your_label")
# downCSV('uniqueID', "your_label")
#
# in server.R
# callModule(downPlot, "uniqueID", 'fname.pdf', input_plot_to_save)
# callModule(downCSV, "uniqueID", 'fname.csv', input_data_to_save)
downCellClUI <- function(id, label = "Download Data") {
# UI-download-csv ----
downCsvUI <- function(id, label = "Download Data") {
ns <- NS(id)
tagList(
......@@ -17,7 +23,8 @@ downCellClUI <- function(id, label = "Download Data") {
)
}
downCellCl <- function(input, output, session, in.fname, in.data) {
# SERVER-download-csv ----
downCsv <- function(input, output, session, in.fname, in.data) {
output$downCellCl <- downloadHandler(
filename = function() {
......
#
# Time Course Inspector: Shiny app for plotting time series data
# Author: Maciej Dobrzynski
#
# This is the module of a Shiny web application.
# Outlier identification, selection
# UI-remove-outliers ----
modSelOutliersUI = function(id, label = "Outlier Selection") {
ns <- NS(id)
tagList(
h4(
"Remove outliers"
),
fluidRow(
column(2,
numericInput(ns('numOutliersPerc'),
label = '% of data',
min = 0,
max = 100,
value = 0,
step = 0.05, width = '100px'),
checkboxInput(ns('chBtrajInter'), 'Interpolate gaps?', value = F)
),
column(2,
radioButtons(ns('rbOutliersType'),
label = 'From',
choices = c('top' = 'top', 'middle' = 'mid', 'bottom' = 'bot'))
),
column(3,
sliderInput(ns('slOutliersGapLen'),
label = 'Remove tracks with gaps equal to or longer than',
min = 1,
max = 10,
value = 1,
step = 1)
),
column(3,
downloadButton(ns('downOutlierCSV'), label = 'CSV with outlier IDs'),
htmlOutput(ns("txtOutliersPerc"))
)
)
)
}
# Server-remove-outliers ----
modSelOutliers = function(input, output, session, in.data) {
# reactive counter to hold number of tracks before and after outlier removal
nCellsCounter <- reactiveValues(
nCellsOrig = 0,
nCellsAfter = 0,
nOutlierTpts = 0
)
# reactive vector with cell ids
vOut = reactiveValues(
id = NULL
)
# Display number of tracks and outliers
output$txtOutliersPerc <- renderText({
cat(file = stdout(), 'modSelOutliers: txtOutliersPerc\n')
sprintf('<b>%d total track(s)<br>%d outlier track(s)<br>%d outlier point(s)</b><br>',
nCellsCounter[['nCellsOrig']],
nCellsCounter[['nCellsOrig']] - nCellsCounter[['nCellsAfter']],
nCellsCounter[['nOutlierTpts']])
})
# button for downloading CSV with ids of removed tracks
output$downOutlierCSV <- downloadHandler(
filename = FCSVOUTLIERS,
content = function(file) {
loc.dt = vOut[['id']]
if (is.null(loc.dt))
return(NULL)
else
write.csv(unique(loc.dt[, (COLID), with = F]), file, row.names = FALSE, quote = F)
}
)
# Identify outliers and remove them from dt
dtReturn = reactive({
cat(file = stdout(), 'modSelOutliers: dtReturn\n')
loc.out = in.data()
if (is.null(loc.out)) {
return(NULL)
}
# store the number of trajectories before prunning
nCellsCounter[['nCellsOrig']] = length(unique(loc.out[['id']]))
# Remove outliers if the field with percentage of data to remove is greater than 0
if (input$numOutliersPerc > 0) {
# scale all measurement points
loc.out[, y.sc := scale(get(COLY))]
# Identify outlier points
# In the UI, user selectes percentage of data to remove from the bottom, middle, or top part.
# loc.outpts stores outlier points
# warning: quantile type = 3: SAS definition: nearest even order statistic.
switch(input$rbOutliersType,
'top' = {loc.outpts = loc.out[ y.sc > quantile(y.sc, 1 - input$numOutliersPerc * 0.01, na.rm = T, type = 3)]},
'mid' = {loc.outpts = loc.out[ y.sc < quantile(y.sc, input$numOutliersPerc * 0.005, na.rm = T, type = 3) |
y.sc > quantile(y.sc, 1 - input$numOutliersPerc * 0.005, na.rm = T, type = 3)]},
'bot' = {loc.outpts = loc.out[ y.sc < quantile(y.sc, input$numOutliersPerc * 0.01, na.rm = T, type = 3)]}
)
if (DEB) {
cat(file = stdout(), 'selOutliers.dtReturn: Outlier points:\n')
print(loc.outpts)
}
if (input$slOutliersGapLen > 1) {
# remove tracks with gaps longer than the value set in slOutliersGapLen
# shorter gaps are interpolated linearly
# add index column per trajecory
loc.out[, (COLIDX) := 1:.N, by = c(COLID)]
# remove single outlier points (anti-join)
# From: https://stackoverflow.com/a/46333620/1898713
loc.out = loc.out[!loc.outpts, on = names(loc.outpts)]
# calculate diff on index column to see the length of gaps due to removed points
# the value of that column corresponds to the gap length (hence the "-1")
loc.out[, (COLIDXDIFF) := c(1, diff(get(COLIDX))) - 1, by = c(COLID)]
# get track ids where the max gap is equal to or longer than the threshold
loc.idgaps = loc.out[, max(get(COLIDXDIFF)), by = c(COLID)][V1 >= input$slOutliersGapLen, get(COLID)]
if (DEB) {
cat(file = stdout(), '\nselOutliers.dtReturn: Track IDs with max gap >= threshold:\n')
if (length(loc.idgaps) > 0)
print(loc.idgaps) else
cat("none\n")
}
# remove outlier tracks with gaps longer than the value set in slOutliersGapLen
if (length(loc.idgaps) > 0)
loc.out = loc.out[!(get(COLID) %in% unique(loc.idgaps))]
# clean
loc.out[, c(COLIDX, COLIDXDIFF) := NULL]
# interpolate gaps due to outliers
if (input$chBtrajInter) {
# fill removed outliers with NA's
setkeyv(loc.out, c(COLGR, COLID, COLRT))
loc.out = loc.out[setkeyv(loc.out[, .(seq(min(get(COLRT), na.rm = T), max(get(COLRT), na.rm = T), 1)), by = c(COLGR, COLID)], c(COLGR, COLID, 'V1'))]
# x-check: print all rows with NA's
if (DEB) {
cat(file = stdout(), '\nselOutliers.dtReturn: Rows with NAs to interpolate:\n')
print(loc.out[rowSums(is.na(loc.out)) > 0, ])
}
# NA's may be already present in the dataset'.
# Interpolate (linear) them with na.interpolate as well
if( (COLPOSX %in% names(loc.out)) & (COLPOSY %in% names(loc.out)) )
s.cols = c(COLY, COLPOSX, COLPOSY)
else
s.cols = c(COLY)
# Apparently the loop is faster than lapply+SDcols
for(col in s.cols) {
# Interpolated columns should be of type numeric (float)
# This is to ensure that interpolated columns are of porper type.
data.table::set(loc.out, j = col, value = as.numeric(loc.out[[col]]))
loc.out[, (col) := na.interpolation(get(col)), by = c(COLID)]
}
}
} else {
# remove outlier tracks with gaps of length 1 time point
# !(input$slOutliersGapLen > 1)
loc.out = loc.out[!(get(COLID) %in% unique(loc.outpts[[COLID]]))]
}
# clean
loc.out[, y.sc := NULL]
# store a vector of outlier timepoints with the corresponding IDs
vOut[['id']] = loc.outpts
} else {
# no outlier removal
# !(input$numOutliersPerc > 0)
loc.outpts = NULL
vOut = NULL
}
# count number of trajectories after removing outlier tracks
nCellsCounter[['nCellsAfter']] = length(unique(loc.out[[COLID]]))
# count number of outlier points
nCellsCounter[['nOutlierTpts']] = length(loc.outpts[[COLID]])
cat(sprintf("%d outlier tpts\n", nCellsCounter[['nOutlierTpts']]))
# return cleaned dt
if (nrow(loc.out) < 1)
return(NULL) else
return(loc.out)
})
return(dtReturn)
}
\ No newline at end of file
#
# Time Course Inspector: Shiny app for plotting time series data
# Author: Maciej Dobrzynski
#
# This is the module of a Shiny web application.
# Hierarchical clustering
# UI
# UI ----
clustHierUI <- function(id, label = "Hierarchical CLustering") {
ns <- NS(id)
......@@ -173,7 +179,7 @@ clustHierUI <- function(id, label = "Hierarchical CLustering") {
)
}
# SERVER
# SERVER ----
clustHier <- function(input, output, session, in.data4clust, in.data4trajPlot, in.data4stimPlot) {
output$uiPlotHierClAss = renderUI({
......
......@@ -29,7 +29,7 @@ tabScatterPlotUI <- function(id, label = "Comparing t-points") {
4,
uiOutput(ns('uiSelTptX')),
uiOutput(ns('uiSelTptY')),
checkboxInput(ns('chBfoldChange'), 'Y-axis displays difference between two t-points')
checkboxInput(ns('chBfoldChange'), 'Display difference between two t-points on Y-axis')
),
column(
4,
......
This diff is collapsed.
......@@ -53,7 +53,7 @@ shinyUI(fluidPage(
uiOutput('uiButLoadStim'),
tags$hr(),
checkboxInput('chBtrajInter', 'Interpolate NAs and missing data?', value = T),
checkboxInput('chBtrajInter', 'Interpolate NAs and missing data?', value = F),
helpPopup(
title = 'Interpolation of NAs and missing data',
content = help.text[3],
......@@ -62,7 +62,7 @@ shinyUI(fluidPage(
),
uiOutput('varSelTimeFreq'),
checkboxInput('chBtrackUni', 'Create unique TrackLabel', T),
checkboxInput('chBtrackUni', 'Create unique TrackLabel', F),
helpPopup(
title = 'Create unique cell ID',
content = help.text[2],
......@@ -73,7 +73,7 @@ shinyUI(fluidPage(
uiOutput('varSelTrackLabel'),
tags$hr(),
checkboxInput('chBgroup', 'Dataset contains grouping column (e.g. treatment, condition)', TRUE),
checkboxInput('chBgroup', 'Dataset contains grouping column (e.g. treatment, condition)', F),
uiOutput('varSelGroup'),
uiOutput('varSelTime'),
uiOutput('varSelMeas1'),
......@@ -101,9 +101,6 @@ shinyUI(fluidPage(
uiOutput('uiChBnormRobust'),
uiOutput('uiChBnormGroup'),
tags$hr(),
checkboxInput('chBoutliers', 'Remove outliers', FALSE),
uiOutput('uiSlOutliers'),
uiOutput("uiTxtOutliers"),
downloadButton('downloadDataClean', 'Download mod\'d data')
),
......@@ -115,7 +112,7 @@ shinyUI(fluidPage(
"Plot time series: means per group or individual"
),
br(),
modSelOutliersUI('returnOutlierIDs'),
tabsetPanel(
tabPanel("Means",
br(),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment