Commit ea935f44 authored by dmattek's avatar dmattek

Major rev: added clustering

parent b64429ce
## Custom plotting
require(ggplot2)
rhg_cols <- c(
"#771C19",
"#AA3929",
......@@ -24,6 +26,33 @@ md_cols <- c(
"#238443"
)
s.cl.linkage = c("ward.D",
"ward.D2",
"single",
"complete",
"average",
"mcquitty",
"centroid")
s.cl.spar.linkage = c("average",
"complete",
"single",
"centroid")
s.cl.diss = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski")
s.cl.spar.diss = c("squared.distance","absolute.value")
l.col.pal = list(
"White-Orange-Red" = 'OrRd',
"Yellow-Orange-Red" = 'YlOrRd',
"Reds" = "Reds",
"Oranges" = "Oranges",
"Greens" = "Greens",
"Blues" = "Blues",
"Spectral" = 'Spectral'
)
myGgplotTraj = function(dt.arg,
x.arg,
y.arg,
......@@ -115,16 +144,16 @@ myGgplotTraj = function(dt.arg,
userDataGen <- function() {
cat(file=stderr(), 'userDataGen: in\n')
locNtp = 13
locNtp = 40
locNtracks = 5
locNsites = 4
locNwells = 2
dt.nuc = data.table(Metadata_Site = rep(1:locNsites, each = locNtp * locNtracks),
Metadata_Well = rep(1:locNwells, each = locNtp * locNsites * locNtracks / locNwells),
Metadata_Time = rep(1:locNtp, locNsites* locNtracks),
meas_MeanIntensity_cyto = rnorm(locNtp * locNtracks * locNsites, .5, 0.1),
meas_MeanIntensity_nuc = rnorm(locNtp * locNtracks * locNsites, .5, 0.1),
Metadata_RealTime = rep(1:locNtp, locNsites* locNtracks),
objCyto_Intensity_MeanIntensity_imErkCor = rnorm(locNtp * locNtracks * locNsites, .5, 0.1),
objNuc_Intensity_MeanIntensity_imErkCor = rnorm(locNtp * locNtracks * locNsites, .5, 0.1),
TrackLabel = rep(1:(locNtracks*locNsites), each = locNtp))
cat(colnames(dt.nuc))
......@@ -189,4 +218,22 @@ myNorm = function(in.dt,
loc.dt[, c('meas.md', 'meas.mad') := NULL]
return(loc.dt)
}
\ No newline at end of file
}
myGgplotTheme = theme_bw(base_size = 18, base_family = "Helvetica") +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line.x = element_line(color = "black", size = 0.25),
axis.line.y = element_line(color = "black", size = 0.25),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
axis.text.y = element_text(size = 12),
strip.text.x = element_text(size = 14, face = "bold"),
strip.text.y = element_text(size = 14, face = "bold"),
strip.background = element_blank(),
legend.key = element_blank(),
legend.key.height = unit(1, "lines"),
legend.key.width = unit(2, "lines"),
legend.position = "right"
)
\ No newline at end of file
source('auxfunc.R')
\ No newline at end of file
......@@ -11,7 +11,13 @@ library(shiny)
library(shinyjs) #http://deanattali.com/shinyjs/
library(data.table)
library(ggplot2)
library(gplots) # for heatmap.2
library(plotly)
library(d3heatmap) # for interactive heatmap
library(dendextend) # for color_branches
library(RColorBrewer)
library(sparcl) # sparse hierarchical and k-means
library(scales) # for percentages on y scale
# increase file upload limit
options(shiny.maxRequestSize = 30 * 1024 ^ 2)
......@@ -183,6 +189,30 @@ shinyServer(function(input, output, session) {
}
})
# UI for trimming x-axis (time)
output$uiSlTimeTrim = renderUI({
cat(file = stderr(), 'UI uiSlTimeTrim\n')
if (input$chBtimeTrim) {
locTpts = getDataTpts()
if(is.null(locTpts))
return(NULL)
locRTmin = min(locTpts)
locRTmax = max(locTpts)
sliderInput(
'slTimeTrim',
label = 'Time range to include',
min = locRTmin,
max = locRTmax,
value = c(locRTmin, locRTmax),
step = 1
)
}
})
# UI for normalization
......@@ -215,7 +245,8 @@ shinyServer(function(input, output, session) {
label = 'Time range for norm.',
min = locRTmin,
max = locRTmax,
value = c(locRTmin, 0.1 * locRTmax)
value = c(locRTmin, 0.1 * locRTmax),
step = 1
)
}
})
......@@ -241,6 +272,24 @@ shinyServer(function(input, output, session) {
})
# UI for removing outliers
output$uiSlOutliers = renderUI({
cat(file = stderr(), 'UI uiSlOutliers\n')
if (input$chBoutliers) {
sliderInput(
'slOutliersPerc',
label = 'Percentage of middle data',
min = 90,
max = 100,
value = 99,
step = 0.1
)
}
})
####
## data processing
......@@ -337,8 +386,21 @@ shinyServer(function(input, output, session) {
return(unique(loc.dt$trackObjectsLabelUni))
})
# return all unique time points (real time)
# return all unique track object labels (created in dataMod)
# This will be used to display in UI for trajectory highlighting
getDataTrackObjLabUni_afterTrim <- reactive({
cat(file = stderr(), 'getDataTrackObjLabUni_afterTrim\n')
loc.dt = data4trajPlot()
if (is.null(loc.dt))
return(NULL)
else
return(unique(loc.dt$id))
})
# return all unique time points (real time)
# This will be used to display in UI for box-plot
# These timepoints are from the original dt and aren't affected by trimming of x-axis
getDataTpts <- reactive({
cat(file = stderr(), 'getDataTpts\n')
loc.dt = dataMod()
......@@ -349,6 +411,19 @@ shinyServer(function(input, output, session) {
return(unique(loc.dt[[input$inSelTime]]))
})
# return dt with cell IDs and their corresponding condition name
# The condition is the column defined by facet groupings
getDataCond <- reactive({
cat(file = stderr(), 'getDataCond\n')
loc.dt = data4trajPlot()
if (is.null(loc.dt))
return(NULL)
else
return(unique(loc.dt[, .(id, group)]))
})
# prepare data for plotting time courses
# returns dt with these columns:
......@@ -419,8 +494,16 @@ shinyServer(function(input, output, session) {
}
}
# remove NAs
loc.out = loc.out[complete.cases(loc.out)]
# Trim x-axis (time)
if(input$chBtimeTrim) {
loc.out = loc.out[realtime >= input$slTimeTrim[[1]] & realtime <= input$slTimeTrim[[2]] ]
}
# Normalization
# F-n myNorm adds additional column with .norm suffix
if (input$chBnorm) {
loc.out = myNorm(
in.dt = loc.out,
......@@ -433,19 +516,72 @@ shinyServer(function(input, output, session) {
in.by.cols = if(input$chBnormGroup %in% 'none') NULL else input$chBnormGroup
)
# Column with normalized data is renamed to the original name
# Further code assumes column name y produced by data4trajPlot
loc.out[, y := NULL]
setnames(loc.out, 'y.norm', 'y')
}
# remove rows with NA
return(loc.out[complete.cases(loc.out)])
# Remove outliers
# 1. Scale all points (independently per track)
# 2. Pick time points that exceed the bounds
# 3. Identify IDs of outliers
# 4. Select cells that don't have these IDs
cat('Ncells orig = ', length(unique(loc.out$id)), '\n')
if (input$chBoutliers) {
loc.out[, y.sc := scale(y)]
loc.tmp = loc.out[ y.sc < quantile(y.sc, (1 - input$slOutliersPerc * 0.01)*0.5) |
y.sc > quantile(y.sc, 1 - (1 - input$slOutliersPerc * 0.01)*0.5)]
loc.out = loc.out[!(id %in% unique(loc.tmp$id))]
loc.out[, y.sc := NULL]
}
cat('Ncells trim = ', length(unique(loc.out$id)), '\n')
return(loc.out)
})
# prepare data for plotting boxplots
# uses the same dt as for trajectory plotting
# returns dt with these columns:
data4boxPlot <- reactive({
cat(file = stderr(), 'data4trajPlot\n')
cat(file = stderr(), 'data4boxPlot\n')
loc.dt = data4trajPlot()
if (is.null(loc.dt))
return(NULL)
loc.out = loc.dt[realtime %in% input$inSelTpts]
})
# prepare data for clustering
# return a matrix with:
# cells as columns
# time points as rows
data4clust <- reactive({
cat(file = stderr(), 'data4clust\n')
loc.dt = data4trajPlot()
if (is.null(loc.dt))
return(NULL)
loc.out = dcast(loc.dt, id ~ realtime, value.var = 'y')
loc.rownames = loc.out$id
loc.out = as.matrix(loc.out[, -1])
rownames(loc.out) = loc.rownames
return(loc.out)
})
# prepare data for plotting timecourses facetted per cluster
# uses the same dt as for trajectory plotting
# returns dt with these columns:
data4hierSparTrajPlot <- reactive({
cat(file = stderr(), 'data4hierSparTrajPlot\n')
loc.dt = data4trajPlot()
if (is.null(loc.dt))
......@@ -454,6 +590,14 @@ shinyServer(function(input, output, session) {
loc.out = loc.dt[realtime %in% input$inSelTpts]
})
# get cell IDs with cluster assignments depending on dendrogram cut
getDataCl = function(in.dend, in.k) {
loc.dt.cl = data.table(id = getDataTrackObjLabUni_afterTrim(),
cl = cutree(as.dendrogram(in.dend), k = in.k))
}
####
## UI for trajectory plot
......@@ -478,13 +622,44 @@ shinyServer(function(input, output, session) {
output$uiPlotTraj = renderUI({
plotlyOutput(
"plotTraj",
"plotTrajPlotly",
width = paste0(input$inPlotTrajWidth, '%'),
height = paste0(input$inPlotTrajHeight, 'px')
)
})
output$plotTraj <- renderPlotly({
output$plotTrajPlotly <- renderPlotly({
# This is required to avoid
# "Warning: Error in <Anonymous>: cannot open file 'Rplots.pdf'"
# When running on a server. Based on:
# https://github.com/ropensci/plotly/issues/494
if (names(dev.cur()) != "null device")
dev.off()
pdf(NULL)
loc.p = plotTraj()
if(is.null(loc.p))
return(NULL)
return(plotly_build(loc.p))
})
# Trajectory plot - download pdf
output$downPlotTraj <- downloadHandler(
filename = 'tcourses.pdf',
content = function(file) {
ggsave(
file,
limitsize = FALSE,
plotTraj(),
width = input$inPlotTrajDownWidth,
height = input$inPlotTrajDownHeight
)
}
)
plotTraj <- function() {
cat(file = stderr(), 'plotTraj: in\n')
locBut = input$butPlotTraj
......@@ -496,33 +671,15 @@ shinyServer(function(input, output, session) {
loc.dt = isolate(data4trajPlot())
cat("plotScatter on to plot\n\n")
cat("plotTraj: on to plot\n\n")
if (is.null(loc.dt)) {
cat(file = stderr(), 'plotTraj: dt is NULL\n')
return(NULL)
}
cat(file = stderr(), 'plotTraj:dt not NULL\n')
# Normalization
# if (input$chBnorm) {
# loc.dt = myNorm(
# in.dt = loc.dt,
# in.meas.col = 'y',
# in.rt.col = 'realtime',
# in.rt.min = input$slNormRtMinMax[1],
# in.rt.max = input$slNormRtMinMax[2],
# in.type = input$rBnormMeth,
# in.robust = input$chBnormRobust,
# in.by.cols = if(input$chBnormGroup %in% 'none') NULL else input$chBnormGroup
# )
# #cat(input$slNormRtMinMax, '\n')
# loc.y.arg = 'y.norm'
# } else {
# loc.y.arg = 'y'
# }
cat(file = stderr(), 'plotTraj: dt not NULL\n')
# Future: change such that a column with colouring status is chosen by the user
# colour trajectories, if dataset contains mi.din column
# with filtering status of trajectory
......@@ -542,18 +699,9 @@ shinyServer(function(input, output, session) {
line.col.arg = loc.line.col.arg
)
# This is required to avoid
# "Warning: Error in <Anonymous>: cannot open file 'Rplots.pdf'"
# When running on a server. Based on:
# https://github.com/ropensci/plotly/issues/494
if (names(dev.cur()) != "null device")
dev.off()
pdf(NULL)
p.out.ly = plotly_build(p.out)
return(p.out.ly)
})
return(p.out)
}
####
......@@ -593,10 +741,6 @@ shinyServer(function(input, output, session) {
filename = 'boxplot.pdf',
content = function(file) {
cat(file = stderr(),
input$inPlotBoxWidth,
input$inPlotBoxHeight,
"\n")
ggsave(
file,
limitsize = FALSE,
......@@ -655,4 +799,700 @@ shinyServer(function(input, output, session) {
legend.position = input$selPlotBoxLegendPos
)
}
##### Hierarchical clustering
output$uiPlotHierClSel = renderUI({
if(input$chBPlotHierClSel) {
selectInput('inPlotHierClSel', 'Select clusters to display',
choices = seq(1, input$inPlotHierNclust, 1),
multiple = TRUE,
selected = 1)
}
})
output$uiPlotHierClDistClSel = renderUI({
if(input$chBPlotHierClDistSel) {
selectInput('inPlotHierClDistClSel', 'Select clusters to display',
choices = seq(1, input$inPlotHierNclust, 1),
multiple = TRUE,
selected = 1)
}
})
userFitDendHier <- reactive({
dm.t = data4clust()
if (is.null(dm.t)) {
return()
}
cl.dist = dist(dm.t, method = s.cl.diss[as.numeric(input$selectPlotHierDiss)])
cl.hc = hclust(cl.dist, method = s.cl.linkage[as.numeric(input$selectPlotHierLinkage)])
cl.lev = rev(row.names(dm.t))
dend <- as.dendrogram(cl.hc)
dend <- color_branches(dend, k = input$inPlotHierNclust)
return(dend)
})
# Function instead of reactive as per:
# http://stackoverflow.com/questions/26764481/downloading-png-from-shiny-r
# This function is used to plot and to downoad a pdf
plotHier <- function() {
loc.dm = data4clust()
if (is.null(loc.dm))
return(NULL)
loc.dend <- userFitDendHier()
if (is.null(loc.dend))
return(NULL)
if (input$inPlotHierRevPalette)
my_palette <-
rev(colorRampPalette(brewer.pal(9, input$selectPlotHierPalette))(n = 99))
else
my_palette <-
colorRampPalette(brewer.pal(9, input$selectPlotHierPalette))(n = 99)
col_labels <- get_leaves_branches_col(loc.dend)
col_labels <- col_labels[order(order.dendrogram(loc.dend))]
if (input$selectPlotHierDend) {
assign("var.tmp.1", loc.dend)
var.tmp.2 = "row"
} else {
assign("var.tmp.1", FALSE)
var.tmp.2 = "none"
}
#cat(loc.dm, '\n')
#cat(var.tmp.1, '\n')
cat(var.tmp.2, '\n')
loc.p = heatmap.2(
loc.dm,
Colv = "NA",
Rowv = var.tmp.1,
srtCol = 90,
dendrogram = var.tmp.2,
trace = "none",
key = input$selectPlotHierKey,
margins = c(input$inPlotHierMarginX, input$inPlotHierMarginY),
col = my_palette,
na.col = grey(input$inPlotHierNAcolor),
denscol = "black",
density.info = "density",
RowSideColors = col_labels,
colRow = col_labels,
# sepcolor = grey(input$inPlotHierGridColor),
# colsep = 1:ncol(loc.dm),
# rowsep = 1:nrow(loc.dm),
cexRow = input$inPlotHierFontX,
cexCol = input$inPlotHierFontY,
main = paste(
"Distance measure: ",
s.cl.diss[as.numeric(input$selectPlotHierDiss)],
"\nLinkage method: ",
s.cl.linkage[as.numeric(input$selectPlotHierLinkage)]
)
)
return(loc.p)
}
plotHierTraj <- function(){
cat(file = stderr(), 'plotHierTraj: in\n')
loc.dt = isolate(data4trajPlot())
cat("plotHierTraj: on to plot\n\n")
if (is.null(loc.dt)) {
cat(file = stderr(), 'plotHierTraj: dt is NULL\n')
return(NULL)
}
cat(file = stderr(), 'plotHierTraj: dt not NULL\n')
# get cellIDs with cluster assignments based on dendrogram cut
loc.dt.cl = getDataCl(userFitDendHier(), isolate(input$inPlotHierNclust))
loc.dt = merge(loc.dt, loc.dt.cl, by = 'id')
# display only selected clusters
if(isolate(input$chBPlotHierClSel))
loc.dt = loc.dt[cl %in% isolate(input$inPlotHierClSel)]
# Future: change such that a column with colouring status is chosen by the user
# colour trajectories, if dataset contains mi.din column
# with filtering status of trajectory
if (sum(names(loc.dt) %in% 'mid.in') > 0)
loc.line.col.arg = 'mid.in'
else
loc.line.col.arg = NULL
p.out = myGgplotTraj(
dt.arg = loc.dt,
x.arg = 'realtime',
y.arg = 'y',
group.arg = "id",
facet.arg = 'cl',
facet.ncol.arg = input$inPlotTrajFacetNcol,
xlab.arg = 'Time (min)',
line.col.arg = loc.line.col.arg
)
return(p.out)
}
# Barplot with distribution of clusters across conditions
plotHierClDist = function() {
cat(file = stderr(), 'plotClDist: in\n')
# get cell IDs with cluster assignments depending on dendrogram cut
loc.dend <- isolate(userFitDendHier())
if (is.null(loc.dend)) {
cat(file = stderr(), 'plotClDist: loc.dend is NULL\n')
return(NULL)
}
loc.dt.cl = data.table(id = getDataTrackObjLabUni_afterTrim(),
cl = cutree(as.dendrogram(loc.dend), k = input$inPlotHierNclust))
loc.dt.gr = isolate(getDataCond())
if (is.null(loc.dt.gr)) {
cat(file = stderr(), 'plotClDist: loc.dt.gr is NULL\n')
return(NULL)
}
loc.dt = merge(loc.dt.cl, loc.dt.gr, by = 'id')
# display only selected clusters
if(isolate(input$chBPlotHierClSel))
loc.dt = loc.dt[cl %in% isolate(input$inPlotHierClSel)]
loc.dt.aggr = loc.dt[, .(nCells = .N), by = .(group, cl)]
p.out = ggplot(loc.dt.aggr, aes(x = group, y = nCells)) +
geom_bar(aes(fill = as.factor(cl)), stat = 'identity', position = 'fill') +
scale_y_continuous(labels = percent) +
ylab("percentage of cells\n") +
xlab("") +
scale_fill_discrete(name = "Cluster no.") +
myGgplotTheme
return(p.out)
}
# Hierarchical - display heatmap
getPlotHierHeatMapHeight <- function() {
return (input$inPlotHierHeatMapHeight)
}
getPlotHierTrajHeight <- function() {
return (input$inPlotHierTrajHeight)
}
output$outPlotHier <- renderPlot({
locBut = input$butPlotHierHeatMap
if (locBut == 0) {
cat(file = stderr(), 'outPlotHier: Go button not pressed\n')
return(NULL)
}
plotHier()
}, height = getPlotHierHeatMapHeight)
# Hierarchical - display timecourses plot
output$outPlotHierTraj <- renderPlot({
locBut = input$butPlotHierTraj
if (locBut == 0) {
cat(file = stderr(), 'outPlotHierTraj: Go button not pressed\n')
return(NULL)
}
plotHierTraj()
})
# Hierarchical - display bar plot
output$outPlotHierClDist <- renderPlot({
locBut = input$butPlotHierClDist
if (locBut == 0) {
cat(file = stderr(), 'outPlotClDist: Go button not pressed\n')
return(NULL)
}
plotHierClDist()
})
# Hierarchical - Heat Map - download pdf
output$downPlotHier <- downloadHandler(
filename = function() {
paste0('clust_hierch_heatMap_', s.cl.spar.linkage[as.numeric(input$selectPlotHierLinkage)], '.pdf')
},
content = function(file) {
pdf(
file,
width = input$inPlotHierWidth,
height = input$inPlotHierHeight
)
plotHier()
dev.off()