In order to mitigate against the brute force attacks against Gitlab accounts, we are moving to all edu-ID Logins. We would like to remind you to link your account with your edu-id. Login will be possible only by edu-ID after November 30, 2021. Here you can find the instructions for linking your account.

If you don't have a SWITCH edu-ID, you can create one with this guide here

kind regards

This Server has been upgraded to GitLab release 14.2.6

Commit 9bb17940 authored by dmattek's avatar dmattek
Browse files

Big changes

parent c7c0bb8d
......@@ -89,16 +89,58 @@ help.text = c(
#####
## Function for clustering
## Functions for clustering
# get cell IDs with cluster assignments depending on dendrogram cut
getDataCl = function(in.dend, in.k, in.ids) {
# Return a dt with cell IDs and corresponding cluster assignments depending on dendrogram cut (in.k)
# This one works wth dist & hclust pair
# For sparse hierarchical clustering use getDataClSpar
# Arguments:
# in.dend - dendrogram; usually output from as.dendrogram(hclust(distance_matrix))
# in.k - level at which dendrogram should be cut
getDataCl = function(in.dend, in.k) {
cat(file = stderr(), 'getDataCl \n')
loc.dt.cl = data.table(id = in.ids,
cl = cutree(as.dendrogram(in.dend), k = in.k))
loc.m = dendextend::cutree(in.dend, in.k, order_clusters_as_data = TRUE)
#print(loc.m)
# The result of cutree containes named vector with names being cell id's
# THIS WON'T WORK with sparse hierarchical clustering because there, the dendrogram doesn't have original id's
loc.dt.cl = data.table(id = names(loc.m),
cl = loc.m)
cat('===============\ndataCl:\n')
print(loc.dt.cl)
return(loc.dt.cl)
}
# Return a dt with cell IDs and corresponding cluster assignments depending on dendrogram cut (in.k)
# This one works with sparse hierarchical clustering!
# Arguments:
# in.dend - dendrogram; usually output from as.dendrogram(hclust(distance_matrix))
# in.k - level at which dendrogram should be cut
# in.id - vector of cell id's
getDataClSpar = function(in.dend, in.k, in.id) {
cat(file = stderr(), 'getDataClSpar \n')
loc.m = dendextend::cutree(in.dend, in.k, order_clusters_as_data = TRUE)
#print(loc.m)
# The result of cutree containes named vector with names being cell id's
# THIS WON'T WORK with sparse hierarchical clustering because there, the dendrogram doesn't have original id's
loc.dt.cl = data.table(id = in.id,
cl = loc.m)
cat('===============\ndataCl:\n')
print(loc.dt.cl)
return(loc.dt.cl)
}
# prepares a table with cluster numbers in 1st column and colour assignments in 2nd column
# the number of rows is determined by dendrogram cut
getClCol <- function(in.dend, in.k) {
......@@ -266,13 +308,17 @@ userDataGen <- function() {
cat(file=stderr(), 'userDataGen: in\n')
locNtp = 40
locNtracks = 100
locNtracks = 10
locNsites = 4
locNwells = 1
x.rand.1 = c(rnorm(locNtp * locNtracks * locNsites * 0.5, .5, 0.1), rnorm(locNtp * locNtracks * locNsites * 0.5, 1, 0.2))
x.rand.2 = c(rnorm(locNtp * locNtracks * locNsites * 0.5, 0.25, 0.1), rnorm(locNtp * locNtracks * locNsites * 0.5, 0.5, 0.2))
# x.rand.3 = rep(rnorm(locNtracks, 2, 0.5), 1, each = locNtp)
# add NA's for testing
x.rand.1[c(10,20,30)] = NA
# x.rand.3 = rep(rnorm(locNtracks, 2, 0.5), 1, each = locNtp)
# x.rand.4 = rep(rnorm(locNtracks, 1, 0.1), 1, each = locNtp)
# x.arg = rep(seq(0, locNtp-1) / locNtp * 4 * pi, locNtracks * locNsites)
......@@ -526,7 +572,9 @@ myPlotHeatmap <- function(data.arg,
# rowsep = 1:nrow(loc.dm),
cexRow = font.row.arg,
cexCol = font.col.arg,
main = title.arg
main = title.arg,
symbreaks = FALSE,
symkey = FALSE
)
return(loc.p)
......
......@@ -75,6 +75,8 @@ tabBoxPlot = function(input, output, session, in.data, in.fname = 'boxplotTP.pdf
out.dt = loc.dt[realtime %in% input$inSelTpts]
loc.dt.aux = loc.dt[realtime %in% c(as.numeric(input$inSelTpts) - input$slFoldChTp)]
loc.y.prev = loc.dt.aux[, y]
print(nrow(loc.dt.aux))
print(nrow(out.dt))
out.dt[, y.prev := loc.y.prev]
print(out.dt)
......
......@@ -257,7 +257,7 @@ clustHier <- function(input, output, session, in.data4clust, in.data4trajPlot) {
cat(file = stderr(), 'data4trajPlotCl: dt not NULL\n')
# get cellIDs with cluster assignments based on dendrogram cut
loc.dt.cl = getDataCl(userFitDendHier(), input$inPlotHierNclust, getDataTrackObjLabUni_afterTrim())
loc.dt.cl = getDataCl(userFitDendHier(), input$inPlotHierNclust)
loc.dt = merge(loc.dt, loc.dt.cl, by = 'id')
# display only selected clusters
......@@ -277,7 +277,7 @@ clustHier <- function(input, output, session, in.data4clust, in.data4trajPlot) {
},
content = function(file) {
write.csv(x = getDataCl(userFitDendHier(), input$inPlotHierNclust, getDataTrackObjLabUni_afterTrim()), file = file, row.names = FALSE)
write.csv(x = getDataCl(userFitDendHier(), input$inPlotHierNclust), file = file, row.names = FALSE)
}
)
......@@ -292,9 +292,8 @@ clustHier <- function(input, output, session, in.data4clust, in.data4trajPlot) {
return(NULL)
}
loc.dt.cl = data.table(id = getDataTrackObjLabUni_afterTrim(),
cl = cutree(as.dendrogram(loc.dend), k = input$inPlotHierNclust))
# get cell id's with associated cluster numbers
loc.dt.cl = getDataCl(loc.dend, input$inPlotHierNclust)
# get cellIDs with condition name
loc.dt.gr = getDataCond()
......
......@@ -222,8 +222,9 @@ clustHierSpar <- function(input, output, session, in.data4clust, in.data4trajPlo
return()
}
#cat('rownames: ', rownames(dm.t), '\n')
cat('rownames: ', rownames(dm.t), '\n')
cat('=============\ndimensions:', dim(dm.t), '\n')
perm.out <- HierarchicalSparseCluster.permute(
dm.t,
wbounds = NULL,
......@@ -238,10 +239,16 @@ clustHierSpar <- function(input, output, session, in.data4clust, in.data4trajPlo
method = s.cl.spar.linkage[as.numeric(input$selectPlotHierSparLinkage)],
dissimilarity = s.cl.spar.diss[as.numeric(input$selectPlotHierSparDiss)]
)
cat('=============\nsparsehc:\n')
print(sparsehc$hc)
return(sparsehc)
})
# return dendrogram colour coded according to the cut level of the dendrogram
userFitDendHierSpar <- reactive({
sparsehc = userFitHierSpar()
if (is.null(sparsehc)) {
......@@ -249,6 +256,9 @@ clustHierSpar <- function(input, output, session, in.data4clust, in.data4trajPlo
}
dend <- as.dendrogram(sparsehc$hc)
cat('=============\ncutree:\n', dendextend::cutree(dend, input$inPlotHierSparNclust, order_clusters_as_data = TRUE), '\n')
dend <- color_branches(dend,
col = rainbow_hcl,
k = input$inPlotHierSparNclust)
......@@ -265,7 +275,10 @@ clustHierSpar <- function(input, output, session, in.data4clust, in.data4trajPlo
if (is.null(loc.dend))
return(NULL)
return(getClCol(loc.dend, input$inPlotHierSparNclust))
loc.cut = getClCol(loc.dend, input$inPlotHierSparNclust)
return(loc.cut)
})
......@@ -273,12 +286,12 @@ clustHierSpar <- function(input, output, session, in.data4clust, in.data4trajPlo
# This will be used to display in UI for trajectory highlighting
getDataTrackObjLabUni_afterTrim <- reactive({
cat(file = stderr(), 'getDataTrackObjLabUni_afterTrim\n')
loc.dt = in.data4trajPlot()
loc.dt = in.data4clust()
if (is.null(loc.dt))
return(NULL)
else
return(unique(loc.dt$id))
return(rownames(loc.dt))
})
# return dt with cell IDs and their corresponding condition name
......@@ -309,8 +322,17 @@ clustHierSpar <- function(input, output, session, in.data4clust, in.data4trajPlo
cat(file = stderr(), 'data4trajPlotClSpar: dt not NULL\n')
cat('rownames: ', rownames(in.data4clust()), '\n')
# get cellIDs with cluster assignments based on dendrogram cut
loc.dt.cl = getDataCl(userFitDendHierSpar(), input$inPlotHierSparNclust, getDataTrackObjLabUni_afterTrim())
loc.dt.cl = getDataClSpar(userFitDendHierSpar(), input$inPlotHierSparNclust, getDataTrackObjLabUni_afterTrim())
####
## PROBLEM!!!
## the dendrogram from sparse hier clust doesn't contain cellID's
## the following merge won't work...
## No idea how to solve it
loc.dt = merge(loc.dt, loc.dt.cl, by = 'id')
# display only selected clusters
......@@ -331,7 +353,7 @@ clustHierSpar <- function(input, output, session, in.data4clust, in.data4trajPlo
},
content = function(file) {
write.csv(x = getDataCl(userFitDendHierSpar(), input$inPlotHierSparNclust, getDataTrackObjLabUni_afterTrim()), file = file, row.names = FALSE)
write.csv(x = getDataClSpar(userFitDendHierSpar(), input$inPlotHierSparNclust, getDataTrackObjLabUni_afterTrim()), file = file, row.names = FALSE)
}
)
......@@ -340,15 +362,14 @@ clustHierSpar <- function(input, output, session, in.data4clust, in.data4trajPlo
cat(file = stderr(), 'data4clSparDistPlot: in\n')
# get cell IDs with cluster assignments depending on dendrogram cut
loc.dend <- userFitHierSpar()
loc.dend <- userFitDendHierSpar()
if (is.null(loc.dend)) {
cat(file = stderr(), 'plotClSparDist: loc.dend is NULL\n')
return(NULL)
}
loc.dt.cl = data.table(id = getDataTrackObjLabUni_afterTrim(),
cl = cutree(as.dendrogram(loc.dend$hc), k = input$inPlotHierSparNclust))
# get cell id's with associated cluster numbers
loc.dt.cl = getDataClSpar(loc.dend, input$inPlotHierSparNclust, getDataTrackObjLabUni_afterTrim())
# get cellIDs with condition name
loc.dt.gr = getDataCond()
......
......@@ -147,6 +147,9 @@ shinyServer(function(input, output, session) {
# In Coralie's case it's a combination of 3 columns called Stimulation_...
output$varSelGroup = renderUI({
cat(file = stderr(), 'UI varSelGroup\n')
if (input$chBgroup) {
locCols = getDataNucCols()
if (!is.null(locCols)) {
......@@ -166,7 +169,7 @@ shinyServer(function(input, output, session) {
multiple = TRUE
)
}
}
})
output$varSelSite = renderUI({
......@@ -445,7 +448,9 @@ shinyServer(function(input, output, session) {
cat(file = stderr(), 'dataMod: trajRem not NULL\n')
loc.dt.rem = dataLoadTrajRem()
loc.dt = loc.dt[!(trackObjectsLabelUni %in% loc.dt.rem$id)]
loc.dt = loc.dt[!(trackObjectsLabelUni %in% loc.dt.rem[[1]])]
}
return(loc.dt)
......@@ -506,11 +511,20 @@ shinyServer(function(input, output, session) {
# create expression for parsing
# creates a merged column based on other columns from input
# used for grouping of plot facets
if (input$chBgroup) {
if(length(input$inSelGroup) == 0)
return(NULL)
loc.s.gr = sprintf("paste(%s, sep=';')",
paste(input$inSelGroup, sep = '', collapse = ','))
} else {
# if no grouping required, fill 'group' column with 0
# because all the plotting relies on the presence of the group column
loc.s.gr = "paste('0')"
}
# column name with time
loc.s.rt = input$inSelTime
# Assign tracks selected for highlighting in UI
......@@ -548,9 +562,6 @@ shinyServer(function(input, output, session) {
mid.in = mid.in
)]
# add 3rd level with status of track selection
# to a column with trajectory filtering status
if (locBut) {
......@@ -581,15 +592,48 @@ shinyServer(function(input, output, session) {
}
}
# add XY location if present in the dataset
## Interpolate NA's and data points not included
# dt with a full span of realtime for every group and cell id (here it's already unique across entire dataset) combination
loc.dt.IdRt = CJ(id = loc.out[['id']],
realtime = loc.out[['realtime']],
unique = TRUE, sorted = TRUE )
# dt with all cell id's and their associated group names
loc.dt.GrId = loc.out[, .(group = first(group)), by = id]
# merge the 2 above to have all id~rt combinations with associated group names
loc.dt.GrIdRt = merge(loc.dt.IdRt, loc.dt.GrId, by = 'id')
# join with the original to expand it and create NA's for non-existing group-id-rt combinations
loc.out = merge(loc.dt.GrIdRt, loc.out, all.x = TRUE, by = c('group', 'id', 'realtime'))
# x-check: print all rows with NA's
print('Rows with NAs:')
print(loc.out[rowSums(is.na(loc.out)) > 0, ])
# Merge will create NA's where a realtime is missing.
# Also, NA's may be already present in the dataset'.
# Interpolate (linear) them with na.interpolate
if(locPos) {
s.cols = c('y', 'pos.x', 'pos.y')
loc.out[, (s.cols) := lapply(.SD, na.interpolation), by = id, .SDcols = s.cols]
}
else {
s.cols = c('y')
loc.out[, (s.cols) := lapply(.SD, na.interpolation), by = id, .SDcols = s.cols]
}
# remove NAs
# (doesn't make sense to remove here anyway;
# NA's are already removed in tCourseSelected.csv
# Such datapoints are missing, therefore they require interpolation.
# If a row of long-format dt is removed, an NA appears after casting anyway if that grid point is missing)
# Remove NAs in data4clust()
loc.out = loc.out[complete.cases(loc.out)]
# !!! Current issue with interpolation:
# The column mid.in is not taken into account.
# If a trajectory is selected in the UI,
# the mid.in column is added (if it doesn't already exist in the dataset),
# and for the interpolated point, it will still be NA. Not really an issue.
#
# Also, think about the current option of having mid.in column in the uploaded dataset.
# Keep it? Expand it?
# Create a UI filed for selecting the column with mid.in data.
# What to do with that column during interpolation (see above)
# Trim x-axis (time)
if(input$chBtimeTrim) {
......
......@@ -51,7 +51,9 @@ shinyUI(fluidPage(
),
uiOutput('varSelSite'),
uiOutput('varSelTrackLabel'),
tags$hr(),
checkboxInput('chBgroup', 'Dataset contains grouping column (e.g. treatment, condition)', TRUE),
uiOutput('varSelGroup'),
uiOutput('varSelTime'),
uiOutput('varSelMeas1'),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment