## Custom plotting require(ggplot2) require(RColorBrewer) require(gplots) # for heatmap.2 require(grid) # for modifying grob rhg_cols <- c( "#771C19", "#AA3929", "#E25033", "#F27314", "#F8A31B", "#E2C59F", "#B6C5CC", "#8E9CA3", "#556670", "#000000" ) md_cols <- c( "#FFFFFF", "#F8A31B", "#F27314", "#E25033", "#AA3929", "#FFFFCC", "#C2E699", "#78C679", "#238443" ) s.cl.linkage = c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "centroid") s.cl.spar.linkage = c("average", "complete", "single", "centroid") s.cl.diss = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski", "DTW") s.cl.spar.diss = c("squared.distance","absolute.value") # list of palettes for the heatmap l.col.pal = list( "White-Orange-Red" = 'OrRd', "Yellow-Orange-Red" = 'YlOrRd', "Reds" = "Reds", "Oranges" = "Oranges", "Greens" = "Greens", "Blues" = "Blues", "Spectral" = 'Spectral' ) # list of palettes for the dendrogram l.col.pal.dend = list( "Rainbow" = 'rainbow_hcl', "Sequential" = 'sequential_hcl', "Heat" = 'heat_hcl', "Terrain" = 'terrain_hcl', "Diverge HCL" = 'diverge_hcl', "Diverge HSV" = 'diverge_hsv' ) # Creates a popup with help text # From: https://gist.github.com/jcheng5/5913297 helpPopup <- function(title, content, placement=c('right', 'top', 'left', 'bottom'), trigger=c('click', 'hover', 'focus', 'manual')) { tagList( singleton( tags$head( tags$script("$(function() { $(\"[data-toggle='popover']\").popover(); })") ) ), tags$a( href = "#", class = "btn btn-mini", `data-toggle` = "popover", title = title, `data-content` = content, `data-animation` = TRUE, `data-placement` = match.arg(placement, several.ok=TRUE)[1], `data-trigger` = match.arg(trigger, several.ok=TRUE)[1], #tags$i(class="icon-question-sign") # changed based on http://stackoverflow.com/questions/30436013/info-bubble-text-in-a-shiny-interface icon("question") ) ) } help.text = c( 'Accepts CSV file with a column of cell IDs for removal. IDs should correspond to those used for plotting. Say, the main data file contains columns Metadata_Site and TrackLabel. These two columns should be then selected in UI to form a unique cell ID, e.g. 001_0001 where former part corresponds to Metadata_Site and the latter to TrackLabel.', 'Plotting and data processing requires a unique cell ID across entire dataset. A typical dataset from CellProfiler assigns unique cell ID (TrackLabel) within each field of view (Metadata_Site). Therefore, a unique ID is created by concatenating these two columns. If the dataset already contains a unique ID, UNcheck this box and select a single column only.', 'This option allows to interpolate NAs or missing data. Some rows in the input file might be missing because a particular time point might not had been acquired. This option, interpolates such missing points as well as points with NAs in the measurement column. When this option is checked, the interval of time column must be provided!', 'Accepts CSV file with 5 columns: grouping (e.g. condition), start and end time points of stimulation, start and end points of y-position, dummy column with id.' ) ##### ## Functions for clustering # Return a dt with cell IDs and corresponding cluster assignments depending on dendrogram cut (in.k) # This one works wth dist & hclust pair # For sparse hierarchical clustering use getDataClSpar # Arguments: # in.dend - dendrogram; usually output from as.dendrogram(hclust(distance_matrix)) # in.k - level at which dendrogram should be cut getDataCl = function(in.dend, in.k) { cat(file = stderr(), 'getDataCl \n') loc.m = dendextend::cutree(in.dend, in.k, order_clusters_as_data = TRUE) #print(loc.m) # The result of cutree containes named vector with names being cell id's # THIS WON'T WORK with sparse hierarchical clustering because there, the dendrogram doesn't have original id's loc.dt.cl = data.table(id = names(loc.m), cl = loc.m) #cat('===============\ndataCl:\n') #print(loc.dt.cl) return(loc.dt.cl) } # Return a dt with cell IDs and corresponding cluster assignments depending on dendrogram cut (in.k) # This one works with sparse hierarchical clustering! # Arguments: # in.dend - dendrogram; usually output from as.dendrogram(hclust(distance_matrix)) # in.k - level at which dendrogram should be cut # in.id - vector of cell id's getDataClSpar = function(in.dend, in.k, in.id) { cat(file = stderr(), 'getDataClSpar \n') loc.m = dendextend::cutree(in.dend, in.k, order_clusters_as_data = TRUE) #print(loc.m) # The result of cutree containes named vector with names being cell id's # THIS WON'T WORK with sparse hierarchical clustering because there, the dendrogram doesn't have original id's loc.dt.cl = data.table(id = in.id, cl = loc.m) #cat('===============\ndataCl:\n') #print(loc.dt.cl) return(loc.dt.cl) } # prepares a table with cluster numbers in 1st column and colour assignments in 2nd column # the number of rows is determined by dendrogram cut getClCol <- function(in.dend, in.k) { loc.col_labels <- get_leaves_branches_col(in.dend) loc.col_labels <- loc.col_labels[order(order.dendrogram(in.dend))] return(unique( data.table(cl.no = dendextend::cutree(in.dend, k = in.k, order_clusters_as_data = TRUE), cl.col = loc.col_labels))) } ##### ## Common plotting functions # Build Function to Return Element Text Object # From: https://stackoverflow.com/a/36979201/1898713 rotatedAxisElementText = function(angle, position='x', size = 12){ angle = angle[1]; position = position[1] positions = list(x=0, y=90, top=180, right=270) if(!position %in% names(positions)) stop(sprintf("'position' must be one of [%s]",paste(names(positions),collapse=", ")), call.=FALSE) if(!is.numeric(angle)) stop("'angle' must be numeric",call.=FALSE) rads = (-angle - positions[[ position ]])*pi/180 hjust = round((1 - sin(rads)))/2 vjust = round((1 + cos(rads)))/2 element_text(size = 12, angle = angle, vjust = vjust, hjust = hjust) } # default ggplot theme used in the app myGgplotTheme = theme_bw(base_size = 8, base_family = "Helvetica") + theme( panel.spacing = unit(1, "lines"), panel.grid.minor = element_blank(), panel.grid.major = element_blank(), panel.border = element_blank(), axis.line.x = element_line(color = "black", size = 0.25), axis.line.y = element_line(color = "black", size = 0.25), axis.text = element_text(size = 8), axis.title = element_text(size = 8), strip.text = element_text(size = 10, face = "bold"), strip.background = element_blank(), legend.key = element_blank(), legend.text = element_text(size = 8), legend.key.height = unit(1, "lines"), legend.key.width = unit(2, "lines"), legend.position = "top" ) # Plot individual time series LOCplotTraj = function(dt.arg, # input data table x.arg, # string with column name for x-axis y.arg, # string with column name for y-axis group.arg, # string with column name for grouping time series (typicaly cell ID) facet.arg, # string with column name for facetting facet.ncol.arg = 2, # default number of facet columns facet.color.arg = NULL, # vector with list of colours for adding colours to facet names (currently a horizontal line on top of the facet is drawn) line.col.arg = NULL, # string with column name for colouring time series (typically when individual time series are selected in UI) xlab.arg = NULL, # string with x-axis label ylab.arg = NULL, # string with y-axis label plotlab.arg = NULL, # string with plot label dt.stim.arg = NULL, # plotting additional dataset; typically to indicate stimulations (not fully implemented yet, not tested!) x.stim.arg = c('tstart', 'tend'), # column names in stimulation dt with x and xend parameters y.stim.arg = c('ystart', 'yend'), # column names in stimulation dt with y and yend parameters tfreq.arg = 1, ylim.arg = NULL, stim.bar.width.arg = 0.5, aux.label1 = NULL, # 1st point label; used for interactive plotting; displayed in the tooltip; typically used to display values of column holding x & y coordinates aux.label2 = NULL, aux.label3 = NULL, stat.arg = c('', 'mean', 'CI', 'SE')) { # match arguments for stat plotting loc.stat = match.arg(stat.arg, several.ok = TRUE) # aux.label12 are required for plotting XY positions in the tooltip of the interactive (plotly) graph p.tmp = ggplot(dt.arg, aes_string(x = x.arg, y = y.arg, group = group.arg, label = group.arg)) #, # label = aux.label1, # label2 = aux.label2, # label3 = aux.label3)) if (is.null(line.col.arg)) { p.tmp = p.tmp + geom_line(alpha = 0.25, size = 0.25) } else { p.tmp = p.tmp + geom_line(aes_string(colour = line.col.arg), alpha = 0.5, size = 0.5) + scale_color_manual(name = '', values =c("FALSE" = rhg_cols[7], "TRUE" = rhg_cols[3], "SELECTED" = 'green', "NOT SEL" = rhg_cols[7])) } # this is temporary solution for adding colour according to cluster number # use only when plotting traj from clustering! # a horizontal line is added at the top of data if (!is.null(facet.color.arg)) { loc.y.max = max(dt.arg[, c(y.arg), with = FALSE]) loc.dt.cl = data.table(xx = 1:length(facet.color.arg), yy = loc.y.max) setnames(loc.dt.cl, 'xx', facet.arg) # adjust facet.color.arg to plot p.tmp = p.tmp + geom_hline(data = loc.dt.cl, colour = facet.color.arg, yintercept = loc.y.max, size = 4) + scale_colour_manual(values = facet.color.arg, name = '') } if ('mean' %in% loc.stat) p.tmp = p.tmp + stat_summary( aes_string(y = y.arg, group = 1), fun.y = mean, colour = 'red', linetype = 'solid', size = 1, geom = "line", group = 1 ) if ('CI' %in% loc.stat) p.tmp = p.tmp + stat_summary( aes_string(y = y.arg, group = 1), fun.data = mean_cl_normal, colour = 'red', alpha = 0.25, geom = "ribbon", group = 1 ) if ('SE' %in% loc.stat) p.tmp = p.tmp + stat_summary( aes_string(y = y.arg, group = 1), fun.data = mean_se, colour = 'red', alpha = 0.25, geom = "ribbon", group = 1 ) p.tmp = p.tmp + facet_wrap(as.formula(paste("~", facet.arg)), ncol = facet.ncol.arg, scales = "free_x") # plot stimulation bars underneath time series # dt.stim.arg is read separately and should contain 4 columns with # xy positions of beginning and end of the bar if(!is.null(dt.stim.arg)) { p.tmp = p.tmp + geom_segment(data = dt.stim.arg, aes_string(x = x.stim.arg[1], xend = x.stim.arg[2], y = y.stim.arg[1], yend = y.stim.arg[2], group = 'group'), colour = rhg_cols[[3]], size = stim.bar.width.arg) } if (!is.null(ylim.arg)) p.tmp = p.tmp + coord_cartesian(ylim = ylim.arg) p.tmp = p.tmp + xlab(paste0(xlab.arg, "\n")) + ylab(paste0("\n", ylab.arg)) + ggtitle(plotlab.arg) + ggplotTheme() + theme(legend.position = "top") return(p.tmp) } # Plot average time series with CI together in one facet LOCplotTrajRibbon = function(dt.arg, # input data table x.arg, # string with column name for x-axis y.arg, # string with column name for y-axis group.arg = NULL, # string with column name for grouping time series (here, it's a column corresponding to grouping by condition) col.arg = NULL, # colour pallette for individual time series dt.stim.arg = NULL, # data table with stimulation pattern x.stim.arg = c('tstart', 'tend'), # column names in stimulation dt with x and xend parameters y.stim.arg = c('ystart', 'yend'), # column names in stimulation dt with y and yend parameters stim.bar.width.arg = 0.5, ribbon.lohi.arg = c('Lower', 'Upper'), ribbon.fill.arg = 'grey50', ribbon.alpha.arg = 0.5, xlab.arg = NULL, ylab.arg = NULL, plotlab.arg = NULL) { p.tmp = ggplot(dt.arg, aes_string(x = x.arg, group = group.arg)) + geom_ribbon(aes_string(ymin = ribbon.lohi.arg[1], ymax = ribbon.lohi.arg[2]), fill = ribbon.fill.arg, alpha = ribbon.alpha.arg) + geom_line(aes_string(y = y.arg, colour = group.arg)) # plot stimulation bars underneath time series # dt.stim.arg is read separately and should contain 4 columns with # xy positions of beginning and end of the bar if(!is.null(dt.stim.arg)) { p.tmp = p.tmp + geom_segment(data = dt.stim.arg, aes_string(x = x.stim.arg[1], xend = x.stim.arg[2], y = y.stim.arg[1], yend = y.stim.arg[2]), colour = rhg_cols[[3]], size = stim.bar.width.arg, group = 1) } if (is.null(col.arg)) { p.tmp = p.tmp + scale_color_discrete(name = '') } else { p.tmp = p.tmp + scale_colour_manual(values = col.arg, name = '') } if (!is.null(plotlab.arg)) p.tmp = p.tmp + ggtitle(plotlab.arg) p.tmp = p.tmp + xlab(xlab.arg) + ylab(ylab.arg) return(p.tmp) } # Plots a scatter plot with marginal histograms # Points are connected by a line (grouping by cellID) # # Assumes an input of data.table with # x, y - columns with x and y coordinates # id - a unique point identifier (here corresponds to cellID) # mid - a (0,1) column by which points are coloured (here corresponds to whether cells are within bounds) myGgplotScat = function(dt.arg, band.arg = NULL, facet.arg = NULL, facet.ncol.arg = 2, xlab.arg = NULL, ylab.arg = NULL, plotlab.arg = NULL, alpha.arg = 1, group.col.arg = NULL) { p.tmp = ggplot(dt.arg, aes(x = x, y = y)) if (is.null(group.col.arg)) { p.tmp = p.tmp + geom_point(alpha = alpha.arg, aes(group = id)) } else { p.tmp = p.tmp + geom_point(aes(colour = as.factor(get(group.col.arg)), group = id), alpha = alpha.arg) + geom_path(aes(colour = as.factor(get(group.col.arg)), group = id), alpha = alpha.arg) + scale_color_manual(name = group.col.arg, values =c("FALSE" = rhg_cols[7], "TRUE" = rhg_cols[3], "SELECTED" = 'green')) } if (is.null(band.arg)) p.tmp = p.tmp + stat_smooth( # method = function(formula, data, weights = weight) # rlm(formula, data, weights = weight, method = 'MM'), method = "lm", fullrange = FALSE, level = 0.95, colour = 'blue' ) else { p.tmp = p.tmp + geom_abline(slope = band.arg$a, intercept = band.arg$b) + geom_abline( slope = band.arg$a, intercept = band.arg$b + abs(band.arg$b)*band.arg$width, linetype = 'dashed' ) + geom_abline( slope = band.arg$a, intercept = band.arg$b - abs(band.arg$b)*band.arg$width, linetype = 'dashed' ) } if (!is.null(facet.arg)) { p.tmp = p.tmp + facet_wrap(as.formula(paste("~", facet.arg)), ncol = facet.ncol.arg) } if (!is.null(xlab.arg)) p.tmp = p.tmp + xlab(paste0(xlab.arg, "\n")) if (!is.null(ylab.arg)) p.tmp = p.tmp + ylab(paste0("\n", ylab.arg)) if (!is.null(plotlab.arg)) p.tmp = p.tmp + ggtitle(paste0(plotlab.arg, "\n")) p.tmp = p.tmp + ggplotTheme() + theme(legend.position = "none") # Marginal distributions don;t work with plotly... # if (is.null(facet.arg)) # ggExtra::ggMarginal(p.scat, type = "histogram", bins = 100) # else return(p.tmp) } myPlotHeatmap <- function(data.arg, dend.arg, palette.arg, palette.rev.arg = TRUE, dend.show.arg = TRUE, key.show.arg = TRUE, margin.x.arg = 5, margin.y.arg = 20, nacol.arg = 0.5, colCol.arg = NULL, labCol.arg = NULL, font.row.arg = 1, font.col.arg = 1, breaks.arg = NULL, title.arg = 'Clustering') { loc.n.colbreaks = 99 if (palette.rev.arg) my_palette <- rev(colorRampPalette(brewer.pal(9, palette.arg))(n = loc.n.colbreaks)) else my_palette <- colorRampPalette(brewer.pal(9, palette.arg))(n = loc.n.colbreaks) col_labels <- get_leaves_branches_col(dend.arg) col_labels <- col_labels[order(order.dendrogram(dend.arg))] if (dend.show.arg) { assign("var.tmp.1", dend.arg) var.tmp.2 = "row" } else { assign("var.tmp.1", FALSE) var.tmp.2 = "none" } loc.p = heatmap.2( data.arg, Colv = "NA", Rowv = var.tmp.1, srtCol = 90, dendrogram = var.tmp.2, trace = "none", key = key.show.arg, margins = c(margin.x.arg, margin.y.arg), col = my_palette, na.col = grey(nacol.arg), denscol = "black", density.info = "density", RowSideColors = col_labels, colRow = col_labels, colCol = colCol.arg, labCol = labCol.arg, # sepcolor = grey(input$inPlotHierGridColor), # colsep = 1:ncol(loc.dm), # rowsep = 1:nrow(loc.dm), cexRow = font.row.arg, cexCol = font.col.arg, main = title.arg, symbreaks = FALSE, symkey = FALSE, breaks = if (is.null(breaks.arg)) NULL else seq(breaks.arg[1], breaks.arg[2], length.out = loc.n.colbreaks+1) ) return(loc.p) }