auxfunc.R 39.5 KB
Newer Older
dmattek's avatar
dmattek committed
1 2 3 4
#
# Time Course Inspector: Shiny app for plotting time series data
# Author: Maciej Dobrzynski
#
dmattek's avatar
dmattek committed
5
# Auxilary functions & definitions of global constants
dmattek's avatar
dmattek committed
6 7 8
#


9 10 11 12 13
library(ggplot2)
library(RColorBrewer)
library(gplots) # for heatmap.2
library(grid) # for modifying grob
library(Hmisc) # for CI calculation
dmattek's avatar
dmattek committed
14

15 16

# Global parameters ----
17 18 19
# number of miliseconds to delay reactions to changes in the UI
# used to delay output from sliders
MILLIS = 1000
dmattek's avatar
dmattek committed
20

21 22 23
# Number of significant digits to display in table stats
SIGNIFDIGITSINTAB = 3

24 25 26
# if true, additional output printed to R console
DEB = T

27
# font sizes in pts for plots in the manuscript
Maciej Dobrzynski's avatar
Maciej Dobrzynski committed
28 29 30 31 32 33
# PLOTFONTBASE = 8
# PLOTFONTAXISTEXT = 8
# PLOTFONTAXISTITLE = 8
# PLOTFONTFACETSTRIP = 10
# PLOTFONTLEGEND = 8

dmattek's avatar
dmattek committed
34
# font sizes in pts for screen display
Maciej Dobrzynski's avatar
Maciej Dobrzynski committed
35 36 37 38 39 40 41 42 43 44 45 46 47
PLOTFONTBASE = 16
PLOTFONTAXISTEXT = 16
PLOTFONTAXISTITLE = 16
PLOTFONTFACETSTRIP = 20
PLOTFONTLEGEND = 16

# height (in pixels) of ribbon and single traj. plots
PLOTRIBBONHEIGHT = 500 # in pixels
PLOTTRAJHEIGHT = 500 # in pixels
PLOTPSDHEIGHT = 500 # in pixels
PLOTBOXHEIGHT = 500 # in pixels
PLOTSCATTERHEIGHT = 500 # in pixels
PLOTWIDTH = 85 # in percent
48 49

# default number of facets in plots
Maciej Dobrzynski's avatar
Maciej Dobrzynski committed
50
PLOTNFACETDEFAULT = 3
51

dmattek's avatar
dmattek committed
52
# internal column names
dmattek's avatar
dmattek committed
53
COLRT   = 'time'
dmattek's avatar
dmattek committed
54 55 56 57 58 59 60 61 62 63
COLY    = 'y'
COLID   = 'id'
COLIDUNI = 'trackObjectsLabelUni'
COLGR   = 'group'
COLIN   = 'mid.in'
COLOBJN = 'obj.num'
COLPOSX = 'pos.x'
COLPOSY = 'pos.y'
COLIDX = 'IDX'
COLIDXDIFF = 'IDXdiff'
dmattek's avatar
dmattek committed
64
COLCL = 'cl'
dmattek's avatar
dmattek committed
65 66 67 68 69 70

# file names
FCSVOUTLIERS = 'outliers.csv'
FCSVTCCLEAN  = 'tCoursesSelected_clean.csv'
FPDFTCMEAN   = "tCoursesMeans.pdf"
FPDFTCSINGLE = "tCourses.pdf"
71
FPDFTCPSD    = 'tCoursesPsd.pdf'
dmattek's avatar
dmattek committed
72 73 74 75
FPDFBOXAUC   = 'boxplotAUC.pdf'
FPDFBOXTP    = 'boxplotTP.pdf'
FPDFSCATTER  = 'scatter.pdf'

dmattek's avatar
dmattek committed
76
# Colour definitions ----
dmattek's avatar
dmattek committed
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
rhg_cols <- c(
  "#771C19",
  "#AA3929",
  "#E25033",
  "#F27314",
  "#F8A31B",
  "#E2C59F",
  "#B6C5CC",
  "#8E9CA3",
  "#556670",
  "#000000"
)

md_cols <- c(
  "#FFFFFF",
  "#F8A31B",
  "#F27314",
  "#E25033",
  "#AA3929",
  "#FFFFCC",
  "#C2E699",
  "#78C679",
  "#238443"
)

102
# list of palettes for the heatmap
dmattek's avatar
dmattek committed
103
l.col.pal = list(
dmattek's avatar
dmattek committed
104 105 106 107
  "Spectral" = 'Spectral',
  "Red-Yellow-Green" = 'RdYlGn',
  "Red-Yellow-Blue" = 'RdYlBu',
  "Greys" = "Greys",
dmattek's avatar
dmattek committed
108 109 110
  "Reds" = "Reds",
  "Oranges" = "Oranges",
  "Greens" = "Greens",
dmattek's avatar
dmattek committed
111
  "Blues" = "Blues"
dmattek's avatar
dmattek committed
112 113
)

114 115 116 117 118 119 120 121 122 123
# list of palettes for the dendrogram
l.col.pal.dend = list(
  "Rainbow" = 'rainbow_hcl',
  "Sequential" = 'sequential_hcl',
  "Heat" = 'heat_hcl',
  "Terrain" = 'terrain_hcl',
  "Diverge HCL" = 'diverge_hcl',
  "Diverge HSV" = 'diverge_hsv'
)

dmattek's avatar
dmattek committed
124 125 126 127 128 129 130 131 132 133 134
# list of palettes for the dendrogram
l.col.pal.dend.2 = list(
  "Colorblind 10" = 'Color Blind',
  "Tableau 10" = 'Tableau 10',
  "Tableau 20" = 'Tableau 20',
  "Classic 10" = "Classic 10",
  "Classic 20" = "Classic 20",
  "Traffic 9" = 'Traffic',
  "Seattle Grays 5" = 'Seattle Grays'
)

dmattek's avatar
dmattek committed
135
# Help text ----
dmattek's avatar
dmattek committed
136
helpText.server = c(
dmattek's avatar
dmattek committed
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
  alDataFormat =  paste0(
    "<p>Switch between long and wide formats of input data. ",
    "TCI accepts CSV or compressed CSV files (gz or bz2).</p>",
    "<p><b>Long format</b> - a row is a single data point and consecutive time series are arranged vertically. ",
    "Data file should contain at least 3 columns separated with a comma:</p>",
    "<li>Identifier of a time series</li>",
    "<li>Time points</li>",
    "<li>A time-varying variable</li>",
    "<br>",
    "<p><b>Wide format</b> - a row is a time series with columns as time points.",
    "At least 3 columns shuold be present:</p>",
    "<li>First two columns in wide format should contain grouping and track IDs</li>",
    "<li>A column with a time point. Headers of columns with time points need to be numeric</li>"
  ),
  inDataGen1 =   paste0(
    "Generate 3 groups with 20 random synthetic time series. ",
    "Every time series contains 101 time points. ",
    "Track IDs are unique across entire dataset."
  ),
  chBtrajRem =   paste0(
    "Load CSV file with a column of track IDs for removal. ",
    "IDs should correspond to those used for plotting."
  ),
  chBstim =      paste0(
    "Load CSV file with stimulation pattern. Should contain 5 columns: ",
    "grouping, start and end time points of stimulation, start and end of y-position, dummy column with ID."
  ),
  chBtrajInter = paste0(
    "Interpolate missing measurements indicated with NAs in the data file. ",
    "In addition, interpolate a row that is completely missing from the data. ",
    "The interval of the time column must be provided to know which rows are missing."
  ),
  chBtrackUni =  paste0(
    "If the track ID in the uploaded dataset is unique only within a group (e.g. an experimental condition), ",
    "make it unique by prepending other columns to the track ID (typically a grouping column)."
  ),
dmattek's avatar
dmattek committed
173 174 175 176 177 178 179 180 181
  chBgroup    = "Select columns to group data according to treatment, condition, etc.",
  inSelMath   = "Select math operation to perform on a single or two measurement columns,",
  chBtimeTrim = "Trim time for further processing.",
  chBnorm     = "Divide measurements by the mean/median or calculate z-score with respect to selected time span.",
  rBnormMeth  = "Fold-change or z-score with respect to selected time span.",
  slNormRtMinMax = "Normalise with respect to this time span.",
  chBnormRobust  = "Calculate fold-change and z-score using the median and Median Absolute Deviation, instead of the mean and standard deviation.",
  chBnormGroup   = "Normalise to mean/median of selected time calculated globally, per group, or for individual time series.",
  downloadDataClean = "Download all time series after modifications in this panel.",
182 183
  alertNAsPresent              = "NAs present in the measurement column. Consider interpolation.",
  alertNAsPresentLong2WideConv = "Missing rows. Consider interpolation.",
dmattek's avatar
dmattek committed
184
  alertWideMissesNumericTime = "Non-numeric headers of time columns. Data in wide format should have numeric column headers corresponding to time points.",
dmattek's avatar
dmattek committed
185
  alertWideTooFewColumns     = "Insufficient columns. Data in wide format should contain at least 3 columns: grouping, track ID, and a single time point."
186 187
)

dmattek's avatar
dmattek committed
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
# Functions for data processing ----
#' Calculate the mean and CI around time series
#'
#' @param in.dt Data table in long format
#' @param in.col.meas Name of the column with the measurement
#' @param in.col.by Column names for grouping (default NULL - no grouping). Typically, you want to use at least a column with time.
#' @param in.type Choice of normal approximation or boot-strapping
#' @param ... Other params passed to smean.cl.normal and smean.cl.boot; these include \code{conf.int} for the confidence level, \code{B} for the number of boot-strapping iterations.
#'
#' @return Datatable with columns: Mean, lower and upper CI, and grouping columns if provided.
#' @export
#' @import data.table
#' @import Hmisc
#'
#' @examples
#'
#'
#' # generate synthetic time series; 100 time points long, with 10 randomly placed NAs
#' dt.tmp = genTraj(100, 10, 6, 3, in.addna = 10)
#'
#' # calculate single stats from all time points
#' calcTrajCI(dt.tmp, 'objCyto_Intensity_MeanIntensity_imErkCor')
#'
#' # calculate the mean and CI along the time course
#' calcTrajCI(dt.tmp, 'objCyto_Intensity_MeanIntensity_imErkCor', 'Metadata_RealTime')
dmattek's avatar
dmattek committed
213 214 215 216 217
LOCcalcTrajCI = function(in.dt,
                         in.col.meas,
                         in.col.by = NULL,
                         in.type = c('normal', 'boot'),
                         ...) {
dmattek's avatar
dmattek committed
218 219 220
  in.type = match.arg(in.type)
  
  if (in.type %like% 'normal')
dmattek's avatar
dmattek committed
221 222 223 224 225
    loc.dt = in.dt[, as.list(smean.cl.normal(get(in.col.meas), ...)), by = in.col.by]
  else
    loc.dt = in.dt[, as.list(smean.cl.boot(get(in.col.meas), ...)), by = in.col.by]
  
  return(loc.dt)
dmattek's avatar
dmattek committed
226 227
}

228

229 230 231 232 233 234 235 236 237
#' Calculate standard error of the mean
#'
#' @param x Vector
#' @param na.rm Remove NAs; default = FALSE
#'
#' @return A scalar with the result
#' @export
#'
#' @examples
dmattek's avatar
dmattek committed
238 239
LOCstderr = function(x, na.rm = FALSE) {
  if (na.rm)
240 241
    x = na.omit(x)
  
dmattek's avatar
dmattek committed
242
  return(sqrt(var(x) / length(x)))
243 244
}

245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
#' Calculate the power spectrum density for time-series
#'
#' @param in.dt Data table in long format
#' @param in.col.meas Name of the column with the measurement
#' @param in.col.id Name of the column with the unique series identifier
#' @param in.col.by Column names for grouping (default NULL - no grouping). PSD of individual trajectories will be averaged within a group.
#' @param in.method Name of the method for PSD estimation, must be one of c("pgram", "ar"). Default to "pgram*.
#' @param in.return.period Wheter to return densities though periods (1/frequencies) instead of frequencies.
#' @param ... Other paramters to pass to stats::spectrum()
#'
#' @return Datatable with columns: (frequency or period), spec (the density) and grouping column
#' @export
#' @import data.table
#'
#' @examples
LOCcalcPSD <- function(in.dt,
dmattek's avatar
dmattek committed
261 262 263 264 265 266 267
                       in.col.meas,
                       in.col.id,
                       in.col.by,
                       in.method = "pgram",
                       in.return.period = TRUE,
                       in.time.btwPoints = 1,
                       ...) {
268
  require(data.table)
269
  # Method "ar" returns $spec as matrix whereas "pgram" returns a vector, custom function to homogenze output format
dmattek's avatar
dmattek committed
270 271
  mySpectrum <- function(x, ...) {
    args_spec <- list(x = x, plot = FALSE)
272 273 274 275 276 277
    inargs <- list(...)
    args_spec[names(inargs)] <- inargs
    out <- do.call(spectrum, args_spec)
    out$spec <- as.vector(out$spec)
    return(out)
  }
dmattek's avatar
dmattek committed
278
  if (!in.method %in% c("pgram", "ar")) {
279 280
    stop('Method should be one of: c("pgram", "ar"')
  }
dmattek's avatar
dmattek committed
281 282
  dt_spec <-
    in.dt[, (mySpectrum(get(in.col.meas), plot = FALSE, method = in.method)[c("freq", "spec")]), by = in.col.id]
283 284 285
  dt_group <- in.dt[, .SD[1, get(in.col.by)], by = in.col.id]
  setnames(dt_group, "V1", in.col.by)
  dt_spec <- merge(dt_spec, dt_group, by = in.col.id)
dmattek's avatar
dmattek committed
286 287 288 289
  dt_agg <-
    dt_spec[, .(spec = mean(spec)), by = c(in.col.by, "freq")]
  if (in.return.period) {
    dt_agg[, period := 1 / freq]
290 291 292
    dt_agg[, freq := NULL]
    # Adjust period unit to go from frame unit  to time unit
    dt_agg[, period := period * in.time.btwPoints]
293
  } else {
dmattek's avatar
dmattek committed
294
    dt_agg[, freq := freq * (1 / in.time.btwPoints)]
295
    setnames(dt_agg, "freq", "frequency")
296 297 298 299 300
  }
  return(dt_agg)
}


301
#' Generate synthetic CellProfiler output with single-cell time series
dmattek's avatar
dmattek committed
302 303 304 305 306 307 308 309 310 311 312 313
#'
#' @param in.ntpts Number of time points (default 60)
#' @param in.ntracks Number of tracks per FOV (default 10)
#' @param in.nfov Number of FOV (default 6)
#' @param in.nwells Number of wells (default 1)
#' @param in.addna Number of NAs to add randomly in the data (default NULL)
#'
#' @return Data table with the follwoing columns: Metadata_Site, Metadata_Well, Metadata_RealTime, objCyto_Intensity_MeanIntensity_imErkCor (normal distributed),
#' objNuc_Intensity_MeanIntensity_imErkCor (normal distributed), objNuc_Location_X and objNuc_Location_Y (uniform ditributed), TrackLabel
#' @export
#' @import data.table

dmattek's avatar
dmattek committed
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
LOCgenTraj <-
  function(in.ntpts = 60,
           in.ntracks = 10,
           in.nfov = 6,
           in.nwells = 1,
           in.addna = NULL,
           in.addout = NULL) {
    x.rand.1 = c(
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 0.5, 0.1),
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3,   1, 0.2),
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3,  2, 0.5)
    )
    x.rand.2 = c(
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 0.25, 0.1),
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 0.5, 0.2),
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 1, 0.2)
    )
    
    # add NA's for testing
    if (!is.null(in.addna)) {
      locTabLen = length(x.rand.1)
      x.rand.1[round(runif(in.addna) * locTabLen)] = NA
      x.rand.2[round(runif(in.addna) * locTabLen)] = NA
    }
    
    # add outliers for testing
    if (!is.null(in.addout)) {
      locTabLen = length(x.rand.1)
      x.rand.1[round(runif(in.addout) * locTabLen)] = 5
      x.rand.2[round(runif(in.addout) * locTabLen)] = 5
    }
    
    x.arg = rep(seq(1, in.ntpts), in.ntracks * in.nfov)
    
    dt.nuc = data.table(
      well = rep(LETTERS[1:in.nwells], each = in.ntpts * in.nfov * in.ntracks / in.nwells),
      group = rep(1:in.nfov, each = in.ntpts * in.ntracks),
      time = x.arg,
      y1 = x.rand.1,
      y2  = x.rand.2,
      posx = runif(
        in.ntpts * in.ntracks * in.nfov,
        min = 0,
        max = 1
      ),
      posy = runif(
        in.ntpts * in.ntracks * in.nfov,
        min = 0,
        max = 1
      ),
      id = rep(1:(in.ntracks * in.nfov), each = in.ntpts)
    )
    
    return(dt.nuc)
dmattek's avatar
dmattek committed
368
  }
dmattek's avatar
dmattek committed
369

dmattek's avatar
dmattek committed
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
LOCgenTraj2 <-
  function(n_perGroup = 20,
           sd_noise = 0.01,
           sampleFreq = 0.2,
           endTime = 50)
  {
    # Function definition ----------------------------------
    sim_expodecay_lagged_stim <-
      function (n,
                noise,
                interval.stim = 5,
                lambda = 0.2,
                freq = 0.2,
                end = 40)
      {
        require(data.table)
        tvec <- seq(0, end, by = freq)
        stim_time <- seq(interval.stim, end, interval.stim)
        stim_time_matrix <-
          matrix(stim_time, nrow = length(stim_time),
                 ncol = n)
        noise_matrix <- abs(replicate(n, rnorm(
          n = length(stim_time),
          mean = 0,
          sd = noise
        )))
        stim_time_matrix <- stim_time_matrix + noise_matrix
        trajs <- matrix(0, nrow = length(tvec), ncol = n)
        for (col in 1:ncol(stim_time_matrix)) {
          for (row in 1:nrow(stim_time_matrix)) {
            index <- which(tvec >= stim_time_matrix[row, col])[1]
            trajs[index, col] <- 1
          }
403
        }
dmattek's avatar
dmattek committed
404 405 406 407 408 409
        decrease_factor <- exp(-lambda * freq)
        for (col in 1:ncol(trajs)) {
          for (row in 2:nrow(trajs)) {
            if (trajs[row, col] != 1) {
              trajs[row, col] <- trajs[row - 1, col] * decrease_factor
            }
410 411
          }
        }
dmattek's avatar
dmattek committed
412 413 414 415 416
        trajs <- as.data.table(trajs)
        trajs <- cbind(seq(0, end, by = freq), trajs)
        colnames(trajs)[1] <- "Time"
        trajs <- melt(trajs, id.vars = "Time")
        return(trajs)
417
      }
dmattek's avatar
dmattek committed
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
    
    
    # Dataset creation -----------------------------------------------
    dt1 <-
      sim_expodecay_lagged_stim(
        n = n_perGroup,
        noise = 0.75,
        interval.stim = 10,
        lambda = 0.4,
        freq = sampleFreq,
        end = endTime
      )
    dt2 <-
      sim_expodecay_lagged_stim(
        n = n_perGroup,
        noise = 0.75,
        interval.stim = 10,
        lambda = 0.1,
        freq = sampleFreq,
        end = endTime
      )
    dt3 <-
      sim_expodecay_lagged_stim(
        n = n_perGroup,
        noise = 0.75,
        interval.stim = 10,
        lambda = 0.4,
        freq = sampleFreq,
        end = endTime
      )
    dt3[, value := value / 3]
    
    dt1[, Group := "fastDecay"]
    dt2[, Group := "slowDecay"]
    dt3[, Group := "lowAmplitude"]
    
    dt <- rbindlist(list(dt1, dt2, dt3))
    dt[, ID := sprintf("%s_%02d", Group, as.integer(gsub('[A-Z]', '', variable)))]
    dt[, variable := NULL]
    dt[, Group := as.factor(Group)]
    
    dt[, value := value + runif(1, -0.1, 0.1), by = .(Group, ID)]
    noise_vec <- rnorm(n = nrow(dt), mean = 0, sd = sd_noise)
    dt[, value := value + noise_vec]
    
    setnames(dt, "value", "Meas")
    setcolorder(dt, c("Group", "ID", "Time", "Meas"))
    
    return(dt)
  }
468

dmattek's avatar
dmattek committed
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
#' Normalize Trajectory
#'
#' Returns original dt with an additional column with normalized quantity.
#' The column to be normalised is given by 'in.meas.col'.
#' The name of additional column is the same as in.meas.col but with ".norm" suffix added.
#' Normalisation is based on part of the trajectory;
#' this is defined by in.rt.min and max, and the column with time in.rt.col.#'
#'
#' @param in.dt Data table in long format
#' @param in.meas.col String with the column name to normalize
#' @param in.rt.col String with the colum name holding time
#' @param in.rt.min Lower bound for time period used for normalization
#' @param in.rt.max Upper bound for time period used for normalization
#' @param in.by.cols String vector with 'by' columns to calculate normalization per group; if NULL, no grouping is done
#' @param in.robust Whether robust measures should be used (median instead of mean, mad instead of sd); default TRUE
#' @param in.type Type of normalization: z.score or mean (i.e. fold change w.r.t. mean); default 'z-score'
#'
#' @return Returns original dt with an additional column with normalized quantity.
#' @export
#' @import data.table

LOCnormTraj = function(in.dt,
dmattek's avatar
dmattek committed
491 492 493 494 495 496 497
                       in.meas.col,
                       in.rt.col = COLRT,
                       in.rt.min = 10,
                       in.rt.max = 20,
                       in.by.cols = NULL,
                       in.robust = TRUE,
                       in.type = 'z.score') {
dmattek's avatar
dmattek committed
498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
  loc.dt <-
    copy(in.dt) # copy so as not to alter original dt object w intermediate assignments
  
  if (is.null(in.by.cols)) {
    if (in.robust)
      loc.dt.pre.aggr = loc.dt[get(in.rt.col) >= in.rt.min &
                                 get(in.rt.col) <= in.rt.max, .(meas.md = median(get(in.meas.col), na.rm = TRUE),
                                                                meas.mad = mad(get(in.meas.col), na.rm = TRUE))]
    else
      loc.dt.pre.aggr = loc.dt[get(in.rt.col) >= in.rt.min &
                                 get(in.rt.col) <= in.rt.max, .(meas.md = mean(get(in.meas.col), na.rm = TRUE),
                                                                meas.mad = sd(get(in.meas.col), na.rm = TRUE))]
    
    loc.dt = cbind(loc.dt, loc.dt.pre.aggr)
  }  else {
    if (in.robust)
      loc.dt.pre.aggr = loc.dt[get(in.rt.col) >= in.rt.min &
                                 get(in.rt.col) <= in.rt.max, .(meas.md = median(get(in.meas.col), na.rm = TRUE),
                                                                meas.mad = mad(get(in.meas.col), na.rm = TRUE)), by = in.by.cols]
    else
      loc.dt.pre.aggr = loc.dt[get(in.rt.col) >= in.rt.min &
                                 get(in.rt.col) <= in.rt.max, .(meas.md = mean(get(in.meas.col), na.rm = TRUE),
                                                                meas.mad = sd(get(in.meas.col), na.rm = TRUE)), by = in.by.cols]
    
    loc.dt = merge(loc.dt, loc.dt.pre.aggr, by = in.by.cols)
  }
  
  
  if (in.type == 'z.score') {
    loc.dt[, meas.norm := (get(in.meas.col) - meas.md) / meas.mad]
  } else {
    loc.dt[, meas.norm := (get(in.meas.col) / meas.md)]
  }
  
  setnames(loc.dt, 'meas.norm', paste0(in.meas.col, '.norm'))
  
  loc.dt[, c('meas.md', 'meas.mad') := NULL]
  return(loc.dt)
}


539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
# Cluster validation ----

#Customize factoextra functions to accept dissimilarity matrix from start. Otherwise can't use distance functions that are not in base R, like DTW.
# Inherit and adapt hcut function to take input from UI, used for fviz_clust

LOChcut <-
function(x,
         k = 2,
         isdiss = inherits(x, "dist"),
         hc_func = "hclust",
         hc_method = "average",
         hc_metric = "euclidean") {

    if (!inherits(x, "dist")) {
    stop("x must be a distance matrix")
  }
  return(
    factoextra::hcut(
      x = x,
      k = k,
      isdiss = TRUE,
      hc_func = hc_func,
      hc_method = hc_method,
      hc_metric = hc_metric
    )
  )
}

# Modified from factoextra::fviz_nbclust
# Allow (actually enforce) x to be a distance matrix; no GAP statistics for compatibility

LOCnbclust <-
  function (x,
            FUNcluster = LOChcut,
            method = c("silhouette", "wss"),
            k.max = 10,
            verbose = FALSE,
            barfill = "steelblue",
            barcolor = "steelblue",
            linecolor = "steelblue",
            print.summary = TRUE,
            ...)
  {
    set.seed(123)
    
    if (k.max < 2)
      stop("k.max must bet > = 2")
    
    method = match.arg(method)
    
    if (!inherits(x, c("dist")))
      stop("x should be an object of class dist")
    
    else if (is.null(FUNcluster))
      stop(
        "The argument FUNcluster is required. ",
        "Possible values are kmeans, pam, hcut, clara, ..."
      )
    
    else if (method %in% c("silhouette", "wss")) {
      diss <- x  # x IS ENFORCED TO BE A DISSIMILARITY MATRIX
      
      v <- rep(0, k.max)
      
      if (method == "silhouette") {
        loc.mainlab = "Optimal number of clusters from silhouette analysis"
        loc.ylab <- "Average silhouette width"
        for (i in 2:k.max) {
          clust <- FUNcluster(x, i, ...)
          v[i] <-
            factoextra:::.get_ave_sil_width(diss, clust$cluster)
        }
      }
      else if (method == "wss") {
        loc.mainlab = "Optimal number of clusters from within cluster sum of squares"
        
        loc.ylab <- "Total within cluster sum of squares"
        
        for (i in 1:k.max) {
          clust <- FUNcluster(x, i, ...)
          v[i] <- factoextra:::.get_withinSS(diss, clust$cluster)
        }
      }
      
      df <- data.frame(clusters = as.factor(1:k.max), y = v)
      
      p <- ggpubr::ggline(
        df,
        x = "clusters",
        y = "y",
        group = 1,
        color = linecolor,
        ylab = loc.ylab,
        xlab = "Number of clusters",
        main = loc.mainlab
      )
      
      if (method == "silhouette")
        p <- p + geom_vline(xintercept = which.max(v),
                            linetype = 2,
                            color = linecolor)
      return(p)
    }
  }
dmattek's avatar
dmattek committed
643

644
# Clustering ----
dmattek's avatar
dmattek committed
645 646 647 648 649 650 651 652 653

# Return a dt with cell IDs and corresponding cluster assignments depending on dendrogram cut (in.k)
# This one works wth dist & hclust pair
# For sparse hierarchical clustering use getDataClSpar
# Arguments:
# in.dend  - dendrogram; usually output from as.dendrogram(hclust(distance_matrix))
# in.k - level at which dendrogram should be cut

getDataCl = function(in.dend, in.k) {
dmattek's avatar
dmattek committed
654 655
  cat(file = stderr(), 'getDataCl \n')
  
dmattek's avatar
dmattek committed
656
  loc.clAssign = dendextend::cutree(in.dend, in.k, order_clusters_as_data = TRUE, )
dmattek's avatar
dmattek committed
657 658 659 660
  #print(loc.m)
  
  # The result of cutree containes named vector with names being cell id's
  # THIS WON'T WORK with sparse hierarchical clustering because there, the dendrogram doesn't have original id's
dmattek's avatar
dmattek committed
661 662 663
  loc.dt.clAssign = as.data.table(loc.clAssign, keep.rownames = T)
  setnames(loc.dt.clAssign, c(COLID, COLCL))
  
dmattek's avatar
dmattek committed
664
  
665 666
  #cat('===============\ndataCl:\n')
  #print(loc.dt.cl)
dmattek's avatar
dmattek committed
667
  return(loc.dt.clAssign)
dmattek's avatar
dmattek committed
668 669
}

dmattek's avatar
dmattek committed
670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688

# Return a dt with cell IDs and corresponding cluster assignments depending on dendrogram cut (in.k)
# This one works with sparse hierarchical clustering!
# Arguments:
# in.dend  - dendrogram; usually output from as.dendrogram(hclust(distance_matrix))
# in.k - level at which dendrogram should be cut
# in.id - vector of cell id's

getDataClSpar = function(in.dend, in.k, in.id) {
  cat(file = stderr(), 'getDataClSpar \n')
  
  loc.m = dendextend::cutree(in.dend, in.k, order_clusters_as_data = TRUE)
  #print(loc.m)
  
  # The result of cutree containes named vector with names being cell id's
  # THIS WON'T WORK with sparse hierarchical clustering because there, the dendrogram doesn't have original id's
  loc.dt.cl = data.table(id = in.id,
                         cl = loc.m)
  
689 690
  #cat('===============\ndataCl:\n')
  #print(loc.dt.cl)
dmattek's avatar
dmattek committed
691 692 693 694 695
  return(loc.dt.cl)
}



dmattek's avatar
dmattek committed
696 697 698 699 700 701 702
# prepares a table with cluster numbers in 1st column and colour assignments in 2nd column
# the number of rows is determined by dendrogram cut
getClCol <- function(in.dend, in.k) {
  loc.col_labels <- get_leaves_branches_col(in.dend)
  loc.col_labels <- loc.col_labels[order(order.dendrogram(in.dend))]
  
  return(unique(
dmattek's avatar
dmattek committed
703 704 705 706 707
    data.table(
      cl.no = dendextend::cutree(in.dend, k = in.k, order_clusters_as_data = TRUE),
      cl.col = loc.col_labels
    )
  ))
dmattek's avatar
dmattek committed
708 709
}

dmattek's avatar
dmattek committed
710
# Custom plotting functions ----
dmattek's avatar
dmattek committed
711

dmattek's avatar
dmattek committed
712 713 714 715 716 717 718 719 720 721 722 723 724 725 726

#' Custom ggPlot theme based on theme_bw
#'
#' @param in.font.base
#' @param in.font.axis.text
#' @param in.font.axis.title
#' @param in.font.strip
#' @param in.font.legend
#'
#' @return
#' @export
#'
#' @examples
#'
LOCggplotTheme = function(in.font.base = 12,
dmattek's avatar
dmattek committed
727 728 729 730
                          in.font.axis.text = 12,
                          in.font.axis.title = 12,
                          in.font.strip = 14,
                          in.font.legend = 12) {
dmattek's avatar
dmattek committed
731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
  loc.theme =
    theme_bw(base_size = in.font.base, base_family = "Helvetica") +
    theme(
      panel.spacing = unit(1, "lines"),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_blank(),
      panel.border = element_blank(),
      axis.line = element_line(color = "black", size = 0.25),
      axis.text = element_text(size = in.font.axis.text),
      axis.title = element_text(size = in.font.axis.title),
      strip.text = element_text(size = in.font.strip, face = "bold"),
      strip.background = element_blank(),
      legend.key = element_blank(),
      legend.text = element_text(size = in.font.legend),
      legend.key.height = unit(1, "lines"),
dmattek's avatar
dmattek committed
746 747
      legend.key.width = unit(2, "lines")
    )
dmattek's avatar
dmattek committed
748 749 750 751
  
  return(loc.theme)
}

dmattek's avatar
dmattek committed
752 753
# Build Function to Return Element Text Object
# From: https://stackoverflow.com/a/36979201/1898713
dmattek's avatar
dmattek committed
754 755 756 757 758
LOCrotatedAxisElementText = function(angle,
                                     position = 'x',
                                     size = 12) {
  angle     = angle[1]
  
dmattek's avatar
dmattek committed
759
  position  = position[1]
dmattek's avatar
dmattek committed
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
  positions = list(
    x = 0,
    y = 90,
    top = 180,
    right = 270
  )
  if (!position %in% names(positions))
    stop(sprintf("'position' must be one of [%s]", paste(names(positions), collapse =
                                                           ", ")), call. = FALSE)
  if (!is.numeric(angle))
    stop("'angle' must be numeric", call. = FALSE)
  rads = (-angle - positions[[position]]) * pi / 180
  hjust = round((1 - sin(rads))) / 2
  vjust = round((1 + cos(rads))) / 2
  element_text(
    size = size,
    angle = angle,
    vjust = vjust,
    hjust = hjust
  )
dmattek's avatar
dmattek committed
780 781
}

782
# Plot individual time series
dmattek's avatar
dmattek committed
783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
LOCplotTraj = function(dt.arg,
                       # input data table
                       x.arg,
                       # string with column name for x-axis
                       y.arg,
                       # string with column name for y-axis
                       group.arg,
                       # string with column name for grouping time series (typicaly cell ID)
                       facet.arg,
                       # string with column name for facetting
                       facet.ncol.arg = 2,
                       # default number of facet columns
                       facet.color.arg = NULL,
                       # vector with list of colours for adding colours to facet names (currently a horizontal line on top of the facet is drawn)
                       line.col.arg = NULL,
                       # string with column name for colouring time series (typically when individual time series are selected in UI)
                       xlab.arg = NULL,
                       # string with x-axis label
                       ylab.arg = NULL,
                       # string with y-axis label
                       plotlab.arg = NULL,
                       # string with plot label
                       dt.stim.arg = NULL,
                       # plotting additional dataset; typically to indicate stimulations (not fully implemented yet, not tested!)
                       x.stim.arg = c('tstart', 'tend'),
                       # column names in stimulation dt with x and xend parameters
                       y.stim.arg = c('ystart', 'yend'),
                       # column names in stimulation dt with y and yend parameters
                       tfreq.arg = 1,
                       # unused
                       xlim.arg = NULL,
                       # limits of x-axis; for visualisation only, not trimmimng data
                       ylim.arg = NULL,
                       # limits of y-axis; for visualisation only, not trimmimng data
                       stim.bar.width.arg = 0.5,
                       # width of the stimulation line; plotted under time series
                       aux.label1 = NULL,
                       # 1st point label; used for interactive plotting; displayed in the tooltip; typically used to display values of column holding x & y coordinates
                       aux.label2 = NULL,
                       aux.label3 = NULL,
                       stat.arg = c('', 'mean', 'CI', 'SE')) {
dmattek's avatar
dmattek committed
824 825
  # match arguments for stat plotting
  loc.stat = match.arg(stat.arg, several.ok = TRUE)
dmattek's avatar
dmattek committed
826
  
dmattek's avatar
dmattek committed
827 828
  
  # aux.label12 are required for plotting XY positions in the tooltip of the interactive (plotly) graph
dmattek's avatar
dmattek committed
829
  p.tmp = ggplot(dt.arg,
dmattek's avatar
dmattek committed
830 831 832 833 834 835
                 aes_string(
                   x = x.arg,
                   y = y.arg,
                   group = group.arg,
                   label = group.arg
                 ))
836 837 838 839
  #,
  #                          label  = aux.label1,
  #                          label2 = aux.label2,
  #                          label3 = aux.label3))
dmattek's avatar
dmattek committed
840
  
dmattek's avatar
dmattek committed
841 842
  if (is.null(line.col.arg)) {
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
843 844
      geom_line(alpha = 0.25,
                size = 0.25)
dmattek's avatar
dmattek committed
845 846
  }
  else {
dmattek's avatar
dmattek committed
847 848 849 850 851 852 853 854 855 856 857 858 859
    p.tmp = p.tmp +
      geom_line(aes_string(colour = line.col.arg),
                alpha = 0.5,
                size = 0.5) +
      scale_color_manual(
        name = '',
        values = c(
          "FALSE" = rhg_cols[7],
          "TRUE" = rhg_cols[3],
          "SELECTED" = 'green',
          "NOT SEL" = rhg_cols[7]
        )
      )
dmattek's avatar
dmattek committed
860
  }
dmattek's avatar
dmattek committed
861
  
dmattek's avatar
Mod:  
dmattek committed
862 863 864 865 866 867 868 869
  # this is temporary solution for adding colour according to cluster number
  # use only when plotting traj from clustering!
  # a horizontal line is added at the top of data
  if (!is.null(facet.color.arg)) {
    loc.y.max = max(dt.arg[, c(y.arg), with = FALSE])
    loc.dt.cl = data.table(xx = 1:length(facet.color.arg), yy = loc.y.max)
    setnames(loc.dt.cl, 'xx', facet.arg)
    
dmattek's avatar
dmattek committed
870 871
    # adjust facet.color.arg to plot
    
dmattek's avatar
Mod:  
dmattek committed
872
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
873 874 875 876 877 878
      geom_hline(
        data = loc.dt.cl,
        colour = facet.color.arg,
        yintercept = loc.y.max,
        size = 4
      ) +
dmattek's avatar
Mod:  
dmattek committed
879 880 881
      scale_colour_manual(values = facet.color.arg,
                          name = '')
  }
dmattek's avatar
dmattek committed
882
  
dmattek's avatar
dmattek committed
883
  if ('mean' %in% loc.stat)
dmattek's avatar
dmattek committed
884
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
885 886
    stat_summary(
      aes_string(y = y.arg, group = 1),
dmattek's avatar
dmattek committed
887
      fun.y = mean,
dmattek's avatar
dmattek committed
888
      na.rm = T,
dmattek's avatar
dmattek committed
889
      colour = 'red',
dmattek's avatar
dmattek committed
890 891 892 893
      linetype = 'solid',
      size = 1,
      geom = "line",
      group = 1
dmattek's avatar
dmattek committed
894
    )
dmattek's avatar
dmattek committed
895
  
dmattek's avatar
dmattek committed
896
  if ('CI' %in% loc.stat)
dmattek's avatar
dmattek committed
897
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
898 899 900
    stat_summary(
      aes_string(y = y.arg, group = 1),
      fun.data = mean_cl_normal,
dmattek's avatar
dmattek committed
901
      na.rm = T,
dmattek's avatar
dmattek committed
902
      colour = 'red',
dmattek's avatar
Mod:  
dmattek committed
903
      alpha = 0.25,
dmattek's avatar
dmattek committed
904 905 906 907 908
      geom = "ribbon",
      group = 1
    )
  
  if ('SE' %in% loc.stat)
dmattek's avatar
dmattek committed
909
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
910 911 912
    stat_summary(
      aes_string(y = y.arg, group = 1),
      fun.data = mean_se,
dmattek's avatar
dmattek committed
913
      na.rm = T,
dmattek's avatar
dmattek committed
914
      colour = 'red',
dmattek's avatar
Mod:  
dmattek committed
915
      alpha = 0.25,
dmattek's avatar
dmattek committed
916 917 918 919 920 921
      geom = "ribbon",
      group = 1
    )
  
  
  
dmattek's avatar
dmattek committed
922
  p.tmp = p.tmp +
dmattek's avatar
dmattek committed
923 924 925
    facet_wrap(as.formula(paste("~", facet.arg)),
               ncol = facet.ncol.arg,
               scales = "free_x")
dmattek's avatar
dmattek committed
926
  
927 928 929
  # plot stimulation bars underneath time series
  # dt.stim.arg is read separately and should contain 4 columns with
  # xy positions of beginning and end of the bar
dmattek's avatar
dmattek committed
930 931 932 933 934 935 936 937 938 939 940 941 942
  if (!is.null(dt.stim.arg)) {
    p.tmp = p.tmp + geom_segment(
      data = dt.stim.arg,
      aes_string(
        x = x.stim.arg[1],
        xend = x.stim.arg[2],
        y = y.stim.arg[1],
        yend = y.stim.arg[2],
        group = 'group'
      ),
      colour = rhg_cols[[3]],
      size = stim.bar.width.arg
    )
dmattek's avatar
dmattek committed
943 944
  }
  
dmattek's avatar
dmattek committed
945
  p.tmp = p.tmp + coord_cartesian(xlim = xlim.arg, ylim = ylim.arg)
946
  
dmattek's avatar
dmattek committed
947
  p.tmp = p.tmp +
dmattek's avatar
dmattek committed
948 949 950
    xlab(paste0(xlab.arg, "\n")) +
    ylab(paste0("\n", ylab.arg)) +
    ggtitle(plotlab.arg) +
dmattek's avatar
dmattek committed
951 952 953 954 955 956 957
    LOCggplotTheme(
      in.font.base = PLOTFONTBASE,
      in.font.axis.text = PLOTFONTAXISTEXT,
      in.font.axis.title = PLOTFONTAXISTITLE,
      in.font.strip = PLOTFONTFACETSTRIP,
      in.font.legend = PLOTFONTLEGEND
    ) +
958
    theme(legend.position = "top")
dmattek's avatar
dmattek committed
959
  
dmattek's avatar
Mod:  
dmattek committed
960
  return(p.tmp)
dmattek's avatar
dmattek committed
961 962
}

963
# Plot average time series with CI together in one facet
dmattek's avatar
dmattek committed
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
LOCplotTrajRibbon = function(dt.arg,
                             # input data table
                             x.arg,
                             # string with column name for x-axis
                             y.arg,
                             # string with column name for y-axis
                             group.arg = NULL,
                             # string with column name for grouping time series (here, it's a column corresponding to grouping by condition)
                             col.arg = NULL,
                             # colour pallette for individual time series
                             dt.stim.arg = NULL,
                             # data table with stimulation pattern
                             x.stim.arg = c('tstart', 'tend'),
                             # column names in stimulation dt with x and xend parameters
                             y.stim.arg = c('ystart', 'yend'),
                             # column names in stimulation dt with y and yend parameters
                             stim.bar.width.arg = 0.5,
                             xlim.arg = NULL,
                             # limits of x-axis; for visualisation only, not trimmimng data
                             ylim.arg = NULL,
                             # limits of y-axis; for visualisation only, not trimmimng data
                             ribbon.lohi.arg = c('Lower', 'Upper'),
                             # column names containing lower and upper bound for plotting the ribbon, e.g. for CI; set to NULL to avoid plotting the ribbon
                             ribbon.fill.arg = 'grey50',
                             ribbon.alpha.arg = 0.5,
                             xlab.arg = NULL,
                             ylab.arg = NULL,
                             plotlab.arg = NULL) {
992 993 994
  p.tmp = ggplot(dt.arg, aes_string(x = x.arg, group = group.arg))
  
  if (!is.null(ribbon.lohi.arg))
dmattek's avatar
dmattek committed
995 996 997 998 999 1000
    p.tmp = p.tmp +
      geom_ribbon(
        aes_string(ymin = ribbon.lohi.arg[1], ymax = ribbon.lohi.arg[2]),
        fill = ribbon.fill.arg,
        alpha = ribbon.alpha.arg
      )
1001 1002
  
  p.tmp = p.tmp + geom_line(aes_string(y = y.arg, colour = group.arg))
1003
  
dmattek's avatar
dmattek committed
1004
  
1005 1006 1007
  # plot stimulation bars underneath time series
  # dt.stim.arg is read separately and should contain 4 columns with
  # xy positions of beginning and end of the bar
dmattek's avatar
dmattek committed
1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
  if (!is.null(dt.stim.arg)) {
    p.tmp = p.tmp + geom_segment(
      data = dt.stim.arg,
      aes_string(
        x = x.stim.arg[1],
        xend = x.stim.arg[2],
        y = y.stim.arg[1],
        yend = y.stim.arg[2]
      ),
      colour = rhg_cols[[3]],
      size = stim.bar.width.arg,
      group = 1
    )
1021
  }
dmattek's avatar
dmattek committed
1022
  
dmattek's avatar
dmattek committed
1023
  p.tmp = p.tmp + coord_cartesian(xlim = xlim.arg, ylim = ylim.arg)
1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
  
  if (is.null(col.arg)) {
    p.tmp = p.tmp +
      scale_color_discrete(name = '')
  } else {
    p.tmp = p.tmp +
      scale_colour_manual(values = col.arg, name = '')
  }
  
  if (!is.null(plotlab.arg))
    p.tmp = p.tmp + ggtitle(plotlab.arg)
  
  p.tmp = p.tmp +
    xlab(xlab.arg) +
    ylab(ylab.arg)
  
  return(p.tmp)
1041 1042
}

1043
# Plot average power spectrum density per facet
dmattek's avatar
dmattek committed
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
LOCplotPSD <- function(dt.arg,
                       # input data table
                       x.arg,
                       # string with column name for x-axis
                       y.arg,
                       # string with column name for y-axis
                       group.arg = NULL,
                       # string with column name for grouping time series (here, it's a column corresponding to grouping by condition)
                       xlab.arg = x.arg,
                       ylab.arg = y.arg,
                       facet.color.arg = NULL) {
majpark21's avatar
majpark21 committed
1055
  require(ggplot2)
dmattek's avatar
dmattek committed
1056 1057 1058 1059
  if (length(setdiff(c(x.arg, y.arg, group.arg), colnames(dt.arg))) > 0) {
    stop(paste("Missing columns in dt.arg: ", setdiff(
      c(x.arg, y.arg, group.arg), colnames(dt.arg)
    )))
majpark21's avatar
majpark21 committed
1060
  }
dmattek's avatar
dmattek committed
1061
  p.tmp <- ggplot(dt.arg, aes_string(x = x.arg, y = y.arg)) +
majpark21's avatar
majpark21 committed
1062
    geom_line() +
dmattek's avatar
dmattek committed
1063 1064 1065
    geom_rug(sides = "b",
             alpha = 1,
             color = "lightblue") +
majpark21's avatar
majpark21 committed
1066 1067
    facet_wrap(group.arg) +
    labs(x = xlab.arg, y = ylab.arg)
1068
  
1069 1070 1071 1072 1073 1074 1075
  if (!is.null(facet.color.arg)) {
    loc.y.max = max(dt.arg[, c(y.arg), with = FALSE])
    loc.dt.cl = data.table(xx = 1:length(facet.color.arg), yy = loc.y.max)
    setnames(loc.dt.cl, 'xx', group.arg)
    
    # adjust facet.color.arg to plot
    
1076
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1077 1078 1079 1080 1081 1082
      geom_hline(
        data = loc.dt.cl,
        colour = facet.color.arg,
        yintercept = loc.y.max,
        size = 4
      ) +
1083 1084
      scale_colour_manual(values = facet.color.arg,
                          name = '')
1085 1086
  }
  
majpark21's avatar
majpark21 committed
1087 1088
  return(p.tmp)
}
1089

dmattek's avatar
dmattek committed
1090 1091 1092
#' Plot a scatter plot with an optional linear regression
#'
#' @param dt.arg input of data.table with 2 columns with x and y coordinates
dmattek's avatar
dmattek committed
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
#' @param facet.arg
#' @param facet.ncol.arg
#' @param xlab.arg
#' @param ylab.arg
#' @param plotlab.arg
#' @param alpha.arg
#' @param trend.arg
#' @param ci.arg

LOCggplotScat = function(dt.arg,
                         facet.arg = NULL,
                         facet.ncol.arg = 2,
                         xlab.arg = NULL,
                         ylab.arg = NULL,
                         plotlab.arg = NULL,
                         alpha.arg = 1,
                         trend.arg = T,
                         ci.arg = 0.95) {
dmattek's avatar
dmattek committed
1111
  p.tmp = ggplot(dt.arg, aes(x = x, y = y, label = id)) +
dmattek's avatar
dmattek committed
1112
    geom_point(alpha = alpha.arg)
dmattek's avatar
dmattek committed
1113
  
dmattek's avatar
dmattek committed
1114
  if (trend.arg) {
dmattek's avatar
dmattek committed
1115 1116
    p.tmp = p.tmp +
      stat_smooth(
dmattek's avatar
dmattek committed
1117
        method = "lm",
dmattek's avatar
dmattek committed
1118
        fullrange = FALSE,
dmattek's avatar
dmattek committed
1119
        level = ci.arg,
dmattek's avatar
dmattek committed
1120 1121 1122
        colour = 'blue'
      )
  }
dmattek's avatar
dmattek committed
1123
  
dmattek's avatar
dmattek committed
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
  if (!is.null(facet.arg)) {
    p.tmp = p.tmp +
      facet_wrap(as.formula(paste("~", facet.arg)),
                 ncol = facet.ncol.arg)
    
  }
  
  if (!is.null(xlab.arg))
    p.tmp = p.tmp +
      xlab(paste0(xlab.arg, "\n"))
  
  if (!is.null(ylab.arg))
    p.tmp = p.tmp +
      ylab(paste0("\n", ylab.arg))
  
  if (!is.null(plotlab.arg))
    p.tmp = p.tmp +
      ggtitle(paste0(plotlab.arg, "\n"))
  
  p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1144 1145 1146 1147 1148 1149 1150
    LOCggplotTheme(
      in.font.base = PLOTFONTBASE,
      in.font.axis.text = PLOTFONTAXISTEXT,
      in.font.axis.title = PLOTFONTAXISTITLE,
      in.font.strip = PLOTFONTFACETSTRIP,
      in.font.legend = PLOTFONTLEGEND
    ) +
1151
    theme(legend.position = "none")
dmattek's avatar
dmattek committed
1152
  
dmattek's avatar
dmattek committed
1153 1154
  return(p.tmp)
}
dmattek's avatar
dmattek committed
1155

1156

dmattek's avatar
dmattek committed
1157
LOCplotHeatmap <- function(data.arg,
dmattek's avatar
dmattek committed
1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
                           dend.arg,
                           palette.arg,
                           palette.rev.arg = TRUE,
                           dend.show.arg = TRUE,
                           key.show.arg = TRUE,
                           margin.x.arg = 5,
                           margin.y.arg = 20,
                           nacol.arg = 0.5,
                           colCol.arg = NULL,
                           labCol.arg = NULL,
                           font.row.arg = 1,
                           font.col.arg = 1,
                           breaks.arg = NULL,
                           title.arg = 'Clustering') {
1172 1173
  loc.n.colbreaks = 99
  
dmattek's avatar
Mod:  
dmattek committed
1174 1175
  if (palette.rev.arg)
    my_palette <-
dmattek's avatar
dmattek committed
1176
      rev(colorRampPalette(brewer.pal(9, palette.arg))(n = loc.n.colbreaks))
dmattek's avatar
Mod:  
dmattek committed
1177 1178
  else
    my_palette <-
dmattek's avatar
dmattek committed
1179
      colorRampPalette(brewer.pal(9, palette.arg))(n = loc.n.colbreaks)
dmattek's avatar
Mod:  
dmattek committed
1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214
  
  
  col_labels <- get_leaves_branches_col(dend.arg)
  col_labels <- col_labels[order(order.dendrogram(dend.arg))]
  
  if (dend.show.arg) {
    assign("var.tmp.1", dend.arg)
    var.tmp.2 = "row"
  } else {
    assign("var.tmp.1", FALSE)
    var.tmp.2 = "none"
  }
  
  loc.p = heatmap.2(
    data.arg,
    Colv = "NA",
    Rowv = var.tmp.1,
    srtCol = 90,
    dendrogram = var.tmp.2,
    trace = "none",
    key = key.show.arg,
    margins = c(margin.x.arg, margin.y.arg),
    col = my_palette,
    na.col = grey(nacol.arg),
    denscol = "black",
    density.info = "density",
    RowSideColors = col_labels,
    colRow = col_labels,
    colCol = colCol.arg,
    labCol = labCol.arg,
    #      sepcolor = grey(input$inPlotHierGridColor),
    #      colsep = 1:ncol(loc.dm),
    #      rowsep = 1:nrow(loc.dm),
    cexRow = font.row.arg,
    cexCol = font.col.arg,
dmattek's avatar
dmattek committed
1215 1216
    main = title.arg,
    symbreaks = FALSE,
1217
    symkey = FALSE,
dmattek's avatar
dmattek committed
1218 1219 1220 1221
    breaks = if (is.null(breaks.arg))
      NULL
    else
      seq(breaks.arg[1], breaks.arg[2], length.out = loc.n.colbreaks + 1)
dmattek's avatar
Mod:  
dmattek committed
1222 1223 1224 1225
  )
  
  return(loc.p)
}