auxfunc.R 44.6 KB
Newer Older
dmattek's avatar
dmattek committed
1 2 3 4
#
# Time Course Inspector: Shiny app for plotting time series data
# Author: Maciej Dobrzynski
#
dmattek's avatar
dmattek committed
5
# Auxilary functions & definitions of global constants
dmattek's avatar
dmattek committed
6 7 8
#


9 10 11 12 13
library(ggplot2)
library(RColorBrewer)
library(gplots) # for heatmap.2
library(grid) # for modifying grob
library(Hmisc) # for CI calculation
dmattek's avatar
dmattek committed
14

15 16

# Global parameters ----
17 18 19
# number of miliseconds to delay reactions to changes in the UI
# used to delay output from sliders
MILLIS = 1000
dmattek's avatar
dmattek committed
20

21 22 23
# Number of significant digits to display in table stats
SIGNIFDIGITSINTAB = 3

24 25 26
# if true, additional output printed to R console
DEB = T

27
# font sizes in pts for plots in the manuscript
Maciej Dobrzynski's avatar
Maciej Dobrzynski committed
28 29 30 31 32 33
# PLOTFONTBASE = 8
# PLOTFONTAXISTEXT = 8
# PLOTFONTAXISTITLE = 8
# PLOTFONTFACETSTRIP = 10
# PLOTFONTLEGEND = 8

dmattek's avatar
dmattek committed
34
# font sizes in pts for screen display
Maciej Dobrzynski's avatar
Maciej Dobrzynski committed
35 36 37 38 39 40 41 42 43 44 45 46 47
PLOTFONTBASE = 16
PLOTFONTAXISTEXT = 16
PLOTFONTAXISTITLE = 16
PLOTFONTFACETSTRIP = 20
PLOTFONTLEGEND = 16

# height (in pixels) of ribbon and single traj. plots
PLOTRIBBONHEIGHT = 500 # in pixels
PLOTTRAJHEIGHT = 500 # in pixels
PLOTPSDHEIGHT = 500 # in pixels
PLOTBOXHEIGHT = 500 # in pixels
PLOTSCATTERHEIGHT = 500 # in pixels
PLOTWIDTH = 85 # in percent
48 49

# default number of facets in plots
Maciej Dobrzynski's avatar
Maciej Dobrzynski committed
50
PLOTNFACETDEFAULT = 3
51

dmattek's avatar
dmattek committed
52
# internal column names
dmattek's avatar
dmattek committed
53
COLRT   = 'time'
dmattek's avatar
dmattek committed
54 55 56 57 58 59 60 61 62 63
COLY    = 'y'
COLID   = 'id'
COLIDUNI = 'trackObjectsLabelUni'
COLGR   = 'group'
COLIN   = 'mid.in'
COLOBJN = 'obj.num'
COLPOSX = 'pos.x'
COLPOSY = 'pos.y'
COLIDX = 'IDX'
COLIDXDIFF = 'IDXdiff'
dmattek's avatar
dmattek committed
64
COLCL = 'cl'
65
COLNTRAJ = "nCells"
dmattek's avatar
dmattek committed
66 67 68

# file names
FCSVOUTLIERS = 'outliers.csv'
majpark21's avatar
majpark21 committed
69
FCSVTCCLEAN  = 'tCoursesProcessed.csv'
dmattek's avatar
dmattek committed
70 71
FPDFTCMEAN   = "tCoursesMeans.pdf"
FPDFTCSINGLE = "tCourses.pdf"
72
FPDFTCPSD    = 'tCoursesPsd.pdf'
majpark21's avatar
majpark21 committed
73 74
FPDFBOXAUC   = 'distributionAUC.pdf'
FPDFBOXTP    = 'distributionTP.pdf'
dmattek's avatar
dmattek committed
75 76
FPDFSCATTER  = 'scatter.pdf'

dmattek's avatar
dmattek committed
77
# Colour definitions ----
dmattek's avatar
dmattek committed
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
rhg_cols <- c(
  "#771C19",
  "#AA3929",
  "#E25033",
  "#F27314",
  "#F8A31B",
  "#E2C59F",
  "#B6C5CC",
  "#8E9CA3",
  "#556670",
  "#000000"
)

md_cols <- c(
  "#FFFFFF",
  "#F8A31B",
  "#F27314",
  "#E25033",
  "#AA3929",
  "#FFFFCC",
  "#C2E699",
  "#78C679",
  "#238443"
)

103
# list of palettes for the heatmap
dmattek's avatar
dmattek committed
104
l.col.pal = list(
dmattek's avatar
dmattek committed
105 106 107 108
  "Spectral" = 'Spectral',
  "Red-Yellow-Green" = 'RdYlGn',
  "Red-Yellow-Blue" = 'RdYlBu',
  "Greys" = "Greys",
dmattek's avatar
dmattek committed
109 110 111
  "Reds" = "Reds",
  "Oranges" = "Oranges",
  "Greens" = "Greens",
dmattek's avatar
dmattek committed
112
  "Blues" = "Blues"
dmattek's avatar
dmattek committed
113 114
)

115 116 117 118 119 120 121 122 123 124
# list of palettes for the dendrogram
l.col.pal.dend = list(
  "Rainbow" = 'rainbow_hcl',
  "Sequential" = 'sequential_hcl',
  "Heat" = 'heat_hcl',
  "Terrain" = 'terrain_hcl',
  "Diverge HCL" = 'diverge_hcl',
  "Diverge HSV" = 'diverge_hsv'
)

dmattek's avatar
dmattek committed
125 126 127 128 129 130 131 132 133 134 135
# list of palettes for the dendrogram
l.col.pal.dend.2 = list(
  "Colorblind 10" = 'Color Blind',
  "Tableau 10" = 'Tableau 10',
  "Tableau 20" = 'Tableau 20',
  "Classic 10" = "Classic 10",
  "Classic 20" = "Classic 20",
  "Traffic 9" = 'Traffic',
  "Seattle Grays 5" = 'Seattle Grays'
)

dmattek's avatar
dmattek committed
136
# Help text ----
dmattek's avatar
dmattek committed
137
helpText.server = c(
dmattek's avatar
dmattek committed
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
  alDataFormat =  paste0(
    "<p>Switch between long and wide formats of input data. ",
    "TCI accepts CSV or compressed CSV files (gz or bz2).</p>",
    "<p><b>Long format</b> - a row is a single data point and consecutive time series are arranged vertically. ",
    "Data file should contain at least 3 columns separated with a comma:</p>",
    "<li>Identifier of a time series</li>",
    "<li>Time points</li>",
    "<li>A time-varying variable</li>",
    "<br>",
    "<p><b>Wide format</b> - a row is a time series with columns as time points.",
    "At least 3 columns shuold be present:</p>",
    "<li>First two columns in wide format should contain grouping and track IDs</li>",
    "<li>A column with a time point. Headers of columns with time points need to be numeric</li>"
  ),
  inDataGen1 =   paste0(
    "Generate 3 groups with 20 random synthetic time series. ",
    "Every time series contains 101 time points. ",
    "Track IDs are unique across entire dataset."
  ),
  chBtrajRem =   paste0(
    "Load CSV file with a column of track IDs for removal. ",
    "IDs should correspond to those used for plotting."
  ),
  chBstim =      paste0(
    "Load CSV file with stimulation pattern. Should contain 5 columns: ",
    "grouping, start and end time points of stimulation, start and end of y-position, dummy column with ID."
  ),
  chBtrajInter = paste0(
    "Interpolate missing measurements indicated with NAs in the data file. ",
    "In addition, interpolate a row that is completely missing from the data. ",
    "The interval of the time column must be provided to know which rows are missing."
  ),
  chBtrackUni =  paste0(
    "If the track ID in the uploaded dataset is unique only within a group (e.g. an experimental condition), ",
    "make it unique by prepending other columns to the track ID (typically a grouping column)."
  ),
dmattek's avatar
dmattek committed
174 175 176 177 178 179 180 181 182
  chBgroup    = "Select columns to group data according to treatment, condition, etc.",
  inSelMath   = "Select math operation to perform on a single or two measurement columns,",
  chBtimeTrim = "Trim time for further processing.",
  chBnorm     = "Divide measurements by the mean/median or calculate z-score with respect to selected time span.",
  rBnormMeth  = "Fold-change or z-score with respect to selected time span.",
  slNormRtMinMax = "Normalise with respect to this time span.",
  chBnormRobust  = "Calculate fold-change and z-score using the median and Median Absolute Deviation, instead of the mean and standard deviation.",
  chBnormGroup   = "Normalise to mean/median of selected time calculated globally, per group, or for individual time series.",
  downloadDataClean = "Download all time series after modifications in this panel.",
183 184
  alertNAsPresent              = "NAs present in the measurement column. Consider interpolation.",
  alertNAsPresentLong2WideConv = "Missing rows. Consider interpolation.",
185
  alertTimeFreq0 = "The interval between 2 time points has to be greater than 0.",
dmattek's avatar
dmattek committed
186
  alertWideMissesNumericTime = "Non-numeric headers of time columns. Data in wide format should have numeric column headers corresponding to time points.",
dmattek's avatar
dmattek committed
187
  alertWideTooFewColumns     = "Insufficient columns. Data in wide format should contain at least 3 columns: grouping, track ID, and a single time point."
188 189
)

dmattek's avatar
dmattek committed
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
# Functions for data processing ----
#' Calculate the mean and CI around time series
#'
#' @param in.dt Data table in long format
#' @param in.col.meas Name of the column with the measurement
#' @param in.col.by Column names for grouping (default NULL - no grouping). Typically, you want to use at least a column with time.
#' @param in.type Choice of normal approximation or boot-strapping
#' @param ... Other params passed to smean.cl.normal and smean.cl.boot; these include \code{conf.int} for the confidence level, \code{B} for the number of boot-strapping iterations.
#'
#' @return Datatable with columns: Mean, lower and upper CI, and grouping columns if provided.
#' @export
#' @import data.table
#' @import Hmisc
#'
#' @examples
#'
#'
#' # generate synthetic time series; 100 time points long, with 10 randomly placed NAs
#' dt.tmp = genTraj(100, 10, 6, 3, in.addna = 10)
#'
#' # calculate single stats from all time points
#' calcTrajCI(dt.tmp, 'objCyto_Intensity_MeanIntensity_imErkCor')
#'
#' # calculate the mean and CI along the time course
#' calcTrajCI(dt.tmp, 'objCyto_Intensity_MeanIntensity_imErkCor', 'Metadata_RealTime')
dmattek's avatar
dmattek committed
215 216 217 218 219
LOCcalcTrajCI = function(in.dt,
                         in.col.meas,
                         in.col.by = NULL,
                         in.type = c('normal', 'boot'),
                         ...) {
dmattek's avatar
dmattek committed
220 221 222
  in.type = match.arg(in.type)
  
  if (in.type %like% 'normal')
dmattek's avatar
dmattek committed
223 224 225 226 227
    loc.dt = in.dt[, as.list(smean.cl.normal(get(in.col.meas), ...)), by = in.col.by]
  else
    loc.dt = in.dt[, as.list(smean.cl.boot(get(in.col.meas), ...)), by = in.col.by]
  
  return(loc.dt)
dmattek's avatar
dmattek committed
228 229
}

230

231 232 233 234 235 236 237 238 239
#' Calculate standard error of the mean
#'
#' @param x Vector
#' @param na.rm Remove NAs; default = FALSE
#'
#' @return A scalar with the result
#' @export
#'
#' @examples
dmattek's avatar
dmattek committed
240 241
LOCstderr = function(x, na.rm = FALSE) {
  if (na.rm)
242 243
    x = na.omit(x)
  
dmattek's avatar
dmattek committed
244
  return(sqrt(var(x) / length(x)))
245 246
}

247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
#' Calculate the power spectrum density for time-series
#'
#' @param in.dt Data table in long format
#' @param in.col.meas Name of the column with the measurement
#' @param in.col.id Name of the column with the unique series identifier
#' @param in.col.by Column names for grouping (default NULL - no grouping). PSD of individual trajectories will be averaged within a group.
#' @param in.method Name of the method for PSD estimation, must be one of c("pgram", "ar"). Default to "pgram*.
#' @param in.return.period Wheter to return densities though periods (1/frequencies) instead of frequencies.
#' @param ... Other paramters to pass to stats::spectrum()
#'
#' @return Datatable with columns: (frequency or period), spec (the density) and grouping column
#' @export
#' @import data.table
#'
#' @examples
LOCcalcPSD <- function(in.dt,
dmattek's avatar
dmattek committed
263 264 265 266 267 268 269
                       in.col.meas,
                       in.col.id,
                       in.col.by,
                       in.method = "pgram",
                       in.return.period = TRUE,
                       in.time.btwPoints = 1,
                       ...) {
270
  require(data.table)
271
  # Method "ar" returns $spec as matrix whereas "pgram" returns a vector, custom function to homogenze output format
dmattek's avatar
dmattek committed
272 273
  mySpectrum <- function(x, ...) {
    args_spec <- list(x = x, plot = FALSE)
274 275 276 277 278 279
    inargs <- list(...)
    args_spec[names(inargs)] <- inargs
    out <- do.call(spectrum, args_spec)
    out$spec <- as.vector(out$spec)
    return(out)
  }
dmattek's avatar
dmattek committed
280
  if (!in.method %in% c("pgram", "ar")) {
281 282
    stop('Method should be one of: c("pgram", "ar"')
  }
dmattek's avatar
dmattek committed
283 284
  dt_spec <-
    in.dt[, (mySpectrum(get(in.col.meas), plot = FALSE, method = in.method)[c("freq", "spec")]), by = in.col.id]
285 286 287
  dt_group <- in.dt[, .SD[1, get(in.col.by)], by = in.col.id]
  setnames(dt_group, "V1", in.col.by)
  dt_spec <- merge(dt_spec, dt_group, by = in.col.id)
dmattek's avatar
dmattek committed
288 289 290 291
  dt_agg <-
    dt_spec[, .(spec = mean(spec)), by = c(in.col.by, "freq")]
  if (in.return.period) {
    dt_agg[, period := 1 / freq]
292 293 294
    dt_agg[, freq := NULL]
    # Adjust period unit to go from frame unit  to time unit
    dt_agg[, period := period * in.time.btwPoints]
295
  } else {
dmattek's avatar
dmattek committed
296
    dt_agg[, freq := freq * (1 / in.time.btwPoints)]
297
    setnames(dt_agg, "freq", "frequency")
298 299 300 301 302
  }
  return(dt_agg)
}


303
#' Generate synthetic CellProfiler output with single-cell time series
dmattek's avatar
dmattek committed
304 305 306 307 308 309 310 311 312 313 314 315
#'
#' @param in.ntpts Number of time points (default 60)
#' @param in.ntracks Number of tracks per FOV (default 10)
#' @param in.nfov Number of FOV (default 6)
#' @param in.nwells Number of wells (default 1)
#' @param in.addna Number of NAs to add randomly in the data (default NULL)
#'
#' @return Data table with the follwoing columns: Metadata_Site, Metadata_Well, Metadata_RealTime, objCyto_Intensity_MeanIntensity_imErkCor (normal distributed),
#' objNuc_Intensity_MeanIntensity_imErkCor (normal distributed), objNuc_Location_X and objNuc_Location_Y (uniform ditributed), TrackLabel
#' @export
#' @import data.table

dmattek's avatar
dmattek committed
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
LOCgenTraj <-
  function(in.ntpts = 60,
           in.ntracks = 10,
           in.nfov = 6,
           in.nwells = 1,
           in.addna = NULL,
           in.addout = NULL) {
    x.rand.1 = c(
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 0.5, 0.1),
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3,   1, 0.2),
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3,  2, 0.5)
    )
    x.rand.2 = c(
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 0.25, 0.1),
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 0.5, 0.2),
      rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 1, 0.2)
    )
    
    # add NA's for testing
    if (!is.null(in.addna)) {
      locTabLen = length(x.rand.1)
      x.rand.1[round(runif(in.addna) * locTabLen)] = NA
      x.rand.2[round(runif(in.addna) * locTabLen)] = NA
    }
    
    # add outliers for testing
    if (!is.null(in.addout)) {
      locTabLen = length(x.rand.1)
      x.rand.1[round(runif(in.addout) * locTabLen)] = 5
      x.rand.2[round(runif(in.addout) * locTabLen)] = 5
    }
    
    x.arg = rep(seq(1, in.ntpts), in.ntracks * in.nfov)
    
    dt.nuc = data.table(
      well = rep(LETTERS[1:in.nwells], each = in.ntpts * in.nfov * in.ntracks / in.nwells),
      group = rep(1:in.nfov, each = in.ntpts * in.ntracks),
      time = x.arg,
      y1 = x.rand.1,
      y2  = x.rand.2,
      posx = runif(
        in.ntpts * in.ntracks * in.nfov,
        min = 0,
        max = 1
      ),
      posy = runif(
        in.ntpts * in.ntracks * in.nfov,
        min = 0,
        max = 1
      ),
      id = rep(1:(in.ntracks * in.nfov), each = in.ntpts)
    )
    
    return(dt.nuc)
dmattek's avatar
dmattek committed
370
  }
dmattek's avatar
dmattek committed
371

dmattek's avatar
dmattek committed
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
LOCgenTraj2 <-
  function(n_perGroup = 20,
           sd_noise = 0.01,
           sampleFreq = 0.2,
           endTime = 50)
  {
    # Function definition ----------------------------------
    sim_expodecay_lagged_stim <-
      function (n,
                noise,
                interval.stim = 5,
                lambda = 0.2,
                freq = 0.2,
                end = 40)
      {
        require(data.table)
        tvec <- seq(0, end, by = freq)
        stim_time <- seq(interval.stim, end, interval.stim)
        stim_time_matrix <-
          matrix(stim_time, nrow = length(stim_time),
                 ncol = n)
        noise_matrix <- abs(replicate(n, rnorm(
          n = length(stim_time),
          mean = 0,
          sd = noise
        )))
        stim_time_matrix <- stim_time_matrix + noise_matrix
        trajs <- matrix(0, nrow = length(tvec), ncol = n)
        for (col in 1:ncol(stim_time_matrix)) {
          for (row in 1:nrow(stim_time_matrix)) {
            index <- which(tvec >= stim_time_matrix[row, col])[1]
            trajs[index, col] <- 1
          }
405
        }
dmattek's avatar
dmattek committed
406 407 408 409 410 411
        decrease_factor <- exp(-lambda * freq)
        for (col in 1:ncol(trajs)) {
          for (row in 2:nrow(trajs)) {
            if (trajs[row, col] != 1) {
              trajs[row, col] <- trajs[row - 1, col] * decrease_factor
            }
412 413
          }
        }
dmattek's avatar
dmattek committed
414 415 416 417 418
        trajs <- as.data.table(trajs)
        trajs <- cbind(seq(0, end, by = freq), trajs)
        colnames(trajs)[1] <- "Time"
        trajs <- melt(trajs, id.vars = "Time")
        return(trajs)
419
      }
dmattek's avatar
dmattek committed
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
    
    
    # Dataset creation -----------------------------------------------
    dt1 <-
      sim_expodecay_lagged_stim(
        n = n_perGroup,
        noise = 0.75,
        interval.stim = 10,
        lambda = 0.4,
        freq = sampleFreq,
        end = endTime
      )
    dt2 <-
      sim_expodecay_lagged_stim(
        n = n_perGroup,
        noise = 0.75,
        interval.stim = 10,
        lambda = 0.1,
        freq = sampleFreq,
        end = endTime
      )
    dt3 <-
      sim_expodecay_lagged_stim(
        n = n_perGroup,
        noise = 0.75,
        interval.stim = 10,
        lambda = 0.4,
        freq = sampleFreq,
        end = endTime
      )
    dt3[, value := value / 3]
    
    dt1[, Group := "fastDecay"]
    dt2[, Group := "slowDecay"]
    dt3[, Group := "lowAmplitude"]
    
    dt <- rbindlist(list(dt1, dt2, dt3))
    dt[, ID := sprintf("%s_%02d", Group, as.integer(gsub('[A-Z]', '', variable)))]
    dt[, variable := NULL]
    dt[, Group := as.factor(Group)]
    
    dt[, value := value + runif(1, -0.1, 0.1), by = .(Group, ID)]
    noise_vec <- rnorm(n = nrow(dt), mean = 0, sd = sd_noise)
    dt[, value := value + noise_vec]
    
    setnames(dt, "value", "Meas")
    setcolorder(dt, c("Group", "ID", "Time", "Meas"))
    
    return(dt)
  }
470

dmattek's avatar
dmattek committed
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
#' Normalize Trajectory
#'
#' Returns original dt with an additional column with normalized quantity.
#' The column to be normalised is given by 'in.meas.col'.
#' The name of additional column is the same as in.meas.col but with ".norm" suffix added.
#' Normalisation is based on part of the trajectory;
#' this is defined by in.rt.min and max, and the column with time in.rt.col.#'
#'
#' @param in.dt Data table in long format
#' @param in.meas.col String with the column name to normalize
#' @param in.rt.col String with the colum name holding time
#' @param in.rt.min Lower bound for time period used for normalization
#' @param in.rt.max Upper bound for time period used for normalization
#' @param in.by.cols String vector with 'by' columns to calculate normalization per group; if NULL, no grouping is done
#' @param in.robust Whether robust measures should be used (median instead of mean, mad instead of sd); default TRUE
#' @param in.type Type of normalization: z.score or mean (i.e. fold change w.r.t. mean); default 'z-score'
#'
#' @return Returns original dt with an additional column with normalized quantity.
#' @export
#' @import data.table

LOCnormTraj = function(in.dt,
dmattek's avatar
dmattek committed
493 494 495 496 497 498 499
                       in.meas.col,
                       in.rt.col = COLRT,
                       in.rt.min = 10,
                       in.rt.max = 20,
                       in.by.cols = NULL,
                       in.robust = TRUE,
                       in.type = 'z.score') {
dmattek's avatar
dmattek committed
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
  loc.dt <-
    copy(in.dt) # copy so as not to alter original dt object w intermediate assignments
  
  if (is.null(in.by.cols)) {
    if (in.robust)
      loc.dt.pre.aggr = loc.dt[get(in.rt.col) >= in.rt.min &
                                 get(in.rt.col) <= in.rt.max, .(meas.md = median(get(in.meas.col), na.rm = TRUE),
                                                                meas.mad = mad(get(in.meas.col), na.rm = TRUE))]
    else
      loc.dt.pre.aggr = loc.dt[get(in.rt.col) >= in.rt.min &
                                 get(in.rt.col) <= in.rt.max, .(meas.md = mean(get(in.meas.col), na.rm = TRUE),
                                                                meas.mad = sd(get(in.meas.col), na.rm = TRUE))]
    
    loc.dt = cbind(loc.dt, loc.dt.pre.aggr)
  }  else {
    if (in.robust)
      loc.dt.pre.aggr = loc.dt[get(in.rt.col) >= in.rt.min &
                                 get(in.rt.col) <= in.rt.max, .(meas.md = median(get(in.meas.col), na.rm = TRUE),
                                                                meas.mad = mad(get(in.meas.col), na.rm = TRUE)), by = in.by.cols]
    else
      loc.dt.pre.aggr = loc.dt[get(in.rt.col) >= in.rt.min &
                                 get(in.rt.col) <= in.rt.max, .(meas.md = mean(get(in.meas.col), na.rm = TRUE),
                                                                meas.mad = sd(get(in.meas.col), na.rm = TRUE)), by = in.by.cols]
    
    loc.dt = merge(loc.dt, loc.dt.pre.aggr, by = in.by.cols)
  }
  
  
  if (in.type == 'z.score') {
    loc.dt[, meas.norm := (get(in.meas.col) - meas.md) / meas.mad]
  } else {
    loc.dt[, meas.norm := (get(in.meas.col) / meas.md)]
  }
  
  setnames(loc.dt, 'meas.norm', paste0(in.meas.col, '.norm'))
  
  loc.dt[, c('meas.md', 'meas.mad') := NULL]
  return(loc.dt)
}


541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
#' Interpolate missing rows in time series
#'
#' @param inDT Data.table in long format with time series
#' @param inColGr Name of the grouipng column
#' @param inColID Name of the column with unique time series IDs
#' @param inColT Name of the column with time
#' @param inColY Name of the column(s) with variables to interpolate
#' @param inTfreq Interval between two time points
#' @param inDeb Debugging, extended output
#'
#' @return Data.table with interpolated missing time points
#' @export
#'
#' @examples
LOCinterpolate = function(inDT, inColGr, inColID, inColT, inColY, inTfreq = 1, inDeb = F) {
  
  if(is.null(inDT))
    return(NULL)
  else
    loc.out = inDT
  
  # Stretch time series by every time series' min/max time
  # Gaps filled with NA's
  setkeyv(loc.out, c(inColGr, inColID, inColT))
  loc.out = loc.out[setkeyv(loc.out[, 
                                    .(seq(min(get(inColT), na.rm = T), 
                                          max(get(inColT), na.rm = T), 
                                          inTfreq)), 
                                    by = c(inColGr, inColID)], c(inColGr, inColID, 'V1'))]
  
  # x-check: print all rows with NA's
  if (inDeb) {
    cat(file = stdout(), '\nLOCinterpolate: Rows with NAs to interpolate:\n')
    print(loc.out[rowSums(is.na(loc.out)) > 0, ])
  }
  
  # Apparently the loop is faster than lapply+SDcols
  for(col in inColY) {
    if(inDeb)
      cat(file = stdout(), sprintf("Interpolating NAs in column: %s\n", col))
    
    # Interpolated columns should be of type numeric (float)
    # This is to ensure that interpolated columns are of porper type.
    data.table::set(loc.out, j = col, value = as.numeric(loc.out[[col]]))
    
    loc.out[, (col) := na_interpolation(get(col)), by = c(inColID)]        
  }
  
  return(loc.out)
}

#' Remove outlier time points and/or tracks depdending on maximum permissible gap length due to outliers
#'
#' @param inDT Data.table in long format with main dataset
#' @param inDTout Data.table in long format with rows of inDT that include outlier time points
#' @param inColID Name of the column with unique time series IDs
#' @param inGapLen Length of the maximum allowed gap. Tracks with gaps longer than threshold will be removed. Shorter gaps will be interpolated
#' @param inDeb Debugging, extended output
#'
#' @return Data.table with time points and/or time series removed
#' @export
#'
#' @examples
LOCremoveOutTracks = function(inDT, inDTout, inColID, inGapLen = 0, inDeb = F) {
  
  if(is.null(inDT))
    return(NULL)
  else
    loc.out = inDT
  
  # add index column per trajecory
  loc.out[, myColIdx := 1:.N, by = c(inColID)]
  
  # remove single outlier points (anti-join)
  # From: https://stackoverflow.com/a/46333620/1898713
  loc.out = loc.out[!inDTout, on = names(inDTout)]
  
  # calculate diff on index column to see the length of gaps due to removed points
  # the value of that column corresponds to the gap length (hence the "-1")
  loc.out[, 
          myColIdxDiff := c(1, diff(myColIdx)) - 1, 
          by = c(inColID)]
  
  # get track ids where the max gap is longer than the threshold
  loc.idgaps = loc.out[, 
                       max(myColIdxDiff), 
                       by = c(inColID)][V1 > inGapLen, get(inColID)]
  
  if (inDeb) {
    cat(file = stdout(), sprintf('\nLOCremoveTracks: Track IDs with max gap >= %d:\n', inGapLen))
    if (length(loc.idgaps) > 0)
      print(loc.idgaps) else
        cat("none\n")
  }
  
  # remove outlier tracks with gaps longer than the value set in slOutliersGapLen
  if (length(loc.idgaps) > 0)
    loc.out = loc.out[!(get(inColID) %in% unique(loc.idgaps))]
  
  # clean
  loc.out[, `:=`(myColIdx = NULL, myColIdxDiff = NULL)]
  
  return(loc.out)
}

646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
# Cluster validation ----

#Customize factoextra functions to accept dissimilarity matrix from start. Otherwise can't use distance functions that are not in base R, like DTW.
# Inherit and adapt hcut function to take input from UI, used for fviz_clust

LOChcut <-
function(x,
         k = 2,
         isdiss = inherits(x, "dist"),
         hc_func = "hclust",
         hc_method = "average",
         hc_metric = "euclidean") {

    if (!inherits(x, "dist")) {
    stop("x must be a distance matrix")
  }
  return(
    factoextra::hcut(
      x = x,
      k = k,
      isdiss = TRUE,
      hc_func = hc_func,
      hc_method = hc_method,
      hc_metric = hc_metric
    )
  )
}

# Modified from factoextra::fviz_nbclust
# Allow (actually enforce) x to be a distance matrix; no GAP statistics for compatibility

LOCnbclust <-
  function (x,
            FUNcluster = LOChcut,
            method = c("silhouette", "wss"),
            k.max = 10,
            verbose = FALSE,
            barfill = "steelblue",
            barcolor = "steelblue",
            linecolor = "steelblue",
            print.summary = TRUE,
            ...)
  {
    set.seed(123)
    
    if (k.max < 2)
      stop("k.max must bet > = 2")
    
    method = match.arg(method)
    
    if (!inherits(x, c("dist")))
      stop("x should be an object of class dist")
    
    else if (is.null(FUNcluster))
      stop(
        "The argument FUNcluster is required. ",
        "Possible values are kmeans, pam, hcut, clara, ..."
      )
    
    else if (method %in% c("silhouette", "wss")) {
      diss <- x  # x IS ENFORCED TO BE A DISSIMILARITY MATRIX
      
      v <- rep(0, k.max)
      
      if (method == "silhouette") {
        loc.mainlab = "Optimal number of clusters from silhouette analysis"
        loc.ylab <- "Average silhouette width"
        for (i in 2:k.max) {
          clust <- FUNcluster(x, i, ...)
          v[i] <-
            factoextra:::.get_ave_sil_width(diss, clust$cluster)
        }
      }
      else if (method == "wss") {
        loc.mainlab = "Optimal number of clusters from within cluster sum of squares"
        
        loc.ylab <- "Total within cluster sum of squares"
        
        for (i in 1:k.max) {
          clust <- FUNcluster(x, i, ...)
          v[i] <- factoextra:::.get_withinSS(diss, clust$cluster)
        }
      }
      
      df <- data.frame(clusters = as.factor(1:k.max), y = v)
      
      p <- ggpubr::ggline(
        df,
        x = "clusters",
        y = "y",
        group = 1,
        color = linecolor,
        ylab = loc.ylab,
        xlab = "Number of clusters",
        main = loc.mainlab
      )
742

743 744 745
      return(p)
    }
  }
dmattek's avatar
dmattek committed
746

747
# Clustering ----
dmattek's avatar
dmattek committed
748 749 750 751 752 753 754 755 756

# Return a dt with cell IDs and corresponding cluster assignments depending on dendrogram cut (in.k)
# This one works wth dist & hclust pair
# For sparse hierarchical clustering use getDataClSpar
# Arguments:
# in.dend  - dendrogram; usually output from as.dendrogram(hclust(distance_matrix))
# in.k - level at which dendrogram should be cut

getDataCl = function(in.dend, in.k) {
dmattek's avatar
dmattek committed
757 758
  cat(file = stderr(), 'getDataCl \n')
  
dmattek's avatar
dmattek committed
759
  loc.clAssign = dendextend::cutree(in.dend, in.k, order_clusters_as_data = TRUE, )
dmattek's avatar
dmattek committed
760 761 762 763
  #print(loc.m)
  
  # The result of cutree containes named vector with names being cell id's
  # THIS WON'T WORK with sparse hierarchical clustering because there, the dendrogram doesn't have original id's
dmattek's avatar
dmattek committed
764 765 766
  loc.dt.clAssign = as.data.table(loc.clAssign, keep.rownames = T)
  setnames(loc.dt.clAssign, c(COLID, COLCL))
  
dmattek's avatar
dmattek committed
767
  
768 769
  #cat('===============\ndataCl:\n')
  #print(loc.dt.cl)
dmattek's avatar
dmattek committed
770
  return(loc.dt.clAssign)
dmattek's avatar
dmattek committed
771 772
}

dmattek's avatar
dmattek committed
773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791

# Return a dt with cell IDs and corresponding cluster assignments depending on dendrogram cut (in.k)
# This one works with sparse hierarchical clustering!
# Arguments:
# in.dend  - dendrogram; usually output from as.dendrogram(hclust(distance_matrix))
# in.k - level at which dendrogram should be cut
# in.id - vector of cell id's

getDataClSpar = function(in.dend, in.k, in.id) {
  cat(file = stderr(), 'getDataClSpar \n')
  
  loc.m = dendextend::cutree(in.dend, in.k, order_clusters_as_data = TRUE)
  #print(loc.m)
  
  # The result of cutree containes named vector with names being cell id's
  # THIS WON'T WORK with sparse hierarchical clustering because there, the dendrogram doesn't have original id's
  loc.dt.cl = data.table(id = in.id,
                         cl = loc.m)
  
792 793
  #cat('===============\ndataCl:\n')
  #print(loc.dt.cl)
dmattek's avatar
dmattek committed
794 795 796 797 798
  return(loc.dt.cl)
}



799 800 801 802 803 804 805
# Returns a table with 2 columns:
# - gr.no - group numbers, e.g. cluster,
# - gr.col - color assignments.
# 
# The number of rows is determined by dendrogram cut, parameter in.k.
# Colours are obtained from the dendrogram, parameter in.dend, using dendextend::get_leaves_branches_col
LOCgetClCol <- function(in.dend, in.k) {
806
  loc.col_labels <- dendextend::get_leaves_branches_col(in.dend)
dmattek's avatar
dmattek committed
807 808 809
  loc.col_labels <- loc.col_labels[order(order.dendrogram(in.dend))]
  
  return(unique(
dmattek's avatar
dmattek committed
810
    data.table(
811 812
      gr.no = dendextend::cutree(in.dend, k = in.k, order_clusters_as_data = TRUE),
      gr.col = loc.col_labels
dmattek's avatar
dmattek committed
813 814
    )
  ))
dmattek's avatar
dmattek committed
815 816
}

817

dmattek's avatar
dmattek committed
818
# Custom plotting functions ----
dmattek's avatar
dmattek committed
819

dmattek's avatar
dmattek committed
820 821 822 823 824 825 826 827 828 829 830 831 832 833 834

#' Custom ggPlot theme based on theme_bw
#'
#' @param in.font.base
#' @param in.font.axis.text
#' @param in.font.axis.title
#' @param in.font.strip
#' @param in.font.legend
#'
#' @return
#' @export
#'
#' @examples
#'
LOCggplotTheme = function(in.font.base = 12,
dmattek's avatar
dmattek committed
835 836 837 838
                          in.font.axis.text = 12,
                          in.font.axis.title = 12,
                          in.font.strip = 14,
                          in.font.legend = 12) {
dmattek's avatar
dmattek committed
839 840 841 842 843 844 845 846 847 848 849 850 851 852 853
  loc.theme =
    theme_bw(base_size = in.font.base, base_family = "Helvetica") +
    theme(
      panel.spacing = unit(1, "lines"),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_blank(),
      panel.border = element_blank(),
      axis.line = element_line(color = "black", size = 0.25),
      axis.text = element_text(size = in.font.axis.text),
      axis.title = element_text(size = in.font.axis.title),
      strip.text = element_text(size = in.font.strip, face = "bold"),
      strip.background = element_blank(),
      legend.key = element_blank(),
      legend.text = element_text(size = in.font.legend),
      legend.key.height = unit(1, "lines"),
dmattek's avatar
dmattek committed
854 855
      legend.key.width = unit(2, "lines")
    )
dmattek's avatar
dmattek committed
856 857 858 859
  
  return(loc.theme)
}

dmattek's avatar
dmattek committed
860 861
# Build Function to Return Element Text Object
# From: https://stackoverflow.com/a/36979201/1898713
dmattek's avatar
dmattek committed
862 863 864 865 866
LOCrotatedAxisElementText = function(angle,
                                     position = 'x',
                                     size = 12) {
  angle     = angle[1]
  
dmattek's avatar
dmattek committed
867
  position  = position[1]
dmattek's avatar
dmattek committed
868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887
  positions = list(
    x = 0,
    y = 90,
    top = 180,
    right = 270
  )
  if (!position %in% names(positions))
    stop(sprintf("'position' must be one of [%s]", paste(names(positions), collapse =
                                                           ", ")), call. = FALSE)
  if (!is.numeric(angle))
    stop("'angle' must be numeric", call. = FALSE)
  rads = (-angle - positions[[position]]) * pi / 180
  hjust = round((1 - sin(rads))) / 2
  vjust = round((1 + cos(rads))) / 2
  element_text(
    size = size,
    angle = angle,
    vjust = vjust,
    hjust = hjust
  )
dmattek's avatar
dmattek committed
888 889
}

890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
#' Return recycled tableau palette
#'
#' Cycle through a tableau palette (e.g. "Color Blind") and return repeated 
#' colours depending on the required number of colours
#'
#' @param inPalName Name of the tableau colour palette, e.g. "Color Blind"
#' @param inNcolors Number of required colours, default 10
#'
#' @return A vector with a requested number of colors
#' @export
#'
#' @examples
#' # The Color Blind palette has only 10 colors; here the 11th will be recycled
#' LOCreturnTableauPalette("Color Blind", 11)
LOCreturnTableauPalette = function(inPalName, inNcolors = 10) {
  
  # get the max N of colours in the palette
  loc.max.col = attr(ggthemes::tableau_color_pal(inPalName), "max_n")
  
  # get all colours in the palette
  loc.col = ggthemes::tableau_color_pal(inPalName)(n = loc.max.col)
  
  # repeat the full palette for the required number of colours
  loc.col = rep(loc.col, ((inNcolors-1) %/% loc.max.col) + 1)
  
  # return only the required number of colurs
  return(loc.col[1:inNcolors])
}


920
# Plot individual time series
dmattek's avatar
dmattek committed
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
LOCplotTraj = function(dt.arg,
                       # input data table
                       x.arg,
                       # string with column name for x-axis
                       y.arg,
                       # string with column name for y-axis
                       group.arg,
                       # string with column name for grouping time series (typicaly cell ID)
                       facet.arg,
                       # string with column name for facetting
                       facet.ncol.arg = 2,
                       # default number of facet columns
                       facet.color.arg = NULL,
                       # vector with list of colours for adding colours to facet names (currently a horizontal line on top of the facet is drawn)
                       line.col.arg = NULL,
                       # string with column name for colouring time series (typically when individual time series are selected in UI)
                       xlab.arg = NULL,
                       # string with x-axis label
                       ylab.arg = NULL,
                       # string with y-axis label
                       plotlab.arg = NULL,
                       # string with plot label
                       dt.stim.arg = NULL,
                       # plotting additional dataset; typically to indicate stimulations (not fully implemented yet, not tested!)
                       x.stim.arg = c('tstart', 'tend'),
                       # column names in stimulation dt with x and xend parameters
                       y.stim.arg = c('ystart', 'yend'),
                       # column names in stimulation dt with y and yend parameters
                       tfreq.arg = 1,
                       # unused
                       xlim.arg = NULL,
                       # limits of x-axis; for visualisation only, not trimmimng data
                       ylim.arg = NULL,
                       # limits of y-axis; for visualisation only, not trimmimng data
                       stim.bar.width.arg = 0.5,
                       # width of the stimulation line; plotted under time series
                       aux.label1 = NULL,
                       # 1st point label; used for interactive plotting; displayed in the tooltip; typically used to display values of column holding x & y coordinates
                       aux.label2 = NULL,
                       aux.label3 = NULL,
                       stat.arg = c('', 'mean', 'CI', 'SE')) {
dmattek's avatar
dmattek committed
962 963
  # match arguments for stat plotting
  loc.stat = match.arg(stat.arg, several.ok = TRUE)
dmattek's avatar
dmattek committed
964
  
dmattek's avatar
dmattek committed
965 966
  
  # aux.label12 are required for plotting XY positions in the tooltip of the interactive (plotly) graph
dmattek's avatar
dmattek committed
967
  p.tmp = ggplot(dt.arg,
dmattek's avatar
dmattek committed
968 969 970 971 972 973
                 aes_string(
                   x = x.arg,
                   y = y.arg,
                   group = group.arg,
                   label = group.arg
                 ))
974 975 976 977
  #,
  #                          label  = aux.label1,
  #                          label2 = aux.label2,
  #                          label3 = aux.label3))
dmattek's avatar
dmattek committed
978
  
dmattek's avatar
dmattek committed
979 980
  if (is.null(line.col.arg)) {
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
981 982
      geom_line(alpha = 0.25,
                size = 0.25)
dmattek's avatar
dmattek committed
983 984
  }
  else {
dmattek's avatar
dmattek committed
985 986 987 988 989 990 991 992 993 994 995 996 997
    p.tmp = p.tmp +
      geom_line(aes_string(colour = line.col.arg),
                alpha = 0.5,
                size = 0.5) +
      scale_color_manual(
        name = '',
        values = c(
          "FALSE" = rhg_cols[7],
          "TRUE" = rhg_cols[3],
          "SELECTED" = 'green',
          "NOT SEL" = rhg_cols[7]
        )
      )
dmattek's avatar
dmattek committed
998
  }
dmattek's avatar
dmattek committed
999
  
dmattek's avatar
Mod:  
dmattek committed
1000 1001 1002 1003 1004 1005 1006 1007
  # this is temporary solution for adding colour according to cluster number
  # use only when plotting traj from clustering!
  # a horizontal line is added at the top of data
  if (!is.null(facet.color.arg)) {
    loc.y.max = max(dt.arg[, c(y.arg), with = FALSE])
    loc.dt.cl = data.table(xx = 1:length(facet.color.arg), yy = loc.y.max)
    setnames(loc.dt.cl, 'xx', facet.arg)
    
dmattek's avatar
dmattek committed
1008 1009
    # adjust facet.color.arg to plot
    
dmattek's avatar
Mod:  
dmattek committed
1010
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1011 1012 1013 1014 1015 1016
      geom_hline(
        data = loc.dt.cl,
        colour = facet.color.arg,
        yintercept = loc.y.max,
        size = 4
      ) +
dmattek's avatar
Mod:  
dmattek committed
1017 1018 1019
      scale_colour_manual(values = facet.color.arg,
                          name = '')
  }
dmattek's avatar
dmattek committed
1020
  
dmattek's avatar
dmattek committed
1021
  if ('mean' %in% loc.stat)
dmattek's avatar
dmattek committed
1022
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1023 1024
    stat_summary(
      aes_string(y = y.arg, group = 1),
dmattek's avatar
dmattek committed
1025
      fun.y = mean,
dmattek's avatar
dmattek committed
1026
      na.rm = T,
dmattek's avatar
dmattek committed
1027
      colour = 'red',
dmattek's avatar
dmattek committed
1028 1029 1030 1031
      linetype = 'solid',
      size = 1,
      geom = "line",
      group = 1
dmattek's avatar
dmattek committed
1032
    )
dmattek's avatar
dmattek committed
1033
  
dmattek's avatar
dmattek committed
1034
  if ('CI' %in% loc.stat)
dmattek's avatar
dmattek committed
1035
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1036 1037 1038
    stat_summary(
      aes_string(y = y.arg, group = 1),
      fun.data = mean_cl_normal,
dmattek's avatar
dmattek committed
1039
      na.rm = T,
dmattek's avatar
dmattek committed
1040
      colour = 'red',
dmattek's avatar
Mod:  
dmattek committed
1041
      alpha = 0.25,
dmattek's avatar
dmattek committed
1042 1043 1044 1045 1046
      geom = "ribbon",
      group = 1
    )
  
  if ('SE' %in% loc.stat)
dmattek's avatar
dmattek committed
1047
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1048 1049 1050
    stat_summary(
      aes_string(y = y.arg, group = 1),
      fun.data = mean_se,
dmattek's avatar
dmattek committed
1051
      na.rm = T,
dmattek's avatar
dmattek committed
1052
      colour = 'red',
dmattek's avatar
Mod:  
dmattek committed
1053
      alpha = 0.25,
dmattek's avatar
dmattek committed
1054 1055 1056 1057 1058 1059
      geom = "ribbon",
      group = 1
    )
  
  
  
dmattek's avatar
dmattek committed
1060
  p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1061 1062 1063
    facet_wrap(as.formula(paste("~", facet.arg)),
               ncol = facet.ncol.arg,
               scales = "free_x")
dmattek's avatar
dmattek committed
1064
  
1065 1066 1067
  # plot stimulation bars underneath time series
  # dt.stim.arg is read separately and should contain 4 columns with
  # xy positions of beginning and end of the bar
dmattek's avatar
dmattek committed
1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
  if (!is.null(dt.stim.arg)) {
    p.tmp = p.tmp + geom_segment(
      data = dt.stim.arg,
      aes_string(
        x = x.stim.arg[1],
        xend = x.stim.arg[2],
        y = y.stim.arg[1],
        yend = y.stim.arg[2],
        group = 'group'
      ),
      colour = rhg_cols[[3]],
      size = stim.bar.width.arg
    )
dmattek's avatar
dmattek committed
1081 1082
  }
  
dmattek's avatar
dmattek committed
1083
  p.tmp = p.tmp + coord_cartesian(xlim = xlim.arg, ylim = ylim.arg)
1084
  
dmattek's avatar
dmattek committed
1085
  p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1086 1087 1088
    xlab(paste0(xlab.arg, "\n")) +
    ylab(paste0("\n", ylab.arg)) +
    ggtitle(plotlab.arg) +
dmattek's avatar
dmattek committed
1089 1090 1091 1092 1093 1094 1095
    LOCggplotTheme(
      in.font.base = PLOTFONTBASE,
      in.font.axis.text = PLOTFONTAXISTEXT,
      in.font.axis.title = PLOTFONTAXISTITLE,
      in.font.strip = PLOTFONTFACETSTRIP,
      in.font.legend = PLOTFONTLEGEND
    ) +
1096
    theme(legend.position = "top")
dmattek's avatar
dmattek committed
1097
  
dmattek's avatar
Mod:  
dmattek committed
1098
  return(p.tmp)
dmattek's avatar
dmattek committed
1099 1100
}

1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126


#' Plot average time series with CI together in one facet
#'
#' @param dt.arg Data.table with aggregated time series in long format
#' @param x.arg String with column name for x-axis
#' @param y.arg String with column name for y-axis
#' @param group.arg String with column name for grouping time series (e.g. a column with grouping by condition)
#' @param col.arg Colour pallette for individual time series
#' @param dt.stim.arg Data.table with stimulation segments to plot under time series
#' @param x.stim.arg Column names in stimulation dt with x and xend parameters, default c('tstart', 'tend')
#' @param y.stim.arg Column names in stimulation dt with y and yend parameters, default c('ystart', 'yend')
#' @param stim.bar.width.arg Width of the stimulation segment, default 0.5
#' @param xlim.arg Limits of x-axis; for visualisation only, not trimmimng data
#' @param ylim.arg Limits of y-axis; for visualisation only, not trimmimng data
#' @param ribbon.lohi.arg Column names containing lower and upper bound for plotting the ribbon, e.g. for CI; default c('Lower', 'Upper'); set to NULL to avoid plotting the ribbon
#' @param ribbon.fill.arg Color to fill the ribbon, default 'grey50'
#' @param ribbon.alpha.arg Transparency of the ribbon, default 0.5
#' @param xlab.arg X-axis label
#' @param ylab.arg Y-axis label
#' @param plotlab.arg Plot label
#'
#' @return Ggplot object
#' @export
#'
#' @examples
dmattek's avatar
dmattek committed
1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
LOCplotTrajRibbon = function(dt.arg,
                             x.arg,
                             y.arg,
                             group.arg = NULL,
                             col.arg = NULL,
                             dt.stim.arg = NULL,
                             x.stim.arg = c('tstart', 'tend'),
                             y.stim.arg = c('ystart', 'yend'),
                             stim.bar.width.arg = 0.5,
                             xlim.arg = NULL,
                             ylim.arg = NULL,
                             ribbon.lohi.arg = c('Lower', 'Upper'),
                             ribbon.fill.arg = 'grey50',
                             ribbon.alpha.arg = 0.5,
                             xlab.arg = NULL,
                             ylab.arg = NULL,
                             plotlab.arg = NULL) {
1144
  
1145 1146 1147
  p.tmp = ggplot(dt.arg, aes_string(x = x.arg, group = group.arg))
  
  if (!is.null(ribbon.lohi.arg))
dmattek's avatar
dmattek committed
1148 1149 1150 1151 1152 1153
    p.tmp = p.tmp +
      geom_ribbon(
        aes_string(ymin = ribbon.lohi.arg[1], ymax = ribbon.lohi.arg[2]),
        fill = ribbon.fill.arg,
        alpha = ribbon.alpha.arg
      )
1154
  
1155
  p.tmp = p.tmp + geom_line(aes_string(y = y.arg, colour = group.arg), size = 1.25)
1156
  
dmattek's avatar
dmattek committed
1157
  
1158 1159 1160
  # plot stimulation bars underneath time series
  # dt.stim.arg is read separately and should contain 4 columns with
  # xy positions of beginning and end of the bar
dmattek's avatar
dmattek committed
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
  if (!is.null(dt.stim.arg)) {
    p.tmp = p.tmp + geom_segment(
      data = dt.stim.arg,
      aes_string(
        x = x.stim.arg[1],
        xend = x.stim.arg[2],
        y = y.stim.arg[1],
        yend = y.stim.arg[2]
      ),
      colour = rhg_cols[[3]],
      size = stim.bar.width.arg,
      group = 1
    )
1174
  }
dmattek's avatar
dmattek committed
1175
  
dmattek's avatar
dmattek committed
1176
  p.tmp = p.tmp + coord_cartesian(xlim = xlim.arg, ylim = ylim.arg)
1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193
  
  if (is.null(col.arg)) {
    p.tmp = p.tmp +
      scale_color_discrete(name = '')
  } else {
    p.tmp = p.tmp +
      scale_colour_manual(values = col.arg, name = '')
  }
  
  if (!is.null(plotlab.arg))
    p.tmp = p.tmp + ggtitle(plotlab.arg)
  
  p.tmp = p.tmp +
    xlab(xlab.arg) +
    ylab(ylab.arg)
  
  return(p.tmp)
1194 1195
}

1196
# Plot average power spectrum density per facet
dmattek's avatar
dmattek committed
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
LOCplotPSD <- function(dt.arg,
                       # input data table
                       x.arg,
                       # string with column name for x-axis
                       y.arg,
                       # string with column name for y-axis
                       group.arg = NULL,
                       # string with column name for grouping time series (here, it's a column corresponding to grouping by condition)
                       xlab.arg = x.arg,
                       ylab.arg = y.arg,
                       facet.color.arg = NULL) {
majpark21's avatar
majpark21 committed
1208
  require(ggplot2)
dmattek's avatar
dmattek committed
1209 1210 1211 1212
  if (length(setdiff(c(x.arg, y.arg, group.arg), colnames(dt.arg))) > 0) {
    stop(paste("Missing columns in dt.arg: ", setdiff(
      c(x.arg, y.arg, group.arg), colnames(dt.arg)
    )))
majpark21's avatar
majpark21 committed
1213
  }
dmattek's avatar
dmattek committed
1214
  p.tmp <- ggplot(dt.arg, aes_string(x = x.arg, y = y.arg)) +
majpark21's avatar
majpark21 committed
1215
    geom_line() +
dmattek's avatar
dmattek committed
1216 1217 1218
    geom_rug(sides = "b",
             alpha = 1,
             color = "lightblue") +
majpark21's avatar
majpark21 committed
1219 1220
    facet_wrap(group.arg) +
    labs(x = xlab.arg, y = ylab.arg)
1221
  
1222 1223 1224 1225 1226 1227 1228
  if (!is.null(facet.color.arg)) {
    loc.y.max = max(dt.arg[, c(y.arg), with = FALSE])
    loc.dt.cl = data.table(xx = 1:length(facet.color.arg), yy = loc.y.max)
    setnames(loc.dt.cl, 'xx', group.arg)
    
    # adjust facet.color.arg to plot
    
1229
    p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1230 1231 1232 1233 1234 1235
      geom_hline(
        data = loc.dt.cl,
        colour = facet.color.arg,
        yintercept = loc.y.max,
        size = 4
      ) +
1236 1237
      scale_colour_manual(values = facet.color.arg,
                          name = '')
1238 1239
  }
  
majpark21's avatar
majpark21 committed
1240 1241
  return(p.tmp)
}
1242

dmattek's avatar
dmattek committed
1243 1244 1245
#' Plot a scatter plot with an optional linear regression
#'
#' @param dt.arg input of data.table with 2 columns with x and y coordinates
dmattek's avatar
dmattek committed
1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263
#' @param facet.arg
#' @param facet.ncol.arg
#' @param xlab.arg
#' @param ylab.arg
#' @param plotlab.arg
#' @param alpha.arg
#' @param trend.arg
#' @param ci.arg

LOCggplotScat = function(dt.arg,
                         facet.arg = NULL,
                         facet.ncol.arg = 2,
                         xlab.arg = NULL,
                         ylab.arg = NULL,
                         plotlab.arg = NULL,
                         alpha.arg = 1,
                         trend.arg = T,
                         ci.arg = 0.95) {
dmattek's avatar
dmattek committed
1264
  p.tmp = ggplot(dt.arg, aes(x = x, y = y, label = id)) +
dmattek's avatar
dmattek committed
1265
    geom_point(alpha = alpha.arg)
dmattek's avatar
dmattek committed
1266
  
dmattek's avatar
dmattek committed
1267
  if (trend.arg) {
dmattek's avatar
dmattek committed
1268 1269
    p.tmp = p.tmp +
      stat_smooth(
dmattek's avatar
dmattek committed
1270
        method = "lm",
dmattek's avatar
dmattek committed
1271
        fullrange = FALSE,
dmattek's avatar
dmattek committed
1272
        level = ci.arg,
dmattek's avatar
dmattek committed
1273 1274 1275
        colour = 'blue'
      )
  }
dmattek's avatar
dmattek committed
1276
  
dmattek's avatar
dmattek committed
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
  if (!is.null(facet.arg)) {
    p.tmp = p.tmp +
      facet_wrap(as.formula(paste("~", facet.arg)),
                 ncol = facet.ncol.arg)
    
  }
  
  if (!is.null(xlab.arg))
    p.tmp = p.tmp +
      xlab(paste0(xlab.arg, "\n"))
  
  if (!is.null(ylab.arg))
    p.tmp = p.tmp +
      ylab(paste0("\n", ylab.arg))
  
  if (!is.null(plotlab.arg))
    p.tmp = p.tmp +
      ggtitle(paste0(plotlab.arg, "\n"))
  
  p.tmp = p.tmp +
dmattek's avatar
dmattek committed
1297 1298 1299 1300 1301 1302 1303
    LOCggplotTheme(
      in.font.base = PLOTFONTBASE,
      in.font.axis.text = PLOTFONTAXISTEXT,
      in.font.axis.title = PLOTFONTAXISTITLE,
      in.font.strip = PLOTFONTFACETSTRIP,
      in.font.legend = PLOTFONTLEGEND
    ) +
1304
    theme(legend.position = "none")
dmattek's avatar
dmattek committed
1305
  
dmattek's avatar
dmattek committed
1306 1307
  return(p.tmp)
}
dmattek's avatar
dmattek committed
1308

1309

dmattek's avatar
dmattek committed
1310
LOCplotHeatmap <- function(data.arg,
dmattek's avatar
dmattek committed
1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324
                           dend.arg,
                           palette.arg,
                           palette.rev.arg = TRUE,
                           dend.show.arg = TRUE,
                           key.show.arg = TRUE,
                           margin.x.arg = 5,
                           margin.y.arg = 20,
                           nacol.arg = 0.5,
                           colCol.arg = NULL,
                           labCol.arg = NULL,
                           font.row.arg = 1,
                           font.col.arg = 1,
                           breaks.arg = NULL,
                           title.arg = 'Clustering') {
1325 1326
  loc.n.colbreaks = 99
  
dmattek's avatar
Mod:  
dmattek committed
1327 1328
  if (palette.rev.arg)
    my_palette <-
dmattek's avatar
dmattek committed
1329
      rev(colorRampPalette(brewer.pal(9, palette.arg))(n = loc.n.colbreaks))
dmattek's avatar
Mod:  
dmattek committed
1330 1331
  else
    my_palette <-
dmattek's avatar
dmattek committed
1332
      colorRampPalette(brewer.pal(9, palette.arg))(n = loc.n.colbreaks)
dmattek's avatar
Mod:  
dmattek committed
1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
  
  
  col_labels <- get_leaves_branches_col(dend.arg)
  col_labels <- col_labels[order(order.dendrogram(dend.arg))]
  
  if (dend.show.arg) {
    assign("var.tmp.1", dend.arg)
    var.tmp.2 = "row"
  } else {
    assign("var.tmp.1", FALSE)
    var.tmp.2 = "none"
  }
  
  loc.p = heatmap.2(
    data.arg,
    Colv = "NA",
    Rowv = var.tmp.1,
    srtCol = 90,
    dendrogram = var.tmp.2,
    trace = "none",
    key = key.show.arg,
    margins = c(margin.x.arg, margin.y.arg),
    col = my_palette,
    na.col = grey(nacol.arg),
    denscol = "black",
    density.info = "density",
    RowSideColors = col_labels,
    colRow = col_labels,
    colCol = colCol.arg,
    labCol = labCol.arg,
    #      sepcolor = grey(input$inPlotHierGridColor),
    #      colsep = 1:ncol(loc.dm),
    #      rowsep = 1:nrow(loc.dm),
    cexRow = font.row.arg,
    cexCol = font.col.arg,
dmattek's avatar
dmattek committed
1368 1369
    main = title.arg,
    symbreaks = FALSE,
1370
    symkey = FALSE,
dmattek's avatar
dmattek committed
1371 1372 1373 1374
    breaks = if (is.null(breaks.arg))
      NULL
    else
      seq(breaks.arg[1], breaks.arg[2], length.out = loc.n.colbreaks + 1)
dmattek's avatar
Mod:  
dmattek committed
1375 1376 1377 1378
  )
  
  return(loc.p)
}