Server has been upgraded to GitLab release 13.9.6

Commit f68c2305 authored by dmattek's avatar dmattek

Code reformatting

parent 7daed4c5
......@@ -129,30 +129,42 @@ l.col.pal.dend.2 = list(
# Help text ----
helpText.server = c(
alDataFormat = paste0("<p>Switch between long and wide formats of input data. ",
"TCI accepts CSV or compressed CSV files (gz or bz2).</p>",
"<p><b>Long format</b> - a row is a single data point and consecutive time series are arranged vertically. ",
"Data file should contain at least 3 columns separated with a comma:</p>",
"<li>Identifier of a time series</li>",
"<li>Time points</li>",
"<li>A time-varying variable</li>",
"<br>",
"<p><b>Wide format</b> - a row is a time series with columns as time points.",
"At least 3 columns shuold be present:</p>",
"<li>First two columns in wide format should contain grouping and track IDs</li>",
"<li>A column with a time point. Headers of columns with time points need to be numeric</li>"),
inDataGen1 = paste0("Generate 3 groups with 20 random synthetic time series. ",
"Every time series contains 101 time points. ",
"Track IDs are unique across entire dataset."),
chBtrajRem = paste0("Load CSV file with a column of track IDs for removal. ",
"IDs should correspond to those used for plotting."),
chBstim = paste0("Load CSV file with stimulation pattern. Should contain 5 columns: ",
"grouping, start and end time points of stimulation, start and end of y-position, dummy column with ID."),
chBtrajInter = paste0("Interpolate missing measurements indicated with NAs in the data file. ",
"In addition, interpolate a row that is completely missing from the data. ",
"The interval of the time column must be provided to know which rows are missing."),
chBtrackUni = paste0("If the track ID in the uploaded dataset is unique only within a group (e.g. an experimental condition), ",
"make it unique by prepending other columns to the track ID (typically a grouping column)."),
alDataFormat = paste0(
"<p>Switch between long and wide formats of input data. ",
"TCI accepts CSV or compressed CSV files (gz or bz2).</p>",
"<p><b>Long format</b> - a row is a single data point and consecutive time series are arranged vertically. ",
"Data file should contain at least 3 columns separated with a comma:</p>",
"<li>Identifier of a time series</li>",
"<li>Time points</li>",
"<li>A time-varying variable</li>",
"<br>",
"<p><b>Wide format</b> - a row is a time series with columns as time points.",
"At least 3 columns shuold be present:</p>",
"<li>First two columns in wide format should contain grouping and track IDs</li>",
"<li>A column with a time point. Headers of columns with time points need to be numeric</li>"
),
inDataGen1 = paste0(
"Generate 3 groups with 20 random synthetic time series. ",
"Every time series contains 101 time points. ",
"Track IDs are unique across entire dataset."
),
chBtrajRem = paste0(
"Load CSV file with a column of track IDs for removal. ",
"IDs should correspond to those used for plotting."
),
chBstim = paste0(
"Load CSV file with stimulation pattern. Should contain 5 columns: ",
"grouping, start and end time points of stimulation, start and end of y-position, dummy column with ID."
),
chBtrajInter = paste0(
"Interpolate missing measurements indicated with NAs in the data file. ",
"In addition, interpolate a row that is completely missing from the data. ",
"The interval of the time column must be provided to know which rows are missing."
),
chBtrackUni = paste0(
"If the track ID in the uploaded dataset is unique only within a group (e.g. an experimental condition), ",
"make it unique by prepending other columns to the track ID (typically a grouping column)."
),
chBgroup = "Select columns to group data according to treatment, condition, etc.",
inSelMath = "Select math operation to perform on a single or two measurement columns,",
chBtimeTrim = "Trim time for further processing.",
......@@ -192,14 +204,19 @@ helpText.server = c(
#'
#' # calculate the mean and CI along the time course
#' calcTrajCI(dt.tmp, 'objCyto_Intensity_MeanIntensity_imErkCor', 'Metadata_RealTime')
LOCcalcTrajCI = function(in.dt, in.col.meas, in.col.by = NULL, in.type = c('normal', 'boot'), ...) {
LOCcalcTrajCI = function(in.dt,
in.col.meas,
in.col.by = NULL,
in.type = c('normal', 'boot'),
...) {
in.type = match.arg(in.type)
if (in.type %like% 'normal')
loc.dt = in.dt[, as.list(smean.cl.normal(get(in.col.meas), ...)), by = in.col.by] else
loc.dt = in.dt[, as.list(smean.cl.boot(get(in.col.meas), ...)), by = in.col.by]
return(loc.dt)
loc.dt = in.dt[, as.list(smean.cl.normal(get(in.col.meas), ...)), by = in.col.by]
else
loc.dt = in.dt[, as.list(smean.cl.boot(get(in.col.meas), ...)), by = in.col.by]
return(loc.dt)
}
......@@ -212,11 +229,11 @@ LOCcalcTrajCI = function(in.dt, in.col.meas, in.col.by = NULL, in.type = c('norm
#' @export
#'
#' @examples
LOCstderr = function(x, na.rm=FALSE) {
if (na.rm)
LOCstderr = function(x, na.rm = FALSE) {
if (na.rm)
x = na.omit(x)
return(sqrt(var(x)/length(x)))
return(sqrt(var(x) / length(x)))
}
#' Calculate the power spectrum density for time-series
......@@ -235,38 +252,40 @@ LOCstderr = function(x, na.rm=FALSE) {
#'
#' @examples
LOCcalcPSD <- function(in.dt,
in.col.meas,
in.col.id,
in.col.by,
in.method = "pgram",
in.return.period = TRUE,
in.time.btwPoints = 1,
...){
in.col.meas,
in.col.id,
in.col.by,
in.method = "pgram",
in.return.period = TRUE,
in.time.btwPoints = 1,
...) {
require(data.table)
# Method "ar" returns $spec as matrix whereas "pgram" returns a vector, custom function to homogenze output format
mySpectrum <- function(x, ...){
args_spec <- list(x=x, plot=FALSE)
mySpectrum <- function(x, ...) {
args_spec <- list(x = x, plot = FALSE)
inargs <- list(...)
args_spec[names(inargs)] <- inargs
out <- do.call(spectrum, args_spec)
out$spec <- as.vector(out$spec)
return(out)
}
if(!in.method %in% c("pgram", "ar")){
if (!in.method %in% c("pgram", "ar")) {
stop('Method should be one of: c("pgram", "ar"')
}
dt_spec <- in.dt[, (mySpectrum(get(in.col.meas), plot = FALSE, method = in.method)[c("freq", "spec")]), by = in.col.id]
dt_spec <-
in.dt[, (mySpectrum(get(in.col.meas), plot = FALSE, method = in.method)[c("freq", "spec")]), by = in.col.id]
dt_group <- in.dt[, .SD[1, get(in.col.by)], by = in.col.id]
setnames(dt_group, "V1", in.col.by)
dt_spec <- merge(dt_spec, dt_group, by = in.col.id)
dt_agg <- dt_spec[, .(spec = mean(spec)), by = c(in.col.by, "freq")]
if(in.return.period){
dt_agg[, period := 1/freq]
dt_agg <-
dt_spec[, .(spec = mean(spec)), by = c(in.col.by, "freq")]
if (in.return.period) {
dt_agg[, period := 1 / freq]
dt_agg[, freq := NULL]
# Adjust period unit to go from frame unit to time unit
dt_agg[, period := period * in.time.btwPoints]
} else {
dt_agg[, freq := freq * (1/in.time.btwPoints)]
dt_agg[, freq := freq * (1 / in.time.btwPoints)]
setnames(dt_agg, "freq", "frequency")
}
return(dt_agg)
......@@ -286,133 +305,160 @@ LOCcalcPSD <- function(in.dt,
#' @export
#' @import data.table
LOCgenTraj <- function(in.ntpts = 60, in.ntracks = 10, in.nfov = 6, in.nwells = 1, in.addna = NULL, in.addout = NULL) {
x.rand.1 = c(rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 0.5, 0.1), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 1, 0.2), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 2, 0.5))
x.rand.2 = c(rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 0.25, 0.1), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 0.5, 0.2), rnorm(in.ntpts * in.ntracks * in.nfov * 1/3, 1, 0.2))
# add NA's for testing
if (!is.null(in.addna)) {
locTabLen = length(x.rand.1)
x.rand.1[round(runif(in.addna) * locTabLen)] = NA
x.rand.2[round(runif(in.addna) * locTabLen)] = NA
}
# add outliers for testing
if (!is.null(in.addout)) {
locTabLen = length(x.rand.1)
x.rand.1[round(runif(in.addout) * locTabLen)] = 5
x.rand.2[round(runif(in.addout) * locTabLen)] = 5
LOCgenTraj <-
function(in.ntpts = 60,
in.ntracks = 10,
in.nfov = 6,
in.nwells = 1,
in.addna = NULL,
in.addout = NULL) {
x.rand.1 = c(
rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 0.5, 0.1),
rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 1, 0.2),
rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 2, 0.5)
)
x.rand.2 = c(
rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 0.25, 0.1),
rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 0.5, 0.2),
rnorm(in.ntpts * in.ntracks * in.nfov * 1 / 3, 1, 0.2)
)
# add NA's for testing
if (!is.null(in.addna)) {
locTabLen = length(x.rand.1)
x.rand.1[round(runif(in.addna) * locTabLen)] = NA
x.rand.2[round(runif(in.addna) * locTabLen)] = NA
}
# add outliers for testing
if (!is.null(in.addout)) {
locTabLen = length(x.rand.1)
x.rand.1[round(runif(in.addout) * locTabLen)] = 5
x.rand.2[round(runif(in.addout) * locTabLen)] = 5
}
x.arg = rep(seq(1, in.ntpts), in.ntracks * in.nfov)
dt.nuc = data.table(
well = rep(LETTERS[1:in.nwells], each = in.ntpts * in.nfov * in.ntracks / in.nwells),
group = rep(1:in.nfov, each = in.ntpts * in.ntracks),
time = x.arg,
y1 = x.rand.1,
y2 = x.rand.2,
posx = runif(
in.ntpts * in.ntracks * in.nfov,
min = 0,
max = 1
),
posy = runif(
in.ntpts * in.ntracks * in.nfov,
min = 0,
max = 1
),
id = rep(1:(in.ntracks * in.nfov), each = in.ntpts)
)
return(dt.nuc)
}
x.arg = rep(seq(1, in.ntpts), in.ntracks * in.nfov)
dt.nuc = data.table(well = rep(LETTERS[1:in.nwells], each = in.ntpts * in.nfov * in.ntracks / in.nwells),
group = rep(1:in.nfov, each = in.ntpts * in.ntracks),
time = x.arg,
y1 = x.rand.1,
y2 = x.rand.2,
posx = runif(in.ntpts * in.ntracks * in.nfov, min = 0, max = 1),
posy = runif(in.ntpts * in.ntracks * in.nfov, min = 0, max = 1),
id = rep(1:(in.ntracks*in.nfov), each = in.ntpts))
return(dt.nuc)
}
LOCgenTraj2 <- function(n_perGroup = 20, sd_noise = 0.01, sampleFreq = 0.2, endTime = 50)
{
# Function definition ----------------------------------
sim_expodecay_lagged_stim <-
function (n,
noise,
interval.stim = 5,
lambda = 0.2,
freq = 0.2,
end = 40)
{
require(data.table)
tvec <- seq(0, end, by = freq)
stim_time <- seq(interval.stim, end, interval.stim)
stim_time_matrix <-
matrix(stim_time, nrow = length(stim_time),
ncol = n)
noise_matrix <- abs(replicate(n, rnorm(
n = length(stim_time),
mean = 0,
sd = noise
)))
stim_time_matrix <- stim_time_matrix + noise_matrix
trajs <- matrix(0, nrow = length(tvec), ncol = n)
for (col in 1:ncol(stim_time_matrix)) {
for (row in 1:nrow(stim_time_matrix)) {
index <- which(tvec >= stim_time_matrix[row, col])[1]
trajs[index, col] <- 1
LOCgenTraj2 <-
function(n_perGroup = 20,
sd_noise = 0.01,
sampleFreq = 0.2,
endTime = 50)
{
# Function definition ----------------------------------
sim_expodecay_lagged_stim <-
function (n,
noise,
interval.stim = 5,
lambda = 0.2,
freq = 0.2,
end = 40)
{
require(data.table)
tvec <- seq(0, end, by = freq)
stim_time <- seq(interval.stim, end, interval.stim)
stim_time_matrix <-
matrix(stim_time, nrow = length(stim_time),
ncol = n)
noise_matrix <- abs(replicate(n, rnorm(
n = length(stim_time),
mean = 0,
sd = noise
)))
stim_time_matrix <- stim_time_matrix + noise_matrix
trajs <- matrix(0, nrow = length(tvec), ncol = n)
for (col in 1:ncol(stim_time_matrix)) {
for (row in 1:nrow(stim_time_matrix)) {
index <- which(tvec >= stim_time_matrix[row, col])[1]
trajs[index, col] <- 1
}
}
}
decrease_factor <- exp(-lambda * freq)
for (col in 1:ncol(trajs)) {
for (row in 2:nrow(trajs)) {
if (trajs[row, col] != 1) {
trajs[row, col] <- trajs[row - 1, col] * decrease_factor
decrease_factor <- exp(-lambda * freq)
for (col in 1:ncol(trajs)) {
for (row in 2:nrow(trajs)) {
if (trajs[row, col] != 1) {
trajs[row, col] <- trajs[row - 1, col] * decrease_factor
}
}
}
trajs <- as.data.table(trajs)
trajs <- cbind(seq(0, end, by = freq), trajs)
colnames(trajs)[1] <- "Time"
trajs <- melt(trajs, id.vars = "Time")
return(trajs)
}
trajs <- as.data.table(trajs)
trajs <- cbind(seq(0, end, by = freq), trajs)
colnames(trajs)[1] <- "Time"
trajs <- melt(trajs, id.vars = "Time")
return(trajs)
}
# Dataset creation -----------------------------------------------
dt1 <-
sim_expodecay_lagged_stim(
n = n_perGroup,
noise = 0.75,
interval.stim = 10,
lambda = 0.4,
freq = sampleFreq,
end = endTime
)
dt2 <-
sim_expodecay_lagged_stim(
n = n_perGroup,
noise = 0.75,
interval.stim = 10,
lambda = 0.1,
freq = sampleFreq,
end = endTime
)
dt3 <-
sim_expodecay_lagged_stim(
n = n_perGroup,
noise = 0.75,
interval.stim = 10,
lambda = 0.4,
freq = sampleFreq,
end = endTime
)
dt3[, value := value / 3]
dt1[, Group := "fastDecay"]
dt2[, Group := "slowDecay"]
dt3[, Group := "lowAmplitude"]
dt <- rbindlist(list(dt1, dt2, dt3))
dt[, ID := sprintf("%s_%02d", Group, as.integer(gsub('[A-Z]', '', variable)))]
dt[, variable := NULL]
dt[, Group := as.factor(Group)]
dt[, value := value + runif(1, -0.1, 0.1), by = .(Group, ID)]
noise_vec <- rnorm(n = nrow(dt), mean = 0, sd = sd_noise)
dt[, value := value + noise_vec]
setnames(dt, "value", "Meas")
setcolorder(dt, c("Group", "ID", "Time", "Meas"))
return(dt)
}
# Dataset creation -----------------------------------------------
dt1 <-
sim_expodecay_lagged_stim(
n = n_perGroup,
noise = 0.75,
interval.stim = 10,
lambda = 0.4,
freq = sampleFreq,
end = endTime
)
dt2 <-
sim_expodecay_lagged_stim(
n = n_perGroup,
noise = 0.75,
interval.stim = 10,
lambda = 0.1,
freq = sampleFreq,
end = endTime
)
dt3 <-
sim_expodecay_lagged_stim(
n = n_perGroup,
noise = 0.75,
interval.stim = 10,
lambda = 0.4,
freq = sampleFreq,
end = endTime
)
dt3[, value := value / 3]
dt1[, Group := "fastDecay"]
dt2[, Group := "slowDecay"]
dt3[, Group := "lowAmplitude"]
dt <- rbindlist(list(dt1, dt2, dt3))
dt[, ID := sprintf("%s_%02d", Group, as.integer(gsub('[A-Z]', '', variable)))]
dt[, variable := NULL]
dt[, Group := as.factor(Group)]
dt[, value := value + runif(1, -0.1, 0.1), by = .(Group, ID)]
noise_vec <- rnorm(n = nrow(dt), mean = 0, sd = sd_noise)
dt[, value := value + noise_vec]
setnames(dt, "value", "Meas")
setcolorder(dt, c("Group", "ID", "Time", "Meas"))
return(dt)
}
#' Normalize Trajectory
#'
......@@ -436,13 +482,13 @@ LOCgenTraj2 <- function(n_perGroup = 20, sd_noise = 0.01, sampleFreq = 0.2, endT
#' @import data.table
LOCnormTraj = function(in.dt,
in.meas.col,
in.rt.col = COLRT,
in.rt.min = 10,
in.rt.max = 20,
in.by.cols = NULL,
in.robust = TRUE,
in.type = 'z.score') {
in.meas.col,
in.rt.col = COLRT,
in.rt.min = 10,
in.rt.max = 20,
in.by.cols = NULL,
in.robust = TRUE,
in.type = 'z.score') {
loc.dt <-
copy(in.dt) # copy so as not to alter original dt object w intermediate assignments
......@@ -540,67 +586,115 @@ getDataClSpar = function(in.dend, in.k, in.id) {
# prepares a table with cluster numbers in 1st column and colour assignments in 2nd column
# the number of rows is determined by dendrogram cut
getClCol <- function(in.dend, in.k) {
loc.col_labels <- get_leaves_branches_col(in.dend)
loc.col_labels <- loc.col_labels[order(order.dendrogram(in.dend))]
return(unique(
data.table(cl.no = dendextend::cutree(in.dend, k = in.k, order_clusters_as_data = TRUE),
cl.col = loc.col_labels)))
data.table(
cl.no = dendextend::cutree(in.dend, k = in.k, order_clusters_as_data = TRUE),
cl.col = loc.col_labels
)
))
}
# Cluster validation ----
#Customize factoextra functions to accept dissimilarity matrix from start. Otherwise can't use distance functions that are not in base R, like DTW.
# Inherit and adapt hcut function to take input from UI, used for fviz_clust
LOChcut <- function(x, k = 2, isdiss = inherits(x, "dist"), hc_func = "hclust", hc_method = "average", hc_metric = "euclidean"){
if(!inherits(x, "dist")){stop("x must be a distance matrix")}
return(factoextra::hcut(x = x, k = k, isdiss = TRUE, hc_func = hc_func, hc_method = hc_method, hc_metric = hc_metric))
}
# Inherit and adapt hcut function to take input from UI, used for fviz_clust
LOChcut <-
function(x,
k = 2,
isdiss = inherits(x, "dist"),
hc_func = "hclust",
hc_method = "average",
hc_metric = "euclidean") {
if (!inherits(x, "dist")) {
stop("x must be a distance matrix")
}
return(
factoextra::hcut(
x = x,
k = k,
isdiss = TRUE,
hc_func = hc_func,
hc_method = hc_method,
hc_metric = hc_metric
)
)
}
# Modified from factoextra::fviz_nbclust
# Allow (actually enforce) x to be a distance matrix; no GAP statistics for compatibility
LOCnbclust <- function (x, FUNcluster = LOChcut, method = c("silhouette", "wss"), k.max = 10, verbose = FALSE,
barfill = "steelblue", barcolor = "steelblue", linecolor = "steelblue",
print.summary = TRUE, ...)
{
set.seed(123)
if (k.max < 2)
stop("k.max must bet > = 2")
method = match.arg(method)
if (!inherits(x, c("dist")))
stop("x should be an object of class dist")
else if (is.null(FUNcluster))
stop("The argument FUNcluster is required. ", "Possible values are kmeans, pam, hcut, clara, ...")
else if (method %in% c("silhouette", "wss")) {
diss <- x # x IS ENFORCED TO BE A DISSIMILARITY MATRIX
v <- rep(0, k.max)
if (method == "silhouette") {
for (i in 2:k.max) {
clust <- FUNcluster(x, i, ...)
v[i] <- factoextra:::.get_ave_sil_width(diss, clust$cluster)
LOCnbclust <-
function (x,
FUNcluster = LOChcut,
method = c("silhouette", "wss"),
k.max = 10,
verbose = FALSE,
barfill = "steelblue",
barcolor = "steelblue",
linecolor = "steelblue",
print.summary = TRUE,
...)
{
set.seed(123)
if (k.max < 2)
stop("k.max must bet > = 2")
method = match.arg(method)
if (!inherits(x, c("dist")))
stop("x should be an object of class dist")
else if (is.null(FUNcluster))
stop(
"The argument FUNcluster is required. ",
"Possible values are kmeans, pam, hcut, clara, ..."
)
else if (method %in% c("silhouette", "wss")) {
diss <- x # x IS ENFORCED TO BE A DISSIMILARITY MATRIX
v <- rep(0, k.max)
if (method == "silhouette") {
loc.mainlab = "Optimal number of clusters from silhouette analysis"
loc.ylab <- "Average silhouette width"
for (i in 2:k.max) {
clust <- FUNcluster(x, i, ...)
v[i] <-
factoextra:::.get_ave_sil_width(diss, clust$cluster)
}
}
}
else if (method == "wss") {
for (i in 1:k.max) {
clust <- FUNcluster(x, i, ...)
v[i] <- factoextra:::.get_withinSS(diss, clust$cluster)
else if (method == "wss") {
loc.mainlab = "Optimal number of clusters from within cluster sum of squares"
loc.ylab <- "Total within cluster sum of squares"
for (i in</