Commit fd26e47c authored by Mauro Gwerder's avatar Mauro Gwerder

Init commit

parents
.Rproj.user
.Rhistory
.RData
.Ruserdata
downPlotInput <- function(id, label = "DownPlot") {
ns <- NS(id)
fluidRow(
downloadButton(ns('downPlot'), 'Download Overview Plot')
)
}
downPlot <- function(input, output, session, in.plot) {
ns <- session$ns
output$downPlot <- downloadHandler(
filename = "downloadPlot.png",
content = function(file) {
png(file)
print(in.plot())
dev.off()
})
}
require(reshape2)
require(gplots)
require(RColorBrewer)
require(dendextend)
source('~/UNIBE 6. Semester/Bachelor project/Pertz/rolling_window_loop.R')
# heatmap function ----
heatmap.outl <- function(data, # should be DT
dist.method = "euclidian", # method of distance measurement
hclust.method = "single", # method of clustering
col_in = "Greens", # color palette for colorRampPalette()
plot = "heatmap",# if a heatmap of only the dendrogram should be plotted
in.list,
trim.pos = 1 # input that determines at which branching event the tree is cut, starting with the first event at 1
) {
l.cols <- in.list
data.loc <- data #converts data to local environment
# creates local matrix: rows = id numbers, cols = timepoints
mat.loc <- acast(data.loc, data.loc[[l.cols$id]] ~ data.loc[[l.cols$time]], value.var = l.cols$meas)
distfun.loc <- function(x) dist(x, method = dist.method) # function for changing the distance-measurement method
hclustfun.loc <- function(x) hclust(x, method = hclust.method) #function for changing the hclustering method
palette.loc <- colorRampPalette(brewer.pal(9, rev(col_in)))(n = 99) #color palette. package: RColorBrewer
hc.rows <- hclust(dist(mat.loc, method = dist.method), method = hclust.method) #stores information about branching heights
hc.height <- rev(hc.rows$height)[trim.pos + 1] # here we get the height output of the first branching event
ct.loc <- cutree(hclust(dist(mat.loc, method = dist.method), method = hclust.method), h = hc.height) # contains information about grouping after treecutting
ct.uniq <- length(unique(ct.loc)) # to see how many groups there are
dend <- as.dendrogram(hclust(dist(mat.loc, method = dist.method), method = hclust.method), h = hc.height) #dendrogram is needed to customize the colours
d1 <- color_branches(dend, k = ct.uniq, col = c(rep(2,(ct.uniq-1)),1)) # ct.uniq can be used to differ between outlier groups.
col_labels <- get_leaves_branches_col(d1)
col_labels <- col_labels[order(order.dendrogram(d1))] # label ordering for heatmap.2 function
if (plot == "heatmap") { # plots heatmap
heatmap.2(mat.loc,
Rowv = d1,
Colv = FALSE,
dendrogram = "row",
trace = "none",
distfun = distfun.loc,
hclustfun = hclustfun.loc,
col = palette.loc,
colRow = col_labels
)
} else {
if (plot == "dendrogram") {
# plots only the dendrogram
plot(d1)
}
}
return (names(which(ct.loc > 1))) #returns position of detected trajectories
}
heatmapforVisual <- function(data, # should be DT
dist.method = "euclidian", # method of distance measurement
hclust.method = "single", # method of clustering
col_in = "heat.colors", # color palette for colorRampPalette()
plot = "heatmap", # if a heatmap of only the dendrogram should be plotted
breaks.in = 16, # how many breaks
in.list,
break.man = F, # T if breaks should be assigned manually. used for fitting breaks to custom color palette
break.thresh, #
trim.pos = 1 # input that determines at which branching event the tree is cut, starting with the first event at 1
) {
l.cols <- in.list
data.loc <- data #converts data to local environment
# creates local matrix: rows = id numbers, cols = timepoints
mat.loc <- acast(data.loc, data.loc[[l.cols$id]] ~ data.loc[[l.cols$time]], value.var = l.cols$meas)
distfun.loc <- function(x) dist(x, method = dist.method) # function for changing the distance-measurement method
hclustfun.loc <- function(x) hclust(x, method = hclust.method) #function for changing the hclustering method
if(break.man){
# Fits breaks to normal data such that a difference between outliers and normal data can be made
breaks.out <- c(max(mat.loc[which(mat.loc < break.thresh + 0.1)]) * (seq(0, 1, 1 / breaks.in)), break.thresh + 0.2)
} else {
breaks.out <- breaks.in
}
hc.rows <- hclust(dist(mat.loc, method = dist.method), method = hclust.method) #stores information about branching heights
hc.height <- rev(hc.rows$height)[trim.pos + 1] # here we get the height output of the first branching event
ct.loc <- cutree(hclust(dist(mat.loc, method = dist.method), method = hclust.method), h = hc.height) # contains information about grouping after treecutting
ct.uniq <- length(unique(ct.loc)) # to see how many groups there are
dend <- as.dendrogram(hclust(dist(mat.loc, method = dist.method), method = hclust.method), h = hc.height) #dendrogram is needed to customize the colours
d1 <- color_branches(dend, k = ct.uniq, col = c(rep(2,(ct.uniq-1)),1)) # ct.uniq can be used to differ between outlier groups.
col_labels <- get_leaves_branches_col(d1)
col_labels <- col_labels[order(order.dendrogram(d1))] # label ordering for heatmap.2 function
if (plot == "heatmap") {
# plots heatmap
heatmap.2(mat.loc,
Rowv = TRUE,
Colv = FALSE,
dendrogram = "row",
trace = "none",
distfun = distfun.loc,
hclustfun = hclustfun.loc,
col = col_in,
breaks = breaks.out,
colRow = col_labels
)
} else {
if (plot == "dendrogram") {
# plots only the dendrogram
plot(d1)
}
}
return (as.numeric(names(which(ct.loc > 1)))) #returns position of detected trajectories
}
require(imputeTS)
interpolComplete <- function(data, method = "linear") {
if (method == "linear") {
interpData <- na.interpolation(data, option = "linear")
}
if (method == "spline") {
interpData <- na.interpolation(data, option = "spline")
}
if (method == "stine") {
interpData <- na.interpolation(data, option = "stine")
}
if (method == "mean") {
interpData <- na.mean(data, option = "mean")
}
if (method == "median") {
interpData <- na.mean(data, option = "median")
}
if (method == "mode") {
interpData <- na.mean(data, option = "mode")
}
if (method == "Kalman") {
interpData <- na.kalman(data)
}
if (method == "Last Observation Carried Forward") {
interpData <- na.locf(data, option = "locf")
}
if (method == "Next Observation Carried Backward") {
interpData <- na.locf(data, option = "nocb")
}
if (method == "Simple Moving Average") {
interpData <- na.ma(data, weighting = "simple")
}
if (method == "Linear Weighted Moving Average") {
interpData <- na.ma(data, weighting = "linear")
}
if (method == "Exponential Weighted Moving Average") {
interpData <- na.ma(data, weighting = "exponential")
}
if (method == "Seasonally Decomposed Missing Value Imputation") {
interpData <- na.seadec(data)
}
if (method == "random sample") {
interpData <- na.random(data)
}
return(interpData)
}
#'
#' Outlier Detection App: Application to identify outliers in time-series data.
#' Author: Mauro Gwerder
#'
#' The Module "HierClust" is clustering the data hierarchically and creates a
#' heatmap (using "heatmap.2" from the package "gplots") for visualization.
#' Detected outlier-containing trajectories can be verified and kicked out one
#' by one by the researcher.
#'
HierClusterInput <- function(id,label = "HierClust"){
# gives all labels into a namespace (can I say it like that?)
ns <- NS(id)
fluidRow(
# plots the heatmap for an overview of the clustering
box(title = "Heatmap",
plotOutput(ns("plot.heat"), height = "2000px"),
# If there was a mistake with clustering or removing, this will reset the loaded dataset.
actionButton(ns("b.reset"),"Reset Data"),
#downPlotInput(ns("downPlot")),
downloadButton(ns('downPlot'), 'Download Overview Plot'),
width = 7),
# Selectable options for the function "heatmap.2"
box(title = "Options for heatmap", selectInput(ns("sel.plot"), "heatmap of dendrogram?",
choices = c("heatmap","dendrogram"), selected = "heatmap"), # select plot output
selectInput(ns("sel.hclust"), "select clustering method",
choices = c("single", "ward.D2", "complete", "average", "median"),
selected = "single"),
selectInput(ns("sel.dist"), "select distance method",
choices = c("euclidean", "maximum", "manhattan", "canberra" ,"binary", "minkowski"),
selected = "euclidean"),
selectInput(ns("sel.col"), "select colour scheme",
# need to still change this to maciej's "l.col.pal.dend" palette options
choices = c("Greens", "Spectral", "RdYlGn", "BrBG", "Greys"), selected = "Spectral"),
br(),
#returns a .csv file with all the trajectories that were removed
downloadButton(ns("downOutl"), "print removed trajectories as .csv"),
width = 4),
# Plots single trajectories for verification.
box(title = "Trajectories", plotOutput(ns("plot.traj"), height = 250),
actionButton(ns("b.prevtraj"), "previous trajectory"),
actionButton(ns("b.nxtraj"),"next trajectory"),
actionButton(ns("b.remove.left"), "remove trajectories (left)"),
actionButton(ns("b.remove.right"),"remove trajectories (right)"),
width = 7)
)
}
HierCluster <- function(input, output, session, in.data) {
ns <- session$ns
# Reactive values: "Rval$trajStatus" stores a vector of flags for keeping track of which
# trajectories were already removed.
# All reactive values with "counter" in their name are used to keep track
# of which trajectory should be displayed for verification.
Rval <- reactiveValues(trajStatus = NULL,
countRemoveL = isolate(input$b.remove.left),
countRemoveR = isolate(input$b.remove.right),
countPrev = isolate(input$b.prevtraj),
countNxt = isolate(input$b.nxtraj),
countReset = isolate(input$b.reset),
count = 0)
# Conversion of the universal column names into a list. Needed for the "heatmap.outliers" function
l.cols <- list()
l.cols$id <- 'ID'
l.cols$time <- 'TIME'
l.cols$meas <- 'MEAS'
l.cols$fov <- 'FOV'
# This reactive environment keeps track of which trajectories are selected for verification in "plot.traj".
# It also resets this selection whenever the reset button or the remove button are pressed.
# Inside the if-statements for "reset" and "remove", there are some deliberate side effects.
# I decided to do it this way because these side-effects are changing a reactive value (trajStatus) that cannot
# be returned using a reactive. This is an alternative to observeEvent() and is combined with the selection-
# reset.
branchCount <- reactive({
ns <- session$ns
cat("branchCount\n")
InPrevTraj <- input$b.prevtraj
InNxtTraj <- input$b.nxtraj
InRemoveL <- input$b.remove.left
InRemoveR <- input$b.remove.right
InResetTraj <- input$b.reset
InTrajStatus <- Rval$trajStatus
InCount <- Rval$count
# checks if and which button was pressed most recently and affects the object "count" respectively.
if (InNxtTraj != isolate(Rval$countNxt)) {
Rval$count <- InCount + 1
cat("branchCount if InNxtTraj\n")
Rval$countNxt <- InNxtTraj
} else if (InPrevTraj != isolate(Rval$countPrev)) {
Rval$count <- InCount - 1
cat("branchCount if InPrevTraj\n")
Rval$countPrev <- InPrevTraj
} else if (InRemoveL != isolate(Rval$countRemoveL)) {
Rval$count <- 0
cat("branchCount if InRemoveL\n")
# converts selected trajectories (marked inside the vector with a "1") to removed
# trajectories (marked with a "2")
InTrajStatus[which(InTrajStatus %in% 1)] <- 2
Rval$trajStatus <- InTrajStatus
Rval$countRemoveL <- InRemoveL
} else if (InRemoveR != isolate(Rval$countRemoveR)) {
Rval$count <- 1
cat("branchCount if InRemoveR\n")
# converts selected trajectories (marked inside the vector with a "0") to removed
# trajectories (marked with a "2")
InTrajStatus[which(InTrajStatus %in% 0)] <- 2
Rval$trajStatus <- InTrajStatus
Rval$countRemoveR <- InRemoveR
} else if (InResetTraj != isolate(Rval$countReset)) {
Rval$count <- 0
cat("branchCount if InResetTraj\n")
Rval$countReset <- InResetTraj
# causes the vector with information about removed and selected trajectories to reset.
Rval$trajStatus <- rep(0, length(Rval$trajStatus))
}
OutCount <- Rval$count
# because negative counts do not make sense for trajectory selection, these values will be
# converted to "0".
if (OutCount < 0)
OutCount <- 0
return(OutCount)
})
IDnames <- reactive({
ns <- session$ns
cat("calling IDnames\n")
dm.in <- in.data()
InTrajStatus <- Rval$trajStatus
if(is.null(dm.in))
return(NULL)
l.id <- list()
l.id$complete <- as.vector(dm.in[, unique(ID)])
l.id$active <- l.id$complete[which(InTrajStatus %in% c(0,1))]
l.id$outl <- l.id$complete[which(InTrajStatus %in% 1)]
l.id$update <- l.id$complete[which(InTrajStatus %in% 0)]
l.id$remove <- l.id$complete[which(InTrajStatus %in% 2)]
return(l.id)
})
# This reactive will return IDs of selected trajectories. The selection will be determined by trimming the
# dendrogram step by step. The reactive "branchCount" will determine on which branching the tree will be
# trimmed.
hierCut <- reactive({
ns <- session$ns
cat("calling hierCut\n")
dm.in <- in.data()
InBranchCount <- branchCount()
InIDnames <- IDnames()
InHclustSel <- input$sel.hclust
InDistSel <- input$sel.dist
InTrajStatus <- Rval$trajStatus
if (is.null(dm.in)) {
return(NULL)
}
# As a side effect, this if-statement checks whether the tree isn't cut at all and if any data was removed.
# If this is the case, the vector stored in "Rval$trajStatus" can be initialised.
if(InBranchCount == 0 && is.null(InTrajStatus[which(InTrajStatus %in% 2)])){
cat("checkCut\n")
InTrajStatus <- rep(0, length(dm.in[, unique(ID)]))
Rval$trajStatus <- InTrajStatus
return(dm.in)
}
# Only IDs that weren't removed (So trajectories that have either the flag "0" or "1" in "Rval$trajStatus")
# can be returned.
currentValues <- which(InTrajStatus %in% c(0, 1))
dm.cut <- dm.in[ID %in% InIDnames$active]
cat("active ID names: ", InIDnames$active, "\n")
dm.out <- heatmap.outl(dm.cut, plot = F, trim.pos = InBranchCount, in.list = l.cols, dist.method = InDistSel,
hclust.method = InHclustSel)
return(dm.out)
})
# Removed Data will be excluded in this reactive.
groupedData <- reactive({
ns <- session$ns
cat("updating groupvector\n")
dm.in <- in.data()
cutreeNames <- hierCut()
InIDnames <- IDnames()
InTrajStatus <- Rval$trajStatus
if (is.null(dm.in))
return(NULL)
# This reactive will only return something if Rval$trajStatus was already initialised in "hierCut()"
if (is.null(InTrajStatus))
return(NULL)
cutreeValues <- which(InIDnames$complete %in% cutreeNames)
oldValues <- which(InTrajStatus %in% c(0, 1))
newValues <- oldValues[which(oldValues %in% cutreeValues)]
emptyValues <- oldValues[-which(oldValues %in% cutreeValues)]
# as a side effect, "Rval$trajStatus" will be overwritten with the updated selection
InTrajStatus[newValues] <- 1
InTrajStatus[emptyValues] <- 0
Rval$trajStatus <- InTrajStatus
dm.ID <- InIDnames$active[which(InTrajStatus %in% c(0,1))]
dm.out <- dm.in[ID %in% dm.ID]
return(dm.out)
})
IDnamesRemove <- reactive({
ns <- session$ns
cat("IDnamesRemove\n")
InIDnames <- IDnames()
InTrajStatus <- Rval$trajStatus
if(is.null(InIDnames))
return(NULL)
selGroup <- InTrajStatus[InTrajStatus %in% 2]
selGroupRemove <- which(selGroup %in% 2)
IDnamesRemove <- InIDnames$remove
return(IDnamesRemove)
})
output$downOutl <- downloadHandler(
filename = "Removed_Trajectories.csv",
content = function(file) {
write.csv(x = IDnamesRemove(), file = file, row.names = FALSE)
})
# plots a clustered heatmap with a coloured dendrogram which gives information about the selected
# trajectories.
# Function instead of reactive as per:
# http://stackoverflow.com/questions/26764481/downloading-png-from-shiny-r
plotHierHeat <- function() {
ns <- session$ns
cat("hierHeatMap\n")
InPlotSel <- input$sel.plot
InHclustSel <- input$sel.hclust
InDistSel <- input$sel.dist
InColourSel <- input$sel.col
InBranchCount <- branchCount()
dm.in <- groupedData()
if (is.null(dm.in))
return(NULL)
dm.out <- heatmap.outl(dm.in, plot = InPlotSel,
dist.method = InDistSel,
hclust.method = InHclustSel,
col_in = InColourSel,
trim.pos = InBranchCount,
in.list = l.cols )
return(dm.out)
}
output$downPlot <- downloadHandler(
filename = "downloadPlot.pdf",
content = function(file){
pdf(file,
width = 8.5,
height = 7)
print(plotHierHeat())
dev.off()
})
# Returns two plots (using gridExtra): plot1 will return the selected trajectories for verification.
# The second plot will return all the remaining plots (not sure if needed). They are divided using
# their flags stored in "Rval$trajStatus" as a vector.
plotHierTraj <- reactive({
ns <- session$ns
cat("hierPlot\n")
dm.in <- groupedData()
InIDlist <- IDnames()
InTrajStatus <- Rval$trajStatus
if (is.null(dm.in))
return(NULL)
# Excludes removed trajectories from the vector, such that the vector and the datatable have the same length.
if(is.null(InIDlist$update))
return(NULL)
dm.outliers <- dm.in[ID %in% InIDlist$outl]
dm.update <- dm.in[ID %in% InIDlist$update]
# plots selected trajectory
plot1 <- ggplot(dm.outliers, aes(x = TIME, y = MEAS, group = ID)) +
ggtitle(paste("traj", dm.outliers[, unique(ID)])) +
geom_line()
# plots all the remaining trajectories
plot2 <- ggplot(dm.update, aes(x = TIME, y = MEAS, group = ID)) +
geom_line()
# arranges plots side to side
dm.out <- grid.arrange(plot1, plot2, ncol = 2)
return(dm.out)
})
output$plot.heat <- renderPlot({
plotHierHeat()
})
output$plot.traj <- renderPlot({
plotHierTraj()
})
#callModule(downPlot, "downPlot", in.plot = plotHierHeat)
}
#'
#' Outlier Detection App: Application to identify outliers in time-series data.
#' Author: Mauro Gwerder
#'
#' In this main file, the input data is processed for feeding it into the modules.
#' UI & Server are combined in this file
#'
#' package requirements:
library(shiny)
library(shinydashboard)
library(shinyjs) # http://deanattali.com/shinyjs/
library(ggplot2)
library(data.table)
library(gplots) # for heatmap.2
library(dendextend) # for colouring dendrogram branches
library(RColorBrewer) # colour palettes for heatmap.2
library(gridExtra) # presentation of several plots in a grid
library(imputeTS) # for interpolating NAs
options(shiny.maxRequestSize = 400 * 1024 ^ 2)
source("~/UNIBE 6. Semester/Bachelor project/Pertz/hierclust/hierclustfunction.R")
source("~/UNIBE 6. Semester/Bachelor project/Pertz/functions/interpolCompleteFunc.R")
source("~/UNIBE 6. Semester/Bachelor project/Pertz/reactiveAPP/reacRollWinMODULE.R")
source("~/UNIBE 6. Semester/Bachelor project/Pertz/reactiveAPP/reacHierClustMODULE.R")
source("~/UNIBE 6. Semester/Bachelor project/Pertz/reactiveAPP/downPlotModule.R")
ui <- dashboardPage( # starts shiny in dashboard
dashboardHeader(
# Application title
title ="Outlier Detection"
),
dashboardSidebar(
# dashboard-equivalent to "tabs" in normal shiny
sidebarMenu(
# Every item stands for one tab. "Rolling Window" and "hierarchical clustering" will show outputs of the
# correspondent modules, whereas "Generate synthetic Data" & "Load Data" represent dropdown-menus that load
# or generate datasets.
menuItem("Rolling Window", tabName = "rollwindow", icon = icon("windows")),
menuItem("hierarchical clustering", tabName = "hiercluster", icon = icon("tree")),
menuItem("Generate synthetic Data", tabName = "synDataOpt", icon = icon("random"), # tab for synthetic Data options
sliderInput("slider.syn", "amount of outliers", 0, 30, 1),