From ac46afa97611d499e1d7867b684628050152738a Mon Sep 17 00:00:00 2001 From: dmattek Date: Fri, 4 Oct 2019 23:45:49 +0200 Subject: [PATCH] Initial commit --- modules/tabClValid.R | 344 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 344 insertions(+) create mode 100644 modules/tabClValid.R diff --git a/modules/tabClValid.R b/modules/tabClValid.R new file mode 100644 index 0000000..e9a66e2 --- /dev/null +++ b/modules/tabClValid.R @@ -0,0 +1,344 @@ +# +# Time Course Inspector: Shiny app for plotting time series data +# Author: Maciej Dobrzynski +# +# This module is a tab for hierarchical clustering (base R hclust + dist) + +helpText.clValid = c(alertNAsPresentDTW = paste0("NAs present. DTW cannot calculate the distance. ", + "NAs and missing data can be interpolated by activating the option in the left panel. ", + "If outlier points were removed, activate \"Interpolate gaps\" or ", + "decrease the threshold for maximum allowed gap length. ", + "The latter will result in entire trajectories with outliers being removed."), + alertNAsPresent = paste0("NAs present. The selected distance measure will work with missing data, ", + "however caution is recommended. NAs and missing data can be interpolated by activating the option in the left panel. ", + "If outlier points were removed, activate \"Interpolate gaps\" or ", + "decrease the threshold for maximum allowed gap length. ", + "The latter will result in entire trajectories with outliers being removed."), + alLearnMore = paste0("

Clustering is an unsupervised machine learning method for partitioning ", + "dataset into a set of groups or clusters. The procedure will return clusters ", + "even if the data does not contain any! ", + "Therefore, it’s necessary to ", + "assess clustering tendency before the analysis, and ", + "validate the quality of the result after clustering.

", + "

Relative validation, evaluates the clustering structure ", + "by varying different parameter values for the same algorithm ", + "(e.g. varying the number of clusters k). Typically used for ", + "determining the optimal number of clusters.

", + "

Internal validation, uses the internal information of the clustering process ", + "to evaluate the goodness of a clustering structure without reference to external information. ", + "It can be also used for estimating the number of clusters and the appropriate clustering algorithm ", + "without any external data.

", + "

External validation, compares the results of a cluster analysis ", + "to an externally known result, such as externally provided class labels. ", + "Since we know the “true” cluster number in advance, ", + "this approach is mainly used for selecting the right clustering algorithm for a specific dataset.

", + "

Stability validation, is a special version of internal validation. ", + "It evaluates the consistency of a clustering result by comparing it with the clusters obtained ", + "after each column is removed, one at a time.

"), + outPlotWss = "Weighted squared sum...", + outPlotSilhAvg = "Average...", + outPlotTree = "Dendrogram...", + outPlotSilhForCut = "Silhouette plot at dendrogram cut...") + + +# UI ---- +clustValidUI <- function(id, label = "Validation") { + ns <- NS(id) + + tagList( + h4('Cluster validation'), + actionLink(ns("alLearnMore"), "Learn more"), + br(), + br(), + fluidRow( + column(3, + selectInput( + ns("selectDiss"), + label = ("Dissimilarity measure"), + choices = list("Euclidean" = "euclidean", + "Manhattan" = "manhattan", + "Maximum" = "maximum", + "Canberra" = "canberra", + "DTW" = "DTW"), + selected = 1 + ), + bsAlert("alertAnchorClHierNAsPresent") + ), + column(3, + selectInput( + ns("selectLinkage"), + label = ("Linkage method"), + choices = list( + "Average" = "average", + "Complete" = "complete", + "Single" = "single", + "Centroid" = "centroid", + "Ward" = "ward.D", + "Ward D2" = "ward.D2", + "McQuitty" = "mcquitty" + ), + selected = 2 + ) + ) + ), + + br(), + tabsetPanel( + tabPanel("Relative", + br(), + fluidRow( + column(2, + actionButton(ns('butPlotRel'), 'Validate!') + ), + column(6, + sliderInput( + ns('slClValidMaxClust'), + 'Maximum number of clusters to validate', + min = 2, + max = 20, + value = 10, + step = 1, + ticks = TRUE, + round = TRUE + ) + ) + ), + br(), + withSpinner(plotOutput(ns('outPlotSilhAvg'))), + bsTooltip(ns('outPlotSilhAvg'), helpText.clValid[["outPlotSilhAvg"]], + placement = "top", trigger = "hover", options = NULL), + br(), + withSpinner(plotOutput(ns('outPlotWss'))), + bsTooltip(ns('outPlotWss'), helpText.clValid[["outPlotWss"]], + placement = "top", trigger = "hover", options = NULL) + + ), + tabPanel("Internal", + br(), + fluidRow( + column(2, + actionButton(ns('butPlotInt'), 'Validate!') + ), + column(6, + sliderInput( + ns('slClValidNclust'), + 'Number of dendrogram branches to cut', + min = 2, + max = 20, + value = 1, + step = 1, + ticks = TRUE, + round = TRUE + ) + ) + ), + br(), + withSpinner(plotOutput(ns('outPlotTree'))), + bsTooltip(ns('outPlotTree'), helpText.clValid[["outPlotTree"]], + placement = "top", trigger = "hover", options = NULL), + br(), + withSpinner(plotOutput(ns('outPlotSilhForCut'))), + bsTooltip(ns('outPlotSilhForCut'), helpText.clValid[["outPlotSilhForCut"]], + placement = "top", trigger = "hover", options = NULL) + ) + ) + ) +} + +# SERVER ---- +clustValid <- function(input, output, session, in.data4clust) { + + ns = session$ns + + # calculate distance matrix for further clustering + # time series arranged in rows with columns corresponding to time points + userFitDistHier <- reactive({ + cat(file = stderr(), 'clustValid:userFitDistHier \n') + + loc.dm = in.data4clust() + + if (is.null(loc.dm)) { + return(NULL) + } + + # Throw some warnings if NAs present in the dataset. + # DTW cannot compute distance when NA's are preset. + # Other distance measures can be calculated but caution is required with interpretation. + if(sum(is.na(loc.dm)) > 0) { + if (input$selectPlotHierDiss == "DTW") { + createAlert(session, "alertAnchorClHierNAsPresent", "alertNAsPresentDTW", title = "Error", + content = helpText.clHier[["alertNAsPresentDTW"]], + append = FALSE, + style = "danger") + return(NULL) + } else { + createAlert(session, "alertAnchorClHierNAsPresent", "alertNAsPresent", title = "Warning", + content = helpText.clHier[["alertNAsPresent"]], + append = FALSE, + style = "warning") + closeAlert(session, 'alertNAsPresentDTW') + } + } else { + closeAlert(session, 'alertNAsPresentDTW') + closeAlert(session, 'alertNAsPresent') + } + + # calculate distance matrix + + return(dist(loc.dm, method = input$selectPlotHierDiss)) + }) + + + calcDendCut = reactive({ + cat(file = stderr(), 'clustValid:calcDendCut \n') + + loc.dmdist = userFitDistHier() + + if (is.null(loc.dmdist)) { + return(NULL) + } + + return(LOChcut(x = loc.dmdist, + k = input$slClValidNclust, + hc_func = "hclust", + hc_method = input$selectLinkage, hc_metric = input$selectDiss)) + }) + + # Function instead of reactive as per: + # http://stackoverflow.com/questions/26764481/downloading-png-from-shiny-r + # This function is used to plot and to downoad a pdf + + # plot average silhouette + plotSilhAvg <- function() { + + loc.dmdist = userFitDistHier() + + if (is.null(loc.dmdist)) { + return(NULL) + } + + loc.p = LOCnbclust(x = loc.dmdist, + FUNcluster = LOChcut, + method = "silhouette", + verbose = TRUE, + k.max = input$slClValidMaxClust, + hc_metric = input$selectDiss, + hc_method = input$selectLinkage) + return(loc.p) + } + + # plot Ws + plotWss <- function() { + + loc.dmdist = userFitDistHier() + + if (is.null(loc.dmdist)) { + return(NULL) + } + + loc.p = LOCnbclust(x = loc.dmdist, + FUNcluster = LOChcut, + method = "wss", + verbose = TRUE, + k.max = input$slClValidMaxClust, + hc_metric = input$selectDiss, + hc_method = input$selectLinkage) + + return(loc.p) + } + + + # plot dendrogram tree + plotTree <- function() { + + loc.dend = calcDendCut() + + if (is.null(loc.dend)) { + return(NULL) + } + + loc.p = factoextra::fviz_dend(x = loc.dend, k = input$slClValidNclust) + + return(loc.p) + } + + # plot silhouetts for a particular dendrogram cut + plotSilhForCut <- function() { + + loc.dmdist = userFitDistHier() + loc.dend = LOChcut(x = loc.dmdist, + k = input$slClValidNclust, + hc_func = "hclust", + hc_method = input$selectLinkage, hc_metric = input$selectDiss) + + if (is.null(loc.dend)) { + return(NULL) + } + + loc.p = factoextra::fviz_silhouette(sil.obj = loc.dend, print.summary = FALSE) + + return(loc.p) + } + + # Display silhouette + output$outPlotSilhAvg <- renderPlot({ + locBut = input$butPlotRel + + if (locBut == 0) { + cat(file = stderr(), 'outPlotSilhAvg: Go button not pressed\n') + + return(NULL) + } + + plotSilhAvg() + }) + + + # Display wss + output$outPlotWss <- renderPlot({ + locBut = input$butPlotRel + + if (locBut == 0) { + cat(file = stderr(), 'outPlotWss: Go button not pressed\n') + + return(NULL) + } + + plotWss() + }) + + # Display tree + output$outPlotTree <- renderPlot({ + locBut = input$butPlotInt + + if (locBut == 0) { + cat(file = stderr(), 'outPlotTree: Go button not pressed\n') + + return(NULL) + } + + plotTree() + }) + + # Display silhouette for a dendrogram cut + output$outPlotSilhForCut <- renderPlot({ + locBut = input$butPlotInt + + if (locBut == 0) { + cat(file = stderr(), 'outPlotSilhForCut: Go button not pressed\n') + + return(NULL) + } + + plotSilhForCut() + }) + + # Pop-overs ---- + addPopover(session, + ns("alLearnMore"), + title = "Classes of cluster validation", + content = helpText.clValid[["alLearnMore"]], + trigger = "click") +} + + -- GitLab