# # Time Course Inspector: Shiny app for plotting time series data # Author: Maciej Dobrzynski # # This module is a tab for hierarchical clustering (base R hclust + dist) helpText.clValid = c(alertClValidNAsPresent = paste0("NAs present. The selected distance measure will work, ", "however PCA will not be avaliable."), alertClValidNAsPresentDTW = paste0("NAs present. DTW distance measure will NOT work."), alLearnMore = paste0("

Clustering ", "is an unsupervised machine learning method for partitioning ", "dataset into a set of groups or clusters. The procedure will return clusters ", "even if the data does not contain any! ", "Therefore, it’s necessary to ", "assess clustering tendency before the analysis, and ", "validate the quality of the result after clustering.

" ), alLearnMoreRel = paste0("

Determine the optimal number of clusters by inspecting ", "the average silhouette width and the total within cluster sum of squares (WSS) ", "for a range of cluster numbers.

", "

Silhouette analysis estimates the average distance between clusters. ", "Larger silhouette widths indicate better.

", "

WSS evaluates the compactness of clusters. ", "Compact clusters achieve low WSS values. ", "Look for the knee in the plot of WSS as function of cluster numbers.

"), alLearnMoreInt = paste0("

Evaluate the goodness of a clustering structure by inspecting ", "principle components, the dendrogram, ", "and the silhouette for a given number of clusters.

", "

Each point in the scatter plot of 2 principle components corresponds to a single time series. ", "Points are coloured by cluster numbers. Compact, well separated clusters ", "indicate good partitioning.

", "

The height of dendrogram branches indicates how well clusters are separated.

", "

The silhouette plot displays how close each time series in one cluster ", "is to time series in the neighboring clusters. ", "A large positive silhouette (Si) indicates time series that are well clustered.", "A negative Si indicates time series that are closer to ", "a neighboring cluster, and are placed in the wrong cluster.

") ) # UI ---- clustValidUI <- function(id, label = "Validation") { ns <- NS(id) tagList( h4( "Cluster validation using ", a("factoextra", href="https://cran.r-project.org/web/packages/factoextra/", title="External link") ), actionLink(ns("alLearnMore"), "Learn more"), br(), br(), fluidRow( column(4, selectInput( ns("selectDiss"), label = ("Dissimilarity measure"), choices = list("Euclidean" = "euclidean", "Manhattan" = "manhattan", "Maximum" = "maximum", "Canberra" = "canberra", "DTW" = "DTW"), selected = "euclidean" ), bsAlert("alertAnchorClValidNAsPresent") ), column(4, selectInput( ns("selectLinkage"), label = ("Linkage method"), choices = list( "Average" = "average", "Complete" = "complete", "Single" = "single", "Centroid" = "centroid", "Ward" = "ward.D", "Ward D2" = "ward.D2", "McQuitty" = "mcquitty" ), selected = "average" ) ) ), br(), tabsetPanel( tabPanel("Relative", br(), p("Determine and visualise the optimal number of clusters. ", actionLink(ns("alLearnMoreRel"), "Learn more")), fluidRow( column(2, actionButton(ns('butPlotRel'), 'Validate!') ), column(6, sliderInput( ns('slClValidMaxClust'), 'Maximum number of clusters to consider', min = 2, max = 20, value = 10, step = 1, ticks = TRUE, round = TRUE ) ) ), br(), withSpinner(plotOutput(ns('outPlotSilhAvg'))), br(), withSpinner(plotOutput(ns('outPlotWss'))) ), tabPanel("Internal", br(), p("Validate a given data partitioning. ", actionLink(ns("alLearnMoreInt"), "Learn more")), fluidRow( column(2, actionButton(ns('butPlotInt'), 'Validate!') ), column(6, sliderInput( ns('slClValidNclust'), 'Number of clusters to evaluate', min = 2, max = 20, value = 1, step = 1, ticks = TRUE, round = TRUE ) ) ), br(), withSpinner(plotOutput(ns('outPlotTree'))), br(), withSpinner(plotOutput(ns('outPlotSilhForCut'))), br(), withSpinner(plotOutput(ns('outPlotClPCA'))) ) ) ) } # SERVER ---- clustValid <- function(input, output, session, in.dataWide) { ns = session$ns # Return the number of clusters from the slider # and delay by a constant in milliseconds defined in auxfunc.R returnNclust = reactive({ return(input$slClValidNclust) }) %>% debounce(MILLIS) # Return max number of clusters from the slider # and delay by a constant in milliseconds defined in auxfunc.R returnMaxNclust = reactive({ return(input$slClValidMaxClust) }) %>% debounce(MILLIS) # calculate distance matrix for further clustering # time series arranged in rows with columns corresponding to time points calcDist <- reactive({ cat(file = stderr(), 'clustValid:calcDist \n') loc.dm = in.dataWide() if (is.null(loc.dm)) { return(NULL) } # Throw some warnings if NAs present in the dataset. # DTW cannot compute distance when NA's are preset. # Other distance measures can be calculated but caution is required with interpretation. # NAs in the wide format can result from explicit NAs in the measurment column or # from missing rows that cause NAs to appear when convertinf from long to wide (dcast) if(sum(is.na(loc.dm)) > 0) { if (input$selectDiss == "DTW") { createAlert(session, "alertAnchorClValidNAsPresent", "alertClValidNAsPresentDTW", title = "Error", content = helpText.clValid[["alertClValidNAsPresentDTW"]], append = FALSE, style = "danger") closeAlert(session, 'alertClValidNAsPresent') return(NULL) } else { createAlert(session, "alertAnchorClValidNAsPresent", "alertClValidNAsPresent", title = "Warning", content = helpText.clValid[["alertClValidNAsPresent"]], append = FALSE, style = "warning") closeAlert(session, 'alertClValidNAsPresentDTW') } } else { closeAlert(session, 'alertClValidNAsPresentDTW') closeAlert(session, 'alertClValidNAsPresent') } # calculate distance matrix return(proxy::dist(loc.dm, method = input$selectDiss)) }) # calculate dendrogram for a chosen number of clusters and the linkage method calcDendCut = reactive({ cat(file = stderr(), 'clustValid:calcDendCut \n') loc.dist = calcDist() if (is.null(loc.dist)) { return(NULL) } return(factoextra::hcut(x = loc.dist, k = returnNclust(), FUNcluster = "hclust", hc_method = input$selectLinkage, graph = FALSE)) }) # Plotting ---- # Function instead of reactive as per: # http://stackoverflow.com/questions/26764481/downloading-png-from-shiny-r # This function is used to plot and to downoad a pdf # plot average silhouette plotSilhAvg <- function() { cat(file = stderr(), 'plotSilhAvg: in\n') # make the f-n dependent on the button click locBut = input$butPlotRel # Check if required data exists # Thanks to isolate all mods in the left panel are delayed # until clicking the Plot button loc.dist = isolate(calcDist()) validate( need(!is.null(loc.dist), "Nothing to plot. Load data first!") ) loc.p = LOCnbclust(loc.dist, method = "silhouette", k.max = returnMaxNclust(), hc_metric = input$selectDiss, hc_method = input$selectLinkage) + xlab("Number of clusters") + ylab("Average silhouette width") + ggtitle("Optimal number of clusters from silhouette analysis") + LOCggplotTheme(in.font.base = PLOTFONTBASE, in.font.axis.text = PLOTFONTAXISTEXT, in.font.axis.title = PLOTFONTAXISTITLE, in.font.strip = PLOTFONTFACETSTRIP, in.font.legend = PLOTFONTLEGEND) return(loc.p) } # plot Ws plotWss <- function() { cat(file = stderr(), 'plotWss: in\n') # make the f-n dependent on the button click locBut = input$butPlotRel # Check if required data exists # Thanks to isolate all mods in the left panel are delayed # until clicking the Plot button loc.dist = isolate(calcDist()) validate( need(!is.null(loc.dist), "Nothing to plot. Load data first!") ) loc.p = LOCnbclust(loc.dist, method = "wss", k.max = returnMaxNclust(), hc_metric = input$selectDiss, hc_method = input$selectLinkage) + xlab("Number of clusters") + ylab("Total within cluster sum of squares") + ggtitle("Within cluster sum of squares for different cluster numbers") + LOCggplotTheme(in.font.base = PLOTFONTBASE, in.font.axis.text = PLOTFONTAXISTEXT, in.font.axis.title = PLOTFONTAXISTITLE, in.font.strip = PLOTFONTFACETSTRIP, in.font.legend = PLOTFONTLEGEND) return(loc.p) } # plot dendrogram tree plotTree <- function() { cat(file = stderr(), 'plotTree: in\n') # make the f-n dependent on the button click locBut = input$butPlotInt # Check if required data exists # Thanks to isolate all mods in the left panel are delayed # until clicking the Plot button loc.part = isolate(calcDendCut()) validate( need(!is.null(loc.part), "Nothing to plot. Load data first!") ) loc.p = factoextra::fviz_dend(loc.part, show_labels = F, rect = T, xlab = "Time series", main = "Dendrogram") + LOCggplotTheme(in.font.base = PLOTFONTBASE, in.font.axis.text = PLOTFONTAXISTEXT, in.font.axis.title = PLOTFONTAXISTITLE, in.font.strip = PLOTFONTFACETSTRIP, in.font.legend = PLOTFONTLEGEND) return(loc.p) } # PCA visualization of partitioning methods plotClPCA <- function() { cat(file = stderr(), 'plotTree: in\n') # make the f-n dependent on the button click locBut = input$butPlotInt # Check if required data exists # Thanks to isolate all mods in the left panel are delayed # until clicking the Plot button loc.part = isolate(calcDendCut()) loc.dm = in.dataWide() print(sum(is.na(loc.dm))) validate( need(!is.null(loc.part), "Nothing to plot. Load data first!"), need(!is.null(loc.dm), "Nothing to plot. Load data first!"), need(sum(is.na(loc.dm)), "Cannot calculate PCA in the presence of missing data and/or NAs.") ) if (sum(is.na(loc.dm)) > 0) return(NULL) loc.p = factoextra::fviz_cluster(loc.part, data = loc.dm, geom = "point", elipse.type = "convex", main = "Principle components" )+ LOCggplotTheme(in.font.base = PLOTFONTBASE, in.font.axis.text = PLOTFONTAXISTEXT, in.font.axis.title = PLOTFONTAXISTITLE, in.font.strip = PLOTFONTFACETSTRIP, in.font.legend = PLOTFONTLEGEND) return(loc.p) } # plot silhouetts for a particular dendrogram cut plotSilhForCut <- function() { cat(file = stderr(), 'plotSilhForCut: in\n') # make the f-n dependent on the button click locBut = input$butPlotInt # Check if required data exists # Thanks to isolate all mods in the left panel are delayed # until clicking the Plot button loc.part = isolate(calcDendCut()) validate( need(!is.null(loc.part), "Nothing to plot. Load data first!") ) loc.p = factoextra::fviz_silhouette(loc.part, print.summary = FALSE, main = "Silhouette") + xlab("Time series") + LOCggplotTheme(in.font.base = PLOTFONTBASE, in.font.axis.text = PLOTFONTAXISTEXT, in.font.axis.title = PLOTFONTAXISTITLE, in.font.strip = PLOTFONTFACETSTRIP, in.font.legend = PLOTFONTLEGEND) + theme(axis.text.x = element_blank()) return(loc.p) } # Plot rendering ---- # Display silhouette output$outPlotSilhAvg <- renderPlot({ loc.p = plotSilhAvg() if(is.null(loc.p)) return(NULL) return(loc.p) }) # Display wss output$outPlotWss <- renderPlot({ loc.p = plotWss() if(is.null(loc.p)) return(NULL) return(loc.p) }) # Display PCA of clustering output$outPlotClPCA <- renderPlot({ # This is required to avoid # "Warning: Error in : cannot open file 'Rplots.pdf'" # When running on a server. Based on: # https://github.com/ropensci/plotly/issues/494 # if (names(dev.cur()) != "null device") # dev.off() # pdf(NULL) loc.p = plotClPCA() if(is.null(loc.p)) return(NULL) return(loc.p) }) # Display tree output$outPlotTree <- renderPlot({ # This is required to avoid # "Warning: Error in : cannot open file 'Rplots.pdf'" # When running on a server. Based on: # https://github.com/ropensci/plotly/issues/494 # if (names(dev.cur()) != "null device") # dev.off() # pdf(NULL) loc.p = plotTree() if(is.null(loc.p)) return(NULL) return(loc.p) }) # Display silhouette for a dendrogram cut output$outPlotSilhForCut <- renderPlot({ # This is required to avoid # "Warning: Error in : cannot open file 'Rplots.pdf'" # When running on a server. Based on: # https://github.com/ropensci/plotly/issues/494 # if (names(dev.cur()) != "null device") # dev.off() # pdf(NULL) loc.p = plotSilhForCut() if(is.null(loc.p)) return(NULL) return(loc.p) }) # Pop-overs ---- addPopover(session, ns("alLearnMore"), title = "Classes of cluster validation", content = helpText.clValid[["alLearnMore"]], trigger = "click") addPopover(session, ns("alLearnMoreRel"), title = "Relative validation", content = helpText.clValid[["alLearnMoreRel"]], trigger = "click") addPopover(session, ns("alLearnMoreInt"), title = "Internal validation", content = helpText.clValid[["alLearnMoreInt"]], trigger = "click") }