Commit 4bfa5183 authored by dmattek's avatar dmattek
Browse files

Help text and plot formatting

parent 2fbcf987
...@@ -14,31 +14,31 @@ helpText.clValid = c(alertNAsPresentDTW = paste0("NAs present. DTW cannot calcul ...@@ -14,31 +14,31 @@ helpText.clValid = c(alertNAsPresentDTW = paste0("NAs present. DTW cannot calcul
"If outlier points were removed, activate \"Interpolate gaps\" or ", "If outlier points were removed, activate \"Interpolate gaps\" or ",
"decrease the threshold for maximum allowed gap length. ", "decrease the threshold for maximum allowed gap length. ",
"The latter will result in entire trajectories with outliers being removed."), "The latter will result in entire trajectories with outliers being removed."),
alLearnMore = paste0("<p><a href=http://www.sthda.com/english/wiki/print.php?id=241>Clustering</a> is an <b>unsupervised</b> machine learning method for partitioning ", alLearnMore = paste0("<p><a href=http://www.sthda.com/english/wiki/print.php?id=241 title=\"External link\">Clustering</a> ",
"is an <b>unsupervised</b> machine learning method for partitioning ",
"dataset into a set of groups or clusters. The procedure will return clusters ", "dataset into a set of groups or clusters. The procedure will return clusters ",
"even if the data <b>does not</b> contain any! ", "even if the data <b>does not</b> contain any! ",
"Therefore, it’s necessary to ", "Therefore, it’s necessary to ",
"assess clustering tendency before the analysis, and ", "assess clustering tendency before the analysis, and ",
"validate the quality of the result after clustering.<p>", "validate the quality of the result after clustering.<p>"
"<p><b>Relative validation</b>, evaluates the clustering structure ", ),
"by varying different parameter values for the same algorithm ", alLearnMoreRel = paste0("<p>Determine the optimal number of clusters by inspecting ",
"(e.g. varying the number of clusters <i>k</i>). Typically used for ", "the average silhouette width and the total within cluster sum of squares (WSS) ",
"determining the optimal number of clusters.</p>", "for a range of cluster numbers.</p>",
"<p><b>Internal validation</b>, uses the internal information of the clustering process ", "<p><b>Silhouette analysis</b> estimates the average distance between clusters. ",
"to evaluate the goodness of a clustering structure without reference to external information. ", "Larger silhouette widths indicate better.<p>",
"It can be also used for estimating the number of clusters and the appropriate clustering algorithm ", "<p><b>WSS</b> evaluates the compactness of clusters. ",
"without any external data.</p>", "Compact clusters achieve low WSS values. ",
"<p><b>External validation</b>, compares the results of a cluster analysis ", "Look for the <i>knee</i> in the plot of WSS as function of cluster numbers.</p>"),
"to an externally known result, such as externally provided class labels. ", alLearnMoreInt = paste0("<p>Evaluate the goodness of a clustering structure by inspecting <b>the dendrogram</b> ",
"Since we know the “true” cluster number in advance, ", "and <b>the silhouette</b> for a given number of clusters.</p>",
"this approach is mainly used for selecting the right clustering algorithm for a specific dataset.</p>", "<p>The height of dendrogram branches indicates how well clusters are separated.</p>",
"<p><b>Stability validation</b>, is a special version of internal validation. ", "<p>The silhouette plot displays how close each time series in one cluster ",
"It evaluates the consistency of a clustering result by comparing it with the clusters obtained ", "is to time series in the neighboring clusters. ",
"after each column is removed, one at a time.</p>"), "A large positive silhouette (Si) indicates time series that are well clustered.",
outPlotWss = "Within squared sum...", "A negative Si indicates time series that are closer to ",
outPlotSilhAvg = "Average...", "a neighboring cluster, and are placed in the wrong cluster.</p>")
outPlotTree = "Dendrogram...", )
outPlotSilhForCut = "Silhouette plot at dendrogram cut...")
# UI ---- # UI ----
...@@ -46,7 +46,12 @@ clustValidUI <- function(id, label = "Validation") { ...@@ -46,7 +46,12 @@ clustValidUI <- function(id, label = "Validation") {
ns <- NS(id) ns <- NS(id)
tagList( tagList(
h4('Cluster validation'), h4(
"Cluster validation using ",
a("factoextra",
href="https://cran.r-project.org/web/packages/factoextra/",
title="External link")
),
actionLink(ns("alLearnMore"), "Learn more"), actionLink(ns("alLearnMore"), "Learn more"),
br(), br(),
br(), br(),
...@@ -87,6 +92,8 @@ clustValidUI <- function(id, label = "Validation") { ...@@ -87,6 +92,8 @@ clustValidUI <- function(id, label = "Validation") {
tabsetPanel( tabsetPanel(
tabPanel("Relative", tabPanel("Relative",
br(), br(),
p("Determine and visualise the optimal number of clusters. ",
actionLink(ns("alLearnMoreRel"), "Learn more")),
fluidRow( fluidRow(
column(2, column(2,
actionButton(ns('butPlotRel'), 'Validate!') actionButton(ns('butPlotRel'), 'Validate!')
...@@ -94,7 +101,7 @@ clustValidUI <- function(id, label = "Validation") { ...@@ -94,7 +101,7 @@ clustValidUI <- function(id, label = "Validation") {
column(6, column(6,
sliderInput( sliderInput(
ns('slClValidMaxClust'), ns('slClValidMaxClust'),
'Maximum number of clusters to validate', 'Maximum number of clusters to consider',
min = 2, min = 2,
max = 20, max = 20,
value = 10, value = 10,
...@@ -106,16 +113,14 @@ clustValidUI <- function(id, label = "Validation") { ...@@ -106,16 +113,14 @@ clustValidUI <- function(id, label = "Validation") {
), ),
br(), br(),
withSpinner(plotOutput(ns('outPlotSilhAvg'))), withSpinner(plotOutput(ns('outPlotSilhAvg'))),
bsTooltip(ns('outPlotSilhAvg'), helpText.clValid[["outPlotSilhAvg"]],
placement = "top", trigger = "hover", options = NULL),
br(), br(),
withSpinner(plotOutput(ns('outPlotWss'))), withSpinner(plotOutput(ns('outPlotWss')))
bsTooltip(ns('outPlotWss'), helpText.clValid[["outPlotWss"]],
placement = "top", trigger = "hover", options = NULL)
), ),
tabPanel("Internal", tabPanel("Internal",
br(), br(),
p("Validate a given data partitioning. ",
actionLink(ns("alLearnMoreInt"), "Learn more")),
fluidRow( fluidRow(
column(2, column(2,
actionButton(ns('butPlotInt'), 'Validate!') actionButton(ns('butPlotInt'), 'Validate!')
...@@ -135,12 +140,10 @@ clustValidUI <- function(id, label = "Validation") { ...@@ -135,12 +140,10 @@ clustValidUI <- function(id, label = "Validation") {
), ),
br(), br(),
withSpinner(plotOutput(ns('outPlotTree'))), withSpinner(plotOutput(ns('outPlotTree'))),
bsTooltip(ns('outPlotTree'), helpText.clValid[["outPlotTree"]],
placement = "top", trigger = "hover", options = NULL),
br(), br(),
withSpinner(plotOutput(ns('outPlotSilhForCut'))), #withSpinner(plotOutput(ns('outPlotClPCA'))),
bsTooltip(ns('outPlotSilhForCut'), helpText.clValid[["outPlotSilhForCut"]], br(),
placement = "top", trigger = "hover", options = NULL) withSpinner(plotOutput(ns('outPlotSilhForCut')))
) )
) )
) )
...@@ -205,6 +208,7 @@ clustValid <- function(input, output, session, in.data4clust) { ...@@ -205,6 +208,7 @@ clustValid <- function(input, output, session, in.data4clust) {
hc_method = input$selectLinkage, hc_metric = input$selectDiss)) hc_method = input$selectLinkage, hc_metric = input$selectDiss))
}) })
# Plotting ----
# Function instead of reactive as per: # Function instead of reactive as per:
# http://stackoverflow.com/questions/26764481/downloading-png-from-shiny-r # http://stackoverflow.com/questions/26764481/downloading-png-from-shiny-r
# This function is used to plot and to downoad a pdf # This function is used to plot and to downoad a pdf
...@@ -224,7 +228,12 @@ clustValid <- function(input, output, session, in.data4clust) { ...@@ -224,7 +228,12 @@ clustValid <- function(input, output, session, in.data4clust) {
verbose = TRUE, verbose = TRUE,
k.max = input$slClValidMaxClust, k.max = input$slClValidMaxClust,
hc_metric = input$selectDiss, hc_metric = input$selectDiss,
hc_method = input$selectLinkage) hc_method = input$selectLinkage) +
LOCggplotTheme(in.font.base = PLOTFONTBASE,
in.font.axis.text = PLOTFONTAXISTEXT,
in.font.axis.title = PLOTFONTAXISTITLE,
in.font.strip = PLOTFONTFACETSTRIP,
in.font.legend = PLOTFONTLEGEND)
return(loc.p) return(loc.p)
} }
...@@ -243,22 +252,50 @@ clustValid <- function(input, output, session, in.data4clust) { ...@@ -243,22 +252,50 @@ clustValid <- function(input, output, session, in.data4clust) {
verbose = TRUE, verbose = TRUE,
k.max = input$slClValidMaxClust, k.max = input$slClValidMaxClust,
hc_metric = input$selectDiss, hc_metric = input$selectDiss,
hc_method = input$selectLinkage) hc_method = input$selectLinkage) +
LOCggplotTheme(in.font.base = PLOTFONTBASE,
in.font.axis.text = PLOTFONTAXISTEXT,
in.font.axis.title = PLOTFONTAXISTITLE,
in.font.strip = PLOTFONTFACETSTRIP,
in.font.legend = PLOTFONTLEGEND)
return(loc.p) return(loc.p)
} }
# plot dendrogram tree # plot dendrogram tree
plotTree <- function() { plotTree <- function() {
loc.dend = calcDendCut() loc.part = calcDendCut()
if (is.null(loc.part)) {
return(NULL)
}
loc.p = factoextra::fviz_dend(x = loc.part,
show_labels = F,
rect = T,
xlab = "Time series",
k = input$slClValidNclust) +
LOCggplotTheme(in.font.base = PLOTFONTBASE,
in.font.axis.text = PLOTFONTAXISTEXT,
in.font.axis.title = PLOTFONTAXISTITLE,
in.font.strip = PLOTFONTFACETSTRIP,
in.font.legend = PLOTFONTLEGEND)
return(loc.p)
}
# PCA visualization of partitioning methods
plotClPCA <- function() {
loc.part = calcDendCut()
if (is.null(loc.dend)) { if (is.null(loc.part)) {
return(NULL) return(NULL)
} }
loc.p = factoextra::fviz_dend(x = loc.dend, k = input$slClValidNclust) loc.p = factoextra::fviz_cluster(object = loc.part, ellipse.type = "convex")
return(loc.p) return(loc.p)
} }
...@@ -266,21 +303,26 @@ clustValid <- function(input, output, session, in.data4clust) { ...@@ -266,21 +303,26 @@ clustValid <- function(input, output, session, in.data4clust) {
# plot silhouetts for a particular dendrogram cut # plot silhouetts for a particular dendrogram cut
plotSilhForCut <- function() { plotSilhForCut <- function() {
loc.dmdist = userFitDistHier() loc.part = calcDendCut()
loc.dend = LOChcut(x = loc.dmdist,
k = input$slClValidNclust,
hc_func = "hclust",
hc_method = input$selectLinkage, hc_metric = input$selectDiss)
if (is.null(loc.dend)) { if (is.null(loc.part)) {
return(NULL) return(NULL)
} }
loc.p = factoextra::fviz_silhouette(sil.obj = loc.dend, print.summary = FALSE) loc.p = factoextra::fviz_silhouette(sil.obj = loc.part,
print.summary = FALSE) +
xlab("Time series") +
LOCggplotTheme(in.font.base = PLOTFONTBASE,
in.font.axis.text = PLOTFONTAXISTEXT,
in.font.axis.title = PLOTFONTAXISTITLE,
in.font.strip = PLOTFONTFACETSTRIP,
in.font.legend = PLOTFONTLEGEND) +
theme(axis.text.x = element_blank())
return(loc.p) return(loc.p)
} }
# Plot rendering ----
# Display silhouette # Display silhouette
output$outPlotSilhAvg <- renderPlot({ output$outPlotSilhAvg <- renderPlot({
locBut = input$butPlotRel locBut = input$butPlotRel
...@@ -340,6 +382,18 @@ clustValid <- function(input, output, session, in.data4clust) { ...@@ -340,6 +382,18 @@ clustValid <- function(input, output, session, in.data4clust) {
title = "Classes of cluster validation", title = "Classes of cluster validation",
content = helpText.clValid[["alLearnMore"]], content = helpText.clValid[["alLearnMore"]],
trigger = "click") trigger = "click")
addPopover(session,
ns("alLearnMoreRel"),
title = "Relative validation",
content = helpText.clValid[["alLearnMoreRel"]],
trigger = "click")
addPopover(session,
ns("alLearnMoreInt"),
title = "Internal validation",
content = helpText.clValid[["alLearnMoreInt"]],
trigger = "click")
} }
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment