Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
shiny-timecourse-inspector
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
2
Issues
2
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
pertz-lab
shiny-timecourse-inspector
Commits
f544f945
Commit
f544f945
authored
Oct 08, 2019
by
dmattek
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Changed checks for NULL data to validate-need syntax.Uses custom nbclust functions. DTW is back.
parent
d216d802
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
113 additions
and
105 deletions
+113
-105
modules/tabClValid.R
modules/tabClValid.R
+113
-105
No files found.
modules/tabClValid.R
View file @
f544f945
...
...
@@ -5,7 +5,8 @@
# This module is a tab for hierarchical clustering (base R hclust + dist)
helpText.clValid
=
c
(
alertClValidNAsPresent
=
paste0
(
"NAs present. The selected distance measure will work, "
,
"however caution is recommended. Consider interpolation of NAs and missing data in the left panel."
),
"however PCA will not be avaliable."
),
alertClValidNAsPresentDTW
=
paste0
(
"NAs present. DTW distance measure will NOT work."
),
alLearnMore
=
paste0
(
"<p><a href=http://www.sthda.com/english/wiki/print.php?id=241 title=\"External link\">Clustering</a> "
,
"is an <b>unsupervised</b> machine learning method for partitioning "
,
"dataset into a set of groups or clusters. The procedure will return clusters "
,
...
...
@@ -60,7 +61,8 @@ clustValidUI <- function(id, label = "Validation") {
choices
=
list
(
"Euclidean"
=
"euclidean"
,
"Manhattan"
=
"manhattan"
,
"Maximum"
=
"maximum"
,
"Canberra"
=
"canberra"
),
"Canberra"
=
"canberra"
,
"DTW"
=
"DTW"
),
selected
=
"euclidean"
),
bsAlert
(
"alertAnchorClValidNAsPresent"
)
...
...
@@ -134,27 +136,39 @@ clustValidUI <- function(id, label = "Validation") {
)
),
br
(),
withSpinner
(
plotOutput
(
ns
(
'outPlotClPCA'
))),
br
(),
withSpinner
(
plotOutput
(
ns
(
'outPlotTree'
))),
br
(),
withSpinner
(
plotOutput
(
ns
(
'outPlotSilhForCut'
)))
withSpinner
(
plotOutput
(
ns
(
'outPlotSilhForCut'
))),
br
(),
withSpinner
(
plotOutput
(
ns
(
'outPlotClPCA'
)))
)
)
)
}
# SERVER ----
clustValid
<-
function
(
input
,
output
,
session
,
in.data
4clust
)
{
clustValid
<-
function
(
input
,
output
,
session
,
in.data
Wide
)
{
ns
=
session
$
ns
# Return the number of clusters from the slider
# and delay by a constant in milliseconds defined in auxfunc.R
returnNclust
=
reactive
({
return
(
input
$
slClValidNclust
)
})
%>%
debounce
(
MILLIS
)
# Return max number of clusters from the slider
# and delay by a constant in milliseconds defined in auxfunc.R
returnMaxNclust
=
reactive
({
return
(
input
$
slClValidMaxClust
)
})
%>%
debounce
(
MILLIS
)
# calculate distance matrix for further clustering
# time series arranged in rows with columns corresponding to time points
userFitDistHier
<-
reactive
({
cat
(
file
=
stderr
(),
'clustValid:
userFitDistHier
\n'
)
calcDist
<-
reactive
({
cat
(
file
=
stderr
(),
'clustValid:
calcDist
\n'
)
loc.dm
=
in.data
4clust
()
loc.dm
=
in.data
Wide
()
if
(
is.null
(
loc.dm
))
{
return
(
NULL
)
...
...
@@ -163,70 +177,52 @@ clustValid <- function(input, output, session, in.data4clust) {
# Throw some warnings if NAs present in the dataset.
# DTW cannot compute distance when NA's are preset.
# Other distance measures can be calculated but caution is required with interpretation.
print
(
sum
(
is.na
(
loc.dm
)))
# NAs in the wide format can result from explicit NAs in the measurment column or
# from missing rows that cause NAs to appear when convertinf from long to wide (dcast)
if
(
sum
(
is.na
(
loc.dm
))
>
0
)
{
if
(
input
$
selectDiss
==
"DTW"
)
{
createAlert
(
session
,
"alertAnchorClValidNAsPresent"
,
"alertClValidNAsPresentDTW"
,
title
=
"Error"
,
content
=
helpText.clValid
[[
"alertClValidNAsPresentDTW"
]],
append
=
FALSE
,
style
=
"danger"
)
closeAlert
(
session
,
'alertClValidNAsPresent'
)
return
(
NULL
)
}
else
{
createAlert
(
session
,
"alertAnchorClValidNAsPresent"
,
"alertClValidNAsPresent"
,
title
=
"Warning"
,
content
=
helpText.clValid
[[
"alertClValidNAsPresent"
]],
append
=
FALSE
,
style
=
"warning"
)
closeAlert
(
session
,
'alertClValidNAsPresentDTW'
)
}
}
else
{
closeAlert
(
session
,
'alertClValidNAsPresentDTW'
)
closeAlert
(
session
,
'alertClValidNAsPresent'
)
}
}
# calculate distance matrix
return
(
dist
(
loc.dm
,
method
=
input
$
selectPlotHierDiss
))
# calculate distance matrix
return
(
proxy
::
dist
(
loc.dm
,
method
=
input
$
selectDiss
))
})
# calculate dendrogram for a chosen number of clusters and the linkage method
calcDendCut
=
reactive
({
cat
(
file
=
stderr
(),
'clustValid:calcDendCut \n'
)
loc.d
m
=
returnDMwithChecks
()
loc.d
ist
=
calcDist
()
if
(
is.null
(
loc.d
m
))
{
if
(
is.null
(
loc.d
ist
))
{
return
(
NULL
)
}
return
(
factoextra
::
eclust
(
x
=
loc.dm
,
return
(
factoextra
::
hcut
(
x
=
loc.dist
,
k
=
returnNclust
(),
FUNcluster
=
"hclust"
,
k
=
input
$
slClValidNclust
,
hc_method
=
input
$
selectLinkage
,
hc_metric
=
input
$
selectDiss
,
graph
=
FALSE
))
})
# Return a matrix with time series in wide format
# If data contains NAs (from explicit NAs or due to missing time points,
# or due to missing time points after outlier removal),
# some warnings are thrown. E.g. DTW cannot caluclate distance if NAs are present.
returnDMwithChecks
=
reactive
({
cat
(
file
=
stderr
(),
'clustValid:returnDMwithChecks \n'
)
loc.dm
=
in.data4clust
()
if
(
is.null
(
loc.dm
))
{
return
(
NULL
)
}
# Throw some warnings if NAs present in the dataset.
# DTW cannot compute distance when NA's are preset.
# Other distance measures can be calculated but caution is required with interpretation.
print
(
sum
(
is.na
(
loc.dm
)))
if
(
sum
(
is.na
(
loc.dm
))
>
0
)
{
createAlert
(
session
,
"alertAnchorClValidNAsPresent"
,
"alertClValidNAsPresent"
,
title
=
"Warning"
,
content
=
helpText.clValid
[[
"alertClValidNAsPresent"
]],
append
=
FALSE
,
style
=
"warning"
)
}
else
{
closeAlert
(
session
,
'alertClValidNAsPresent'
)
}
return
(
loc.dm
)
})
# Plotting ----
# Function instead of reactive as per:
# http://stackoverflow.com/questions/26764481/downloading-png-from-shiny-r
...
...
@@ -234,23 +230,22 @@ clustValid <- function(input, output, session, in.data4clust) {
# plot average silhouette
plotSilhAvg
<-
function
()
{
locBut
=
input
$
butPlotRel
if
(
locBut
==
0
)
{
cat
(
file
=
stderr
(),
'plotSilhAvg: Go button not pressed\n'
)
return
(
NULL
)
}
cat
(
file
=
stderr
(),
'plotSilhAvg: in\n'
)
loc.dm
=
returnDMwithChecks
()
if
(
is.null
(
loc.dm
))
{
return
(
NULL
)
}
loc.p
=
factoextra
::
fviz_nbclust
(
loc.dm
,
hcut
,
# make the f-n dependent on the button click
locBut
=
input
$
butPlotRel
# Check if required data exists
# Thanks to isolate all mods in the left panel are delayed
# until clicking the Plot button
loc.dist
=
isolate
(
calcDist
())
validate
(
need
(
!
is.null
(
loc.dist
),
"Nothing to plot. Load data first!"
)
)
loc.p
=
LOCnbclust
(
loc.dist
,
method
=
"silhouette"
,
k.max
=
input
$
slClValidMaxClust
,
k.max
=
returnMaxNclust
()
,
hc_metric
=
input
$
selectDiss
,
hc_method
=
input
$
selectLinkage
)
+
xlab
(
"Number of clusters"
)
+
...
...
@@ -266,23 +261,22 @@ clustValid <- function(input, output, session, in.data4clust) {
# plot Ws
plotWss
<-
function
()
{
cat
(
file
=
stderr
(),
'plotWss: in\n'
)
# make the f-n dependent on the button click
locBut
=
input
$
butPlotRel
if
(
locBut
==
0
)
{
cat
(
file
=
stderr
(),
'plotWss: Go button not pressed\n'
)
return
(
NULL
)
}
loc.dm
=
returnDMwithChecks
()
if
(
is.null
(
loc.dm
))
{
return
(
NULL
)
}
# Check if required data exists
# Thanks to isolate all mods in the left panel are delayed
# until clicking the Plot button
loc.dist
=
isolate
(
calcDist
())
validate
(
need
(
!
is.null
(
loc.dist
),
"Nothing to plot. Load data first!"
)
)
loc.p
=
factoextra
::
fviz_nbclust
(
loc.dm
,
hcut
,
loc.p
=
LOCnbclust
(
loc.dist
,
method
=
"wss"
,
k.max
=
input
$
slClValidMaxClust
,
k.max
=
returnMaxNclust
()
,
hc_metric
=
input
$
selectDiss
,
hc_method
=
input
$
selectLinkage
)
+
xlab
(
"Number of clusters"
)
+
...
...
@@ -299,18 +293,18 @@ clustValid <- function(input, output, session, in.data4clust) {
# plot dendrogram tree
plotTree
<-
function
()
{
cat
(
file
=
stderr
(),
'plotTree: in\n'
)
# make the f-n dependent on the button click
locBut
=
input
$
butPlotInt
if
(
locBut
==
0
)
{
cat
(
file
=
stderr
(),
'plotTree: Go button not pressed\n'
)
return
(
NULL
)
}
loc.part
=
calcDendCut
()
if
(
is.null
(
loc.part
))
{
return
(
NULL
)
}
# Check if required data exists
# Thanks to isolate all mods in the left panel are delayed
# until clicking the Plot button
loc.part
=
isolate
(
calcDendCut
())
validate
(
need
(
!
is.null
(
loc.part
),
"Nothing to plot. Load data first!"
)
)
loc.p
=
factoextra
::
fviz_dend
(
loc.part
,
show_labels
=
F
,
...
...
@@ -329,42 +323,56 @@ clustValid <- function(input, output, session, in.data4clust) {
# PCA visualization of partitioning methods
plotClPCA
<-
function
()
{
cat
(
file
=
stderr
(),
'plotTree: in\n'
)
# make the f-n dependent on the button click
locBut
=
input
$
butPlotInt
if
(
locBut
==
0
)
{
cat
(
file
=
stderr
(),
'plotClPCA: Go button not pressed\n'
)
return
(
NULL
)
}
loc.part
=
calcDendCut
()
if
(
is.null
(
loc.part
))
{
# Check if required data exists
# Thanks to isolate all mods in the left panel are delayed
# until clicking the Plot button
loc.part
=
isolate
(
calcDendCut
())
loc.dm
=
in.dataWide
()
print
(
sum
(
is.na
(
loc.dm
)))
validate
(
need
(
!
is.null
(
loc.part
),
"Nothing to plot. Load data first!"
),
need
(
!
is.null
(
loc.dm
),
"Nothing to plot. Load data first!"
),
need
(
sum
(
is.na
(
loc.dm
)),
"Cannot calculate PCA in the presence of missing data and/or NAs."
)
)
if
(
sum
(
is.na
(
loc.dm
))
>
0
)
return
(
NULL
)
}
loc.p
=
factoextra
::
fviz_cluster
(
loc.part
,
data
=
loc.dm
,
geom
=
"point"
,
elipse.type
=
"
norm
"
,
elipse.type
=
"
convex
"
,
main
=
"Principle components"
)
)
+
LOCggplotTheme
(
in.font.base
=
PLOTFONTBASE
,
in.font.axis.text
=
PLOTFONTAXISTEXT
,
in.font.axis.title
=
PLOTFONTAXISTITLE
,
in.font.strip
=
PLOTFONTFACETSTRIP
,
in.font.legend
=
PLOTFONTLEGEND
)
return
(
loc.p
)
}
# plot silhouetts for a particular dendrogram cut
plotSilhForCut
<-
function
()
{
cat
(
file
=
stderr
(),
'plotSilhForCut: in\n'
)
# make the f-n dependent on the button click
locBut
=
input
$
butPlotInt
if
(
locBut
==
0
)
{
cat
(
file
=
stderr
(),
'plotSilhForCut: Go button not pressed\n'
)
return
(
NULL
)
}
loc.part
=
calcDendCut
()
if
(
is.null
(
loc.part
))
{
return
(
NULL
)
}
# Check if required data exists
# Thanks to isolate all mods in the left panel are delayed
# until clicking the Plot button
loc.part
=
isolate
(
calcDendCut
())
validate
(
need
(
!
is.null
(
loc.part
),
"Nothing to plot. Load data first!"
)
)
loc.p
=
factoextra
::
fviz_silhouette
(
loc.part
,
print.summary
=
FALSE
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment