RDocumentation: silhouette
  • AI Chat
  • Code
  • Report
  • Beta
    Spinner

    Note that this notebook was automatically generated from an RDocumentation page. It depends on the package and the example code whether this code will run without errors. You may need to edit the code to make things work.

    if(!require('cluster')) {
        install.packages('cluster')
        library('cluster')
    }
    data(ruspini)
    pr4 <- pam(ruspini, 4)
    str(si <- silhouette(pr4))
    (ssi <- summary(si))
    plot(si) # silhouette plot
    plot(si, col = c("red", "green", "blue", "purple"))# with cluster-wise coloring
    
    si2 <- silhouette(pr4$clustering, dist(ruspini, "canberra"))
    summary(si2) # has small values: "canberra"'s fault
    plot(si2, nmax= 80, cex.names=0.6)
    
    op <- par(mfrow= c(3,2), oma= c(0,0, 3, 0),
              mgp= c(1.6,.8,0), mar= .1+c(4,2,2,2))
    for(k in 2:6)
       plot(silhouette(pam(ruspini, k=k)), main = paste("k = ",k), do.n.k=FALSE)
    mtext("PAM(Ruspini) as in Kaufman & Rousseeuw, p.101",
          outer = TRUE, font = par("font.main"), cex = par("cex.main")); frame()
    
    ## the same with cluster-wise colours:
    c6 <- c("tomato", "forest green", "dark blue", "purple2", "goldenrod4", "gray20")
    for(k in 2:6)
       plot(silhouette(pam(ruspini, k=k)), main = paste("k = ",k), do.n.k=FALSE,
            col = c6[1:k])
    par(op)
    
    ## clara(): standard silhouette is just for the best random subset
    data(xclara)
    set.seed(7)
    str(xc1k <- xclara[ sample(nrow(xclara), size = 1000) ,]) # rownames == indices
    cl3 <- clara(xc1k, 3)
    plot(silhouette(cl3))# only of the "best" subset of 46
    ## The full silhouette: internally needs large (36 MB) dist object:
    sf <- silhouette(cl3, full = TRUE) ## this is the same as
    s.full <- silhouette(cl3$clustering, daisy(xc1k))
    stopifnot(all.equal(sf, s.full, check.attributes = FALSE, tolerance = 0))
    ## color dependent on original "3 groups of each 1000": % __FIXME ??__
    plot(sf, col = 2+ as.integer(names(cl3$clustering) ) %/% 1000,
         main ="plot(silhouette(clara(.), full = TRUE))")
    
    ## Silhouette for a hierarchical clustering:
    ar <- agnes(ruspini)
    si3 <- silhouette(cutree(ar, k = 5), # k = 4 gave the same as pam() above
        	           daisy(ruspini))
    stopifnot(is.data.frame(di3 <- as.data.frame(si3))) 
    plot(si3, nmax = 80, cex.names = 0.5)
    ## 2 groups: Agnes() wasn't too good:
    si4 <- silhouette(cutree(ar, k = 2), daisy(ruspini))
    plot(si4, nmax = 80, cex.names = 0.5)