library(cluster)
library(datasets)
library(MASS)
data(iris)
# manual cluster prediction strength for selecting then number of clusters
# cheating a bit since not trying all combos
# There is package fpc that has many cluster validation functions in it - see further down
ii<-sample(seq(1,150),100) # sample a subset of the iris data
#
Kvec<-seq(2,10) # how many clusters to investigate
pvec<-0*Kvec # empty vector to store predictions in
ij<-sample(seq(1,length(ii)),round(length(ii)/2)) # divide the data set into two parts
Aset<-iris[ii,][ij,] 
Bset<-iris[ii,][-ij,]
for (kk in (1:length(Kvec))) {
  ppA<-pam(Aset[,1:4],Kvec[kk]) # cluster part A
  ppB<-pam(Bset[,1:4],Kvec[kk]) # cluster part B# 
  if (Kvec[kk]<6) {
  #plot the cluster outcomes, colors from data set A clustering, symbols from B
  pairs(rbind(Aset,Bset)[,-5],col=c(ppA$clust,rep("grey",dim(Bset)[1])),pch=c(rep(1,dim(Aset)[1]),ppB$clust))
  p<-locator() } # push finish to continue to next cluster number
  lA<-lda(Aset[,1:4],ppA$clust) # use A's clustering to generate a classification
  pA<-predict(lA,newdata=Bset[,1:4],type="class") # predict on set B
  lB<-lda(Bset[,1:4],ppB$clust)
  pB<-predict(lB,newdata=Aset[,1:4],type="class")
  #
  tAB<-table(pA$class,ppB$clust) # compare classification vs clustering
  tBA<-table(pB$class,ppA$clust)
  pvec[kk]<-sum(apply(tAB,1,max))+sum(apply(tBA,1,max))  } #compute approximate overlap
plot(Kvec,pvec,xlab="K",ylab="Overlap")


irisnew<-iris
irisnew[iris[,5]=="virginica",1]<-irisnew[iris[,5]=="virginica",1]+2 # make Virginica stand out more.
#
ii<-sample(seq(1,150),100)
Kvec<-seq(2,10)
pvec<-0*Kvec
ij<-sample(seq(1,length(ii)),round(length(ii)/2))
Aset<-irisnew[ii,][ij,]
Bset<-irisnew[ii,][-ij,]
for (kk in (1:length(Kvec))) {
  ppA<-pam(Aset[,1:4],Kvec[kk])
  ppB<-pam(Bset[,1:4],Kvec[kk])
  if (Kvec[kk]<6) {
    pairs(rbind(Aset,Bset)[,-5],col=c(ppA$clust,rep("grey",dim(Bset)[1])),pch=c(rep(1,dim(Aset)[1]),ppB$clust))
    p<-locator() }
  lA<-lda(Aset[,1:4],ppA$clust)
  pA<-predict(lA,newdata=Bset[,1:4],type="class")
  lB<-lda(Bset[,1:4],ppB$clust)
  pB<-predict(lB,newdata=Aset[,1:4],type="class")
  #
  tAB<-table(pA$class,ppB$clust)
  tBA<-table(pB$class,ppA$clust)
  pvec[kk]<-sum(apply(tAB,1,max))+sum(apply(tBA,1,max)) }
plot(Kvec,pvec,xlab="K",ylab="Overlap")


### creating 4 separated clusters
irisnew<-iris
irisnew[iris[,5]=="virginica",1]<-irisnew[iris[,5]=="virginica",1]+2
ss<-seq(1,dim(iris)[1])[iris[,5]=="virginica"]
irisnew[ss[1:25],2]<-irisnew[ss[1:25],2]+3
#
ii<-sample(seq(1,150),100)
Kvec<-seq(2,10)
pvec<-0*Kvec
ij<-sample(seq(1,length(ii)),round(length(ii)/2))
Aset<-irisnew[ii,][ij,]
Bset<-irisnew[ii,][-ij,]
for (kk in (1:length(Kvec))) {
  ppA<-pam(Aset[,1:4],Kvec[kk])
  ppB<-pam(Bset[,1:4],Kvec[kk])
  if (Kvec[kk]<6) {
    pairs(rbind(Aset,Bset)[,-5],col=c(ppA$clust,rep("grey",dim(Bset)[1])),pch=c(rep(1,dim(Aset)[1]),ppB$clust))
    p<-locator() }
  lA<-lda(Aset[,1:4],ppA$clust)
  pA<-predict(lA,newdata=Bset[,1:4],type="class")
  lB<-lda(Bset[,1:4],ppB$clust)
  pB<-predict(lB,newdata=Aset[,1:4],type="class")
  #
  tAB<-table(pA$class,ppB$clust)
  tBA<-table(pB$class,ppA$clust)
  pvec[kk]<-sum(apply(tAB,1,max))+sum(apply(tBA,1,max)) }
plot(Kvec,pvec,xlab="K",ylab="Overlap")


#### Package for cluster validation
#install.packages("fpc")
library(fpc)
# Read the helpfile about the package
#library(help="fpc")
# online manual also
ii<-sample(seq(1,150),100)
pp<-prediction.strength(iris[ii,-5], Gmin=2,Gmax=5,clustermethod=kmeansCBI, classification="centroid",M=10)
# You can try other clustering methods like hierarchical etc and other classifiers like kNN etc
# Check help(prediction.strength) for more info
plot(seq(2,5),pp$mean.pred[-1],xlab="Number of clusters", ylab="Prediction strength")
#


irisnew<-iris
irisnew[iris[,5]=="virginica",1]<-irisnew[iris[,5]=="virginica",1]+2
ss<-seq(1,dim(iris)[1])[iris[,5]=="virginica"]
irisnew[ss[1:25],2]<-irisnew[ss[1:25],2]+3
ii<-sample(seq(1,150),100)
pp<-prediction.strength(irisnew[ii,-5], Gmin=2,Gmax=10,clustermethod=kmeansCBI, classification="centroid",M=10)
plot(seq(2,10),pp$mean.pred[-1],xlab="Number of clusters", ylab="Prediction strength")
# Do a couple of times and check how many clusters are selected


##
# You can also try different clustering and classification methods...
irisnew<-iris
irisnew[iris[,5]=="virginica",1]<-irisnew[iris[,5]=="virginica",1]+2
ss<-seq(1,dim(iris)[1])[iris[,5]=="virginica"]
irisnew[ss[1:25],2]<-irisnew[ss[1:25],2]+4
ii<-sample(seq(1,150),100)
pp<-prediction.strength(irisnew[ii,-5], Gmin=2,Gmax=10,clustermethod=hclustCBI, method="complete",cut="level",classification="knn",nnk=5,M=10)
plot(seq(2,10),pp$mean.pred[-1],xlab="Number of clusters", ylab="Prediction strength")

Warning message in max(which(mean.pred > cutoff)):
"no non-missing arguments to max; returning -Inf"


kk<-kmeans(iris[,-5],3)
hh<-cutree(hclust(dist(iris[,-5])),3)
cluster.stats(clustering=kk$cluster,alt.clustering=hh,compareonly = T)
table(kk$cluster,hh)
pairs(iris[,-5],col=kk$cluster,pch=as.numeric(iris[,5]), main = "Overlap kmeans and true labels")
pairs(iris[,-5],col=hh,pch=as.numeric(iris[,5]), main = "Overlap hierarchical and true labels")
pairs(iris[,-5],col=hh,pch=kk$cluster, main = "Overlap hierarchical and kmeans")

   hh
     1  2  3
  1 33  0  0
  2  0 72 24
  3 17  0  4


kk<-kmeans(irisnew[,-5],2)
hh<-cutree(hclust(dist(irisnew[,-5])),2)
cluster.stats(clustering=kk$cluster,alt.clustering=hh,compareonly = T)
table(kk$cluster,hh)

   hh
      1   2
  1 100   0
  2   0  50


# Stability of clustering results - using bootstrap to assess
par(mfrow=c(2,2))
pp<-clusterboot(iris[,-5],B=2,showplots=T,clustermethod=kmeansCBI,krange=2,multipleboot = F)
print(pp)
pp<-clusterboot(iris[,-5],B=2,showplots=T,clustermethod=kmeansCBI,krange=3,multipleboot = F)
print(pp)
pp<-clusterboot(iris[,-5],B=2,showplots=T,clustermethod=kmeansCBI,krange=4,multipleboot = F)
print(pp)

boot 1 
boot 2 
* Cluster stability assessment *
Cluster method:  kmeans 
Full clustering results are given as parameter result
of the clusterboot object, which also provides further statistics
of the resampling results.
Number of resampling runs:  2 

Number of clusters found in data:  2 

 Clusterwise Jaccard bootstrap (omitting multiple points) mean:
[1] 1 1
dissolved:
[1] 0 0
recovered:
[1] 2 2
boot 1 
boot 2

* Cluster stability assessment *
Cluster method:  kmeans 
Full clustering results are given as parameter result
of the clusterboot object, which also provides further statistics
of the resampling results.
Number of resampling runs:  2 

Number of clusters found in data:  3 

 Clusterwise Jaccard bootstrap (omitting multiple points) mean:
[1] 0.9720280 1.0000000 0.9502315
dissolved:
[1] 0 0 0
recovered:
[1] 2 2 2
boot 1 
boot 2

* Cluster stability assessment *
Cluster method:  kmeans 
Full clustering results are given as parameter result
of the clusterboot object, which also provides further statistics
of the resampling results.
Number of resampling runs:  2 

Number of clusters found in data:  4 

 Clusterwise Jaccard bootstrap (omitting multiple points) mean:
[1] 1.0000000 0.7991071 0.8987179 0.9705882
dissolved:
[1] 0 0 0 0
recovered:
[1] 2 2 2 2


pp<-clusterboot(irisnew[,-5],B=100,showplots=F,clustermethod=kmeansCBI,krange=2,multipleboot = F,count=F)
print(pp)
pp<-clusterboot(irisnew[,-5],B=100,showplots=F,clustermethod=kmeansCBI,krange=3,multipleboot = F,count=F)
print(pp)
pp<-clusterboot(irisnew[,-5],B=100,showplots=F,clustermethod=kmeansCBI,krange=4,multipleboot = F,count=F)
print(pp)

* Cluster stability assessment *
Cluster method:  kmeans 
Full clustering results are given as parameter result
of the clusterboot object, which also provides further statistics
of the resampling results.
Number of resampling runs:  100 

Number of clusters found in data:  2 

 Clusterwise Jaccard bootstrap (omitting multiple points) mean:
[1] 0.9280267 0.9521509
dissolved:
[1] 1 0
recovered:
[1] 91 96
* Cluster stability assessment *
Cluster method:  kmeans 
Full clustering results are given as parameter result
of the clusterboot object, which also provides further statistics
of the resampling results.
Number of resampling runs:  100 

Number of clusters found in data:  3 

 Clusterwise Jaccard bootstrap (omitting multiple points) mean:
[1] 0.9686008 0.9004731 0.8710462
dissolved:
[1]  0  0 12
recovered:
[1] 100  78  73
* Cluster stability assessment *
Cluster method:  kmeans 
Full clustering results are given as parameter result
of the clusterboot object, which also provides further statistics
of the resampling results.
Number of resampling runs:  100 

Number of clusters found in data:  4 

 Clusterwise Jaccard bootstrap (omitting multiple points) mean:
[1] 0.8234300 0.5033564 0.4587607 0.7015930
dissolved:
[1]  6 54 68  1
recovered:
[1] 73  4  2 23


pp<-clusterboot(irisnew[,-5],B=100,showplots=F,clustermethod=hclustCBI,k=2,method="complete",multipleboot = F,count=F)
print(pp)
pp<-clusterboot(irisnew[,-5],B=100,showplots=F,clustermethod=hclustCBI,k=3,method="complete",multipleboot = F,count=F)
print(pp)
pp<-clusterboot(irisnew[,-5],B=100,showplots=F,clustermethod=hclustCBI,k=4,method="complete",multipleboot = F,count=F)
print(pp)

* Cluster stability assessment *
Cluster method:  hclust/cutree 
Full clustering results are given as parameter result
of the clusterboot object, which also provides further statistics
of the resampling results.
Number of resampling runs:  100 

Number of clusters found in data:  2 

 Clusterwise Jaccard bootstrap (omitting multiple points) mean:
[1] 0.9938185 0.9921167
dissolved:
[1] 0 0
recovered:
[1] 98 98
* Cluster stability assessment *
Cluster method:  hclust/cutree 
Full clustering results are given as parameter result
of the clusterboot object, which also provides further statistics
of the resampling results.
Number of resampling runs:  100 

Number of clusters found in data:  3 

 Clusterwise Jaccard bootstrap (omitting multiple points) mean:
[1] 0.9805493 0.9825098 0.9730529
dissolved:
[1] 0 1 4
recovered:
[1] 96 96 95
* Cluster stability assessment *
Cluster method:  hclust/cutree 
Full clustering results are given as parameter result
of the clusterboot object, which also provides further statistics
of the resampling results.
Number of resampling runs:  100 

Number of clusters found in data:  4 

 Clusterwise Jaccard bootstrap (omitting multiple points) mean:
[1] 1.0000000 0.9917513 1.0000000 0.9777238
dissolved:
[1] 0 0 0 0
recovered:
[1] 100 100 100  94


#install.packages("irlba")
#install.packages("plot3D")
#install.packages("rgl")
library(irlba)
# fast svd for large data sets
library(plot3D)
library(rgl)
#### Let's look at a very large genome cancer data set
#### 6 cancer classes: breast, glioblastoma (brain), lung, ovarian, kidney and uterus
#### 20000+ genes are the feaures
load("TCGAdata.RData")
ss<-irlba(TCGA,10,10) # keep the leading 10 eigenvectors
pairs(ss$u[,1:5],col=as.numeric(as.factor(TCGAclassstr)))
par(xpd = TRUE)
legend("bottomright", fill = unique(as.factor(TCGAclassstr))[sort.list(as.numeric(as.factor(unique(TCGAclassstr))))], legend = c(levels(as.factor(TCGAclassstr))))
#plot3d(ss$u[,1:3],col=as.numeric(as.factor(TCGAclassstr)))
#legend3d("topright", legend = paste('Type', c(unique(TCGAclassstr))[sort.list(as.numeric(as.factor(unique(TCGAclassstr))))]), pch = 5, col=seq(1,6), cex=1, inset=c(0.02))
# 3D plot if you prefer.


kk<-kmeans(ss$u[,1:10],6)
#
library(mda)
mda::confusion(kk$cluster,TCGAclassstr)

Warning message:
"package 'mda' was built under R version 4.1.3"
Loading required package: class

Loaded mda 0.5-2

         true
predicted  BC GBM  KI  LU  OV   U
        1   0   0   0   0 262   1
        2   1 170 505   0   0   0
        3   0   0   0 531   0   0
        4 943   0   2   0   0   0
        5 271   2   6  40   4  56
        6   0   0  93   0   0   0


library(cluster)
pp<-pam(ss$u,6)
plot(pp$silinfo$widths[,3],col=pp$silinfo$widths[,1],type="h")
mda::confusion(pp$clustering,TCGAclassstr)

         true
predicted  BC GBM  KI  LU  OV   U
        1   0 169   0   0   0   0
        2 223   3   5  23   3  50
        3 992   0  10   0   0   5
        4   0   0   0   0 263   1
        5   0   0   7 548   0   1
        6   0   0 584   0   0   0


#### consensus clustering - using a random subset of genes to cluster
#### 
DD<-matrix(0,2887,2887)
nz<-50
for (zz in (1:nz)) {
  ss<-irlba(TCGA[,sample(seq(1,20530),2500)],10,10) # pca on a random subset of genes
  kk<-kmeans(ss$u[,1:10],6) # clustering on the leading pca components
  dd<-as.matrix(dist(kk$cluster)) 
  dd[dd!=0]<-1 # who is NOT in the same cluster = 1
  DD<-DD+(1-dd) # same +1 for those who ARE in the same cluster
  cat(zz)} 
##

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950


hc<-hclust(as.dist(1-DD/50)) # Use the co-clustering results as a
# new distance metric in e.g. hierarchical clustering.
# plot(hc,labels=TCGAclassstr)
# nicer dendrogram
library(dendextend)
dend<-as.dendrogram(hc)
cols<-as.numeric(as.factor(TCGAclassstr))
labels_colors(dend)<-cols[order.dendrogram(dend)]
plot(dend)

Warning message:
"package 'dendextend' was built under R version 4.1.3"

---------------------
Welcome to dendextend version 1.15.2
Type citation('dendextend') for how to cite the package.

Type browseVignettes(package = 'dendextend') for the package vignette.
The github page is: https://github.com/talgalili/dendextend/

Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
You may ask questions at stackoverflow, use the r and dendextend tags: 
	 https://stackoverflow.com/questions/tagged/dendextend

	To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
---------------------


Attaching package: 'dendextend'


The following object is masked from 'package:stats':

    cutree


k<-cutree(hc,k=6)
mda::confusion(k,TCGAclassstr)
#
k<-cutree(hc,k=8)
mda::confusion(k,TCGAclassstr)
#
k<-cutree(hc,k=7)
mda::confusion(k,TCGAclassstr)
# how reproducible are the clusters?
#image(DD)
image(DD[hc$order,hc$order]) #

         true
predicted  BC GBM  KI  LU  OV   U
        1 251 172   7  15   3  56
        2 964   0   0   0   0   0
        3   0   0   0   0 262   1
        4   0   0  96   0   1   0
        5   0   0   0 556   0   0
        6   0   0 503   0   0   0

         true
predicted  BC GBM  KI  LU  OV   U
        1   0 169   0   0   0   0
        2 251   3   7  15   3  56
        3 666   0   0   0   0   0
        4 298   0   0   0   0   0
        5   0   0   0   0 262   1
        6   0   0  96   0   1   0
        7   0   0   0 556   0   0
        8   0   0 503   0   0   0

         true
predicted  BC GBM  KI  LU  OV   U
        1   0 169   0   0   0   0
        2 251   3   7  15   3  56
        3 964   0   0   0   0   0
        4   0   0   0   0 262   1
        5   0   0  96   0   1   0
        6   0   0   0 556   0   0
        7   0   0 503   0   0   0


vv<-apply(TCGA,2,sd)
vv<-rev(sort.list(vv))
# some examples of filtered features
par(mfrow=c(2,2))
hist(TCGA[,vv[2]],50)
hist(TCGA[,vv[130]],50)
hist(TCGA[,vv[530]],50)
hist(TCGA[,vv[18500]],50)


guse<-vv[1:250]
# use top 250 genes
library(gplots)
heatmap.2(as.matrix(TCGA[,guse]),
          col=colorRampPalette(c("red","white","blue"))(64),RowSideColors = as.character(cols),
          hclust=function(x) hclust(x,method="complete"),scale="column",trace="none")
#euclidean distance

Warning message:
"package 'gplots' was built under R version 4.1.3"

Attaching package: 'gplots'


The following object is masked from 'package:stats':

    lowess


heatmap.2(as.matrix(TCGA[,guse]),
          col=colorRampPalette(c("red","white","blue"))(64),RowSideColors = as.character(cols),
          hclust=function(x) hclust(x,method="complete"),scale="column",trace="none",
          distfun=function(x) as.dist((1-cor(t(x),method="pearson",use="pairwise.complete.obs"))/2))
#correlation


guse<-vv[1:2000]
# use top 2000 genes
ss<-irlba(TCGA,10,10)
pairs(ss$u[,1:3],col=as.numeric(as.factor(TCGAclassstr)))
par(xpd = TRUE)
legend("bottomright", fill = unique(as.factor(TCGAclassstr))[sort.list(as.numeric(as.factor(unique(TCGAclassstr))))], legend = c(levels(as.factor(TCGAclassstr))))


pairs(ss$u[,1:3])


pairs(TCGA[,vv[1:3]],col=as.numeric(as.factor(TCGAclassstr)))
par(xpd = TRUE)
legend("bottomright", fill = unique(as.factor(TCGAclassstr))[sort.list(as.numeric(as.factor(unique(TCGAclassstr))))], legend = c(levels(as.factor(TCGAclassstr))))


pairs(TCGA[,vv[4:6]],col=as.numeric(as.factor(TCGAclassstr)))
par(xpd = TRUE)
legend("bottomright", fill = unique(as.factor(TCGAclassstr))[sort.list(as.numeric(as.factor(unique(TCGAclassstr))))], legend = c(levels(as.factor(TCGAclassstr))))


#BiocManager::install("ConsensusClusterPlus")
#browseVignettes("ConsensusClusterPlus")
# Cannot install directly to Rstudio depending on version - go via Bioconductor
library(ConsensusClusterPlus)
# this can be a bit slot depending on the number of features and observations!
ii<-sample(seq(1,2887),1000)
options(warn=-1)
cc<-ConsensusClusterPlus(as.matrix(t(TCGA[ii,guse[1:500]])),maxK=8,reps=100,pItem=.6,pFeature=.6,
                         clusterAlg="km")
options(warn=0)

Note: The km (kmeans) option only supports a euclidean distance metric when supplying a data matrix.  If you want to cluster a distance matrix, use a different algorithm such as 'hc' or 'pam'.  Changing distance to euclidean

end fraction

clustered

clustered

clustered

clustered

clustered

clustered

clustered


ccu<-cc[[3]]
mda::confusion(ccu$consensusClass,TCGAclassstr[ii])
ccu<-cc[[5]]
mda::confusion(ccu$consensusClass,TCGAclassstr[ii])
ccu<-cc[[6]]
mda::confusion(ccu$consensusClass,TCGAclassstr[ii])

         true
predicted  BC GBM  KI  LU  OV   U
        1   6  67 203   0   0   5
        2 413   0   1   0   0   1
        3   0   0   2 198  92  12

         true
predicted  BC GBM  KI  LU  OV   U
        1   6  67   6   1   0   5
        2 358   0   0   0   0   0
        3  55   0   2   1  92  13
        4   0   0   0 196   0   0
        5   0   0 198   0   0   0

         true
predicted  BC GBM  KI  LU  OV   U
        1   0   0   2   0   0   0
        2 412   0   0   0   0   1
        3   7  67   3   1   0   5
        4   0   0   0 196   0   0
        5   0   0 199   0   0   0
        6   0   0   2   1  92  12


cc<-ConsensusClusterPlus(as.matrix(t(TCGA[ii,guse[1:500]])),maxK=8,reps=100,pItem=.6,pFeature=.6,distance="pearson",clusterAlg="hc")

end fraction

clustered

clustered

clustered

clustered

clustered

clustered

clustered

         true
predicted  BC GBM  KI  LU  OV   U
        1   0   0 203   0   0   0
        2 411   0   0   0   0   1
        3   1  67   0   0   0   0
        4   0   0   0 196   0   0
        5   0   0   0   0  92   5
        6   1   0   3   1   0   0
        7   0   0   0   1   0  12
        8   6   0   0   0   0   0


ccu<-cc[[3]]
mda::confusion(ccu$consensusClass,TCGAclassstr[ii])
ccu<-cc[[5]]
mda::confusion(ccu$consensusClass,TCGAclassstr[ii])
ccu<-cc[[6]]
mda::confusion(ccu$consensusClass,TCGAclassstr[ii])

         true
predicted  BC GBM  KI  LU  OV   U
        1   8  67 206   2   0  12
        2 411   0   0   0  92   6
        3   0   0   0 196   0   0

         true
predicted  BC GBM  KI  LU  OV   U
        1   0   0 203   0   0   0
        2 411   0   0   0   0   1
        3   8  67   3   2   0  12
        4   0   0   0 196   0   0
        5   0   0   0   0  92   5

         true
predicted  BC GBM  KI  LU  OV   U
        1   0   0 203   0   0   0
        2 411   0   0   0   0   1
        3   7  67   0   1   0  12
        4   0   0   0 196   0   0
        5   0   0   0   0  92   5
        6   1   0   3   1   0   0


library(ElemStatLearn)
data(zip.train)
Nmat<-as.matrix(zip.train[,-1])
Numbers<-as.matrix(zip.train)
#
library(huge)
# The network modeling package using sparse gaussian graphical models
#help(huge)
#
its<-sample(seq(1,dim(Nmat)[1]),1000)
# select 1000 digits at random
gg<-huge(t(Nmat[its,]))
names(gg)
plot(gg)

Warning message:
"package 'huge' was built under R version 4.1.3"

Conducting Meinshausen & Buhlmann graph estimation (mb)....done


library(network)
par(mfrow=c(1,1))
plot.network(network(gg$path[[4]]),usearrows=F)
plot.network(network(gg$path[[4]]),usearrows=F,displayisolates=F,label=Numbers[its,1],label.pos=5,label.cex=.5,vertex.col=Numbers[its,1])
# plotting the graphical lasso output

<sparse>[ <logic> ] : .M.sub.i.logical() maybe inefficient

<sparse>[ <logic> ] : .M.sub.i.logical() maybe inefficient


vv<-apply(TCGA,2,sd)
vv<-rev(sort.list(vv))
# Sort by top variance genes
library(irlba)
ss<-irlba(TCGA[,vv[1:5000]],nu=25,nv=25)
# leading 100 PCs
gg<-huge(t(ss$u))
plot.network(network(gg$path[[3]]),usearrows=F,displayisolates=F,label=TCGAclassstr,label.pos=5,label.col="white",label.cex=.5,vertex.col=as.numeric(as.factor(TCGAclassstr)))

Conducting Meinshausen & Buhlmann graph estimation (mb)....done

<sparse>[ <logic> ] : .M.sub.i.logical() maybe inefficient


# network of genes instead
vv<-apply(TCGA,2,sd)
vv<-rev(sort.list(vv))
#gg<-huge(TCGA[,vv[1:5000]])
plot.network(network(gg$path[[2]]),vertex.cex=.5,usearrows=F,displayisolates=F)

<sparse>[ <logic> ] : .M.sub.i.logical() maybe inefficient

Demo on Clustering¶

Cluster prediction strengh¶

Comparing clusters.¶

Cluster stability¶

Consensus Clustering¶

Graphical Lasso - Networks and clusters¶