WGCNA: an R
package for weighted correlation network analysis
Reference:
Webpage: http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/
Tutorials: http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/index.html
install.packages(c("fields", "impute", "dynamicTreeCut", "qvalue", "flashClust", "Hmisc") )
¡P For Windows, R version 2.10.0 and higher: WGCNA_0.86.zip
¡P For Windows, R version 2.9.2 and lower: WGCNA_0.86.zip
¡P Source (Linux, Mac etc): WGCNA_0.86.tar.gz
¡P Reference manual in pdf format
¡P Quick reference: overview table of most important functions
1. Download the appropriate package file and save it in a directory of your choice.
2.
At the prompt, type (replace path/to/file
with the directory and filename of the downloaded package)
install.packages("path/to/file", repos = NULL)
For Mac users: add the argument
type = "source"
If you do not have sufficient privileges, you can either (1) ask your system administrator to install the packages for you, or (2) create your own personal R library.
install.packages("path/to/file", repos = NULL, lib = "path/to/library")
Note that if you
chose to go this route, you will need to specify your library location every
time you load the package, by using
library(package, lib.loc = "path/to/library")
Install GO database for the species
The organism-speci_c packages have names of the form org.Xx.eg.db, where Xx stands for organism code, for example, Mm for mouse, Hs for human, etc
source("http://bioconductor.org/biocLite.R")
biocLite("org.Mm.eg.db")
The data are gene expression measurements from livers of female mouse of a specific F2 intercross. For a detailed description of the data and the biological implications we refer the reader to Ghazalpour et al (2006), Integrating Genetics and Network Analysis to Characterize Genes Related to Mouse Weight (link to paper; link to additional information). Note that the data set contains 3600 measured expression profiles. In addition to the expression data, several physiological quantitative traits were measured for the mice. Please download the following
and unzip them in a folder of your choice.
Load expression
data
# Display the current working directory
getwd();
# If necessary, change the path below to the directory where
the data files are stored.
# "." means current directory. On Windows use a
forward slash / instead of the usual \.
workingDir = ".";
setwd(workingDir);
# Load the package
library(WGCNA);
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
#Read in the female liver data set
femData = read.csv("LiverFemale3600.csv");
# Take a quick look at what is in the data set:
dim(femData);
names(femData);
Remove the auxiliary data and transpose the expression data for
further analysis
datExpr0 = as.data.frame(t(femData[, -c(1:8)]));
names(datExpr0) = femData$substanceBXH;
rownames(datExpr0) = names(femData)[-c(1:8)];
Check data for excessive missing values and identi_cation of outlier
microarray
Samples
gsg = goodSamplesGenes(datExpr0, verbose = 3);
gsg$allOK
if (!gsg$allOK)
{
# Remove the
offending genes and samples from the data:
datExpr0 =
datExpr0[gsg$goodSamples, gsg$goodGenes];
}
Check outliers
sampleTree = flashClust(dist(datExpr0), method = "average");
# Plot the sample tree: Open a graphic output window of size
12 by 9 inches
# The user should change the dimensions if the window is too
large or too small.
sizeGrWindow(12,9)
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample
clustering to detect outliers", sub="",
xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
Remove outliers
# Plot a line to show the cut
abline(h = 15, col = "red");
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize =
10)
table(clust)
# clust 1 contains the samples we want to keep.
keepSamples = (clust==1)
datExpr = datExpr0[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
colnames(datExpr)
rownames(datExpr)
Load trait data
traitData = read.csv("ClinicalTraits.csv");
dim(traitData)
names(traitData)
# remove columns that hold information we do not need.
allTraits = traitData[, -c(31, 16)]; #remove
note and comment
allTraits = allTraits[, c(2, 11:36) ];
dim(allTraits)
names(allTraits)
# Form a data frame analogous to expression data that will
hold the clinical traits.
femaleSamples = rownames(datExpr);
traitRows = match(femaleSamples, allTraits$Mice);
datTraits = allTraits[traitRows, -1];
rownames(datTraits) = allTraits[traitRows, 1];
collectGarbage();
Visualize how the clinical traits relate to the relate
to the sample dendrogram
# Re-cluster samples
sampleTree2 = flashClust(dist(datExpr), method = "average")
# Convert traits to color representation ( white: low, red:
high, grey: missing entry)
traitColors = numbers2colors(datTraits, signed = FALSE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors, groupLabels =
names(datTraits),
main = "Sample dendrogram and trait heatmap")
Automatic,
one-step network construction and module detection
net = blockwiseModules(datExpr, power = 6, minModuleSize =
30,
reassignThreshold
= 0, mergeCutHeight = 0.25,
numericLabels
= TRUE, pamRespectsDendro = FALSE,
saveTOMs =
TRUE, saveTOMFileBase = "femaleMouseTOM", verbose
= 3)
The structure of network
str(net)
To see how many modules were identifed and what the module sizes are,
table(net$color)
The dendrogram
can be displayed together with the color assignment using the following code
# open a graphics window
sizeGrWindow(12, 9)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]],
mergedColors[net$blockGenes[[1]]],
"Module colors", dendroLabels
= FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
Save the module
information necessary for subsequent analysis.
moduleLabels
= net$colors
moduleColors
= labels2colors(net$colors)
MEs
= net$MEs;
geneTree
= net$dendrograms[[1]];
save(MEs,
moduleLabels, moduleColors, geneTree,
file
= "FemaleLiver-02-networkConstruction-auto.RData")
Compute 1st
principal componet of each module as its eigengenes. Correlate eigengene
external traits and look for the most significant associations.
#
Define numbers of genes and samples
nGenes
= ncol(datExpr);
nSamples
= nrow(datExpr);
#
calculate eigengenes (1st principal component) of modules
MEs0
= moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs
= orderMEs(MEs0)
moduleTraitCor
= cor(MEs, datTraits, use = "p");
moduleTraitPvalue
= corPvalueStudent(moduleTraitCor, nSamples);
A suitable graphical
representation
# Open
a graphic window with specified dimensions
sizeGrWindow(10,6)
#
Will display correlations and their p-values
textMatrix
= paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue,
1), ")", sep = "");
dim(textMatrix)
= dim(moduleTraitCor)
par(mar
= c(6, 8.5, 3, 3));
#
Display the correlation values within a heatmap plot
labeledHeatmap(Matrix
= moduleTraitCor, xLabels = names(datTraits), yLabels = names(MEs), ySymbols =
names(MEs), colorLabels = FALSE, colors = greenWhiteRed(50), textMatrix =
textMatrix, setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1), main =
paste("Module-trait relationships"))
Get LocusLinkID(entrez ID)
#
Read in the probe annotation
annot
= read.csv(file = "GeneAnnotation.csv");
#
Match probes in the data set to the probe IDs in the annotation file
probes
= names(datExpr)
probes2annot
= match(probes, annot$substanceBXH)
#
Get the corresponding Locuis Link IDs
# As
background in the enrichment analysis, we will use all probes in the analysis
allLLIDs
= annot$LocusLinkID[probes2annot];
Install GO
database for the species
The organism-speci_c packages have names of the form org.Xx.eg.db, where Xx stands for organism code, for example, Mm for mouse, Hs for human, etc
source("http://bioconductor.org/biocLite.R")
biocLite("org.Mm.eg.db")
Load GO database
and do GO enrichments
library (org.Mm.eg.db)
GOenr = GOenrichmentAnalysis(moduleColors, allLLIDs,
organism = "mouse", nBestP = 10);
The function
runs for a while and returns a long list, where the most interesting component
of which is an enrichment table containing the 10 best terms for each module
present in moduleColors.
tab =
GOenr$bestPTerms[[4]]$enrichment
names(tab)
Write the
results into a file
write.table(tab,
file = "GOEnrichmentTable.csv", sep = ",",
quote = TRUE, row.names = FALSE)