Monday, September 20, 2010

R: Canonical Correspondence Analysis

# Classical Canonical Correspondence Analysis - alpha tested ok on 23 Aug 2010 -

# In general, I didn't code this myself.
# Compiled from various sources.

# lib(s): CCA, mixOmics(?)

# Before using this script:
library(CCA)

# Data input: generic

# Red indicates stuff you gotta change.

# - CANONICAL CORRESPONDANCE ANALYSIS 1 -

X <- as.matrix(infile$dat.frm1) # declare dataframe with first set of variables as a matrix
Y <- as.matrix(infile$dat.frm2) # declare dataframe with second set of variables as a matrix

# Tested on nutrimouse data (from CCA library).

# Canonical correspondence analysis assumes that for both datasets, the number if cases is more than the number of variables. If that is not the case, consider sampling a specific number of variables.
correl <- matcor(X, Y)
img.matcor(correl,type=2)
res.cc<-cc(X, Y)
barplot(res.cc$cor,xlab = "Dimension",ylab = "Canonical correlations", names.arg=1:10,ylim=c(0,1))
plt.cc(res.cc)

# this assumes that the number of cases exceeds the number of variables for both datasets in question.
X <- as.matrix(infile$var1,infile$var2,infile$var3,...,infile$var)
Y <- as.matrix(infile$var1,infile$var2,infile$var3,...,infile$var)

# Regularized CCA - (regularized? wtf does that mean???) - alpha tested ok on 23 Aug 2010 -

lib(s): mixOmics,CCA

# think you might need mixOmics package.
# mixOmics requires the gtk runtime environment.
# Windows users: search and get from download.com or get from here: http://download.cnet.com/3001-2192_4-10280276.html?spi=cd9c2c926c6f63eceaf7d4e103ceb2a9
# ubuntu (and probably other debian variants): open the terminal and type: sudo apt-get install libX11-dev freeglut3 freeglut3-dev then install as usual.

res.regul <- estim.regul(X, Y, plt = TRUE, grid1 = seq(0.0001, 0.2, l=51), grid2 = seq(0, 0.2, l=51)) # !!! this command will take the better part of 90 mins to run !!!
contour(res.regul$grid1, res.regul$grid2, res.regul$mat, add = TRUE, levels = c(0,0.5,0.7), col = "blue")
contour(res.regul$grid1, res.regul$grid2, res.regul$mat, add = TRUE, levels = c(0.8,0.85,0.88), col = "darkgreen")

res.rcc <- rcc(X, Y, 0.008096, 0.064) # rcc(X,Y,lambda1,lambda2)
barplot(res.rcc$cor, xlab = "Dimension", ylab = "Canonical correlations", names.arg = 1:21, ylim = c(0,1))

# some kind of extra plot
plt.cc(res.rcc, var.label = TRUE,ind.names = paste(nutrimouse$genotype, nutrimouse$diet, sep = " "))

# - END OF CANONICAL CORRESPONDENCE ANALYSIS 1 -


# - CANONICAL CORRESPONDANCE ANALYSIS 2 - using the function from package ade4

VarSet1 <- data.frame(infile$Var1,infile$Var2,infile$Var3,...infile$Var)
VarSet2 <- data.frame(infile$Var1,infile$Var2,infile$Var3,...infile$Var)

cca(VarSet1,VarSet2) # no negative values allowed in VarSet1 and VarSet2

# - END OF CANONICAL CORRESPONDENCE ANALYSIS 2 -

# /* END OF CANONICAL CORRESPONDENCE ANALYSIS */

No comments:

Post a Comment