Friday, August 19, 2011

3x3 box and whiskers

reset

set yrange [y1:y2] # where y1 < y2

set xrange [x1:x2] # where x1 < x2

set lmargin 0.5
set bmargin 0.5
set tmargin 0.5
set rmargin 0.5

set boxwidth 0.5 absolute

unset bars

set style fill solid 0.25
set style line 1 lw 3
set grid ytics

set terminal postscript eps enhanced color 'Helvetica' 16 size 5,5

# ==================================
# !!! - Graph for Group_I - DUMMY - !!!
# ==================================

# you gotta run a dummy first because there's some problem with extra tics showing up in the 2nd row of graphs in the first output - this will disappear from subsequent plots generated in the script. This might be glaringly obvious if you want generate more than one cluster of graphs per script.

set output "path/to/Outfile.eps"

set multiplot layout 3,3 scale 1,1 offset 0.5,0.5

set x2tics ("|" x1, "section 1" x1.1, "|" x1.25, "section 2" x1.5, "|" x1.75, "section 3" , "|" x2) font 'Helvetica,11'
set format x ""
set format y
set ylabel "MinorY-1"
set ytics (ytic1,ytic2,ytic3,...,ytic) # where ytic(x) are all some numerical value
plot '/path/to/file/GraphData1.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1 # where x is the column with the x-coordinates and y1<y2<y3<y4<y5
#unset ytics

set format y ""
set ylabel " "
set title "Cluster Title"
plot '/path/to/file/GraphData2.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

set format y ""
set ylabel ""
set title " "
plot '/path/to/file/GraphData3.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

unset x2tics
set format y
set ylabel "MajorY (Major y unit) \n \n MinorY-2"
set ytics (ytic1,ytic2,ytic3,...,ytic) # where ytic(x) are all some numerical value
plot '/path/to/file/GraphData4.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1
#unset ytics

set format y ""
set ylabel ""
plot '/path/to/file/GraphData5.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

plot '/path/to/file/GraphData6.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

#set xtics border in scale 1,0.5 nomirror rotate by +45 offset character 0, 0, 0
set xtics border in scale 1,1 nomirror rotate by +90 offset character 0, -4, 0
set xtics ("xtic1" 1, "xtic2" 2, ..., "xtic" x2)
set format x

set ylabel "MinorY-3"
set xlabel "MinorX-1"
set format y
set ytics (ytic1,ytic2,ytic3,...,ytic) # where ytic(x) are all some numerical value
plot '/path/to/file/GraphData7.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1
#unset ytics

set format y ""
set ylabel ""
set xlabel "MinorX-2"
plot '/path/to/file/GraphData8.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

set xlabel "MinorX-3"
plot '/path/to/file/GraphData9.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

set xlabel ""

unset xtics

unset multiplot

set output

# ==================================
# !!! - Graph for Group_I - ACTUAL - !!!
# ==================================

# you can repeat this as many times as necessary in a single script. The dummy needs to be run only _once_.

set output "path/to/OutfileACTUAL.eps"

set multiplot layout 3,3 scale 1,1 offset 0.5,0.5

set x2tics ("|" x1, "section 1" x1.1, "|" x1.25, "section 2" x1.5, "|" x1.75, "section 3" , "|" x2) font 'Helvetica,11'
set format x ""
set format y
set ylabel "MinorY-1"
set ytics (ytic1,ytic2,ytic3,...,ytic) # where ytic(x) are all some numerical value
plot '/path/to/file/GraphData1.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1
#unset ytics

set format y ""
set ylabel " "
set title "Cluster Title"
plot '/path/to/file/GraphData2.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

set format y ""
set ylabel ""
set title " "
plot '/path/to/file/GraphData3.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

unset x2tics
set format y
set ylabel "MajorY (Major y unit) \n \n MinorY-2"
set ytics (ytic1,ytic2,ytic3,...,ytic) # where ytic(x) are all some numerical value
plot '/path/to/file/GraphData4.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1
#unset ytics

set format y ""
set ylabel ""
plot '/path/to/file/GraphData5.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

plot '/path/to/file/GraphData6.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

#set xtics border in scale 1,0.5 nomirror rotate by +45 offset character 0, 0, 0
set xtics border in scale 1,1 nomirror rotate by +90 offset character 0, -4, 0
set xtics ("xtic1" 1, "xtic2" 2, ..., "xtic" x2)
set format x

set ylabel "MinorY-3"
set xlabel "MinorX-1"
set format y
set ytics (ytic1,ytic2,ytic3,...,ytic) # where ytic(x) are all some numerical value
plot '/path/to/file/GraphData7.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1
#unset ytics

set format y ""
set ylabel ""
set xlabel "MinorX-2"
plot '/path/to/file/GraphData8.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

set xlabel "MinorX-3"
plot '/path/to/file/GraphData9.txt' using x:y2:y1:y5:y4 with candlesticks whiskerbars lt -1 title "", '' using x:y3 with linespoints title "" pt 5 lt -1

set xlabel ""

unset xtics

unset multiplot

set output

set terminal wxt

Monday, August 8, 2011

Percentiles for plotting box and whiskers

# this calculates percentile values and median, then spits out a text file with data. Set to 2.5th ,25th ,50th and 97.5th percentiles.
# it was only recently that i realized that the median is the 50th percentile (right?) and you can just skip the whole "create 2 dataframes and merge before filling in the blank rows" thing by including 0.5 in the by() line above. Following which you just need to fill in any dummy lines necessary.
# the objective here is to produce a dataframe with a _specific_ number of rows (ie: cases). This can be repeated as many times as necessary.
# this is so when you plot it later, all your graphs will have identical x-axis(es?) when you do a big-ass cluster of graphs (see associated gnuplot script)

# Agt.Smith is the name of the dataframe that you wanna use for this. If necessary:
# Agt.Smith <- infile # uncomment if you want to use this, but do i really even have to note this???


Var.Agt.Smith <- data.frame(Agt.Smith$Var,Agt.Smith$CatVar)
colnames(Var.Agt.Smith) <- c("Var","CatVar")
Var.Agt.Smith$CatVar <- factor(Var.Agt.Smith$CatVar)
Var.percentile <- by(Var.Agt.Smith[,1],Var.Agt.Smith$CatVar,quantile,probs=c(0.025, 0.25, 0.75, 0.975),na.rm=T) # change the percentiles as you will.
df1 <- data.frame((matrix(unlist(Var.percentile),nrow=n,byrow=T)),row.names=names(Var.percentile)) # where n is the number of levels in CatVar
colnames(df1) <- c("alpha","bravo","charlie","delta")# these are just generic names for the percentiles calculated above. Feel free to change to something more sensible.
Var.median <- tapply(Var.Agt.Smith$Var,Var.Agt.Smith$CatVar,median,na.rm=T)
df2 <- data.frame(Var.median)
Var.Agt.Smith <- merge(df1,df2,by="row.names")
#blnk.rw <- data.frame(Row.names=c("missing_rowname1","missing_rowname2"),"alpha"=c(NA,NA),bravo=c(NA,NA),charlie=c(NA,NA),delta=c(NA,NA),Var.median=c(NA,NA))# - OPTIONAL - if you need to fill in empty rows
Var.Agt.Smith <- rbind(Var.Agt.Smith,blnk.rw)
Var.Agt.Smith2 <- Var.Agt.Smith[order(Var.Agt.Smith$Row.names),]
Var.Agt.Smith2
Var.Agt.Smith2$dummy.var <- c(1:n) # where n is the number of cases you want
colnames(Var.Agt.Smith2) <- c("#Row.names","alpha","bravo","charlie","delta","Var.median") # at this point, the data frame for the final output willl have 6 columns containing the rownames(which we use to insert dunmy cases into the correct position when necessary), the 4 percentiles, and the median, _in that order_. Feel free to rearrange them in another dataframe if it suits you.
write.table(Var.Agt.Smith2,"path/to/file/Var/Varoutfile.txt",sep="\t",quote=FALSE, row.names=FALSE)

Wednesday, August 3, 2011

R: Reshaping Data 1 (no calculations)

# this assumes you just want to reshape your data without calculating anything - i couldn't get it to work any other way.
# cast() failed me here, for some reason
# adapted from digithead's lab notebook

# Data input: generic

Var.df <- data.frame(infile$cat.var1,infile$cat.var2,infile$Var)
colnames(Var.df) <- c("CatVar1","CatVar2","Var")

Var <- split(Var.df,Var.df$CatVar2)
result <- data.frame(CatVar1=levels(Var.df$CatVar1))

for (i in seq(along=Var)) { result[[names(Var)[i]]] <- merge(Var[[i]], result, by.x='yymm', all.x=T,all.y=T)$Var}
result

write.table(result,"path/to/output/folder/Var.txt",sep="\t",quote=F,row.names=F)