Supplemental Methods R-Scripts Used for Microarray Image Analysis And

Supplemental Methods R-Scripts Used for Microarray Image Analysis And

SUPPORTING INFORMATION Supplemental Methods R-Scripts used for microarray image analysis and analysis of gene transcription profiles of salmon juveniles from enriched and unenriched hatchery rearing envrionments. Microarray data acquisition (scripts adapted from Booman et al. 2011) 1. Images exported from the ScanArray Gx Plus scanner and ScanExpress v4.0 software were visualized using Imagene. In Imagene, manual flagging was used to exclude spots of poor quality. 2. Further processing of the data was done in R: # Script based on 'script 110327.R' # Script written and executed over multiple days January 24-25 2013 # Load marray package # Read in array info to make object of class marrayInfo targets_fam_trmt_melissa <- read.marrayInfo(fname="targets_fam_trmt.txt",info.id=NULL, labels=NULL, notes="targets_fam_trmt_melissa", sep="\t", skip=0, quote="\"") # Read in gal file info indicating Name and ID columns and columns containing layout info to make object of class marrayInfo galinfoAgilent <- read.Galfile("025055_D_20090817_AID.gal", path=".", info.id=c("Name","Annotation ID","ID","ControlType"), layout.id=c(Block="Block", Row="Row", Column="Column"), labels="Name", notes="", sep="\t", skip=NULL, ncolumns=1) # Create vector of array names arrayNames <- targets_fam_trmt_melissa@maLabels # Read in, summarize and display intensity data for all arrays mrawMedian <- read.marrayRaw(fnames=arrayNames, path=".", name.Gf="Signal Median 2", name.Gb="Background Median 2", name.Rf="Signal Median 1", name.Rb="Background Median 1", name.W="Flag", layout=galinfoAgilent$layout, gnames=galinfoAgilent$gnames, targets=targets_fam_trmt_melissa, notes=NULL, skip=NULL, sep="\t", quote="\"", DEBUG=FALSE) summary(mrawMedian) # Check if order of intensities and target are the same checkTargetInfo(mrawMedian) # Remove control spots 1 mrawMedianNoControl <- subset(mrawMedian,!(mrawMedian@maGnames@maInfo$ControlType %in% c("ignore","pos","neg"))) # Calculate summary stats and cutoff levels # Create separate object for this, and remove manual flagged spots (flag 1) # Manually flagged spots are removed to prevent blowing up the background SD because of dust etc. mrawMedianStats <- mrawMedianNoControl for (i in 1:32){ mrawMedianStats@maGf[,i][mrawMedianStats@maW[,i]==1]<-NA mrawMedianStats@maGb[,i][mrawMedianStats@maW[,i]==1]<-NA mrawMedianStats@maRf[,i][mrawMedianStats@maW[,i]==1]<-NA mrawMedianStats@maRb[,i][mrawMedianStats@maW[,i]==1]<-NA } # Calculate average (avg), standard deviation (sd), signal/background ratio (sbr) and cutoffs avgGf <- colMeans(mrawMedianStats@maGf, na.rm=TRUE) avgGb <- colMeans(mrawMedianStats@maGb, na.rm=TRUE) avgRf <- colMeans(mrawMedianStats@maRf, na.rm=TRUE) avgRb <- colMeans(mrawMedianStats@maRb, na.rm=TRUE) sdGf <- apply(mrawMedianStats@maGf,2,sd, na.rm=TRUE) sdGb <- apply(mrawMedianStats@maGb,2,sd, na.rm=TRUE) sdRf <- apply(mrawMedianStats@maRf,2,sd, na.rm=TRUE) sdRb <- apply(mrawMedianStats@maRb,2,sd, na.rm=TRUE) sbrG <- avgGf/avgGb sbrR <- avgRf/avgRb cutoffs <- matrix(0,32,2) for (i in 1:32){ cutoffs[i,1] <- avgGb[i] + (2*sdGb[i]) cutoffs[i,2] <- avgRb[i] + (2*sdRb[i]) } # To export cutoffs into text file, add column and row names cutoffsExport <- cbind(arrayNames,cutoffs) cutoffsExport <- rbind(c('ArrayID','Green cutoff','Red cutoff'),cutoffsExport) write.table(cutoffsExport,"cutoff levels.txt",quote=FALSE,sep="\t") # To export summary statistics to text file, add column and row names statsExport <- cbind(avgGf,sdGf,avgRf,sdRf,avgGb,sdGb,avgRb,sdRb,sbrG,sbrR) write.table(statsExport,"summary statistics.txt",quote=FALSE,sep="\t") # Continue with normalizing dataset, using the mrawMedianNoControl object # (which has only control spots removed) # Normalize using printtip Loess 2 mnormMedianNoControl <- maNormMain(mrawMedianNoControl,echo=TRUE) summary(mnormMedianNoControl) # Plot boxplots to check normalization for (i in 1:32){ pdf(paste("RawMedianBoxPlots",targets_fam_trmt_melissa@maInfo[i,2],".pdf", sep=""), paper="letter") boxplot(mrawMedianNoControl[, i], xvar = "maPrintTip", yvar = "maM") dev.off() pdf(paste("NormMedianBoxPlots",targets_fam_trmt_melissa@maInfo[i,2],".pdf", sep=""), paper="letter") boxplot(mnormMedianNoControl[, i], xvar = "maPrintTip", yvar = "maM") dev.off() } pdf("RawMedianBoxPlotsAllArrays.pdf", paper="letter") boxplot(mrawMedianNoControl, yvar = "maM") dev.off() pdf("NormMedianBoxPlotsAllArrays.pdf", paper="letter") boxplot(mnormMedianNoControl, yvar = "maM") dev.off() # Plot scatterplots; example MA plots with LowessLines for (i in 1:32){ pdf(paste("RawMedianScatterPlot",targets_fam_trmt_melissa@maInfo[i,2],".pdf", sep=""), paper="letter") defs <- maDefaultPar(mrawMedianNoControl[, i], x = "maA", y = "maM", z = "maPrintTip") legend.func <- do.call("maLegendLines", defs$def.legend) lines.func <- do.call("maLowessLines", c(list(TRUE, f = 0.3), defs$def.lines)) plot(mrawMedianNoControl[, i], xvar = "maA", yvar = "maM", zvar = "maPrintTip", lines.func, text.func = maText(), legend.func) dev.off() pdf(paste("NormMedianScatterPlot",targets_fam_trmt_melissa@maInfo[i,2],".pdf", sep=""), paper="letter") defs <- maDefaultPar(mnormMedianNoControl[, i], x = "maA", y = "maM", z = "maPrintTip") legend.func <- do.call("maLegendLines", defs$def.legend) lines.func <- do.call("maLowessLines", c(list(TRUE, f = 0.3), defs$def.lines)) plot(mnormMedianNoControl[, i], xvar = "maA", yvar = "maM", zvar = "maPrintTip", lines.func, text.func = maText(), legend.func) dev.off() } # Create array data where values below specific cutoff are replaced by NA after normalization # Create different datasets # And one with only cutoff for Green used (i.e. to prevent loss of genes that are switched off in a subgroup of samples) 3 mnormCutoffBoth <- mnormMedianNoControl for (i in 1:91){ removeG <- mrawMedianNoControl@maGf[,i]<cutoffs[i,1] removeR <- mrawMedianNoControl@maRf[,i]<cutoffs[i,2] mnormCutoffBoth@maM[,i][removeG]<-NA mnormCutoffBoth@maM[,i][removeR]<-NA mnormCutoffBoth@maA[,i][is.na(mnormCutoffBoth@maM[,i])]<-NA } mnormCutoffGreen <- mnormMedianNoControl for (i in 1:32){ removeG <- mrawMedianNoControl@maGf[,i]<cutoffs[i,1] mnormCutoffGreen@maM[,i][removeG]<-NA mnormCutoffGreen@maA[,i][is.na(mnormCutoffGreen@maM[,i])]<-NA } # Make a count of the Imagene quality flags remaining after removal of controls # (since spots below cutoff are not removed but rather replaced by NA, a count after cutoff levels is useless) flags <- mrawMedianNoControl@maW flags1 <- flags == 1 flags2 <- flags == 2 flags3 <- flags == 3 flagcounts <- matrix(0,32,3) flagcounts[,1] <- apply(flags1,2,sum) flagcounts[,2] <- apply(flags2,2,sum) flagcounts[,3] <- apply(flags3,2,sum) write.table(flagcounts,"flagcounts after control removal.txt",quote=FALSE,sep="\t") # Remove all flagged spots mnormCutoffBothrmFlagAll <- mnormCutoffBoth for (i in 1:1){ mnormCutoffBothrmFlagAll@maM[,i][mnormCutoffBothrmFlagAll@maW[,i]==1]<-NA [mnormCutoffBothrmFlagAll@maW[,i]==2]<-NA mnormCutoffBothrmFlagAll@maA[,i][is.na(mnormCutoffBothrmFlagAll@maM[,i])]<-NA } mnormCutoffBothrmFlagAll.table <- cbind(mnormCutoffBothrmFlagAll@maGnames@maInfo$ID,mnormCutoffBothrmFlagAll@maM) write.table(mnormCutoffBothrmFlagAll.table,"mnormCutoffBothrmFlagAll_ratios.txt",quote=FALSE,sep ="\t") mnormCutoffGreenrmFlagAll <- mnormCutoffGreen for (i in 1:32){ mnormCutoffGreenrmFlagAll@maM[,i][mnormCutoffGreenrmFlagAll@maW[,i]==2]<-NA mnormCutoffGreenrmFlagAll@maA[,i][is.na(mnormCutoffGreenrmFlagAll@maM[,i])]<-NA } for (i in 1:32){ 4 mnormCutoffGreenrmFlagAll@maM[,i][mnormCutoffGreenrmFlagAll@maW[,i]==1]<-NA mnormCutoffGreenrmFlagAll@maA[,i][is.na(mnormCutoffGreenrmFlagAll@maM[,i])]<-NA } mnormCutoffGreenrmFlagAll.table <- cbind(mnormCutoffGreenrmFlagAll@maGnames@maInfo$ID,mnormCutoffGreenrmFlagAll@maM) write.table(mnormCutoffGreenrmFlagAll.table,"mnormCutoffGreenrmFlagAll_ratios.txt",quote=FALSE,s ep="\t") # Make new tables removing spots with >=25% NA (32 arrays, 8 NA, so 8 and smaller allowed) mnormCutoffGreenrmFlagAllNACount <- rowSums(is.na(mnormCutoffGreenrmFlagAll.table)) mnormCutoffGreen.NA <- cbind(mnormCutoffGreenrmFlagAll.table,mnormCutoffGreenrmFlagAllNACount) write.table(mnormCutoffGreen.NA,file="mnormCutoffGreenNACount.txt",quote=FALSE,sep="\t") mnormCutoffGreen.full <- read.table("mnormCutoffGreenNACount.txt",header=TRUE,sep="\t",row.names=1,fill=TRUE) mnormCutoffGreen.NA25 <- subset(mnormCutoffGreen.full,mnormCutoffGreen.full[,34]<9) mnormCutoffGreen.NA25 <- mnormCutoffGreen.NA25[,1:33] write.table(mnormCutoffGreen.NA25,"Green_avg_NACutoff_25percent.txt",quote=FALSE,sep="\t") # Impute missing data using EMarray from LSimpute applet # Adapt LSimpute commands to new files and folders # Do not forget to change text files first to insert a tab at the beginning and remove './' and '.txt' 5 Siggenes analysis (scripts adapted from Booman et al. 2011) 1. As part of the previous script, all probes for which the log2 ratio was missing (NA) in more than 25% of arrays were removed from the normalized and thresholded log transcription data. This resulted in a final dataset comprised of 21,117 probes. 2. Missing data in the new dataset were imputed using the EM_array algorithm from the LSimpute package as described by Bø et al., Nucleic Acids Research 2004, 32:e34. 3. Imputed data were read into R and two-class comparison analysis was performed with the package ‘siggenes’: #Read in imputed data table.melissa.avg.25.imputed.full <- read.table("Green_avg_NACutoff_25percent_imputed_EMarray_repl_rem.txt",header=TRUE,sep="\t",r ow.names=1,fill=TRUE) table.25imputed_all_families_NG_vs_G <- cbind(table.melissa.avg.25.imputed.full[,1:5],table.melissa.avg.25.imputed.full[,11:15],table.melissa.avg

View Full Text

Details

  • File Type
    pdf
  • Upload Time
    -
  • Content Languages
    English
  • Upload User
    Anonymous/Not logged-in
  • File Pages
    135 Page
  • File Size
    -

Download

Channel Download Status
Express Download Enable

Copyright

We respect the copyrights and intellectual property rights of all users. All uploaded documents are either original works of the uploader or authorized works of the rightful owners.

  • Not to be reproduced or distributed without explicit permission.
  • Not used for commercial purposes outside of approved use cases.
  • Not used to infringe on the rights of the original creators.
  • If you believe any content infringes your copyright, please contact us immediately.

Support

For help with questions, suggestions, or problems, please contact us