#######################################################
################# ######################
################# UI Function ######################
################# ######################
#######################################################
ui<-navbarPage("Metabolomics Statisitcal Analysis R Shiny App 0.1",
tabPanel("Load all Packages",
helpText("All packages must be initialised before uploading the data."),
actionButton("loadPack","Load All Packages"),
helpText("After clicking on the Load All Packages button. Please wait unitl the application
displays below \"TRUE\" for all the packages"),
tableOutput("pacL")
),
tabPanel("Upload Data",
sidebarLayout(
sidebarPanel(
radioButtons("radioButtons1",
label = h3("Data Type"),
choices = list("Concentration" = 1,
"Spectral Bins" = 2, "Intensity Table" = 3),
selected = 1),
selectInput("select",
label = h3("Format"),
choices = list("Samples in rows (unpaired)" = 1, "Samples in columns (unpaired)" = 2,
"Samples in rows (paired)" = 3, "Samples in columns (paired)"=4), selected = 1),
fileInput('file1', 'Choose CSV File',
accept=c('text/csv', 'text/comma-separated-values,text/plain', '.csv'))
),
mainPanel(textOutput('contents'))
)
),
navbarMenu("Data Processing",
tabPanel("Missing value estimation",
helpText(" Too many missing values will cause difficulties for downstream analysis.
There are several different methods for this purpose.
The default method replaces all the missing values with a small values (the half of the minimum positive values in the original data) assuming to be the detection limit.
Move onto Normalization if you want to use the default method.
The assumption of this approach is that most missing values are caused by low abundance metabolites (i.e.below the detection limit)."
),
helpText("The functions in MetaboAnalyst also offers other methods, such as replace by mean/median, k-nearest neighbour (KNN), probabilistic PCA (PPCA), Bayesian PCA (BPCA) method, Singular Value Decomposition (SVD) method to impute the missing values.
Please choose the one that is the most appropriate for your data."
),
sidebarPanel(h4("Step 1. Remove features with too many missing values"),
checkboxInput("missValue1",
label = "Remove features with > x % missing values",
value = TRUE),
numericInput("missValue2",
label=NULL,
value=50)
),
sidebarPanel(h4("Step 2. Estimate the remaining missing values (Select only 1 of the following 4 options"),
checkboxInput("missValue3",
label = "Replace by a small value (half of the minimum positive value in the original data)",
value = TRUE),
checkboxInput("missValue4",
label = "Exclude variables with missing values",
value = FALSE),
selectInput("missValue5",
label=h5("Replace by column (feature)"),
choices = list("None"=1, "Mean"=2, "Median"=3,"Min"=4),
selected =1),
selectInput("missValue6",
label=h5("Estimate missing values using"),
choices = list("None"=1, "KNN"=2, "PPCA"=3,"BPCA"=4,"SVD Impute"=5),
selected =1)
),
actionButton("calc1","Process"),
h3(textOutput("MVtext1"))
)
),
tabPanel("Normailsation",
sidebarLayout(
sidebarPanel(h3("Sample Normalization"),
radioButtons("radioButtons2",
label = h4("Sample normalization"),
choices = list("None" = 1,
"Normalization by sum" = 2,
"Normalization by median" = 3,
"Normalization by a specific reference sample"=4,
"Normalization by a pooled sample from group"=5,
"Normalization by reference feature"= 6
),
selected = 1),
uiOutput("refsample"),
uiOutput("poolsample"),
uiOutput("refFeat"),
radioButtons("radioButtons3",
label = h4("Data transform"),
choices = list("None" = 1,
"log transform" = 2, "cube root transform" = 3),
selected = 1),
radioButtons("radioButtons4",
label = h4("Data scaling"),
choices = list("None" = 1, "Mean Centering"=2,
"Auto scaling" = 3, "Pareto scaling" = 4,
"Range scaling" = 5, "Vast scaling"=6),
selected = 1)
),
mainPanel(actionButton("go","Update"),
plotOutput("normPlot",width = "600", height = "800")
)
)
),
navbarMenu("Univariate Analysis",
tabPanel("Fold Change Analysis",
h3("Fold Change Analysis"),
helpText("Fold change (FC) analysis is to compare the absolute value change between two group means.
Since column-wise normalization (i.e. log transformation, mean-centering) will significantly change the absolute values,
FC is calculated as the ratio between two group means using data before column-wise normilzation was applied."
),
helpText("For paired analysis, the program first counts the number of pairs with consistent change above the given FC threshold.
If this number exceeds a given count threshold, the variable will be reported as significant."
),
uiOutput("FCAnalT"),
numericInput("FCThresh",
label=h5("Fold Change Threshhold"),
value=2
),
uiOutput("SigCountT"),
uiOutput("ComparType"),
actionButton("go4", "Update"),
plotOutput("FCPlot"),
dataTableOutput('FCtable1')
),
tabPanel("T-Test",
h3("T-Test"),
helpText("Note, for large data set (> 1000 variables),
both the paired information and the group variance
will be ignored, and the default parameters will be used for t-tests
to save computational time. If you choose non-parametric tests (Wilcoxon rank-sum test),
the group variance will be ignored."
),
uiOutput("TTAnalType"),
numericInput("TTestP",
label=h5("P-Value"),
value=0.05
),
selectInput("grpVar",
label = h5("Group variance"),
choices = list("Equal" = 1, "Unequal" = 2),
selected = 1),
checkboxInput("NPT1",
label = h5("Non-parametric tests:"), value = FALSE),
# sidebarPanel(h5("Color input for the T-test graph"),
textInput("Tcolo1",
label=h5("Color 1:"),
value="red"
),
textInput("Tcolo2",
label=h5("Color 2:"),
value="green"
),
helpText("This applications supports the following colors: "),
helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\",\"grey\",\"gray\",
\"lightblue\", \"limegreen\",\"magenta\", \"orange\",\"pink\",
\"purple\", \"violet\", \"yellow\""),
#),
actionButton("go2","Update/Plot"),
plotOutput("PlotTT", click ="plot_click1"),
helpText("Click the points on the graph to display an interval plot and boxplot plot for that compound"),
selectInput("tIntCalc",
label = h5("Statistic Shown for the Interval Plot"),
choices = list("se" = 1, "sd" = 2),
selected = 1),
uiOutput("color1"),
splitLayout(cellWidths = c("50%", "50%"),
plotOutput("PlotTT2", width = "500", height = "500"),
plotOutput("PlotBox1", width="400", height="500")
),
verbatimTextOutput("info1"),
dataTableOutput('TTtable1')
),
tabPanel("One-way Analysis of Variance (ANOVA)",
h3("One-way ANOVA & post-hoc Tests "),
helpText("You can choose to perform one-way ANOVA or its non-parametric version (Kruskal Wallis Test).
Note, the post-hoc tests have only been implemented for parametric version."),
checkboxInput("NPT2",
label = h5("Non-parametric tests:"), value = FALSE),
numericInput("ANOVAP",
label=h5("Significant Level (alpha): raw p-vlaue < "),
value=0.05
),
selectInput("PHA",
label = h5("Post-hoc analysis:"),
choices = list("Fisher's LSD" = 1, "Tuckey's HSD" = 2),
selected = 1),
textInput("Acolo1",
label=h5("Color 1:"),
value="red"
),
textInput("Acolo2",
label=h5("Color 2:"),
value="green"
),
helpText("This applications supports the following colors: "),
helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\",\"grey\",\"gray\",
\"lightblue\", \"limegreen\",\"magenta\", \"orange\",\"pink\",
\"purple\", \"violet\", \"yellow\""),
actionButton("go3","Update/Plot"),
plotOutput("PlotAOV", click ="plot_click2"),
helpText("Click the points on the graph to display an interval plot and boxplot plot for that compound"),
selectInput("aIntCalc",
label = h5("Statistic Shown for the Interval Plot"),
choices = list("se" = 1, "sd" = 2),
selected = 1),
uiOutput("color2"),
splitLayout(cellWidths = c("50%", "50%"),
plotOutput("PlotAOV2", width = "500", height = "500"),
plotOutput("PlotBox2", width="400", height="500")
),
dataTableOutput('AOVtable1')
),
tabPanel("Volcano plot",
h3("Volcano plot"),
helpText("The volcano plot is a combination of fold change and t-tests.
Note, for unpaired samples, the x-axis is log (FC).
For paired analysis, the x-axis is number of significant counts.
Y-axis is -log10(p.value) for both cases."),
uiOutput("VolAnalT"),
sidebarPanel(h3("X-axis:"),
numericInput("VolThresh",
label=h5("Fold Change Threshhold"),
value=2),
uiOutput("VolComparType"),
uiOutput("VolSigCountT")
),
sidebarPanel(h3("Y-axis:"),
checkboxInput("VolNPT",
label = h5("Non-parametric tests:"), value = FALSE),
numericInput("VolP",
label=h5("P value threshold:"),
value=0.1),
selectInput("VolgrpVar",
label = h5("Group variance"),
choices = list("Equal" = 1, "Unequal" = 2),
selected = 1)
),
actionButton("go5","Update/Plot"),
selectInput("vIntCalc",
label = h5("Statistic Shown for the Interval Plot"),
choices = list("se" = 1, "sd" = 2),
selected = 1),
uiOutput("color3"),
helpText("This applications supports the following colors: "),
helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\", \"grey\", \"gray\",
\"lightblue\", \"limegreen\", \"magenta\", \"orange\", \"pink\", \"purple\", \"violet\", \"yellow\""),
plotOutput("PlotVol",click ="plot_click3"),
splitLayout(cellWidths = c("50%", "50%"),
plotOutput("PlotVol2", width = "500", height = "500"),
plotOutput("PlotVol3", width="400", height="500")
),
verbatimTextOutput("info2"),
dataTableOutput('VOLtable1')
),
tabPanel("Correlation Analysis",
h3("Correlation Analysis"),
helpText("Note, the heatmap will only show correlations for a maximum of 1000 features.
For larger datasets, only top 1000 features will be selected based on their
interquantile range (IQR). When color distribution is fixed,
you can potentially compare the correlation patterns among different data sets.
In this case, you can choose \"do not perform clustering\" for all data set, or only to perform clustering on a single reference data set,
then manually re-arranged other data sets according to the clustering pattern of the reference data set. "),
selectInput("distM",
label = h5("Distance Measure"),
choices = list("Pearson r" = 1, "Spearman rank correlation" = 2,
"Kendall rank correlation"=3),
selected = 1),
radioButtons("viewM",
label = h5("View Mode"),
choices = list("Overview" = 1, "Detailed View" = 2),
selected = 1),
checkboxInput("fixColD",
label = h5("Fix color distribution[-1,1]:"), value = FALSE),
selectInput("colorCon",
label = h5("Color Contrast"),
choices = list("Default"=1, "Red/Green"=2, "Heat Color"=3,
"Topo Color"=4, "Gray Scale"=5,"Red/White/Blue"=6,
"Red/White/Green"=7, "White/Navy/Blue"=8)
),
checkboxInput("performClus",
label = h5("Do no perform clustering:"), value = FALSE),
sidebarPanel(
sliderInput("CorrHeatWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("CorrHeatHeight", "Plot Height (px)", min = 0, max = 1500, value = 500)
),
actionButton("go6","Update"),
uiOutput("corrHeat")
),
tabPanel("Pattern Searching",
h3("Pattern Searching"),
helpText("Correlation analysis can be performed either against a given feature or against a given pattern. The pattern is specified as a series of numbers separated by \"-\".
Each number corresponds to the expected expression pattern in the corresponding group. For example,
a 1-2-3-4 pattern is used to search for features that increase linearly with time in a time-series data with four time points (or four groups).
The order of the groups is given as the first item in the predefined patterns. "),
radioButtons("pattern",
label = h5("Define a pattern using:"),
choices = list("a feature of interest:" = 1, "a predefined profile:"=2,
"a custom profile:" = 3),
selected = 1),
uiOutput("interestFt"),
uiOutput("profile"),
textInput("customPro",
h5("Custom Profile input")
),
selectInput("distM2",
label = h5("Distance Measure"),
choices = list("Pearson r" = 1, "Spearman rank correlation" = 2,
"Kendall rank correlation"= 3)
),
actionButton("go7","Update"),
plotOutput("patternGraph"),
dataTableOutput("CORRtable1")
)
),
navbarMenu("PCA",
tabPanel("Overview",
uiOutput("pcNum"),
actionButton("update1", "Update/Plot"),
sidebarPanel(
sliderInput("OverPWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("OverPHeight", "Plot Height (px)", min = 0, max = 800, value = 500)
),
uiOutput("overPlot")
),
tabPanel("Scree Plot",
uiOutput("pcNum2"),
helpText("The green line on top shows the accumulated variance explained; the blue line underneath shows the variance explained by individual PC"),
actionButton("update2", "Update/Plot"),
sidebarPanel(
sliderInput("ScreePWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("ScreePHeight", "Plot Height (px)", min = 0, max = 800, value = 500)
),
uiOutput("screePlot")
),
tabPanel("2D Score Plot",
numericInput("pcX",
label=h5("Specify PC on x-axis:"),
value=1),
numericInput("pcY",
label=h5("Specify PC on y-axis:"),
value=2),
checkboxInput("pcaConf",
label=h5("Display 95% confidence regions:"),
value=TRUE),
checkboxInput("disSmplName",
label=h5("Display sample names:"),
value=TRUE),
checkboxInput("gscale",
label=h5("Use grey-scale colors:"),
value=FALSE),
actionButton("update3", "Update/Plot"),
sidebarPanel(
sliderInput("PCA2DWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("PCA2DHeight", "Plot Height (px)", min = 0, max = 800, value = 500)
),
uiOutput("pca2dP")
),
tabPanel("PCA trajectory plot",
numericInput("pcaTraX",
label=h5("Specify PC on x-axis:"),
value=1),
numericInput("pcaTraY",
label=h5("Specify PC on y-axis:"),
value=2),
textInput("PCATraTitle", label=h5("Title"),
value="PCA Trajectory Plot"),
uiOutput("PCATraCol"),
numericInput("errBarWidth",
label=h5("Error Bar Width"),
value=0.03),
numericInput("traPtS",
label=h5("Point Size"),
value=2),
numericInput("traLimit",
label=h5("Increase The range of the scale by (%)"),
value=20),
actionButton("update16", "Update/Plot Graph"),
sidebarPanel(
sliderInput("PCATrAWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("PCATRAHeight", "Plot Height (px)", min = 0, max = 800, value = 500)
),
uiOutput("pcaTrajPlot")
),
tabPanel("3D Score Plot",
sidebarPanel(h3("3D PCA plot using raw rgl"),
numericInput("pSize",
label=h5("Point Size"),
value=0.7),
numericInput("transparency",
label=h5("Transparency for ecllipses"),
value=0.1),
checkboxInput("ell",
label=h5("Add ellipses"),
value=TRUE),
checkboxInput("grid",
label=h5("add grid to plot"),
value=FALSE),
textInput("PcaTiltle", label=h5("Title"),
value = ""),
uiOutput("pcaCol"),
actionButton("plot3dpca1", "plot pca 1"),
# plotOutput("PCA3D"),
actionButton("snapShot1","Snap Shot"),
helpText("Note: the SnapShot will only capture latest plotted graph.")
),
sidebarPanel(h3("3D pca plot using PCA3d package"),
checkboxInput("dC",
label=h5("Data Scaling"),
value=TRUE),
checkboxInput("dS",
label=h5("data Centering"),
value=FALSE),
checkboxInput("showScale",
label=h5("show scale"),
value=FALSE),
checkboxInput("showlabels",
label=h5("show labels"),
value=FALSE),
checkboxInput("showP",
label=h5("Show Plane"),
value=FALSE),
checkboxInput("shadow",
label=h5("show Shadow"),
value=FALSE),
checkboxInput("ell2",
label=h5("Add ellipses"),
value=TRUE),
checkboxInput("showGrpLab",
label=h5("show group labels"),
value= FALSE),
actionButton("plot3dpca2", "plot pca 2"),
actionButton("snapShot2","Snap Shot"),
#plotOutput("PCA3D2")
helpText("Note: the SnapShot will only capture latest plotted graph.")
)
),
tabPanel("Loading Plot",
numericInput("loadingX",
label=h5("Specify PC on x-axis:"),
value=1),
numericInput("loadingY",
label=h5("Specify PC on y-axis:"),
value=2),
checkboxInput("loadingFeat",
label=h5("Display feature names:"),
value= FALSE),
actionButton("update4", "Update/Plot"),
splitLayout(cellWidths = c("50%", "50%"),
plotOutput("loadingPCA1",click ="plot_click4"),
plotOutput("loadingPCA2",width="1000")
),
selectInput("lIntCalc",
label = h5("Statistic Shown for the Interval Plot"),
choices = list("se" = 1, "sd" = 2),
selected = 1),
uiOutput("color4"),
helpText("This applications supports the following colors: "),
helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\", \"grey\", \"gray\",
\"lightblue\", \"limegreen\", \"magenta\", \"orange\", \"pink\", \"purple\", \"violet\", \"yellow\""),
splitLayout(cellWidths = c("50%", "50%"),
plotOutput("loadingCmp1", width = "500", height = "500"),
plotOutput("loadingCmp2", width="400", height="500")
),
verbatimTextOutput("info3"),
dataTableOutput('Loadingtable1')
),
tabPanel("Biplot",
numericInput("biplotPCX",
label=h5("Enter PC for X-axis:"),
value=1),
numericInput("biplotPCY",
label=h5("Enter PC for Y-axis:"),
value=2),
actionButton("update5", "Update/Plot"),
plotOutput("biplot")
)
),
navbarMenu("PLSDA",
tabPanel("OverviewPLS",
numericInput("plsNum",
label= h5("Display pairwise score plot for top PCs:"),
value=5),
actionButton("update6", "Update/Plot"),
sidebarPanel(
sliderInput("PLSOWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("PLSOHeight", "Plot Height (px)", min = 0, max = 800, value = 500)
),
uiOutput("overPlotPls")
),
tabPanel("2D Score Plot",
numericInput("plsX",
label=h5("Specify PC on x-axis:"),
value=1),
numericInput("plsY",
label=h5("Specify PC on y-axis:"),
value=2),
checkboxInput("plsConf",
label=h5("Display 95% confidence regions:"),
value=TRUE),
checkboxInput("plsName",
label=h5("Display sample names:"),
value=TRUE),
checkboxInput("plsgscale",
label=h5("Use grey-scale colors:"),
value=FALSE),
actionButton("update7", "Update/Plot"),
sidebarPanel(
sliderInput("dplsWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("dplsHeight", "Plot Height (px)", min = 0, max = 800, value = 500)
),
uiOutput("pls2dP")
),
tabPanel("PLSDA trajectory plot",
numericInput("plsdaTraX",
label=h5("Specify PC on x-axis:"),
value=1),
numericInput("plsdaTraY",
label=h5("Specify PC on y-axis:"),
value=2),
textInput("PlsdaTraTitle", label=h5("Title"),
value="PLS-DA Trajectory Plot"),
uiOutput("PLSDATraCol"),
numericInput("plserrBarWidth",
label=h5("Error Bar Width"),
value=0.03),
numericInput("plsdatraPtS",
label=h5("Point Size"),
value=2),
numericInput("plsdatraLimit",
label=h5("Increase The range of the scale by (%)"),
value=20),
actionButton("update17", "Update/Plot Graph"),
sidebarPanel(
sliderInput("plsTRAWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("plsTRAHeight", "Plot Height (px)", min = 0, max = 800, value = 500)
),
uiOutput("plsdaTrajPlot")
),
tabPanel("3D Score Plot",
sidebarPanel(h3("3D PLSA plot using raw rgl"),
numericInput("plsPSize",
label=h5("Point Size"),
value=0.7),
numericInput("plsTransparency",
label=h5("Transparency for ecllipses"),
value=0.1),
checkboxInput("plsEll",
label=h5("Add ellipses"),
value=TRUE),
checkboxInput("plsGrid",
label=h5("add grid to plot"),
value=FALSE),
textInput("PlsTiltle", label=h5("Title"),
value = ""),
uiOutput("plsCol"),
actionButton("plot3dpls1", "Plot 3D PLSDA 1"),
actionButton("snapShot3","Snap Shot"),
#plotOutput("PCA3D2")
helpText("Note: the SnapShot will only capture latest plotted graph.")
#plotOutput("PLS3D")
),
sidebarPanel(h3("3D PLSDA plot using PCA3d package"),
checkboxInput("showScalePls",
label=h5("show scale"),
value=FALSE),
checkboxInput("showlabelsPls",
label=h5("show labels"),
value=FALSE),
checkboxInput("showPPls",
label=h5("Show Plane"),
value=FALSE),
checkboxInput("shadowPls",
label=h5("show Shadow"),
value=FALSE),
checkboxInput("ell2Pls",
label=h5("Add ellipses"),
value=TRUE),
checkboxInput("showGrpLabPls",
label=h5("show group labels"),
value= FALSE),
actionButton("plot3dpls2", "Plot 3D PLSDA 2"),
actionButton("snapShot4","Snap Shot"),
#plotOutput("PCA3D2")
helpText("Note: the SnapShot will only capture latest plotted graph.")
#plotOutput("PLS3D2")
)
),
tabPanel("Loading Plot",
numericInput("plsloadingX",
label=h5("Specify PC on x-axis:"),
value=1),
numericInput("plsloadingY",
label=h5("Specify PC on y-axis:"),
value=2),
checkboxInput("plsloadingFeat",
label=h5("Display feature names:"),
value= FALSE),
actionButton("update8", "Update/Plot"),
splitLayout(cellWidths = c("50%", "50%"),
plotOutput("loadingPLS1",click ="plot_click5"),
plotOutput("loadingPLS2",width="1000")
),
selectInput("l2IntCalc",
label = h5("Statistic Shown for the Interval Plot"),
choices = list("se" = 1, "sd" = 2),
selected = 1),
uiOutput("color5"),
helpText("This applications supports the following colors: "),
helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\", \"grey\", \"gray\",
\"lightblue\", \"limegreen\", \"magenta\", \"orange\", \"pink\", \"purple\", \"violet\", \"yellow\""),
splitLayout(cellWidths = c("50%", "50%"),
plotOutput("plsloadingCmp1", width = "500", height = "500"),
plotOutput("plsloadingCmp2", width="400", height="500")
),
verbatimTextOutput("info5"),
dataTableOutput('Loadingtable2')
),
tabPanel("Cross Validation",
h3("Select optimal number of components for classification"),
uiOutput("CVcompNo"),
selectInput("CVMethod",
label=h5("Cross validation (CV) method:"),
choices=list("10-fold CV"=1,
"LOOCV"=2),
selected = 2),
selectInput("performMea",
label=h5("Performance measure:"),
choices=list("Q2"= 1, "Accuracy" = 2, "R2" = 3),
selected = 1),
actionButton("update9", "update"),
sidebarPanel(
sliderInput("plsCVWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("plsCVHeight", "Plot Height (px)", min = 0, max = 800, value = 500)
),
tableOutput("CVTab"),
uiOutput("PLSDACVPlot")
),
tabPanel("Imp.Features",
helpText("There are two important measures in PLS-DA: one is variable importance in projection (VIP) and the other is weighted sum of absolute
regression coefficients (coef.). The colored boxes on the right indicate the relateive concentrations of the corresponding metabolite in each group
under study."),
sidebarPanel(h3("Importance measure:"),
checkboxInput("impMeasure1",
label=h5("VIP score"),
value=TRUE
),
uiOutput("vip"),
checkboxInput("impMeasure2",
label=h5("Coefficient score"),
value=FALSE
),
uiOutput("coef")
),
numericInput("topFeatNo",
label=h5("Show top feature number:"),
value=15),
checkboxInput("BW",
label=h5("Use grey scale color:"),
value=FALSE
),
actionButton("update10", "Update/Plot"),
plotOutput("PLSDAImp"),
dataTableOutput('ImpFeatTab')
),
tabPanel("Permutation",
selectInput("permTest",
label=h5("Select test statistic:"),
choices=list("Prediction accuracy during training"=1,
"Separation distance (B/W)"=2),
selected=1),
selectInput("permNumber",
label=h5("Set permutation numbers:"),
choices=list("100"=1, "1000"=2, "2000"=3),
selected=1),
actionButton("update11", "Update/Plot"),
plotOutput("permPlot")
)
),
navbarMenu("OPLSDA",
tabPanel("Score Plot",
checkboxInput("opls95",
label=h5("Display 95% confidence region:"),
value=TRUE),
checkboxInput("oplsSmpNam",
label=h5("Display sample names:"),
value=TRUE),
checkboxInput("oplsgScale",
label=h5("Use grey-scale colors:"),
value=FALSE),
actionButton("update12","Update/Plot"),
sidebarPanel(
sliderInput("OPlsWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("OPlsHeight", "Plot Height (px)", min = 0, max = 800, value = 500)
),
uiOutput("oplsScore")
),
tabPanel("S-Plot",
actionButton("update13","Update/Plot"),
plotOutput("oplsSPlot", click="plot_click6"),
uiOutput("color6"),
splitLayout(cellWidths = c("50%", "50%"),
plotOutput("oplsCmp1", width = "500", height = "500"),
plotOutput("oplsCmp2", width="400", height="500")
),
verbatimTextOutput("info6"),
dataTableOutput('OPLSDATab')
),
tabPanel("Model Overview",
actionButton("update14","Update/Plot"),
plotOutput("OPLSOver")
),
tabPanel("Permutation",
selectInput("oplsdaPer",
label="Set permutation numbers:",
choices=list("100"=1,"1000"=2,"2000"=3),
selected=1),
actionButton("update15","Update/Plot"),
plotOutput("OPLSDAPerm")
)
),
navbarMenu("Feature Identification",
tabPanel("Significance Analysis if Microarry (and Metabolites) (SAM)",
uiOutput("SAMPara"),
uiOutput("SAMAnlTyp"),
uiOutput("SAMVar"),
uiOutput("SAMtxt"),
plotOutput("emptyPlot", width="1",height="1"),
actionButton("compute1","Calculate"),
uiOutput("SAMFDR"),
#actionButton("updateFDR","Update Plot"),
plotOutput("SAMFDRPLOT"),
plotOutput("SAMResultPlot"),
dataTableOutput("SAMTab")
)
),
navbarMenu("Cluster Analysis: Hiearchical Clustering",
tabPanel("Dendrogram",
selectInput("DendroDist",
label = "Distance Measure:",
choices = list("Euclidean" = 1, "Spearman" = 2,
"Pearson" = 3),
selected = 1),
selectInput("DendroAlgor",
label = "Clustering Algorithm:",
choices = list("Ward"=1, "Average"=2, "Complete"=3,
"Single"=4),
selected=1),
actionButton("update18","Update/Plot"),
plotOutput("DendroPlot")
),
tabPanel("HeatMap",
selectInput("CAHeatDist",
label = "Distance Measure:",
choices = list("Euclidean" = 1, "Pearson" = 2,
"Minkowski" = 3),
selected = 1),
selectInput("CAHeatAlgor",
label = "Clustering Algorithm:",
choices = list("Ward"=1, "Average"=2, "Complete"=3,
"Single"=4),
selected=1),
selectInput("CAHeatCC",
label = "Color Contrast:",
choices = list("Default"=1, "Red/Green"=2, "Heat Color"=3,
"Topo Color"=4, "Gray Scale"=5,"Red/White/Blue"=6,
"Red/White/Green"=7, "White/Navy/Blue"=8),
selected=1),
sidebarPanel(h4("View Mode:"),
radioButtons("CAHeatView",
label = NULL,
choices = list("Overview" = 1,
"Detail View (< 2000 features)" = 2),
selected=1)
),
sidebarPanel(h4("View Options"),
checkboxInput("CAHeatView01",
label = "Do not reorganize:", value = FALSE),
selectInput("CAHeatView02",
label=NULL,
choices = list("Samples"=1, "Features"=2, "Both"=3),
selected=1),
checkboxInput("CAHeatView03",
label ="use top:", value = FALSE),
numericInput("CAHeatView04",
label=NULL,
value=25),
selectInput("CAHeatView05",
label=NULL,
choices = list("T-test/ANOVA"=1, "PLS-DA VIP"=2, "Random Forest"=3),
selected =1),
checkboxInput("CAHeatView06",
label= "Show cell borders",
value=TRUE)
),
sidebarPanel(h4("Data Options"),
selectInput("CAHeatData01",
label=h5("Data Source:"),
choices = list("Normalized data"=1, "Original data"=2),
selected=1),
selectInput("CAHeatData02",
label=h5("Standardization:"),
choices = list("Autoscale features"=1, "Autoscale samples"=2, "None"=3),
selected=1)
),
sidebarPanel(
sliderInput("HeatWidth", "Plot Width (%)", min = 0, max = 100, value = 100),
sliderInput("HeatHeight", "Plot Height (px)", min = 0, max = 1500, value = 500)
),
actionButton("update19","Update/Plot"),
uiOutput("plot.ui")
)
),
navbarMenu("Cluster Analysis: Partitional Clustering",
tabPanel("K-means",
helpText("Pleas note: due to space limit, only the cluster memenber will be calculated
if the specified cluster number > 20. The blue lines represent the median intensities of each cluster"),
numericInput("KMclstNm",
label="Specify the cluster number:",
value=3),
actionButton("update20","Plot/update"),
plotOutput("KMPlot"),
tableOutput('mytable1')
),
tabPanel("Self Organising Map (SOM)",
helpText("Please note:only cluster memebers will be calculated if the total cluster number (xdim*ydim) > 20.
The blue lines represent the median intensities of each cluster. "),
numericInput("SOMXD",
label="X dimension:",
value=1),
numericInput("SOMYD",
label="Y dimension:",
value=3),
selectInput("SOMInit",
label = "Initialization:",
choices = list("Linear" = 1, "Random" = 2,"Sample"=3),
selected = 1),
selectInput("SOMNeigh",
label = "Neighbourhood:",
choices = list("Gaunssian" = 1, "Bubble" = 2),
selected = 1),
actionButton("update21","Plot/update"),
plotOutput("SOMPlot"),
tableOutput('SOMTab')
)
),
navbarMenu("Classification & Feature Selection - Random Forest",
tabPanel("Classification",
selectInput("RFTreesNu",
label = "Number of trees to grow:",
choices = list("500" = 1, "1000" = 2,"2000"=3,
"5000"=4),
selected = 1),
numericInput("RFPredNu",
label="Number of predictors to try for each node:",
value=7),
actionButton("update22","Plot/update"),
textOutput("RFOOB"),
tableOutput('RFTab'),
plotOutput("RFPlot")
),
tabPanel("Var.Importance",
actionButton("update23","Plot/update"),
textOutput("RFVipHelp"),
plotOutput("RFVipPlot"),
dataTableOutput("RFDaTTab")
),
tabPanel("Outlier Detection",
actionButton("update24","Plot/update"),
textOutput("RFOutTxt"),
plotOutput("RFOutPlot")
)
),
navbarMenu("Classification & Feature Selection - SVM",
tabPanel("Classification",
helpText("R-SVM uses SVM (with linear kernel) to perform classifcation recursively using different feature subsets.
Features are selected based on their relative contribution in the classification using cross validation error rates.
The least important features are eliminated in the subsequent steps. This process creates a series of SVM models (levels).
The features used by the best model are plotted. LOOCV: leave one out cross-validation. "),
selectInput("SVMVMet",
label = "Validation method:",
choices = list("10-fold CV" = 1, "LOOCV" = 2,"BooStrap"=3),
selected = 1),
actionButton("update25","Plot/update"),
plotOutput("SVMPlot")
),
tabPanel("Var.Importance",
helpText("Please note : features are ranked by their
frequencies being selected in the best classifiers
(only top 15 will be shown) "),
actionButton("update26","Plot/update"),
plotOutput("SVMImportPlot")
)
),
navbarMenu("PLSR",
tabPanel("Upload Data",
sidebarLayout(
sidebarPanel(
radioButtons("plsrRadioButton1",
label = h3("Data Type"),
choices = list("Concentration" = 1,
"Spectral Bins" = 2, "Intensity Table" = 3),
selected = 1),
selectInput("plsrSelect1",
label = h3("Format"),
choices = list("Samples in rows (unpaired)" = 1, "Samples in columns (unpaired)" = 2,
"Samples in rows (paired)" = 3, "Samples in columns (paired)"=4), selected = 1),
fileInput('file2', 'Choose CSV File',
accept=c('text/csv', 'text/comma-separated-values,text/plain', '.csv'))
),
mainPanel(textOutput('Pcontents'))
)
),
tabPanel("Data Processing",
helpText(" Too many missing values will cause difficulties for downstream analysis.
There are several different methods for this purpose.
The default method replaces all the missing values with a small values (the half of the minimum positive values in the original data) assuming to be the detection limit.
Move onto Normalization if you want to use the default method.
The assumption of this approach is that most missing values are caused by low abundance metabolites (i.e.below the detection limit)."
),
helpText("The functions in MetaboAnalyst also offers other methods, such as replace by mean/median, k-nearest neighbour (KNN), probabilistic PCA (PPCA), Bayesian PCA (BPCA) method, Singular Value Decomposition (SVD) method to impute the missing values.
Please choose the one that is the most appropriate for your data."
),
sidebarPanel(h4("Step 1. Remove features with too many missing values"),
checkboxInput("PmissValue1",
label = "Remove features with > x % missing values",
value = TRUE),
numericInput("PmissValue2",
label=NULL,
value=50)
),
sidebarPanel(h4("Step 2. Estimate the remaining missing values (Select only 1 of the following 4 options"),
checkboxInput("PmissValue3",
label = "Replace by a small value (half of the minimum positive value in the original data)",
value = TRUE),
checkboxInput("PmissValue4",
label = "Exclude variables with missing values",
value = FALSE),
selectInput("PmissValue5",
label=h5("Replace by column (feature)"),
choices = list("None"=1, "Mean"=2, "Median"=3,"Min"=4),
selected =1),
selectInput("PmissValue6",
label=h5("Estimate missing values using"),
choices = list("None"=1, "KNN"=2, "PPCA"=3,"BPCA"=4,"SVD Impute"=5),
selected =1)
),
actionButton("Pcalc1","Process"),
h3(textOutput("PMVtext1"))
),
tabPanel("Normailsation",
sidebarLayout(
sidebarPanel(h3("Sample Normalization"),
radioButtons("PradioButtons2",
label = h4("Sample normalization"),
choices = list("None" = 1,
"Normalization by sum" = 2,
"Normalization by median" = 3,
"Normalization by a specific reference sample"=4,
"Normalization by a pooled sample from group"=5,
"Normalization by reference feature"= 6
),
selected = 1),
uiOutput("Prefsample"),
uiOutput("Ppoolsample"),
uiOutput("PrefFeat"),
radioButtons("PradioButtons3",
label = h4("Data transform"),
choices = list("None" = 1,
"log transform" = 2, "cube root transform" = 3),
selected = 1),
radioButtons("PradioButtons4",
label = h4("Data scaling"),
choices = list("None" = 1, "Mean Centering"=2,
"Auto scaling" = 3, "Pareto scaling" = 4,
"Range scaling" = 5, "Vast scaling"=6),
selected = 1)
),
mainPanel(actionButton("Pgo","Update"),
plotOutput("PnormPlot",width = "600", height = "800")
)
)
),
tabPanel("PLSR",
uiOutput("plsrNum"),
actionButton("PLSRButton","Calculate/Plot"),
uiOutput("PLSRcolor1"),
uiOutput("PLSRcolor2"),
uiOutput("PLSRptSize"),
splitLayout(cellWidths = c("50%", "50%"),
plotOutput("plsrModel", width = "500", height = "500"),
plotOutput("plsrCV", width="500", height="500")
),
plotOutput("plsrOverLay"),
tableOutput('PLSRTab')
)
),
navbarMenu("More",
tabPanel("Sub-Component A"),
tabPanel("Sub-Component B")
)
)
#######################################################
################# ######################
################# ServerFunction ######################
################# ######################
#######################################################
server <- function(input, output,session){
observeEvent(input$loadPack, output$pacL <- renderTable({
pakCheckTable()
}))
pakCheckTable<-function(){
lol=LoadAllPackages()
m=data.frame(lol)
k=cbind(row.names(m),m)
colnames(k)[2] <- c(" ")
return(k)
}
options(shiny.usecairo=T)
colorList=c("black","blue","brown","cyan","darkblue","darkred","green", "grey", "gray",
"lightblue", "limegreen", "magenta", "orange", "pink", "purple", "violet", "yellow")
############################################################################################
####################################### Utility function ###################################
############################################################################################
dataLvls <- reactive({
input$file1
lvls = length(levels(dataSet$proc.cls))
return(lvls)
})
numOfcls <- reactive({
input$file1
if(dataSet$cls.num>2){
return(TRUE)
} else {
return(FALSE)
}
})
ispaired <- reactive({
input$file1
return(dataSet$paired)
})
###################################################################
############### reading File ######################################
###################################################################
output$contents <- renderText({
inFile <- input$file1
if (is.null(inFile)){
return(NULL)
}
datatype <- "conc"
format1 <- "rowu"
format2 <- "disc"
ispaired=FALSE
if(input$radioButtons1==1){
datatype <- "conc"
} else if (input$radioButtons1== 2){
datatype <- "specbin"
} else {
datatype <- "pktable"
}
if (input$select==1){
format1="rowu"
} else if (input$select==2){
format1 <- "colu"
} else if (input$select==3){
format1 <- "rowp"
} else {
format1 <- "colp"
}
if(format1=="rowp"|format1=="colp"){
ispaired=TRUE
}
InitDataObjects(datatype, "stat", paired=ispaired)
Read.TextData(inFile$datapath, format=format1, lbl.type=format2)
SanityCheckData()
ReplaceMin()
dataSet$check.msg
})
#############################################################################################
########################## missing values calculations ######################################
#############################################################################################
MissVPara1 <- eventReactive(input$calc1,{
return(input$missValue1)
})
MissVPara2 <- eventReactive(input$calc1,{
return(input$missValue2/100)
})
MissVPara3 <- eventReactive(input$calc1,{
return(input$missValue3)
})
MissVPara4 <- eventReactive(input$calc1,{
return(input$missValue4)
})
MissVPara5 <- eventReactive(input$calc1,{
MVmet="min"
if(input$missValue5==2|3|4){
if(input$missValue5==2){
MVmet="mean"
} else if (input$missValue5==3){
MVmet="median"
} else if (input$missValue5==4){
MVmet="min"
}
}
return(MVmet)
})
MissVPara6 <- eventReactive(input$calc1,{
MVmet="knn"
if(input$missValue6==2|3|4|5){
if(input$missValue6==2){
MVmet="knn"
} else if (input$missValue6==3){
MVmet="ppca"
} else if (input$missValue6==4){
MVmet="bpca"
} else if (input$missValue6==5){
MVmet="svdImpute"
}
}
return(MVmet)
})
output$MVtext1<-renderText({
if(MissVPara1()==TRUE){
RemoveMissingPercent(int.mat=dataSet$preproc, percent=MissVPara2())
}
if(MissVPara3()==TRUE){
ImputeVar(int.mat=dataSet$preproc, method="colmin")
}else if(MissVPara4()==TRUE){
ImputeVar(int.mat=dataSet$preproc, method="exclude")
} else if (MissVPara5()=="knn"|MissVPara5()=="ppca"|MissVPara5()=="bpca"){
ImputeVar(int.mat=dataSet$preproc, method=MissVPara5())
} else if (MissVPara6()=="mean"| MissVPara6()=="median"| MissVPara6()=="min"| MissVPara6()=="svdImpute"){
ImputeVar(int.mat=dataSet$preproc, method=MissVPara6())
}
ReplaceMin()
paste("Missing value calculations complete")
})
##########################################################################################
################################ Normlization ############################################
##########################################################################################
sampleGroups <-reactive({
input$file1
list=rownames(dataSet$proc)
return(list)
})
sampleFeatures <-reactive({
input$file1
list2=colnames(dataSet$proc)
return(list2)
})
pooledSamples <-reactive({
input$file1
numOfLvl=length(levels(dataSet$proc.cls))
groups=c()
for(i in 1:numOfLvl){
groups[i]=levels(dataSet$proc.cls)[i]
}
return(groups)
})
output$refsample <- renderUI({
selectInput("rsmpl",
label = h4("Specific reference samples"),
choices = sampleGroups())
})
output$poolsample <-renderUI({
selectInput("psmpl",
label = h4("Pooled samples from group"),
choices=pooledSamples())
})
output$refFeat <- renderUI({
selectInput("rf",
label=h4("Reference features"),
choices= sampleFeatures())
})
normMethod1 <- eventReactive(input$go,{
rowNorm='Nothing'
if(input$radioButtons2==1){
rowNorm = 'Nothing'
} else if (input$radioButtons2==2){
rowNorm = 'SumNorm'
} else if(input$radioButtons2==3){
rowNorm = 'MedianNorm'
} else if(input$radioButtons2==4){
rowNorm = "ProbNorm"
} else if (input$radioButtons2==5){
rowNorm = "ProbNorm2"
} else {
rownNorm="CompNorm"
}
})
normMethod2 <- eventReactive(input$go, {
transNorm='Nothing'
if (input$radioButtons3==1){
transNorm='Nothing'
} else if (input$radioButtons3==2){
transNorm='LogNorm'
} else {
transNorm='CrNorm'
}
})
normMethod3 <- eventReactive(input$go, {
scaleNorm='Nothing'
if(input$radioButtons4==1){
scaleNorm='Nothing'
} else if (input$radioButtons4==2){
scaleNorm='MeanCenter'
} else if (input$radioButtons4==3){
scaleNorm= 'AutoNorm'
} else if (input$radioButtons4==4){
scaleNorm='ParetoNorm'
} else if (input$radioButtons4==5){
scaleNorm='RangeNorm'
} else {
scaleNorm='VastNorm'
}
})
normMethod4 <- eventReactive(input$go,{
return(toString(input$rsmpl))
})
normMethod5 <- eventReactive(input$go,{
return(toString(input$psmpl))
})
normMethod6 <- eventReactive(input$go,{
return(toString(input$rf))
})
output$normPlot <- renderPlot({
if(normMethod1()=="ProbNorm"){
Normalization(
rowNorm="ProbNormF",
transNorm=normMethod2(),
scaleNorm=normMethod3(),
ref=normMethod4(),
ratio=FALSE,
ratioNum=20)
} else if (normMethod1()=="ProbNorm2"){
Normalization(
rowNorm="ProbNormT",
transNorm=normMethod2(),
scaleNorm=normMethod3(),
ref=normMethod5(),
ratio=FALSE,
ratioNum=20)
} else if (normMethod1()=="CompNorm"){
Normalization(
rowNorm="CompNorm",
transNorm=normMethod2(),
scaleNorm=normMethod3(),
ref=normMethod6(),
ratio=FALSE,
ratioNum=20)
} else {
Normalization(
rowNorm=normMethod1(),
transNorm=normMethod2(),
scaleNorm=normMethod3(),
ref=NULL,
ratio=FALSE,
ratioNum=20)
}
PlotNormSum()
})
##############################################################################
############################## Fold Change Analysis ##########################
##############################################################################
####add message here telling the user if their data is paired or not
ispaired2 <-reactive({
input$file1
return(dataSet$paired)
})
output$FCAnalT<-renderUI({
if (ispaired2()==TRUE){
selectInput("FCAnalType",
label = h5("Analysis Type"),
choices = list("Unpaired" = 1, "Paired" = 2),
selected = 1)
} else {
selectInput("FCAnalType",
label = h5("Analysis Type"),
choices = list("Unpaired" = 1),
selected = 1)
}
})
output$SigCountT <-renderUI({
if (ispaired()==FALSE){
return()
} else {
numericInput("SigCountThresh",
label=h5("Significant count threshold % (paired only):"),
value=75)
}
})
FCpara4 <- eventReactive(input$go4,{
if (input$FCAnalType==1){
return(TRUE)
}else{
return(FALSE)
}
})
name<-reactive({
input$file1
groups=c()
numOfLvl=length(levels(dataSet$proc.cls))
for(i in 1:numOfLvl){
groups[i]=levels(dataSet$proc.cls)[i]
}
group1=groups[1]
group2=groups[2]
group12=paste(group1,"/",group2)
group21=paste(group2,"/",group1)
inputOp<- list(group12,group21)
return(inputOp)
})
output$ComparType <- renderUI({
selectInput("ComparTyp",
label=h5("Comparison Type"),
choices=name())
})
FCpara1 <- eventReactive(input$go4,{
FCTvalue=input$FCThresh
})
FCpara2 <-eventReactive(input$go4,{
groups=c()
numOfLvl = length(levels(dataSet$proc.cls))
for(i in 1:numOfLvl){
groups[i]=levels(dataSet$proc.cls)[i]
}
group1=groups[1]
group2=groups[2]
group12=paste(group1,"/",group2)
group21=paste(group2,"/",group1)
ComparT=0
if(input$ComparTyp==group12){
ComparT=0
} else {
ComparT=1
}
})
FCpara3 <- eventReactive(input$go4,{
SigCT=input$SigCountThresh/100
})
output$FCPlot <- renderPlot({
if (FCpara4()==TRUE){
FC.Anal.unpaired(fc.thresh= FCpara1(), cmp.type = FCpara2())
}else{
FC.Anal.paired(fc.thresh=FCpara1(),percent.thresh=FCpara3(),cmp.type=FCpara2())
}
MyPlotFC()
})
output$FCtable1 <- renderDataTable({
if (FCpara4()==TRUE){
FC.Anal.unpaired(fc.thresh= FCpara1(), cmp.type = FCpara2())
}else{
FC.Anal.paired(fc.thresh=FCpara1(),percent.thresh=FCpara3(),cmp.type=FCpara2())
}
MyFCTable()
})
###############################################################################
############################### T test ########################################
###############################################################################
ispaired <- reactive({
input$file1
return(dataSet$paired)
})
output$TTAnalType<-renderUI({
if (ispaired()==TRUE){
selectInput("TTAnalT",
label = h5("Analysis Type:"),
choices = list("Unpaired" = 1, "Paired" = 2),
selected = 1)
} else {
selectInput("TTAnalT",
label = h5("Analysis Type:"),
choices = list("Unpaired" = 1),
selected = 1)
}
})
ttestpara4 <- eventReactive(input$go2,{
if (input$TTAnalT==1){
return(FALSE)
}else{
return(TRUE)
}
})
ttestpara1 <- eventReactive(input$go2,{
PValue=input$TTestP
})
ttestpara2 <- eventReactive(input$go2,{
grpVar=TRUE
if(input$grpVar==1){
grpVar=TRUE
} else {
grpVar=FALSE
}
})
ttestpara3 <- eventReactive(input$go2,{
nonpar=input$NPT1
})
ttespara5 <- eventReactive(input$go2,{
return(toString(input$Tcolo1))
})
ttespara6 <- eventReactive(input$go2,{
return(toString(input$Tcolo2))
})
output$PlotTT <- renderPlot({
Ttests.Anal(nonpar=ttestpara3(),
threshp=ttestpara1(),
paired=ttestpara4(),
equal.var=ttestpara2())
MyPlotTT(c1=ttespara5(),
c2=ttespara6())
})
output$TTtable1 <- renderDataTable({
Ttests.Anal(nonpar=ttestpara3(),
threshp=ttestpara1(),
paired=ttestpara4(),
equal.var=ttestpara2())
MyTTTable()
},options = list(lengthMenu = c(10, 20,50,100), pageLength = 20)
)
click1 <- reactive({
validate(
need(input$plot_click1$x != "", "Click a point for a interval/box plot")
)
n=c()
b=c()
for(i in 1:length(analSet$tt$p.log)){
n[i]=i
}
for(i in 1:length(analSet$tt$p.log)){
b[i]=as.numeric(analSet$tt$p.log)[i]
}
x=as.numeric(nearPoints(data.frame(n,b), input$plot_click1, xvar="n", yvar="b",maxpoints=1)[[1]])
return(x)
})
output$color1 <- renderUI({
colorV= sample(colorList, dataLvls())
textInput("colorVector",
label=h5("Colors for the interval and box plot"), value = toString(colorV))
})
observeEvent(input$plot_click1, output$PlotTT2 <-renderPlot({
colString=input$colorVector
col=strsplit(colString,",")[[1]]
stat=" "
if(input$tIntCalc==1){
stat="se"
}else if (input$tIntCalc==2){
stat="sd"
}
IntervalPlot(cmpdNm = click1(), dpi=200, colors=col,calc=stat)
}))
observeEvent(input$plot_click1, output$PlotBox1 <-renderPlot({
colString=input$colorVector
colr=strsplit(colString,",")[[1]]
PlotCmpdBoxView(cmpdNm=click1(), dpi=200, col=colr)
}))
output$info1 <- renderText({
paste0("x=", input$plot_click1$x, "\ny=", input$plot_click1$y)
})
###########################################################################################
########################################## ANOVA ##########################################
###########################################################################################
anovapara1 <- eventReactive(input$go3,{
PValue=input$ANOVAP
})
anovapara2 <- eventReactive(input$go3,{
if(input$PHA==1){
posthoc="fisher"
} else {
posthoc="tukey"
}
})
anovapara3 <- eventReactive(input$go3,{
nonpar=input$NPT2
})
anovapara4 <- eventReactive(input$go3,{
return(toString(input$Acolo1))
})
anovapara5 <- eventReactive(input$go3,{
return(toString(input$Acolo2))
})
output$PlotAOV <- renderPlot({
ANOVA.Anal(nonpar=anovapara3(), thresh=anovapara1(), post.hoc=anovapara2())
PlotLiveANOVA(c1=anovapara4(),
c2=anovapara5())
})
output$AOVtable1 <- renderDataTable({
ANOVA.Anal(nonpar=anovapara3(),
thresh=anovapara1(),
post.hoc=anovapara2())
MyANOVATable()
},options = list(lengthMenu = c(10, 20,50,100), pageLength = 20)
)
click2 <- reactive({
validate(
need(input$plot_click2$x != "", "Click a point for a interval/box plot")
)
n=c()
b=c()
for(i in 1:length(analSet$aov$p.log)){
n[i]=i
}
for(i in 1:length(analSet$aov$p.log)){
b[i]=as.numeric(analSet$aov$p.log)[i]
}
x=(nearPoints(data.frame(n,b), input$plot_click2,xvar="n", yvar="b",maxpoints=1)[[1]])
return(x)
})
output$color2 <- renderUI({
colorV= sample(colorList, dataLvls())
textInput("colorVector2",
h5("Colors for the interval and box plot graph"),
value = toString(colorV))
})
observeEvent(input$plot_click2, output$PlotAOV2 <-renderPlot({
colString=input$colorVector2
col=strsplit(colString,",")[[1]]
stat=" "
if(input$aIntCalc==1){
stat="se"
}else if (input$aIntCalc==2){
stat="sd"
}
IntervalPlot(cmpdNm = click2(), dpi=200, colors=col,calc=stat)
}))
observeEvent(input$plot_click2, output$PlotBox2 <-renderPlot({
colString=input$colorVector2
colr=strsplit(colString,",")[[1]]
PlotCmpdBoxView(cmpdNm=click2(), dpi=200, col=colr)
}))
#################################################################################
################################ Volcano Plot ###################################
#################################################################################
ispaired3 <-reactive({
input$file1
return(dataSet$paired)
})
ispaired4 <-reactive({
input$file1
return(dataSet$paired)
})
output$VolAnalT<-renderUI({
if (ispaired3()==TRUE){
selectInput("VAnalT",
label = h5("Analysis Type"),
choices = list("Unpaired" = 1, "Paired" = 2),
selected = 1)
} else {
selectInput("VAnalT",
label = h5("Analysis Type"),
choices = list("Unpaired" = 1),
selected = 1)
}
})
output$VolSigCountT <-renderUI({
if (ispaired4()==FALSE){
return()
} else {
numericInput("VolSigCountThresh",
label=h5("Significant count threshold % (paired only):"),
value=75)
}
})
name2<-reactive({
input$file1
numOfLvl = length(levels(dataSet$proc.cls))
groups=c()
for(i in 1:numOfLvl){
groups[i]=levels(dataSet$proc.cls)[i]
}
group1=groups[1]
group2=groups[2]
group12=paste(group1,"/",group2)
group21=paste(group2,"/",group1)
inputOp<- list(group12,group21)
return(inputOp)
})
output$VolComparType <- renderUI({
selectInput("VolComparT",
label=h5("Comparison Type"),
choices=name2())
})
volpara1 <- eventReactive(input$go5,{
volAnalysisT=FALSE
if(input$VAnalT==1){
volAnalysisT=FALSE
} else{
volAnalysisT=TRUE
}
return(volAnalysisT)
})
volpara2 <- eventReactive(input$go5,{
VFCTvalue=input$VolThresh
})
volpara3 <-eventReactive(input$go5,{
groups=c()
numOfLvl = length(levels(dataSet$proc.cls))
for(i in 1:numOfLvl){
groups[i]=levels(dataSet$proc.cls)[i]
}
group1=groups[1]
group2=groups[2]
group12=paste(group1,"/",group2)
group21=paste(group2,"/",group1)
VComparT=0
if(input$VolComparT==group12){
VComparT=0
} else {
VComparT=1
}
})
volpara4 <- eventReactive(input$go5,{
VolSigCT=input$VolSigCountThresh/100
})
volpara5 <- eventReactive(input$go5, {
Volnonpar=input$VolNPT
})
volpara6 <- eventReactive(input$go5,{
VolP=input$VolP
})
volpara7 <- eventReactive(input$go5,{
grpVar=TRUE
if(input$VolgrpVar==1){
grpVar=TRUE
} else {
grpVar=FALSE
}
})
output$PlotVol <- renderPlot({
Volcano.Anal(paired=volpara1(), fcthresh=volpara2(), cmpType=volpara3(),
percent.thresh=volpara4(), nonpar=volpara5(), threshp=volpara6(),
equal.var=volpara7())
MyPlotVolcano2()
})
output$VOLtable1 <- renderDataTable({
Volcano.Anal(paired=volpara1(), fcthresh=volpara2(), cmpType=volpara3(),
percent.thresh=volpara4(), nonpar=volpara5(), threshp=volpara6(),
equal.var=volpara7())
MyVOLTable()
},options = list(lengthMenu = c(10, 20,50,100), pageLength = 20)
)
click3 <- reactive({
validate(
need(input$plot_click3$x != "", "Click a point for a interval/box plot")
)
vcn<-analSet$volcano
n=c()
b=c()
c=c()
for(i in 1:length(vcn$fc.log)){
n[i]=i
}
for(j in 1:length(vcn$fc.log)){
b[j]=as.numeric(vcn$fc.log[j])
}
for(k in 1:length(vcn$p.log)){
c[k]=as.numeric(vcn$p.log[k])
}
x=as.numeric(nearPoints(data.frame(n,b,c), input$plot_click3, xvar="b", yvar="c", maxpoints=1)[1])
return(x)
})
output$color3 <- renderUI({
colorV= sample(colorList, dataLvls())
textInput("colorVector3", h5("Colors for the interval plot and boxplot"), value = toString(colorV))
})
observeEvent(input$plot_click3, output$PlotVol2 <-renderPlot({
colString=input$colorVector3
col=strsplit(colString,",")[[1]]
if(input$vIntCalc==1){
stat="se"
}else if (input$vIntCalc==2){
stat="sd"
}
IntervalPlot(cmpdNm = click3(), dpi=200, colors=col,calc=stat)
}))
observeEvent(input$plot_click3, output$PlotVol3 <-renderPlot({
colString=input$colorVector3
colr=strsplit(colString,",")[[1]]
PlotCmpdBoxView(cmpdNm=click3(), dpi=200, col=colr)
}))
output$info2 <- renderText({
paste0("x=", input$plot_click3$x, "\ny=", input$plot_click3$y)
})
#################################################################################
################################ Correlation Analysis ###########################
#################################################################################
corrPara1 <- eventReactive(input$go6,{
distanceM="pearson"
if(input$distM==1){
distanceM = "pearson"
} else if (input$distM==2){
distanceM = "spearman"
} else {
distanceM="kendall"
}
return(distanceM)
})
corrPara2 <- eventReactive(input$go6,{
viewMethod="overview"
if(input$viewM==1){
viewMethod=="overview"
} else {
viewMethod="detailed"
}
return(viewMethod)
})
corrPara3 <- eventReactive(input$go6,{
fixColorDist=FALSE
if(input$fixColD==TRUE){
fixColorDist=TRUE
} else {
fixColorDist=FALSE
}
return(fixColorDist)
})
corrPara4 <- eventReactive(input$go6, {
colContrast='default'
if(input$colorCon==1){
colContrast='default'
} else if (input$colorCon==2){
colContrast ='gbr'
} else if (input$colorCon==3){
colContrast = 'heat'
} else if (input$colorCon==4){
colContrast ='topo'
} else if (input$colorCon==5){
colContrast ='gray'
} else if (input$colorCon==6){
colContrast ='rwb'
} else if (input$colorCon==7){
colContrast ='rwg'
} else if (input$colorCon==8){
colContrast ='wnvyb'
}
return(colContrast)
})
corrPara5 <- eventReactive(input$go6, {
doNot=FALSE
if(input$performClus==TRUE){
doNot=TRUE
} else {
doNot=FALSE
}
return(doNot)
})
output$corrHeat <- renderUI({
plotOutput("CorrHeatMap", width = paste0(input$CorrHeatWidth, "%"), height = input$CorrHeatHeight)
})
output$CorrHeatMap <- renderPlot({
MyPlotCorrHeatMap("correlation HeatMap", format="png", dpi=200, width=NA,
cor.method=corrPara1(),
colors=corrPara4(),
viewOpt=corrPara2(),
fix.col=corrPara3(),
no.clst=corrPara5(),
top=FALSE,
topNum)
})
#################################################################################
################################ pattern searching ###########################
#################################################################################
interestingFeatures <- reactive({
input$file1
ftList=colnames(dataSet$proc)
return(ftList)
})
profiles <- reactive({
input$file1
template1=toString(GenerateTemplates())##use this shit####
template2=as.vector(strsplit(template1, ",")[[1]])
return(template2)
})
output$interestFt <- renderUI({
selectInput("iFT",
label=h5("features of interest"),
choices=interestingFeatures())
})
output$profile <- renderUI({
selectInput("templateProfiles",
label=h5("predefined profile"),
choices=profiles())
})
patternPara1 <- eventReactive(input$go7,{
patt=1
if(input$pattern==1){
patt=1
} else if (input$pattern==2){
patt=2
} else {
patt=3
}
return(patt)
})
patternPara2 <- eventReactive(input$go7,{
patternMethod="pearson"
if(input$distM2==1){
patternMethod="pearson"
} else if (input$distM2==2){
patternMethod="spearman"
} else {
patternMethod="kendall"
}
return(patternMethod)
})
patternPara3 <- eventReactive(input$go7,{
return(toString(input$iFT))
})
patternPara4 <- eventReactive(input$go7,{
return(toString(input$templateProfiles))
})
patternPara5 <- eventReactive(input$go7,{
uTempl=input$customPro
return(uTempl)
})
output$patternGraph <- renderPlot({
if(patternPara1()==1){
FeatureCorrelation(patternPara2(), patternPara3())
} else if (patternPara1()==2) {
Match.Pattern(dist.name=patternPara2(), pattern=patternPara4())
} else {
Match.Pattern(dist.name=patternPara2(), pattern=patternPara5())
}
MyPlotCorr()
})
output$CORRtable1 <- renderDataTable({
if(patternPara1()==1){
FeatureCorrelation(patternPara2(), patternPara3())
} else if (patternPara1()==2) {
Match.Pattern(dist.name=patternPara2(), pattern=patternPara4())
} else {
Match.Pattern(dist.name=patternPara2(), pattern=patternPara5())
}
MyCORRTable()
},options = list(lengthMenu = c(10, 20,50,100), pageLength = 20)
)
#######################################################################################
################################## Summary Plot #######################################
#######################################################################################
pcNumbers <- reactive({
input$file1
PCA.Anal()
n=c()
for(i in 1:GetMaxPCAComp()-1){
n[i]=i+1
}
x=n
})
output$pcNum <- renderUI({
selectInput("pcNo",
label = h5("Display pairwise score plot for top PCs:"),
choices=pcNumbers())
})
pcSummPara1 <- eventReactive(input$update1,{
return(input$pcNo)
})
output$overPlot <- renderUI({
plotOutput("overplt", width = paste0(input$OverPWidth, "%"),
height = input$OverPHeight)
})
output$overplt <- renderPlot({
MyPlotPCAPairSummary(pc.num=pcSummPara1())
})
#######################################################################################
################################## Scree plot #########################################
#######################################################################################
pcNumbers2 <- reactive({
input$file1
PCA.Anal()
n=c()
for(i in 1:GetMaxPCAComp()-1){
n[i]=i+1
}
x=n
})
output$pcNum2 <- renderUI({
selectInput("pcNo2",
label = h5("Display pairwise score plot for top PCs:"),
choices=pcNumbers2())
})
screePlotPara1 <- eventReactive(input$update2,{
return(input$pcNo2)
})
output$screePlot <- renderUI({
plotOutput("ScreePlt", width = paste0(input$ScreePWidth, "%"), height = input$ScreePHeight)
})
output$ScreePlt <- renderPlot({
MyPlotPCAScree(scree.num=screePlotPara1())
})
#######################################################################################
################################## 2d pca plot ########################################
#######################################################################################
pca2dPara1 <- eventReactive(input$update3,{
pcx=input$pcX
})
pca2dPara2 <- eventReactive(input$update3,{
pcy=input$pcY
})
pca2dPara3 <- eventReactive(input$update3,{
if(input$pcaConf==TRUE){
reg=0.95
} else {
reg=0
}
return(reg)
})
pca2dPara4 <- eventReactive(input$update3,{
if(input$disSmplName==TRUE){
show=1
} else {
show=0
}
return(show)
})
pca2dPara5 <- eventReactive(input$update3,{
if(input$gscale==TRUE){
grey.scale=1
} else {
grey.scale=0
}
return(grey.scale)
})
output$pca2dP <- renderUI({
plotOutput("pca2dPlt", width = paste0(input$PCA2DWidth, "%"), height = input$PCA2DHeight)
})
output$pca2dPlt <- renderPlot({
PCA.Anal()
MyPlotPCA2DScore(pcx=pca2dPara1(),
pcy=pca2dPara2(),
reg=pca2dPara3(),
show=pca2dPara4(),
grey.scale=pca2dPara5())
})
########################################################################################
################################### PCA Trajectory Plot ################################
########################################################################################
pcaTraPara1 <- eventReactive(input$update16,{
return(input$pcaTraX)
})
pcaTraPara2 <- eventReactive(input$update16,{
return(input$pcaTraY)
})
pcaTraPara3 <- eventReactive(input$update16,{
return(input$PCATraTitle)
})
pcaTraPara4 <- eventReactive(input$update16,{
return(input$traPtS)
})
pcaTraPara5 <- eventReactive(input$update16,{
decPer=input$traLimit/100
return(decPer)
})
pcaTraPara7 <- eventReactive(input$update16,{
return(input$errBarWidth)
})
output$PCATraCol <- renderUI({
colorV=sample(colorList, dataLvls())
textInput("traColor", h5("Colors for the graph"), value = toString(colorV))
})
pcaTraPara6 <- eventReactive(input$update16,{
col=strsplit(input$traColor, ",")[[1]]
return(col)
})
output$pcaTrajPlot <- renderUI({
plotOutput("pcaTrajP", width = paste0(input$PCATrAWidth, "%"), height = input$PCATRAHeight)
})
output$pcaTrajP <- renderPlot({
PCA.Anal()
PlotTraPCA(pc1=pcaTraPara1(),
pc2=pcaTraPara2(),
title=pcaTraPara3(),
ptsSize=pcaTraPara4(),
extPer=pcaTraPara5(),
colors=pcaTraPara6(),
errW=pcaTraPara7()
)
})
#######################################################################################
################################## 3d pca plot ########################################
#######################################################################################
PCA3DPara1 <- eventReactive(input$plot3dpca1,{
s=input$pSize
})
PCA3DPara2 <- eventReactive(input$plot3dpca1,{
t=input$transparency
})
PCA3DPara3 <- eventReactive(input$plot3dpca1,{
e=input$ell
})
PCA3DPara4 <- eventReactive(input$plot3dpca1,{
g=input$grid
})
PCA3DPara5 <- eventReactive(input$plot3dpca1,{
ti=input$PcaTiltle
})
output$pcaCol <- renderUI({
lvls=length(levels(dataSet$proc.cls))
colorV=sample(colorList, lvls)
textInput("pcaColor", h5("Colors for PCA"), value=toString(colorV))
})
PCA3DPara6 <- eventReactive(input$plot3dpca1,{
col=strsplit(input$pcaColor, ",")[[1]]
return(col)
})
observeEvent(input$plot3dpca1,{
PCA.Anal()
open3d()
par3d(windowRect = c(216,30, 958, 695))
Graphs3DPCA(pointSize=PCA3DPara1(),
transparency=PCA3DPara2(),
ell=PCA3DPara3(),
grd=PCA3DPara4(),
Title=PCA3DPara5(),
group.col=PCA3DPara6())
#dev.off()
})
# output$PCA3D <- renderPlot({
#PCA.Anal()
# open3d()
# par3d(windowRect = c(216,30, 958, 695))
# Graphs3DPCA(pointSize=PCA3DPara1(),
# transparency=PCA3DPara2(),
# ell=PCA3DPara3(),
# grd=PCA3DPara4(),
# Title=PCA3DPara5(),
# group.col=PCA3DPara6())
# dev.off()
# })
observeEvent(input$snapShot1, {
snapshot3d( filename="3D Plot.png", fmt = "png", top = TRUE)
})
# actionButton("snapShot1","Snap Shot")
PCA3D2para1 <- eventReactive(input$plot3dpca2,{
return(input$dC)
})
PCA3D2para2 <- eventReactive(input$plot3dpca2,{
return(input$dS)
})
PCA3D2para3 <- eventReactive(input$plot3dpca2,{
return(input$showScale)
})
PCA3D2para4 <- eventReactive(input$plot3dpca2,{
if(input$showlabels==TRUE){
return(input$showlabels)
} else {
return(" ")
}
})
PCA3D2para5 <- eventReactive(input$plot3dpca2,{
return(input$showP)
})
PCA3D2para6 <- eventReactive(input$plot3dpca2,{
return(input$shadow)
})
PCA3D2para7 <- eventReactive(input$plot3dpca2,{
return(input$ell2)
})
PCA3D2para8 <- eventReactive(input$plot3dpca2,{
return(input$showGrpLab)
})
observeEvent(input$plot3dpca2,{
pca3d(pca=prcomp(dataSet$norm, center=PCA3D2para1(), scale=PCA3D2para2()),
group=dataSet$cls,
show.scale=PCA3D2para3(),
show.labels=PCA3D2para4(),
show.plane=PCA3D2para5(),
show.shadows= PCA3D2para6(),
show.ellipses=PCA3D2para7(),
show.group.labels=PCA3D2para8(),
new=TRUE)
})
observeEvent(input$snapShot2, {
snapshotPCA3d("3D Plot 2.png")
})
###########################################################################################
##################################### Loading Plot ########################################
###########################################################################################
loadingPara1 <- eventReactive(input$update4,{
inx1=input$loadingX
})
loadingPara2 <- eventReactive(input$update4,{
inx2=input$loadingY
})
loadingPara3 <- eventReactive(input$update4,{
return(input$loadingFeat)
})
output$color4 <- renderUI({
colorV= sample(colorList, dataLvls())
textInput("colorVector4", h5("Colors for the interval/box plot graph"), value = toString(colorV))
})
output$loadingPCA1 <- renderPlot({
PCA.Anal()
MyPlotPCALoading(imgName="loading", format="png", dpi=72, width=NA,
inx1=loadingPara1(),
inx=loadingPara2(),
plotType="scatter",
lbl.feat=loadingPara3())
})
output$loadingPCA2 <- renderPlot({
PCA.Anal()
MyPlotPCALoading(imgName="loading", format="png", dpi=72, width=NA,
inx1=loadingPara1(),
inx=loadingPara2(),
plotType="bar",
lbl.feat=loadingPara3())
})
output$Loadingtable1 <- renderDataTable({
PCA.Anal()
MyLOADTable(x=loadingPara1(),
y=loadingPara2())
},options = list(lengthMenu = c(10, 20,50,100), pageLength = 20)
)
click4 <- reactive({
validate(
need(input$plot_click4$x != "", "Click a point for a interval/box plot")
)
PCA.Anal()
n=c()
b=c()
c=c()
for(i in 1:length(analSet$pca$rotation[,loadingPara1()])){
n[i]=i
}
for(i in 1:length(analSet$pca$rotation[,loadingPara1()])){
b[i]=signif(as.numeric(analSet$pca$rotation[,loadingPara1()])[i],5)
}
for(i in 1:length(analSet$pca$rotation[,loadingPara2()])){
c[i]=signif(as.numeric(analSet$pca$rotation[,loadingPara2()])[i],5)
}
x=as.numeric(nearPoints(data.frame(n,b,c), input$plot_click4, xvar="b", yvar="c", maxpoints=1)[1])
return(x)
})
observeEvent(input$plot_click4, output$loadingCmp1 <-renderPlot({
colString=input$colorVector4
col=strsplit(colString,",")[[1]]
stat=" "
if(input$lIntCalc==1){
stat="se"
}else if (input$tIntCalc==2){
stat="sd"
}
IntervalPlot(cmpdNm = click4(), dpi=200, colors=col,calc=stat)
}))
observeEvent(input$plot_click4, output$loadingCmp2<-renderPlot({
colString=input$colorVector4
colr=strsplit(colString,",")[[1]]
PlotCmpdBoxView(cmpdNm=click4(), dpi=200, col=colr)
}))
output$info3 <- renderText({
paste0("x=", input$plot_click4$x, "\ny=", input$plot_click4$y)
})
###########################################################################################
##################################### BiPlot #############################################
###########################################################################################
biplotPara1 <- eventReactive(input$update5,{
return(input$biplotPCX)
})
biplotPara2 <- eventReactive(input$update5,{
return(input$biplotPCY)
})
output$biplot <- renderPlot ({
PCA.Anal()
MyPlotPCABiplot(inx1=biplotPara1(), inx2= biplotPara2())
})
######################################################################################
################################## PLS Summary Plot ##################################
######################################################################################
plsSummPara1 <- eventReactive(input$update6,{
pcNo=input$plsNum
})
output$overPlotPls <- renderUI({
plotOutput("overPPls", width = paste0(input$PLSOWidth, "%"),
height = input$PLSOHeight)
})
output$overPPls <- renderPlot({
PLSR.Anal()
MyPlotPLSPairSummary(
pc.num=plsSummPara1()
)
})
#######################################################################################
################################## 2d pLS plot ########################################
#######################################################################################
pls2dPara1 <- eventReactive(input$update7,{
return(input$plsX)
})
pls2dPara2 <- eventReactive(input$update7,{
return(input$plsY)
})
pls2dPara3 <- eventReactive(input$update7,{
if(input$plsConf==TRUE){
reg=0.95
} else {
reg=0
}
return(reg)
})
pls2dPara4 <- eventReactive(input$update7,{
if(input$plsName==TRUE){
s=1
} else {
s=0
}
return(s)
})
pls2dPara5 <- eventReactive(input$update7,{
if(input$plsgscale==TRUE){
grey.scale=1
} else {
grey.scale=0
}
return(grey.scale)
})
output$pls2dP <- renderUI({
plotOutput("pls2dPlot", width = paste0(input$dplsWidth, "%"),
height = input$dplsHeight)
})
output$pls2dPlot <- renderPlot({
PLSR.Anal()
MyPlotPLS2DScore(inx1=pls2dPara1(),
inx2=pls2dPara2(),
reg=pls2dPara3(),
show=pls2dPara4(),
grey.scale=pls2dPara5()
)
})
###########################################################################################
################################## PLSDA Trajectory plot ##################################
###########################################################################################
plsTraPara1 <- eventReactive(input$update17,{
return(input$plsdaTraX)
})
plsTraPara2 <- eventReactive(input$update17,{
return(input$plsdaTraY)
})
plsTraPara3 <- eventReactive(input$update17,{
return(input$PlsdaTraTitle)
})
plsTraPara4 <- eventReactive(input$update17,{
return(input$plsdatraPtS)
})
plsTraPara5 <- eventReactive(input$update17,{
decPer=input$plsdatraLimit/100
return(decPer)
})
plsTraPara7 <- eventReactive(input$update17,{
return(input$plserrBarWidth)
})
output$PLSDATraCol <- renderUI({
colorV=sample(colorList, dataLvls())
textInput("PLStraColor", h5("Colors for the graph"), value = toString(colorV))
})
plsTraPara6 <- eventReactive(input$update17,{
col=strsplit(input$PLStraColor, ",")[[1]]
return(col)
})
output$plsdaTrajPlot <- renderUI({
plotOutput("plsTRAP", width = paste0(input$plsTRAWidth, "%"),
height = input$plsTRAHeight)
})
output$plsTRAP<- renderPlot({
PLSR.Anal()
PlotTraPLSDA(inx1=plsTraPara1(),
inx2=plsTraPara2(),
title=plsTraPara3(),
ptsSize=plsTraPara4(),
extPer=plsTraPara5(),
colors=plsTraPara6(),
errW=plsTraPara7())
})
#######################################################################################
################################## 3d plsda plot ########################################
#######################################################################################
PLS3DPara1 <- eventReactive(input$plot3dpls1,{
return(input$plsPSize)
})
PLS3DPara2 <- eventReactive(input$plot3dpls1,{
return(input$plsTransparency)
})
PLS3DPara3 <- eventReactive(input$plot3dpls1,{
return(input$plsEll)
})
PLS3DPara4 <- eventReactive(input$plot3dpls1,{
return(input$plsGrid)
})
PLS3DPara5 <- eventReactive(input$plot3dpls1,{
return(input$PlsTiltle)
})
output$plsCol <- renderUI({
textInput("plsColor", h5("Colors for PLSDA"), value=toString(sample(colorList, length(levels(dataSet$proc.cls)))))
})
PLS3DPara6 <- eventReactive(input$plot3dpls1,{
return(strsplit(input$plsColor, ",")[[1]])
})
observeEvent(input$plot3dpls1,{
PLSR.Anal()
open3d()
par3d(windowRect = c(216,30, 958, 695))
Graphs3DPLSDA(pointSize = PLS3DPara1(),
transparency = PLS3DPara2(),
ell = PLS3DPara3(),
grd = PLS3DPara4(),
Title = PLS3DPara5(),
group.col = PLS3DPara6())
})
observeEvent(input$snapShot3, {
snapshot3d( filename="3D Plot.png", fmt = "png", top = TRUE)
})
PLS3D2para1 <- eventReactive(input$plot3dpls2,{
return(input$showScalePls)
})
PLS3D2para2 <- eventReactive(input$plot3dpls2,{
if(input$showlabelsPls==TRUE){
return(input$showlabelsPls)
} else {
return(" ")
}
})
PLS3D2para3 <- eventReactive(input$plot3dpls2,{
return(input$showPPls)
})
PLS3D2para4 <- eventReactive(input$plot3dpls2,{
return(input$shadowPls)
})
PLS3D2para5 <- eventReactive(input$plot3dpls2,{
return(input$ell2Pls)
})
PLS3D2para6 <- eventReactive(input$plot3dpls2,{
return(input$showGrpLabPls)
})
observeEvent(input$plot3dpls2,{
PC1=analSet$plsr$scores[,1]
PC2=analSet$plsr$scores[,2]
PC3=analSet$plsr$scores[,3]
d=data.frame(PC1,PC2,PC3)
m <- as.matrix(d)
row.names(m)<-row.names(dataSet$norm)
pca3d(pca=m,
group=dataSet$cls,
show.scale=PLS3D2para1(),
show.labels=PLS3D2para2(),
show.plane=PLS3D2para3(),
show.shadows= PLS3D2para4(),
show.ellipses=PLS3D2para5(),
show.group.labels=PLS3D2para6(),
new=TRUE)
})
observeEvent(input$snapShot4, {
snapshotPCA3d("3D Plot 2.png")
})
################################################################################################
################################### PLSDA Loading ##############################################
################################################################################################
plsloadingPara1 <- eventReactive(input$update8,{
return(input$plsloadingX)
})
plsloadingPara2 <- eventReactive(input$update8,{
return(input$plsloadingY)
})
plsloadingPara3 <- eventReactive(input$update8,{
return(input$plsloadingFeat)
})
output$color5 <- renderUI({
colorV= sample(colorList, dataLvls())
textInput("colorVector5", h5("Colors for the interval plot graph"), value = toString(colorV))
})
output$loadingPLS1 <- renderPlot({
PLSR.Anal()
MyPlotPLSLoading(imgName="PLSDA loading", format="png", dpi=72, width=NA,
inx1=plsloadingPara1(),
inx=plsloadingPara2(),
plotType="scatter",
lbl.feat=plsloadingPara3())
})
output$loadingPLS2 <- renderPlot({
PLSR.Anal()
MyPlotPLSLoading(imgName="PLSDA loading", format="png", dpi=72, width=NA,
inx1=plsloadingPara1(),
inx=plsloadingPara2(),
plotType="bar",
lbl.feat=plsloadingPara3())
})
output$Loadingtable2 <- renderDataTable({
PLSR.Anal()
MyLOADTable2(inx=plsloadingPara1(),
iny=plsloadingPara2())
},options = list(lengthMenu = c(10, 20,50,100), pageLength = 20)
)
click5 <- reactive({
validate(
need(input$plot_click5$x != "", "Click a point for a interval plot")
)
PLSR.Anal()
n=c()
b=c()
c=c()
for(i in 1:length(analSet$plsr$loadings[,plsloadingPara1()])){
n[i]=i
}
for(i in 1:length(analSet$plsr$loadings[,plsloadingPara1()])){
b[i]=signif(as.numeric(analSet$plsr$loadings[,plsloadingPara1()])[i],5)
}
for(i in 1:length(analSet$plsr$loadings[,plsloadingPara2()])){
c[i]=signif(as.numeric(analSet$plsr$loadings[,plsloadingPara2()])[i],5)
}
x=as.numeric(nearPoints(data.frame(n,b,c), input$plot_click5, xvar="b", yvar="c", maxpoints=1)[1])
return(x)
})
observeEvent(input$plot_click5, output$plsloadingCmp1 <-renderPlot({
colString=input$colorVector5
col=strsplit(colString,",")[[1]]
stat=" "
if(input$l2IntCalc==1){
stat="se"
}else if (input$tIntCalc==2){
stat="sd"
}
IntervalPlot(cmpdNm = click5(), dpi=200, colors=col,calc=stat)
}))
observeEvent(input$plot_click5, output$plsloadingCmp2<-renderPlot({
colString=input$colorVector5
colr=strsplit(colString,",")[[1]]
PlotCmpdBoxView(cmpdNm=click5(), dpi=200, col=colr)
}))
output$info5 <- renderText({
paste0("x=", input$plot_click5$x, "\ny=", input$plot_click5$y)
})
################################################################################################
################################### PLSDA CV ###################################################
################################################################################################
defultCompNo <- reactive({
PLSR.Anal()
input$file1
no=GetDefaultPLSCVComp()
return(no)
})
output$CVcompNo <- renderUI({
numericInput("crossVNo",
label=h5("Maximum components to search:"),
value=defultCompNo())
})
CVpara1 <- eventReactive(input$update9,{
return(input$crossVNo)
})
CVpara2 <- eventReactive(input$update9,{
meth='L'
if(input$CVMethod==1){
meth='T'
} else {
meth='L'
}
return(meth)
})
CVpara3 <- eventReactive(input$update9,{
per=""
if(input$performMea==1){
per="Q2"
} else if (input$performMea==3){
per="R2"
} else {
per=" "
}
return(per)
})
output$PLSDACVPlot <- renderUI({
plotOutput("PLSDACVP", width = paste0(input$plsCVWidth, "%"),
height = input$plsCVHeight)
})
output$PLSDACVP <- renderPlot({
PLSDA.CV(methodName= CVpara2(), compNum=CVpara1(), choice=CVpara3())
MyPlotPLS.Classification()
})
output$CVTab <- renderTable({
PLSDA.CV(methodName= CVpara2(), compNum=CVpara1(), choice=CVpara3())
MyCVTable()
},options = list(lengthMenu = c(10, 20,50,100), pageLength = 20)
)
################################################################################################
################################### PLSDA IMP ##################################################
################################################################################################
vips <- reactive({
input$file1
vipNames=GetPLSSigColNames("vip")
return(vipNames)
})
coefN <- reactive({
input$file1
coefNames=GetPLSSigColNames("coef")
return(coefNames)
})
output$vip <- renderUI({
selectInput("impVIP",
label=NULL,
choices=vips())
})
output$coef <- renderUI({
selectInput("impCoef",
label=NULL,
choices=coefN())
})
impPara1 <- eventReactive(input$update10,{
m=""
if(input$impMeasure1==TRUE){
m="vip"
} else if (input$impMeasure2==TRUE) {
m="coef"
}
return(m)
})
impPara2 <- eventReactive(input$update10,{
impNm=" "
if(impPara1()=='vip'){
impNm = input$impVIP
} else if (impPara1()=="coef"){
impNm = input$impCoef
}
return(impNm)
})
impPara3 <- eventReactive(input$update10,{
return(input$topFeatNo)
})
impPara4 <- eventReactive(input$update10,{
return(input$BW)
})
output$PLSDAImp <- renderPlot({
MyPlotPLS.Imp(type=impPara1(),
feat.nm=impPara2(),
feat.num=impPara3(),
color.BW=impPara4())
})
output$ImpFeatTab <- renderDataTable({
if(impPara1()=="vip"){
VIPTab()
}else if(impPara1()=="coef"){
COEFTab()
}
},options = list(lengthMenu = c(10, 20,50,100), pageLength = 20)
)
################################################################################################
################################### PLSDA Perm #################################################
################################################################################################
permuPara1 <- eventReactive(input$update11,{
testst=" "
if(input$permTest==1){
testst="accu"
} else {
testst=" "
}
return(testst)
})
permuPara2 <- eventReactive(input$update11,{
reps=100
if(input$permNumber==1){
reps=100
} else if (input$permNumber==2){
reps=1000
} else {
reps=2000
}
return(reps)
})
output$permPlot <- renderPlot({
options(warn=-1)
PLSDA.Permut(num=permuPara2(), type=permuPara1())
MyPlotPLS.Permutation()
options(warn=0)
})
###############################################################################
############################### OPLSDA 2D Score ###############################
###############################################################################
oplsSPara1 <- eventReactive(input$update12,{
conf=0.95
if(input$opls95==TRUE){
conf=0.95
} else {
conf=0
}
return(conf)
})
oplsSPara2 <- eventReactive(input$update12,{
na=1
if(input$oplsSmpNam==TRUE){
na=1
} else {
na=0
}
return(na)
})
oplsSPara3 <- eventReactive(input$update12,{
sca=0
if(input$oplsgScale==TRUE){
sca=1
} else {
sca=0
}
return(sca)
})
output$oplsScore <- renderUI({
plotOutput("oplsSc", width = paste0(input$OPlsWidth, "%"),
height = input$OPlsHeight)
})
output$oplsSc <- renderPlot({
OPLSR.Anal()
MyPlotOPLS2DScore(reg=oplsSPara1(),
show=oplsSPara2(),
grey.scale=oplsSPara3())
})
###############################################################################
############################### OPLSDA S-plot #################################
###############################################################################
output$color6 <- renderUI({
colorV= sample(colorList, dataLvls())
textInput("colorVector6", h5("Colors for the interval plot graph"), value = toString(colorV))
})
observeEvent(input$update13, output$oplsSPlot <- renderPlot({
OPLSR.Anal()
MyPlotOPLS.Splot(plotType="custom")
}))
click6 <- reactive({
validate(
need(input$plot_click6$x != "", "Click a point for a interval plot")
)
OPLSR.Anal()
s <- as.matrix(dataSet$norm);
T <- as.matrix(analSet$oplsda$scoreMN)
n <- c()
b <- c()
c <- c()
for(i in 1:ncol(s)){
n[i]=i
}
for (i in 1:ncol(s)) {
scov <- cov(s[,i], T)
b <- matrix(c(b, scov), ncol=1)
}
for (i in 1:nrow(b)) {
den <- apply(T, 2, sd)*sd(s[,i])
corr1 <- b[i,]/den
c <- matrix(c(c, corr1), ncol=1)
}
x=as.numeric(nearPoints(data.frame(n,b,c), input$plot_click6, xvar="b", yvar="c", maxpoints=1)[1])
return(x)
})
observeEvent(input$plot_click6, output$oplsCmp1 <-renderPlot({
colString=input$colorVector6
col=strsplit(colString,",")[[1]]
IntervalPlot(cmpdNm = click6(), dpi=200, colors=col)
}))
observeEvent(input$plot_click6, output$oplsCmp2 <-renderPlot({
colString=input$colorVector6
colr=strsplit(colString,",")[[1]]
PlotCmpdBoxView(cmpdNm=click6(), dpi=200,col=colr)
}))
output$info6 <- renderText({
paste0("x=", input$plot_click6$x, "\ny=", input$plot_click6$y)
})
output$OPLSDATab <- renderDataTable({
OPLSR.Anal()
MyPlotOPLS.Splot(plotType="custom")
OPLSTab()
},options = list(lengthMenu = c(10, 20,50,100), pageLength = 20)
)
###############################################################################
############################### OPLSDA Overview Plot ##########################
###############################################################################
observeEvent(input$update14, output$OPLSOver <- renderPlot({
MyPlotOPLS.MDL()
}))
###############################################################################
############################### OPLSDA Permutation plot #######################
###############################################################################
oplsdaPermPara1 <- eventReactive(input$update15,{
return(as.numeric(input$oplsdaPer))
})
output$OPLSDAPerm <- renderPlot({
MyPlotOPLS.Permutation(num=oplsdaPermPara1())
})
###############################################################################
########################### Feature Identification SAM ########################
###############################################################################
ispaired5 <- reactive({
input$file1
return(dataSet$paired)
})
output$SAMtxt<-renderUI({
if (numOfcls()==TRUE){
textOutput("SAMinfo")
}
})
output$SAMinfo <- renderText({
"Perform multi-class SAM based on F-statistic"
})
output$SAMPara <- renderUI({
if (numOfcls()==FALSE){
checkboxInput("sampara",
label = h5("Non-parametric tests:"), value = FALSE)
}
})
output$SAMAnlTyp <- renderUI({
if (numOfcls()==FALSE){
if(ispaired5()==TRUE){
selectInput("samanltyp",
label = h5("Analysis Type"),
choices = list("Unpaired" = 1, "Paired" = 2),
selected = 1)
} else {
selectInput("samanltyp",
label = h5("Analysis Type"),
choices = list("Unpaired" = 1),
selected = 1)
}
}
})
output$SAMVar <- renderUI({
if(numOfcls()==FALSE){
selectInput("samvar",
label =h5("Group variance"),
choices = list("Equal"=1, "Unequal"=2),
selected =1)
}
})
SAMPara1 <- eventReactive(input$compute1,{
method="d.stat"
if(input$sampara==FALSE){
method="d.stat"
} else {
method="non-parametric"
}
return(method)
})
SAMPara2 <- eventReactive(input$compute1,{
paired=FALSE
if(input$samanltyp==1){
paired=FALSE
} else {
paired= TRUE
}
return(paired)
})
SAMPara3 <- eventReactive(input$compute1,{
grpVar=TRUE
if(input$samvar==1){
grpVar=TRUE
} else {
grpVar=FALSE
}
return(grpVar)
})
output$emptyPlot<-renderPlot({
if(numOfcls()==TRUE){
SAM.Anal()
#MyPlotSAM.FDR(delta=input$samfdr)
} else {
SAM.Anal(method=SAMPara1(),
paired=SAMPara2(),
varequal=SAMPara3())
}
# MyPlotSAM.FDR(delta=input$samfdr)
})
observeEvent(input$compute1,{
output$SAMFDR <- renderUI({
numericInput("samfdr",
label = h5("Update the delta to control FDR:"), value = GetSuggestedSAMDelta())
})
})
#FDRPara <- eventReactive(input$compute1,{
# return(dv)
#})
#output$SAMFDRPLOT<-renderPlot({
# MyPlotSAM.FDR(delta=input$samfdr)
#})
observeEvent(input$compute1,{
output$SAMFDRPLOT<-renderPlot({
MyPlotSAM.FDR(delta=input$samfdr)
})
})
observeEvent(input$compute1,{
output$SAMResultPlot<-renderPlot({
SAMResPlot(delta=input$samfdr)
})
})
observeEvent(input$compute1,{
output$SAMTab<-renderDataTable({
SAMTable(del=input$samfdr)
})
})
##########################################################################
################################# Dendrogram #############################
##########################################################################
dendroPara1 <- eventReactive(input$update18,{
smplDist='euclidean'
if(input$DendroDist==1){
smplDist='euclidean'
} else if (input$DendroDist==2){
smplDist='spearman'
} else {
smplDist='pearson'
}
return(smplDist)
})
dendroPara2 <- eventReactive(input$update18,{
clstDist="ward"
if(input$DendroAlgor==1){
clstDist="ward"
} else if (input$DendroAlgor==2){
clstDist="average"
} else if (input$DendroAlgor==3){
clstDist="complete"
} else {
clstDist="single"
}
return(clstDist)
})
output$DendroPlot <- renderPlot({
MyPlotHCTree(smplDist=dendroPara1(),
clstDist=dendroPara2())
})
##############################################################################
################################# Cluster Heatmap ############################
##############################################################################
CAHeatPara1 <- eventReactive(input$update19,{
DistMeasure='euclidean'
if(input$CAHeatDist==1){
DistMeasure='euclidean'
} else if (input$CAHeatDist==2){
DistMeasure='pearson'
} else {
DistMeasure='minkowski'
}
return(DistMeasure)
})
CAHeatPara2 <- eventReactive(input$update19,{
clstMethod = 'ward'
if(input$CAHeatAlgor==1){
clstMethod='ward'
} else if (input$CAHeatAlgor==2){
clstMethod='average'
} else if (input$CAHeatAlgor==3){
clstMethod='complete'
} else {
clstMethod='single'
}
return(clstMethod)
})
CAHeatPara3 <- eventReactive(input$update19,{
colorContrast='default'
if(input$CAHeatCC==1){
colorContrast='default'
} else if (input$CAHeatCC==2){
colorContrast ='gbr'
} else if (input$CAHeatCC==3){
colorContrast = 'heat'
} else if (input$CAHeatCC==4){
colorContrast ='topo'
} else if (input$CAHeatCC==5){
colorContrast ='gray'
} else if (input$CAHeatCC==6){
colorContrast ='rwb'
} else if (input$CAHeatCC==7){
colorContrast ='rwg'
} else if (input$CAHeatCC==8){
colorContrast ='wnvyb'
}
return(colorContrast)
})
CAHeatPara4 <- eventReactive(input$update19,{
ovrVOpt="overview"
if(input$CAHeatView==1){
ovrVOpt="overview"
} else {
ovrVOpt="detail"
}
return(ovrVOpt)
})
CAHeatPara5 <- eventReactive(input$update19,{
return(input$CAHeatView01)
})
CAHeatPara6 <- eventReactive(input$update19,{
organiseBy1=TRUE
if(CAHeatPara5()==TRUE){
if(input$CAHeatView02==1){
organiseBy1=FALSE
} else if (input$CAHeatView02==3) {
organiseBy1=FALSE
} else {
organiseBy1=TRUE
}
return(organiseBy1)
} else {
return(organiseBy1)
}
})
CAHeatPara7 <- eventReactive(input$update19,{
organiseBy2=TRUE
if(CAHeatPara5()==TRUE){
if(input$CAHeatView02==2){
organiseBy2=FALSE
} else if (input$CAHeatView02==3){
organiseBy2=FALSE
} else {
organiseBy2=TRUE
}
return(organiseBy2)
} else {
return(organiseBy2)
}
})
CAHeatPara8 <- eventReactive(input$update19,{
return(input$CAHeatView03)
})
CAHeatPara9 <- eventReactive(input$update19,{
return(input$CAHeatView04)
})
CAHeatPara10 <- eventReactive(input$update19,{
methodnm="tanova"
if(input$CAHeatView05==1){
methodnm="tanova"
} else if (input$CAHeatView05==2){
methodnm="vip"
} else {
methodnm="rf"
}
return(methodnm)
})
CAHeatPara11<-eventReactive(input$update19,{
return(input$CAHeatView06)
})
CAHeatPara12 <- eventReactive(input$update19,{
dataOpt="norm"
if(input$CAHeatData01==1){
dataOpt="norm"
} else {
dataOpt="org"
}
return(dataOpt)
})
CAHeatPara13 <-eventReactive(input$update19,{
scaleOpt="column"
if(input$CAHeatData02==1){
scaleOpt="row"
} else if (input$CAHeatData02==2){
scaleOpt="column"
} else {
scaleOpt="none"
}
return(scaleOpt)
})
output$plot.ui <- renderUI({
plotOutput("CAHeatPlot", width = paste0(input$HeatWidth, "%"), height = input$HeatHeight)
})
output$CAHeatPlot <-renderPlot({
if(CAHeatPara8()==TRUE){
MyPlotSubHeatMap(dataOpt=CAHeatPara12(),
scaleOpt=CAHeatPara13(),
smplDist=CAHeatPara1(),
clstDist= CAHeatPara2(),
palette=CAHeatPara3(),
method.nm=CAHeatPara10(),
top.num=CAHeatPara9(),
viewOpt=CAHeatPara4(),
rowV=CAHeatPara6(),
colV=CAHeatPara7(),
border=CAHeatPara11())
} else {
MyPlotHeatMap(dataOpt=CAHeatPara12(),
scaleOpt=CAHeatPara13(),
smplDist=CAHeatPara1(),
clstDist=CAHeatPara2(),
palette=CAHeatPara3(),
viewOpt=CAHeatPara4(),
rowV=CAHeatPara6(),
colV=CAHeatPara7(),
var.inx=NA,
border=CAHeatPara11())
}
})
#############################################################################
############################## K-means ######################################
#############################################################################
KMPara1 <- eventReactive(input$update20,{
return(input$KMclstNm)
})
output$KMPlot <- renderPlot({
Kmeans.Anal(clust.num=KMPara1())
MyPlotKmeans()
})
output$mytable1 <- renderTable({
Kmeans.Anal(clust.num=KMPara1())
MyGetAllKMClusterMembers()
})
#############################################################################
############################## SOM ##########################################
#############################################################################
SOMPara1 <- eventReactive(input$update21,{
return(as.numeric(input$SOMXD))
})
SOMPara2 <- eventReactive(input$update21,{
return(as.numeric(input$SOMYD))
})
SOMPara3 <- eventReactive(input$update21,{
intMet='linear'
if(input$SOMInit==1){
intMet='linear'
} else if (input$SOMInit==2){
intMet='random'
} else {
intMet='sample'
}
return(intMet)
})
SOMPara4 <- eventReactive(input$update21,{
SOMN='gaussian'
if(input$SOMNeigh==1){
SOMN='gaussian'
} else {
SOMN='bubble'
}
return(SOMN)
})
output$SOMPlot <- renderPlot({
SOM.Anal(x.dim=SOMPara1(),
y.dim=SOMPara2(),
initMethod=SOMPara3(),
neigb = SOMPara4())
MyPlotSOM()
})
output$SOMTab <- renderTable({
SOM.Anal(x.dim=SOMPara1(),
y.dim=SOMPara2(),
initMethod=SOMPara3(),
neigb = SOMPara4())
MyGetAllSOMClusterMembers()
})
#############################################################################
############################## Random Forest ################################
#############################################################################
RFPara1 <- eventReactive(input$update22,{
Nu=500
if(input$RFTreesNu==1){
Nu=500
} else if (input$RFTreesNu==2){
Nu=1000
} else if (input$RFTreesNu==3){
Nu=2000
} else {
Nu=5000
}
return(Nu)
})
RFPara2 <- eventReactive(input$update22,{
return(as.numeric(input$RFPredNu))
})
output$RFPlot <- renderPlot({
RF.Anal(treeNum=RFPara1(), tryNum=RFPara2())
MyPlotRF.Classify()
})
observeEvent(input$update22,{
output$RFOOB<-renderText({
paste("The OOB Error is: ",toString(GetRFOOB()))
})
})
output$RFTab <- renderTable({
RF.Anal(treeNum=RFPara1(), tryNum=RFPara2())
MyGetRFConf.Table()
})
#################################################################################
######################### Random Forest VIP Plot ################################
#################################################################################
observeEvent(input$update23,{
output$RFVipPlot <- renderPlot({
MyPlotRF.VIP()
})
})
observeEvent(input$update23,{
output$RFVipHelp<-renderText({
paste("Features ranked by their contributions
to classification accuracy (Mean Dicrease Accuracy) ")
})
})
observeEvent(input$update23,{
output$RFDaTTab = renderDataTable({
MyRFVipTab()
})
})
#################################################################################
######################### Random Forest Outlier detection #######################
#################################################################################
observeEvent(input$update24,{
output$RFOutTxt<-renderText({
paste("Only top 5 potential outliers are labeled ")
})
})
observeEvent(input$update24,{
output$RFOutPlot = renderPlot({
MyPlotRF.Outlier()
})
})
########################################################################################################
################################## SVM Plot ############################################################
########################################################################################################
SVMPara1 <- eventReactive(input$update25,{
cvType=10
if(input$SVMVMet==1){
cvType=10
} else if (input$SVMVMet==2){
cvType="LOO"
} else if (input$SVMVMet==3){
cvType="bootstrape"
}
return(cvType)
})
output$SVMPlot <- renderPlot({
RSVM.Anal(cvType=SVMPara1())
MyPlotRSVM.Classification()
})
############################################################################################
####################################### SVM Importance plot ###################################
############################################################################################
observeEvent(input$update26,{
output$SVMImportPlot = renderPlot({
MyPlotRSVM.Cmpd()
})
})
PLSRdataLvls <- reactive({
input$file2
lvls = length(levels(dataSet$proc.cls))
return(lvls)
})
PLSRnumOfcls <- reactive({
input$file2
if(dataSet$cls.num>2){
return(TRUE)
} else {
return(FALSE)
}
})
PLSRispaired <- reactive({
input$file2
return(dataSet$paired)
})
output$Pcontents <- renderText({
inFile2 <- input$file2
if (is.null(inFile2)){
return(NULL)
}
datatype <- "conc"
format1 <- "rowu"
format2 <- "cont"
ispaired=FALSE
if(input$plsrRadioButton1==1){
datatype <- "conc"
} else if (input$plsrRadioButton1== 2){
datatype <- "specbin"
} else {
datatype <- "pktable"
}
if (input$plsrSelect1==1){
format1="rowu"
} else if (input$plsrSelect1==2){
format1 <- "colu"
} else if (input$plsrSelect1==3){
format1 <- "rowp"
} else {
format1 <- "colp"
}
if(format1=="rowp"|format1=="colp"){
ispaired=TRUE
}
InitDataObjects(datatype, "stat", paired=ispaired)
Read.TextData(inFile2$datapath, format=format1, lbl.type=format2)
SanityCheckData()
ReplaceMin()
dataSet$check.msg
})
PLSRMissVPara1 <- eventReactive(input$Pcalc1,{
return(input$PmissValue1)
})
PLSRMissVPara2 <- eventReactive(input$Pcalc1,{
return(input$PmissValue2/100)
})
PLSRMissVPara3 <- eventReactive(input$Pcalc1,{
return(input$PmissValue3)
})
PLSRMissVPara4 <- eventReactive(input$Pcalc1,{
return(input$PmissValue4)
})
PLSRMissVPara5 <- eventReactive(input$Pcalc1,{
MVmet="min"
if(input$PmissValue5==2|3|4){
if(input$PmissValue5==2){
MVmet="mean"
} else if (input$PmissValue5==3){
MVmet="median"
} else if (input$PmissValue5==4){
MVmet="min"
}
}
return(MVmet)
})
PLSRMissVPara6 <- eventReactive(input$Pcalc1,{
MVmet="knn"
if(input$PmissValue6==2|3|4|5){
if(input$PmissValue6==2){
MVmet="knn"
} else if (input$PmissValue6==3){
MVmet="ppca"
} else if (input$PmissValue6==4){
MVmet="bpca"
} else if (input$PmissValue6==5){
MVmet="svdImpute"
}
}
return(MVmet)
})
output$PMVtext1<-renderText({
if(PLSRMissVPara1()==TRUE){
RemoveMissingPercent(int.mat=dataSet$preproc, percent=PLSRMissVPara2())
}
if(PLSRMissVPara3()==TRUE){
ImputeVar(int.mat=dataSet$preproc, method="colmin")
}else if(PLSRMissVPara4()==TRUE){
ImputeVar(int.mat=dataSet$preproc, method="exclude")
} else if (PLSRMissVPara5()=="knn"|PLSRMissVPara5()=="ppca"|PLSRMissVPara5()=="bpca"){
ImputeVar(int.mat=dataSet$preproc, method=PLSRMissVPara5())
} else if (PLSRMissVPara6()=="mean"| PLSRMissVPara6()=="median"| PLSRMissVPara6()=="min"| PLSRMissVPara6()=="svdImpute"){
ImputeVar(int.mat=dataSet$preproc, method=PLSRMissVPara6())
}
ReplaceMin()
paste("Missing value calculations complete")
})
PLSRsampleGroups <-reactive({
input$file2
list=rownames(dataSet$proc)
return(list)
})
PLSRsampleFeatures <-reactive({
input$file2
list2=colnames(dataSet$proc)
return(list2)
})
PLSRpooledSamples <-reactive({
input$file2
numOfLvl=length(levels(dataSet$proc.cls))
groups=c()
for(i in 1:numOfLvl){
groups[i]=levels(dataSet$proc.cls)[i]
}
return(groups)
})
output$Prefsample <- renderUI({
selectInput("Prsmpl",
label = h4("Specific reference samples"),
choices = PLSRsampleGroups())
})
output$Ppoolsample <-renderUI({
selectInput("Ppsmpl",
label = h4("Pooled samples from group"),
choices=PLSRpooledSamples())
})
output$PrefFeat <- renderUI({
selectInput("Prf",
label=h4("Reference features"),
choices= PLSRsampleFeatures())
})
PLSRnormMethod1 <- eventReactive(input$Pgo,{
rowNorm='Nothing'
if(input$PradioButtons2==1){
rowNorm = 'Nothing'
} else if (input$PradioButtons2==2){
rowNorm = 'SumNorm'
} else if(input$PradioButtons2==3){
rowNorm = 'MedianNorm'
} else if(input$PradioButtons2==4){
rowNorm = "ProbNorm"
} else if (input$PradioButtons2==5){
rowNorm = "ProbNorm2"
} else {
rownNorm="CompNorm"
}
})
PLSRnormMethod2 <- eventReactive(input$Pgo, {
transNorm='Nothing'
if (input$PradioButtons3==1){
transNorm='Nothing'
} else if (input$PradioButtons3==2){
transNorm='LogNorm'
} else {
transNorm='CrNorm'
}
})
PLSRnormMethod3 <- eventReactive(input$Pgo, {
scaleNorm='Nothing'
if(input$PradioButtons4==1){
scaleNorm='Nothing'
} else if (input$PradioButtons4==2){
scaleNorm='MeanCenter'
} else if (input$PradioButtons4==3){
scaleNorm= 'AutoNorm'
} else if (input$PradioButtons4==4){
scaleNorm='ParetoNorm'
} else if (input$PradioButtons4==5){
scaleNorm='RangeNorm'
} else {
scaleNorm='VastNorm'
}
})
PLSRnormMethod4 <- eventReactive(input$Pgo,{
return(toString(input$Prsmpl))
})
PLSRnormMethod5 <- eventReactive(input$Pgo,{
return(toString(input$Ppsmpl))
})
PLSRnormMethod6 <- eventReactive(input$Pgo,{
return(toString(input$Prf))
})
output$PnormPlot <- renderPlot({
if(PLSRnormMethod1()=="ProbNorm"){
Normalization(
rowNorm="ProbNormF",
transNorm=PLSRnormMethod2(),
scaleNorm=PLSRnormMethod3(),
ref=PLSRnormMethod4(),
ratio=FALSE,
ratioNum=20)
} else if (PLSRnormMethod1()=="ProbNorm2"){
Normalization(
rowNorm="ProbNormT",
transNorm=PLSRnormMethod2(),
scaleNorm=PLSRnormMethod3(),
ref=PLSRnormMethod5(),
ratio=FALSE,
ratioNum=20)
} else if (PLSRnormMethod1()=="CompNorm"){
Normalization(
rowNorm="CompNorm",
transNorm=PLSRnormMethod2(),
scaleNorm=PLSRnormMethod3(),
ref=PLSRnormMethod6(),
ratio=FALSE,
ratioNum=20)
} else {
Normalization(
rowNorm=PLSRnormMethod1(),
transNorm=PLSRnormMethod2(),
scaleNorm=PLSRnormMethod3(),
ref=NULL,
ratio=FALSE,
ratioNum=20)
}
PlotNormSum()
})
plrNumbers1<- reactive({
input$file2
n=c()
comp.num <- dim(dataSet$norm)[1]-1;
if(comp.num > 8) {
comp.num <- 8;
}
max=comp.num
for(i in 1:max-1){
n[i]=i+1
}
x=n
})
plrNumbers2<- reactive({
input$file2
return(GetDefaultPLSPairComp())
})
output$plsrNum <- renderUI({
selectInput("plsrCNo",
label = h5("Select the number of components to be taken into account by the PLSR model"),
selected =plrNumbers2(),
choices=plrNumbers1())
})
observeEvent(input$PLSRButton,output$PLSRcolor1 <- renderUI({
textInput("PLSRC1",
label=h5("Color for regression plot:"),
value="green"
)
}))
observeEvent(input$PLSRButton,output$PLSRcolor2 <- renderUI({
textInput("PLSRC2",
label=h5("Color for CV plot:"),
value="red"
)
}))
observeEvent(input$PLSRButton,output$PLSRptSize <- renderUI({
numericInput("PLSRpts",
label=h5("Point Size"),
value=0.5
)
}))
PLSRPARA1<-reactive({
return(as.numeric(input$plsrCNo))
})
PLSRPARA2<-reactive({
return(input$PLSRC1)
})
PLSRPARA3<-reactive({
return(input$PLSRC2)
})
PLSRPARA4<-reactive({
return(as.numeric(input$PLSRpts))
})
# next decclare some reactive expressions for these input paraemter then plot the graph.
# finally work on the table
observeEvent(input$PLSRButton,output$plsrModel <- renderPlot({
PlsRegPlot(no=PLSRPARA1(),
color=PLSRPARA2(),
ptsize=PLSRPARA4())
}))
observeEvent(input$PLSRButton,output$plsrCV <- renderPlot({
plsRegPlotCV(no=PLSRPARA1(),
color=PLSRPARA3(),
ptsize=PLSRPARA4())
}))
observeEvent(input$PLSRButton,output$plsrOverLay <- renderPlot({
predOvrlyPlt(no=PLSRPARA1(),
color1=PLSRPARA2(),
color2=PLSRPARA3(),
ptsize=PLSRPARA4())
}))
observeEvent(input$PLSRButton,output$PLSRTab <- renderTable({
m=PLSR.Table(no=PLSRPARA1())
k=cbind(row.names(m),m)
colnames(k)[1] <- c(" ")
return(k)
}))
#################################################################################################
#################################################################################################
#################################################################################################
#################################################################################################
################################### #############################
################################### ORIGINAL METABOANALYST R SCRIPTS ############################
################################### ############################
#################################################################################################
#################################################################################################
#################################################################################################
#################################################################################################
##################################################
## R script for MetaboAnalyst
## Description: data I/O
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
# create objects for storing data
# data type: list, conc, specbin, pktable, nmrpeak, mspeak, msspec
# anal type: stat, pathora, pathqea, msetora, msetssp, msetqea, ts, cmpdmap, smpmap
InitDataObjects <- function(dataType, analType, paired=F){
dataSet <<- list();
dataSet$type <<- dataType;
dataSet$design.type <<- "regular"; # one factor to two factor
dataSet$cls.type <<- "disc"; # default until specified otherwise
dataSet$format <<- "rowu";
dataSet$paired <<- paired;
analSet <<- list();
analSet$type <<- analType;
imgSet <<- list();
msg.vec <<- vector(mode="character");
current.msetlib <<- NULL;
conc.db <<- NULL;
cmpd.db <<- NULL;
# record the current name(s) to be transferred to client
require('Cairo'); # plotting required by all
# fix Mac font issue
CairoFonts("Arial:style=Regular","Arial:style=Bold","Arial:style=Italic","Helvetica","Symbol")
print("R objects intialized ...");
}
# for two factor time series only
SetDesignType <-function(design){
dataSet$design.type <<- tolower(design);
}
# Read in the user uploaded CSV or TXT data,
# format: rowp, rowu, colp, colu
# label type: disc (for discrete) or cont (for continuous)
Read.TextData<-function(filePath, format="rowu", lbl.type="disc"){
dataSet$cls.type <<- lbl.type;
dataSet$format <<- format;
formatStr <- substr(filePath, nchar(filePath)-2, nchar(filePath))
if(formatStr == "txt"){
dat <-try(read.table(filePath,header=TRUE,check.names=F, as.is=T));
}else{ # note, read.csv is more than read.table with sep=","
dat <-try(read.csv(filePath,header=TRUE,check.names=F, as.is=T));
}
# try to guess column numers and class labels (starts with #) from the top 20 rows
if(class(dat) == "try-error") {
AddErrMsg("Data format error. Failed to read in the data!");
AddErrMsg("Please check the followings: ");
AddErrMsg("Either sample or feature names must in UTF-8 encoding; Latin, Greek letters are not allowed.");
AddErrMsg("We recommend using a combination of English letters, underscore, and numbers for naming purpose");
AddErrMsg("Make sure sample names and feature (peak, compound) names are unique;");
AddErrMsg("Missing values should be blank or NA without quote.");
return(0);
}
if(ncol(dat) == 1){
AddErrMsg("Error: Make sure the data table is saved as comma separated values (.csv) format!");
AddErrMsg("Please also check the followings: ");
AddErrMsg("Either sample or feature names must in UTF-8 encoding; Latin, Greek letters are not allowed.");
AddErrMsg("We recommend to use a combination of English letters, underscore, and numbers for naming purpose.");
AddErrMsg("Make sure sample names and feature (peak, compound) names are unique.");
AddErrMsg("Missing values should be blank or NA without quote.");
return(0);
}
msg <- NULL;
if(substring(format,4,5)=="ts"){
# two factor time series data
if(substring(format,1,3)=="row"){ # sample in row
msg<-c(msg, "Samples are in rows and features in columns");
smpl.nms <-dat[,1];
all.nms <- colnames(dat);
facA.lbl <- all.nms[2];
cls.lbl<-facA <- dat[,2]; # default assign facA to cls.lbl in order for one-factor analysis
facB.lbl <- all.nms[3];
facB <- dat[,3];
conc <- dat[,-c(1:3)];
var.nms <- colnames(conc);
}else{ # sample in col
msg<-c(msg, "Samples are in columns and features in rows.");
all.nms <- dat[,1];
facA.lbl <- all.nms[1];
cls.lbl <- facA <- dat[1,-1];
facB.lbl <- all.nms[2];
facB <- dat[2,-1];
var.nms <- dat[-c(1:2),1];
conc<-t(dat[-c(1:2),-1]);
smpl.nms <- rownames(conc);
}
facA <- as.factor(as.character(facA));
facB <- as.factor(as.character(facB));
if(dataSet$design.type =="time" | dataSet$design.type =="time0"){
# determine time factor
if(!(tolower(facA.lbl) == "time" | tolower(facB.lbl) == "time")){
AddErrMsg("No time points found in your data");
AddErrMsg("The time points group must be labeled as Time");
return(0);
}
}
}else{
if(substring(format,1,3)=="row"){ # sample in row
msg<-c(msg, "Samples are in rows and features in columns");
smpl.nms <-dat[,1];
dat[,1] <- NULL;
if(lbl.type == "qc"){
rownames(dat) <- smpl.nms;
dataSet$orig<<-dat;
dataSet$cmpd<<-colnames(dat);
return(1);
}
cls.lbl <- dat[,1];
conc <- dat[,-1];
var.nms <- colnames(conc);
}else{ # sample in col
msg<-c(msg, "Samples are in columns and features in rows.");
var.nms <- dat[-1,1];
dat[,1] <- NULL;
smpl.nms <- colnames(dat);
cls.lbl <- dat[1,];
conc<-t(dat[-1,]);
}
}
# free memory
dat <- NULL;
msg<-c(msg, "The uploaded file is in comma separated values (.csv) format.");
# try to remove empty line if present
# identified if no sample names provided
empty.inx <- is.na(smpl.nms) | smpl.nms == ""
if(sum(empty.inx) > 0){
msg<-c(msg, paste("", sum(empty.inx), "empty rows were detected and excluded from your data."));
smpl.nms <- smpl.nms[!empty.inx];
cls.lbl <- cls.lbl[!empty.inx];
conc <- conc[!empty.inx, ];
}
# try to check & remove empty lines if class label is empty
# Added by B. Han
empty.inx <- is.na(cls.lbl) | cls.lbl == ""
if(sum(empty.inx) > 0){
if(analSet$type != "roc"){
msg<-c(msg, paste("", sum(empty.inx), "empty labels were detected and excluded from your data."));
smpl.nms <- smpl.nms[!empty.inx];
cls.lbl <- cls.lbl[!empty.inx];
conc <- conc[!empty.inx, ];
}else{
# force all NA to empty string, otherwise NA will become "NA" class label
cls.lbl[is.na(cls.lbl)] <- "";
msg<-c(msg, paste("", sum(empty.inx), "new samples were detected from your data."));
}
}
if(analSet$type == "roc"){
if(length(unique(cls.lbl[!empty.inx])) > 2){
AddErrMsg("ROC analysis is only defined for two-group comparisions!");
return(0);
}
}
# try to remove check & remove empty line if sample name is empty
empty.inx <- is.na(smpl.nms) | smpl.nms == "";
if(sum(empty.inx) > 0){
msg<-c(msg,paste("", sum(empty.inx), "empty samples were detected and excluded from your data."));
smpl.nms <- smpl.nms[!empty.inx];
cls.lbl <- cls.lbl[!empty.inx];
conc <- conc[!empty.inx, ];
}
# check for uniqueness of dimension name
if(length(unique(smpl.nms))!=length(smpl.nms)){
dup.nm <- paste(smpl.nms[duplicated(smpl.nms)], collapse=" ");;
AddErrMsg("Duplicate sample names are not allowed!");
AddErrMsg(dup.nm);
return(0);
}
# try to remove check & remove empty line if feature name is empty
empty.inx <- is.na(var.nms) | var.nms == "";
if(sum(empty.inx) > 0){
msg<-c(msg,paste("", sum(empty.inx), "empty features were detected and excluded from your data."));
var.nms <- var.nms[!empty.inx];
conc <- conc[,!empty.inx];
}
if(length(unique(var.nms))!=length(var.nms)){
dup.nm <- paste(var.nms[duplicated(var.nms)], collapse=" ");
AddErrMsg("Duplicate feature names are not allowed!");
AddErrMsg(dup.nm);
return(0);
}
# now check for special characters in the data labels
if(sum(is.na(iconv(smpl.nms)))>0){
na.inx <- is.na(iconv(smpl.nms));
nms <- paste(smpl.nms[na.inx], collapse="; ");
AddErrMsg(paste("No special letters (i.e. Latin, Greek) are allowed in sample names!", nms, collapse=" "));
return(0);
}
if(sum(is.na(iconv(var.nms)))>0){
na.inx <- is.na(iconv(var.nms));
nms <- paste(var.nms[na.inx], collapse="; ");
AddErrMsg(paste("No special letters (i.e. Latin, Greek) are allowed in feature names!", nms, collapse=" "));
return(0);
}
# only keep alphabets, numbers, ",", "." "_", "-" "/"
smpl.nms <- gsub("[^[:alnum:]./_-]", "", smpl.nms);
var.nms <- gsub("[^[:alnum:][:space:],'./_-]", "", var.nms); # allow space, comma and period
cls.lbl <- ClearStrings(as.vector(cls.lbl));
# now assgin the dimension names
rownames(conc) <- smpl.nms;
colnames(conc) <- var.nms;
# check if paired or not
if(dataSet$paired){
label<-as.numeric(cls.lbl);
dataSet$orig.cls<<-as.factor(ifelse(label>0,1,0));
dataSet$pairs<<-label;
}else{
if(lbl.type == "disc"){
# check for class labels at least two replicates per class
if(min(table(cls.lbl)) < 3){
AddErrMsg(paste ("A total of", length(levels(as.factor(cls.lbl))), "groups found with", length(smpl.nms), "samples."));
AddErrMsg("At least three replicates are required in each group!");
AddErrMsg("Or maybe you forgot to specify the data format?");
return(0);
}
dataSet$orig.cls <<-dataSet$cls <<-as.factor(as.character(cls.lbl));
if(substring(format,4,5)=="ts"){
dataSet$orig.facA <<-dataSet$facA <<- as.factor(as.character(facA));
dataSet$facA.lbl <<- facA.lbl;
dataSet$orig.facB <<-dataSet$facB <<- as.factor(as.character(facB));
dataSet$facB.lbl <<- facB.lbl;
}
}else{ # continuous
dataSet$orig.cls <<- dataSet$cls <<- as.numeric(cls.lbl);
}
}
# for the current being to support MSEA and MetPA
if(dataSet$type == "conc"){
dataSet$cmpd <<- var.nms;
}
dataSet$orig<<-conc; # copy to be processed in the downstream
dataSet$read.msg<<-c(msg, paste("The uploaded data file contains ", nrow(conc),
" (samples) by ", ncol(conc), " (", tolower(GetVariableLabel()), ") data matrix.", sep=""));
return(1);
}
# Read peak list files
# NMR peak list input should be two-column numeric value (ppm, int), change ppm to mz and add dummy 'rt'
# MS peak list can be 2-col (mz, int), add dummy 'rt'
# MS can also be 3-col (mz, rt, int)
Read.PeakList<-function(foldername){
suppressMessages(require(xcms));
msg <- c("The uploaded files are peak lists and intensities data.");
# the "upload" folder should contain several subfolders (groups)
# each of the subfolder contains samples (.csv files)
files<-dir(foldername, pattern=".[Cc][Ss][Vv]$", recursive=T, full.name=TRUE)
if (length(files) == 0) {
AddErrMsg("No peak list files (.csv) were found.");
return(0);
}
snames <- gsub("\\.[^.]*$", "", basename(files));
msg<-c(msg, paste("A total of ", length(files), "samples were found."));
sclass <- gsub("^\\.$", "sample", dirname(files));
scomp <- strsplit(substr(sclass, 1, min(nchar(sclass))), "");
scomp <- matrix(c(scomp, recursive = TRUE), ncol = length(scomp));
i <- 1
while(all(scomp[i,1] == scomp[i,-1]) && i < nrow(scomp)){
i <- i + 1;
}
i <- min(i, tail(c(0, which(scomp[1:i,1] == .Platform$file.sep)), n = 1) + 1)
if (i > 1 && i <= nrow(scomp)){
sclass <- substr(sclass, i, max(nchar(sclass)))
}
# some sanity check before proceeds
sclass <- as.factor(sclass);
if(length(levels(sclass))<2){
AddErrMsg("You must provide classes labels (at least two classes)!");
return(0);
}
# check for class labels at least three replicates per class
if(min(table(sclass)) < 3){
AddErrMsg("At least three replicates are required in each group!");
return(0);
}
# check for unique sample names
if(length(unique(snames))!=length(snames)){
AddErrMsg("Duplcate sample names are not allowed!");
dup.nm <- paste(snames[duplicated(snames)], collapse=" ");;
AddErrMsg("Duplicate sample names are not allowed!");
AddErrMsg(dup.nm);
return(0);
}
# change sample names to numbers
samp.num<-seq(1:length(snames));
names(samp.num)<-snames;
# create a matrix all.peaks compatible with xcmsSet@peaks matrix, so that grouping algorithm can be used directly
# the matrix should have "mz", "rt", "into", "sample" 4 columns used for grouping
# check 2 or 3 column
############## use try block to catch any error ##############
pks<-try(as.matrix(read.csv(files[1], header=T)));
if(class(pks) == "try-error") {
AddErrMsg("The CSV file is not formatted correctly!");
return(0);
};
########################################################
n.col<-ncol(pks);
if(n.col==2){
add=TRUE;
}else if(n.col==3){
add=FALSE;
}else{
AddErrMsg("Peak list file can only be 2 or 3 columns.");
return(0);
}
all.peaks<-NULL;
for(i in 1:length(files)){
print(files[i]);
pks<-as.matrix(read.csv(files[i], header=T));
if(ncol(pks)!=n.col){
AddErrMsg("Columns in each file are not the same!");
return(0);
}
if(add){ # NMR ppm+int or MS mz+int
pks<-cbind(pks[,1], 1000, pks[,2],samp.num[i]);
}else{
pks<-cbind(pks,samp.num[i]);
}
all.peaks<-rbind(all.peaks, pks);
}
msg<-c(msg, paste("These samples contain a total of ", dim(all.peaks)[1], "peaks." ));
msg<-c(msg, paste("with an average of ", round(dim(all.peaks)[1]/length(files), 1), "peaks per sample" ));
colnames(all.peaks)<-c("mz","rt","int","sample");
peakSet<-list(
peaks = all.peaks,
ncol = n.col,
sampclass = sclass,
sampnames = snames
);
dataSet$peakSet<<-peakSet;
dataSet$read.msg<<-msg;
return (1);
}
# read LC/GC-MS spectra(.netCDF, .mzXML, mzData)
# use functions in XCMS package
Read.MSspec<-function(folderName, profmethod='bin', fwhm=30, bw=30){
suppressMessages(require(xcms));
msfiles <- list.files(folderName, recursive=T, full.names=TRUE);
# first do some sanity check b4 spending more time on that
# note the last level is the file names, previous one should be the class label
dir.table <- t(data.frame(strsplit(msfiles, "/")));
cls.all<-dir.table[,ncol(dir.table)-1];
smpl.all <- dir.table[,ncol(dir.table)];
# check for groups
if(length(levels(as.factor(cls.all))) < 2){
dataSet$read.msg <<- "At least two groups are required!";
return(0);
}
# check for min samples in each group
if(min(table(cls.all)) < 3){
dataSet$read.msg <<- "At least three replicates are required in each group!";
return(0);
}
# check for unique sample names
if(length(unique(smpl.all))!=length(smpl.all)){
dataSet$read.msg <<- "Duplcate sample names are not allowed!";
return(0);
}
xset <- xcmsSet(msfiles, profmethod = profmethod, fwhm=fwhm);
msg<-c(paste("In total,", length(xset@filepaths), "sample files were detected. "),
paste("They are divided into ", length(levels(xset@phenoData[,1]))," classes: ", paste(levels(xset@phenoData[,1]), collapse=', '), ".", sep=""));
xset<-group(xset, bw=bw);
dataSet$xset.orig<<-xset;
dataSet$read.msg<<-msg;
return(1);
}
# peak list or spectra files can be paired, the pair information
# is stored in a file with each line is a pair and names are separated by :,
ReadPairFile<-function(filePath="pairs.txt"){
all.pairs<-scan(filePath, what='character', strip.white = T);
labels<-as.vector(rbind(1:length(all.pairs), -(1:length(all.pairs))));
all.names <- NULL;
for(i in 1:length(all.pairs)){
all.names=c(all.names, unlist(strsplit(all.pairs[i],":"), use.names=FALSE));
}
names(labels)<-all.names;
labels;
}
# save the processed data with class names
SaveTransformedData<-function(){
if(!is.null(dataSet$orig)){
lbls <- NULL;
tsFormat <- substring(dataSet$format,4,5)=="ts";
if(tsFormat){
lbls <- cbind(as.character(dataSet$orig.facA),as.character(dataSet$orig.facB));
colnames(lbls) <- c(dataSet$facA.lbl, dataSet$facB.lbl);
}else{
lbls <- cbind("Label"= as.character(dataSet$orig.cls));
}
orig.data<-cbind(lbls, dataSet$orig);
if(dim(orig.data)[2]>200){
orig.data<-t(orig.data);
}
write.csv(orig.data, file="data_original.csv");
if(!is.null(dataSet$proc)){
if(tsFormat){
lbls <- cbind(as.character(dataSet$proc.facA),as.character(dataSet$proc.facB));
colnames(lbls) <- c(dataSet$facA.lbl, dataSet$facB.lbl);
}else{
lbls <- cbind("Label"= as.character(dataSet$proc.cls));
}
proc.data<-cbind(lbls, dataSet$proc);
if(dim(proc.data)[2]>200){
proc.data<-t(proc.data);
}
write.csv(proc.data, file="data_processed.csv");
if(!is.null(dataSet$norm)){
if(tsFormat){
lbls <- cbind(as.character(dataSet$facA),as.character(dataSet$facB));
colnames(lbls) <- c(dataSet$facA.lbl, dataSet$facB.lbl);
}else{
lbls <- cbind("Label"= as.character(dataSet$cls));
}
# for ms peaks with rt and ms, insert two columns, without labels
# note in memory, features in columns
if(!is.null(dataSet$three.col)){
ids <- matrix(unlist(strsplit(colnames(dataSet$norm), "/")),ncol=2, byrow=T);
colnames(ids) <- c("mz", "rt");
new.data <- data.frame(ids, t(dataSet$norm));
write.csv(new.data, file="peak_normalized_rt_mz.csv");
}
norm.data<-cbind(lbls, dataSet$norm);
if(dim(norm.data)[2]>200){
norm.data<-t(norm.data);
}
write.csv(norm.data, file="data_normalized.csv");
}
}
}
}
AddErrMsg<-function(msg){
if(!exists('msg.vec')){
msg.vec <<- vector(mode="character"); # store error messages
}
msg.vec <<- c(msg.vec, msg);
}
GetErrMsg<-function(){
return (msg.vec);
}
GetKEGG.PathNames<-function(){
return(names(metpa$path.ids));
}
# given a vector of KEGGID, return a vector of KEGG compound names
KEGGID2Name<-function(ids){
hit.inx<- match(ids, cmpd.db$kegg);
return(cmpd.db[hit.inx, 3]);
}
# given a vector of KEGG pathway ID, return a vector of SMPDB IDs (only for hsa)
KEGGPATHID2SMPDBIDs<-function(ids){
hit.inx<-match(ids, path.map[,1]);
return(path.map[hit.inx, 3]);
}
# given a vector of HMDBID, return a vector of HMDB compound names
HMDBID2Name<-function(ids){
hit.inx<- match(ids, cmpd.db$hmdb);
return(cmpd.db[hit.inx, "name"]);
}
# given a vector of KEGGID, return a vector of HMDB ID
KEGGID2HMDBID<-function(ids){
hit.inx<- match(ids, cmpd.db$kegg);
return(cmpd.db[hit.inx, "hmdb_id"]);
}
# given a vector of HMDBID, return a vector of KEGG ID
HMDBID2KEGGID<-function(ids){
hit.inx<- match(ids, cmpd.db$hmdb);
return(cmpd.db[hit.inx, "kegg_id"]);
}
# save compound name for mapping
Setup.MapData<-function(qvec){
dataSet$cmpd <<- qvec;
}
# save concentration data
Setup.ConcData<-function(conc){
dataSet$norm <<- conc;
}
# save biofluid type for SSP
Setup.BiofluidType<-function(type){
dataSet$biofluid <<- type;
}
GetLiteralGroupNames <- function(){
as.character(dataSet$proc.cls);
}
# all groups
GetGroupNames <- function(){
cls.lbl <- dataSet$proc.cls;
if(analSet$type=="roc"){
empty.inx <- is.na(cls.lbl) | cls.lbl == "";
# make sure re-factor to drop level
lvls <- levels(factor(cls.lbl[!empty.inx]));
}else{
lvls <- levels(cls.lbl);
}
return(lvls);
}
# groups entering analysis
GetNormGroupNames <- function(){
levels(dataSet$cls);
}
SetOrganism <- function(org){
inmex.org <<- org;
}
#############################################################################################
#############################################################################################
#############################################################################################
#############################################################################################
#############################################################################################
##################################################
## R script for MetaboAnalyst
## Description: processing raw data types
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
# basic sanity check for the content
# return 1 or 0 based on the result
SanityCheckData<-function(){
msg = NULL;
cls=dataSet$orig.cls;
dataSet$small.smpl.size <<- 0;
# check class info
if(dataSet$cls.type == "disc"){
if(substring(dataSet$format,4,5)=="ts"){
if(dataSet$design.type =="time"){
msg<-c(msg, "The data is time-series data.");
}else{
msg<-c(msg, "The data is not time-series data.");
}
clsA.num <- length(levels(dataSet$facA));
clsB.num <- length(levels(dataSet$facB));
msg<-c(msg, paste(clsA.num, "groups were detected in samples for factor", dataSet$facA.lbl));
msg<-c(msg, paste(clsB.num, "groups were detected in samples for factor", dataSet$facB.lbl));
}else{
# checking if too many groups but a few samples in each group
cls.lbl <- dataSet$orig.cls;
min.grp.size <- min(table(cls.lbl));
cls.num <- length(levels(cls.lbl));
if(cls.num/min.grp.size > 3){
dataSet$small.smpl.size <<- 1;
msg <- c(msg, "Too many groups with very small number of replicates!");
msg <- c(msg, "Only a subset of methods will be available for analysis!");
}
msg<-c(msg, paste(cls.num, "groups were detected in samples."));
dataSet$cls.num <<- cls.num;
dataSet$min.grp.size <<- min.grp.size;
if(dataSet$paired){
msg<-c(msg,"Samples are paired.");
# need to first set up pair information if not csv file
if(!(dataSet$type=="conc" | dataSet$type=="specbin" | dataSet$type=="pktable" )){
pairs<-ReadPairFile();
# check if they are of the right length
if(length(pairs)!=nrow(dataSet$orig)){
AddErrMsg("Error: the total paired names are not equal to sample names.");
return(0);
}else{
# matching the names of the files
inx<-match(rownames(dataSet$orig),names(pairs));
#check if all matched exactly
if(sum(is.na(inx))>0){
AddErrMsg("Error: some paired names not match the sample names.");
return(0);
}else{
dataSet$pairs<<-pairs[inx];
}
}
}
pairs<-dataSet$pairs;
lev<-unique(pairs);
uni.cl<-length(lev);
uni.cl.abs<-uni.cl/2;
sorted.pairs<-sort(pairs,index=TRUE);
if(!all(sorted.pairs$x==c(-uni.cl.abs:-1,1:uni.cl.abs))){
AddErrMsg("There are some problems in paired sample labels! ");
if(uni.cl.abs != round(uni.cl.abs)){
AddErrMsg("The total samples must be of even number!");
}else{
AddErrMsg(paste("And class labels between ",-uni.cl.abs,
" and 1, and between 1 and ",uni.cl.abs,".",sep=""));
}
return(0);
}else{
msg<-c(msg,"The labels of paired samples passed sanity check.");
msg<-c(msg, paste("A total of", uni.cl.abs, "pairs were detected."));
# make sure paired samples are sorted 1:n/2 and -1:-n/2
x<-sorted.pairs$ix[(uni.cl.abs+1):uni.cl]
y<-sorted.pairs$ix[uni.cl.abs:1]
index<-as.vector(cbind(x,y));
dataSet$pairs<<-pairs[index];
dataSet$orig.cls<<-cls[index];
dataSet$orig<<-dataSet$orig[index,];
}
}else{
msg<-c(msg,"Samples are not paired.");
}
}
}
msg<-c(msg,"Only English letters, numbers, underscore, hyphen and forward slash (/) are allowed.");
msg<-c(msg,"Other special characters or punctuations (if any) will be stripped off.");
int.mat=dataSet$orig;
# check numerical matrix
rowNms <- rownames(int.mat);
colNms <- colnames(int.mat);
naNms <- sum(is.na(int.mat));
num.mat<-apply(int.mat, 2, as.numeric)
if(sum(is.na(num.mat)) > naNms){
# try to remove "," in thousand seperator if it is the cause
num.mat <- apply(int.mat,2,function(x) as.numeric(gsub(",", "", x)));
if(sum(is.na(num.mat)) > naNms){
msg<-c(msg,"Non-numeric values were found and replaced by NA.");
}else{
msg<-c(msg,"All data values are numeric.");
}
}else{
msg<-c(msg,"All data values are numeric.");
}
int.mat <- num.mat;
rownames(int.mat)<-rowNms;
colnames(int.mat)<-colNms;
# check for columns with all constant (var =0)
varCol <- apply(int.mat, 2, var, na.rm=T);
constCol <- (varCol == 0 | is.na(varCol));
constNum <- sum(constCol, na.rm=T);
if(constNum > 0){
msg<-c(msg, paste("", constNum, "columns with constant or a single value were found and deleted."));
int.mat <- int.mat[,!constCol];
}
# check zero, NA values
totalCount <-nrow(int.mat)*ncol(int.mat);
naCount<-sum(is.na(int.mat));
naPercent<-round(100*naCount/totalCount,1)
msg<-c(msg, paste("A total of ", naCount, " (", naPercent, "%) missing values were detected.", sep=""));
msg<-c(msg, " By default, these values will be replaced by a small value. "
#"Click Skip button if you accept the default practice",
#"Or click Missing value imputation to use other methods"
);
# obtain original half of minimal positive value (threshold)
minConc<-min(int.mat[int.mat>0], na.rm=T)/2;
dataSet$minConc<<-minConc;
dataSet$preproc <<- as.data.frame(int.mat);
dataSet$proc.cls <<- dataSet$orig.cls;
if(substring(dataSet$format,4,5)=="ts"){
dataSet$proc.facA <<- dataSet$orig.facA;
dataSet$proc.facB <<- dataSet$orig.facB;
}
dataSet$check.msg <<- c(dataSet$read.msg, msg);
return(1);
}
GetGroupNumber<-function(){
return(dataSet$cls.num);
}
IsSmallSmplSize<-function(){
return(dataSet$small.smpl.size);
}
GetMinGroupSize<-function(){
return(dataSet$min.grp.size);
}
IsDataContainsNegative<-function(){
return(dataSet$containsNegative);
}
################################################################
# Note: the following step directly modifies the dataSet$proc
#################################################################
# replace zero/missing values by half of the minimum pos values, this is the default
# also we will call this method after all missing value imputation if conducted
ReplaceMin<-function(int.mat=as.matrix(dataSet$preproc)){
minConc<-dataSet$minConc;
# replace zero and missing values
# we leave nagative values unchanged! ? not sure if this is the best way
int.mat[int.mat==0 | is.na(int.mat)] <- minConc;
# note, this is last step of processing, also save to proc
dataSet$proc <<- as.data.frame(int.mat);
dataSet$replace.msg <<- paste("Zero or missing variables were replaced with a small value:", minConc);
rm(int.mat);
gc();
}
# remove variable with over certain percentage values are missing
RemoveMissingPercent<-function(int.mat=dataSet$preproc, percent=perct){
minConc<-dataSet$minConc;
good.inx<-apply(is.na(int.mat), 2, sum)/nrow(int.mat)0){
x[is.na(x)]<-min(x,na.rm=T)/2;
}
x;
});
msg <- c(msg,"Missing variables were replaced with the half of minimum values for each feature column.");
}else if (method=="mean"){
new.mat<-apply(int.mat, 2, function(x){
if(sum(is.na(x))>0){
x[is.na(x)]<-mean(x,na.rm=T);
}
x;
});
msg <- c(msg,"Missing variables were replaced with mean.");
}else if (method == "median"){
new.mat<-apply(int.mat, 2, function(x){
if(sum(is.na(x))>0){
x[is.na(x)]<-median(x,na.rm=T);
}
x;
});
msg <- c(msg,"Missing variables were replaced with median.");
}else {
if(method == "knn"){
suppressMessages(require(impute));
#print("loading for KNN...");
new.mat<-t(impute.knn(t(int.mat))$data);
}else{
suppressMessages(require(pcaMethods));
if(method == "bpca"){
new.mat<-pca(int.mat, nPcs =5, method="bpca", center=T)@completeObs;
}else if(method == "ppca"){
new.mat<-pca(int.mat, nPcs =5, method="ppca", center=T)@completeObs;
}else if(method == "svdImpute"){
new.mat<-pca(int.mat, nPcs =5, method="svdImpute", center=T)@completeObs;
}
}
msg <- c(msg, paste("Missing variables were imputated using", toupper(method)));
}
dataSet$proc <<- as.data.frame(new.mat);
dataSet$replace.msg <<- msg;
}
# to deal with negative values, this is after dealing with negative values
# so operate on dataSet$proc
ClearNegatives <- function(int.mat=as.matrix(dataSet$proc), method="abs"){
if(dataSet$containsNegative){
if(method == "min"){
int.mat[int.mat < 0] <- dataSet$minConc;
msg <- paste("Negative variables were replaced with a small value:", dataSet$minConc);
}else if(method =="abs"){
int.mat <- abs(int.mat);
msg <- paste("Negative variables were replaced with their absolute values");
}else{ # exclude
good.inx<-apply(int.mat<0, 2, sum)==0
new.mat<-int.mat[,good.inx];
msg <- paste("Columns contains negative variables were excluded");
}
dataSet$containsNegative <<- 0;
dataSet$replace.msg <<- c(dataSet$replace.msg, msg);
dataSet$proc <<- as.data.frame(int.mat);
}
}
# Group peak list basede on position using xcms algorithm (align peaks wrt rt and mz)
# NMR peaks change ppm -> mz and add dummy rt
# 2-col MS need to add dummy rt
# 3-col MS can be used directly
# default mzwid MS 0.25 m/z, NMR 0.03 ppm
# bw 30 for LCMS, 5 for GCMS
GroupPeakList<-function(mzwid = 0.25, bw = 30, minfrac = 0.5, minsamp = 1, max = 50) {
peakSet<-dataSet$peakSet;
samples <- peakSet$sampnames;
classlabel <- peakSet$sampclass;
classnames <- levels(classlabel)
classlabel <- as.vector(unclass(classlabel))
classnum <- integer(max(classlabel))
for (i in seq(along = classnum)){
classnum[i] <- sum(classlabel == i)
}
peakmat <- peakSet$peaks
porder <- order(peakmat[,"mz"])
peakmat <- peakmat[porder,,drop=F]
rownames(peakmat) <- NULL
retrange <- range(peakmat[,"rt"])
minpeakmat <- min(classnum)/2
mass <- seq(peakmat[1,"mz"], peakmat[nrow(peakmat),"mz"] + mzwid, by = mzwid/2)
masspos <- findEqualGreaterM(peakmat[,"mz"], mass)
groupmat <- matrix(nrow = 512, ncol = 7 + length(classnum))
groupindex <- vector("list", 512)
endidx <- 0
num <- 0
gcount <- integer(length(classnum))
for (i in seq(length = length(mass)-2)) {
startidx <- masspos[i]
endidx <- masspos[i+2]-1
if (endidx - startidx + 1 < minpeakmat)
next
speakmat <- peakmat[startidx:endidx,,drop=FALSE]
den <- density(speakmat[,"rt"], bw, from = retrange[1]-3*bw, to = retrange[2]+3*bw)
maxden <- max(den$y)
deny <- den$y
gmat <- matrix(nrow = 5, ncol = 2+length(classnum))
snum <- 0
while (deny[maxy <- which.max(deny)] > maxden/20 && snum < max) {
grange <- descendMin(deny, maxy)
deny[grange[1]:grange[2]] <- 0
gidx <- which(speakmat[,"rt"] >= den$x[grange[1]] & speakmat[,"rt"] <= den$x[grange[2]])
gnum <- classlabel[unique(speakmat[gidx,"sample"])]
for (j in seq(along = gcount))
gcount[j] <- sum(gnum == j)
if (! any(gcount >= classnum*minfrac & gcount >= minsamp))
next
snum <- snum + 1
num <- num + 1
### Double the size of the output containers if they're full
if (num > nrow(groupmat)) {
groupmat <- rbind(groupmat, matrix(nrow = nrow(groupmat), ncol = ncol(groupmat)))
groupindex <- c(groupindex, vector("list", length(groupindex)))
}
groupmat[num, 1] <- median(speakmat[gidx, "mz"])
groupmat[num, 2:3] <- range(speakmat[gidx, "mz"])
groupmat[num, 4] <- median(speakmat[gidx, "rt"])
groupmat[num, 5:6] <- range(speakmat[gidx, "rt"])
groupmat[num, 7] <- length(gidx)
groupmat[num, 7+seq(along = gcount)] <- gcount
groupindex[[num]] <- sort(porder[(startidx:endidx)[gidx]])
}
}
colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax",
"npeaks", classnames)
groupmat <- groupmat[seq(length = num),]
groupindex <- groupindex[seq(length = num)]
# Remove groups that overlap with more "well-behaved" groups
numsamp <- rowSums(groupmat[,(match("npeaks", colnames(groupmat))+1):ncol(groupmat),drop=FALSE])
uorder <- order(-numsamp, groupmat[,"npeaks"])
uindex <- rectUnique(groupmat[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE],
uorder)
peakSet$groups <- groupmat[uindex,];
peakSet$groupidx<- groupindex[uindex];
dataSet$peakSet<<-peakSet;
}
# object is nmr.xcmsSet object
SetPeakList.GroupValues<-function() {
peakSet <- dataSet$peakSet;
msg<-dataSet$peakMsg;
peakmat <- peakSet$peaks;
groupmat <- peakSet$groups;
groupindex <- peakSet$groupidx;
sampnum <- seq(length = length(peakSet$sampnames))
intcol <- match("int", colnames(peakmat))
sampcol <- match("sample", colnames(peakmat))
# row is peak, col is sample
values <- matrix(nrow = length(groupindex), ncol = length(sampnum))
for (i in seq(along = groupindex)) {
# for each group, replace multiple peaks from the same sample by their sum
for(m in sampnum){
samp.inx<-which(peakmat[groupindex[[i]], sampcol]==m)
if(length(samp.inx)>0){
values[i, m] <- sum(peakmat[groupindex[[i]][samp.inx], intcol]);
}else{
values[i, m] <- NA;
}
}
}
msg<-c(msg, paste("A total of", length(groupindex), "peak groups were formed. "));
msg<-c(msg, paste("Peaks of the same group were summed if they are from one sample. "));
msg<-c(msg, paste("Peaks appear in less than half of samples in each group were ignored."));
colnames(values) <- peakSet$sampnames;
if(peakSet$ncol==2){
rownames(values) <- paste(round(groupmat[,paste("mz", "med", sep="")],5));
}else{
rownames(values) <- paste(round(groupmat[,paste("mz", "med", sep="")],5), "/", round(groupmat[,paste("rt", "med", sep="")],2), sep="");
dataSet$three.col <<- T;
}
dataSet$orig<<-t(values);
dataSet$proc.msg<<-msg;
dataSet$orig.cls<<-as.factor(peakSet$sampclass);
}
# retention time correction for LC/GC-MS spectra
MSspec.rtCorrection<-function(bw=30){
xset2<-retcor(dataSet$xset.orig)
# re-group peaks after retention time correction
xset2<-group(xset2, bw=bw)
dataSet$xset.rt<<-xset2;
}
# plot rentention time corrected spectra
PlotMS.RT<-function(imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 9;
imgSet$msrt<<-imgName;
}else{
w <- width;
}
h <- w*7/9;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
plotrt(dataSet$xset.rt);
#dev.off();
}
# fill in missing peaks
MSspec.fillPeaks<-function(){
xset3<-fillPeaks(dataSet$xset.rt);
dataSet$xset.fill<<-xset3;
msg<-paste("A total of", dim(xset3@peaks)[1],"peaks were detected from these samples");
msg<-c(msg, paste("with an average of", round(dim(xset3@peaks)[1]/dim(xset3@phenoData)[1], 2), "peaks per spectrum."));
dataSet$xset.msg<<-msg;
}
# into: integrated area of original (raw) peak
# intf: integrated area of filtered peak
# maxo: maximum intensity of original (raw) peak
# maxf: maximum intensity of filtered peak
SetupMSdataMatrix<-function(intvalue = c("into","maxo","intb")){
values <- groupval(dataSet$xset.fill, "medret", value = intvalue);
msg<-dataSet$xset.msg;
# transpose to make row for samples
orig<-as.data.frame(t(values));
msg<-dataSet$xset.msg;
msg=c(msg, paste("These peaks were aligned", dim(orig)[2], "groups according to their mass and retention time."));
msg=c(msg, paste("Please note, some peaks were excluded if they appear in only a few samples."));
dataSet$xset.msg<<-msg;
dataSet$orig<<-orig;
dataSet$orig.cls<<-as.factor(sampclass(dataSet$xset.fill))
}
IsSpectraProcessingOK<-function(){
msg<-dataSet$xset.msg;
if(is.null(dataSet$xset.orig)){
dataSet$xset.msg<<-c(msg, "Failed to read in and process the spectra.");
return(0);
}
if(is.null(dataSet$xset.rt)){
dataSet$xset.msg<<-c(msg, "Faiedl in retention time correction, spectra problem?");
return(0);
}
if(is.null(dataSet$xset.fill)){
dataSet$xset.msg<<-c(msg, "Failed in filling missing peaks, spectra problem?");
return(0);
}
return(1);
}
####################################################################
### ========= methods for non-specific filtering of variables====###
####################################################################
# the final variable should be less than 5000 for effective computing
FilterVariable <- function(filter){
int.mat <- as.matrix(dataSet$proc);
feat.num <- ncol(int.mat);
nm <- NULL;
if(filter == "none" && feat.num <= 2000){ # only allow for less than 2000
remain <- rep(TRUE, feat.num);
#dataSet$proc <<- as.data.frame(int.mat);
msg <- "No data filtering was applied";
}else{
if (filter == "rsd" ){
sds <- apply(int.mat, 2, sd, na.rm=T);
mns <- apply(int.mat, 2, mean, na.rm=T);
filter.val <- abs(sds/mns);
nm <- "Relative standard deviation";
}else if (filter == "nrsd" ){
mads <- apply(int.mat, 2, mad, na.rm=T);
meds <- apply(int.mat, 2, median, na.rm=T);
filter.val <- abs(mads/meds);
nm <- "Non-paramatric relative standard deviation";
}else if (filter == "mean"){
filter.val <- apply(int.mat, 2, mean, na.rm=T);
nm <- "mean";
}else if (filter == "sd"){
filter.val <- apply(int.mat, 2, sd, na.rm=T);
nm <- "standard deviation";
}else if (filter == "mad"){
filter.val <- apply(int.mat, 2, mad, na.rm=T);
nm <- "Median absolute deviation";
}else if (filter == "median"){
filter.val <- apply(int.mat, 2, median, na.rm=T);
nm <- "median";
}else{ # iqr
filter.val <- apply(int.mat, 2, IQR, na.rm=T);
nm <- "Interquantile Range";
}
# get the rank of the
rk <- rank(-filter.val, ties.method='random');
var.num <- ncol(int.mat);
if(var.num < 250){ # reduce 5%
remain <- rk < var.num*0.95;
# dataSet$proc <<- as.data.frame(int.mat[,rk < var.num*0.95]);
msg <- paste("Reduce 5\\% features (", sum(!(rk < var.num*0.95)), ") based on", nm);
}else if(ncol(int.mat) < 500){ # reduce 10%
remain <- rk < var.num*0.9;
# dataSet$proc <<- as.data.frame(int.mat[,rk < var.num*0.9]);
msg <- paste("Reduce 10\\% features (", sum(!(rk < var.num*0.9)), ") based on", nm);
}else if(ncol(int.mat) < 1000){ # reduce 25%
remain <- rk < var.num*0.75;
# dataSet$proc <<- as.data.frame(int.mat[,rk < var.num*0.75]);
msg <- paste("Reduce 25\\% features (", sum(!(rk < var.num*0.75)), ") based on", nm);
}else{ # reduce 40%, if still over 5000, then only use top 5000
remain <- rk < var.num*0.6;
msg <- paste("Reduce 40\\% features (", sum(!remain), ") based on", nm);
if(sum(remain) > 5000){
remain <-rk < 5000;
msg <- paste("Reduced to 5000 features based on", nm);
}
# dataSet$proc <<- as.data.frame(int.mat[,remain]);
}
}
dataSet$remain <<- remain;
dataSet$filter.msg <<- msg;
print(msg);
}
# create a summary table for each type of uploaded data
# csv table - 5 col: sampleID, feature #, zero, missing #,
CreateSummaryTable<-function(){
suppressMessages(require(xtable));
sum.dat<-NULL;
plenth<-dim(dataSet$proc)[2];
if(dataSet$type=='conc'| dataSet$type=='pktable'| dataSet$type=='specbin'){
for(i in 1:nrow(dataSet$orig)){
srow<-dataSet$orig[i,];
newrow<-c(sum(srow[!is.na(srow)]>0), (sum(is.na(srow)) + sum(srow[!is.na(srow)]<=0)), plenth);
sum.dat<-rbind(sum.dat, newrow);
}
colnames(sum.dat)<-c("Features (positive)","Missing/Zero","Features (processed)");
rownames(sum.dat)<-row.names(dataSet$orig);
}else if(dataSet$type=="nmrpeak"| dataSet$type=="mspeak"){ # peak list
pkSet<-dataSet$peakSet;
snames<-pkSet$sampnames;
for(i in 1:length(snames)){
samp.inx<-pkSet$peaks[,"sample"]==i;
srow<-dataSet$orig[i,];
newrow<-c(sum(samp.inx),(sum(is.na(srow)) + sum(srow[!is.na(srow)]<=0)), plenth);
sum.dat<-rbind(sum.dat, newrow);
}
colnames(sum.dat)<-c("Peaks (raw)","Missing/Zero", "Peaks (processed)");
rownames(sum.dat)<-row.names(dataSet$orig);
}else{ # spectra
rawxset<-dataSet$xset.orig;
fillxset<-dataSet$xset.fill;
snames<-row.names(rawxset@phenoData)
for(i in 1:length(snames)){
rawno<-sum(rawxset@peaks[,"sample"]==i);
fillno<-sum(fillxset@peaks[,"sample"]==i);
newrow<-c(rawno,fillno,plenth);
sum.dat<-rbind(sum.dat, newrow);
}
colnames(sum.dat)<-c("Peaks (raw)","Peaks (fill)", "Peaks(processed)");
rownames(sum.dat)<-row.names(dataSet$orig);
}
print(xtable(sum.dat, caption="Summary of data processing results"), caption.placement="top", size="\\scriptsize");
}
# mat are log normalized, diff will be ratio
CalculatePairwiseDiff <- function(mat){
f <- function(i, mat) {
z <- mat[, i-1] - mat[, i:ncol(mat), drop = FALSE]
colnames(z) <- paste(colnames(mat)[i-1], colnames(z), sep = "/")
z
}
res <- do.call("cbind", sapply(2:ncol(mat), f, mat));
round(res,5);
}
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##################################################
## R script for MetaboAnalyst
## Description: perform various normalization
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
###############################################################
# remove the sample or feature from data
# Note: this should happen after processing and before normalization
# dataSet$proc dataSet$proc.cls (make a copy of this pair for restore)
########################################################
UpdateGroupItems<-function(){
if(!exists("grp.nm.vec")){
current.msg <<- "Cannot find the current group names!";
return (0);
}
hit.inx <- dataSet$proc.cls %in% grp.nm.vec;
dataSet$prenorm <<- dataSet$proc[hit.inx,];
dataSet$prenorm.cls <<- factor(dataSet$proc.cls[hit.inx], levels=grp.nm.vec);
if(substring(dataSet$format,4,5)=="ts"){
dataSet$prenorm.facA <<- factor(dataSet$proc.facA[hit.inx],levels=grp.nm.vec);
dataSet$prenorm.facB <<- factor(dataSet$proc.facB[hit.inx],levels=grp.nm.vec);
}
current.msg <<- "Successfully updated the group items!";
return (1);
}
UpdateSampleItems<-function(){
if(!exists("smpl.nm.vec")){
current.msg <<- "Cannot find the current sample names!";
return (0);
}
hit.inx <- rownames(dataSet$proc) %in% smpl.nm.vec;
dataSet$prenorm <<- dataSet$proc[hit.inx,];
dataSet$prenorm.cls <<- dataSet$proc.cls[hit.inx];
if(substring(dataSet$format,4,5)=="ts"){
dataSet$prenorm.facA <<- dataSet$proc.facA[hit.inx];
dataSet$prenorm.facB <<- dataSet$proc.facB[hit.inx];
}
current.msg <<- "Successfully updated the sample items!";
return (1);
}
UpdateFeatureItems<-function(){
if(!exists("feature.nm.vec")){
current.msg <<- "Cannot find the selected feature names!";
return (0);
}
hit.inx <- colnames(dataSet$proc) %in% feature.nm.vec;
dataSet$prenorm <<- dataSet$proc[,hit.inx];
dataSet$prenorm.cls <<- dataSet$proc.cls; # this is the same
current.msg <<- "Successfully updated the sample items!";
return (1);
}
Normalization<-function(rowNorm, transNorm, scaleNorm, ref=NULL, ratio=FALSE, ratioNum=20){
# now do actual filter if indicated
if(!is.null(dataSet$remain)){
remain <- dataSet$remain;
if(rowNorm == "CompNorm"){
# make sure the ref is there, not filtered out
hit.inx <- match(ref, colnames(dataSet$proc));
remain[hit.inx] <- TRUE;
}
proc <- dataSet$proc[,remain];
}else{
proc <- dataSet$proc;
}
if(is.null(dataSet$prenorm)){
data<- proc;
cls <- dataSet$proc.cls;
if(substring(dataSet$format,4,5)=="ts"){
dataSet$facA <- dataSet$proc.facA;
dataSet$facB <- dataSet$proc.facB;
cls <- dataSet$facA;
}
}else{
data<- dataSet$prenorm;
cls <- dataSet$prenorm.cls;
if(substring(dataSet$format,4,5)=="ts"){
dataSet$facA <- dataSet$prenorm.facA;
dataSet$facB <- dataSet$prenorm.facB;
cls <- dataSet$facA;
}
}
# note, samples may not be sorted by group labels
if(substring(dataSet$format,4,5)=="ts"){
nfacA <- dataSet$facA;
nfacB <- dataSet$facB;
if(dataSet$design.type =="time" | dataSet$design.type =="time0"){
# determine time factor and should order first by subject then by each time points
if(tolower(dataSet$facA.lbl) == "time"){
time.fac <- nfacA;
exp.fac <- nfacB;
}else{
time.fac <- nfacB;
exp.fac <- nfacA;
}
# update with new index
ord.inx <- order(exp.fac);
dataSet$time.fac <<- time.fac[ord.inx];
dataSet$exp.fac <<- exp.fac[ord.inx];
}else{
ord.inx <- order(cls);
}
data<-data[ord.inx, ];
cls <-cls[ord.inx];
dataSet$facA <<- dataSet$facA[ord.inx];
dataSet$facB <<- dataSet$facB[ord.inx];
}else{
ord.inx <- order(cls);
data<-data[ord.inx, ];
cls <-cls[ord.inx];
}
colNames <- colnames(data);
rowNames <- rownames(data);
# row-wise normalization
if(rowNorm=="SpecNorm"){
if(!exists("norm.vec")){
norm.vec <- rep(1,nrow(data)); # default all same weight vec to prevent error
print("No sample specific information were given, all set to 1.0");
}
rownm<-"Normalization by sample-specific factor";
data<-data/norm.vec;
}else if(rowNorm=="ProbNormT"){
grp.inx <- cls == ref;
ref.smpl <- apply(proc[grp.inx, ], 2, mean);
data<-t(apply(data, 1, ProbNorm, ref.smpl));
rownm<-"Probabilistic Quotient Normalization";
}else if(rowNorm=="ProbNormF"){
ref.smpl <- proc[ref,];
data<-t(apply(data, 1, ProbNorm, ref.smpl));
rownm<-"Probabilistic Quotient Normalization";
}else if(rowNorm=="CompNorm"){
data<-t(apply(data, 1, CompNorm, ref));
rownm<-"Normalization by a reference feature";
}else if(rowNorm=="SumNorm"){
data<-t(apply(data, 1, SumNorm));
rownm<-"Normalization to constant sum";
}else if(rowNorm=="MedianNorm"){
data<-t(apply(data, 1, MedianNorm));
rownm<-"Normalization to sample median";
}else{
# nothing to do
rownm<-"N/A";
}
# use apply will lose dimesion info (i.e. row names and colnames)
rownames(data)<-rowNames;
colnames(data)<-colNames;
# note: row-normed data is based on biological knowledge, since the previous
# replacing zero/missing values by half of the min positive (a constant)
# now may become different due to different norm factor, which is artificial
# variance and should be corrected again
#
# stopped, this step cause troubles
# minConc<-round(min(data)/2, 5);
# data[dataSet$fill.inx]<-minConc;
# if the reference by feature, the feature column should be removed, since it is all 1
if(rowNorm=="CompNorm" && !is.null(ref)){
inx<-match(ref, colnames(data));
data<-data[,-inx];
colNames <- colNames[-inx];
}
# record row-normed data for fold change analysis (b/c not applicable for mean-centered data)
dataSet$row.norm<<-as.data.frame(CleanData(data));
# this is for biomarker analysis only (for compound concentraion data)
if(ratio){
min.val <- min(abs(data[data!=0]))/2;
norm.data <- log2((data + sqrt(data^2 + min.val))/2);
transnm<-"Log Normalization";
ratio.mat <- CalculatePairwiseDiff(norm.data);
fstats <- Get.Fstat(ratio.mat, dataSet$proc.cls);
hit.inx <- rank(-fstats) < ratioNum; # get top n
ratio.mat <- ratio.mat[, hit.inx];
data <- cbind(norm.data, ratio.mat);
colNames <- colnames(data);
rowNames <- rownames(data);
}
if(!ratio){
# transformation
if(transNorm=='LogNorm'){
min.val <- min(abs(data[data!=0]))/10;
data<-apply(data, 2, LogNorm, min.val);
transnm<-"Log Normalization";
}else if(transNorm=='CrNorm'){
norm.data <- abs(data)^(1/3);
norm.data[data<0] <- - norm.data[data<0];
data <- norm.data;
transnm<-"Cubic Root Transformation";
}else{
transnm<-"N/A";
}
}
# scaling
if(scaleNorm=='MeanCenter'){
data<-apply(data, 2, MeanCenter);
scalenm<-"MeanCenter";
}else if (scaleNorm=='AutoNorm'){
data<-apply(data, 2, AutoNorm);
scalenm<-"Autoscaling";
}else if(scaleNorm=='ParetoNorm'){
data<-apply(data, 2, ParetoNorm);
scalenm<-"Pareto Scaling";
}else if(scaleNorm=='RangeNorm'){
data<-apply(data, 2, RangeNorm);
scalenm<-"Range Scaling";
}else if(scaleNorm=='VastNorm'){
data<-apply(data, 2, VastNorm);
scalenm<-'Vast Scaling';
}else{
scalenm<-"N/A";
}
# need to do some sanity check, for log there may be Inf values introduced
data <- CleanData(data, T, F);
# note after using "apply" function, all the attribute lost, need to add back
rownames(data)<-rowNames;
colnames(data)<-colNames;
dataSet$norm <<- as.data.frame(data);
dataSet$cls <<- cls;
dataSet$rownorm.method<<-rownm;
dataSet$trans.method<<-transnm;
dataSet$scale.method<<-scalenm;
dataSet$combined.method<<-FALSE;
dataSet$norm.all <<- NULL; # this is only for biomarker ROC analysis
return(1);
}
########################################
###row-wise norm methods, x is a row ###
########################################
# normalize by a sum of each sample, assume constant sum (1000)
# return: normalized data
SumNorm<-function(x){
1000*x/sum(x, na.rm=T);
}
# normalize by median
MedianNorm<-function(x){
x/median(x, na.rm=T);
}
# normalize by a reference sample (probability quotient normalization)
# ref should be the name of the reference sample
ProbNorm<-function(x, ref.smpl){
x/median(as.numeric(x/ref.smpl), na.rm=T)
}
# normalize by a reference reference (i.e. creatinine)
# ref should be the name of the cmpd
CompNorm<-function(x, ref){
1000*x/x[ref];
}
##############################################
###column-wise norm methods, x is a column ###
##############################################
# generalize log, tolerant to 0 and negative values
LogNorm<-function(x,min.val){
log2((x + sqrt(x^2 + min.val^2))/2)
}
# normalize to zero mean and unit variance
AutoNorm<-function(x){
(x - mean(x))/sd(x, na.rm=T);
}
# normalize to zero mean but varaince/SE
ParetoNorm<-function(x){
(x - mean(x))/sqrt(sd(x, na.rm=T));
}
# normalize to zero mean but varaince/SE
MeanCenter<-function(x){
x - mean(x);
}
# normalize to zero mean but varaince/SE
RangeNorm<-function(x){
if(max(x) == min(x)){
x;
}else{
(x - mean(x))/(max(x)-min(x));
}
}
#VastNorm
VastNorm<-function(x){
((x-mean(x))/sd(x,na.rm=T))*((mean(x))/sd(x,na.rm=T))
}
#######################################
####### Combined approach #############
#######################################
QuantileNormalize <- function(){
data<-dataSet$proc;
cls <- dataSet$proc.cls;
cls.lvl <- levels(cls);
# first log normalize
data <- glog(data);
require('preprocessCore');
# normalize within replicates
#for (lv in cls.lvl){
# sub.inx <- dataSet$proc.cls == lv;
# data[sub.inx, ] <- t(normalize.quantiles(t(data[sub.inx, ]), copy=FALSE));
#}
data <- t(normalize.quantiles(t(data), copy=FALSE));
dataSet$norm <<- as.data.frame(data);
dataSet$cls <<- cls;
dataSet$rownorm.method<<-NULL;
dataSet$colnorm.method<<-NULL;
dataSet$combined.method<<-TRUE;
}
##############################################
################## Summary plot ##############
##############################################
# plot two summary plot, one b4 normalization, one after
# for each plot top is box plot, bottom is a density plot
PlotNormSummary<-function(imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 10.5; h <- 12;
}else if(width == 0){
w <- 7.2;h <- 9;
imgSet$norm<<-imgName;
}else{
w <- 7.2; h <- 9;
}
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
layout(matrix(c(1,2,2,2,3,4,4,4), 4, 2, byrow = FALSE))
# since there may be too many compounds, only plot a subsets (50) in box plot
# but density plot will use all the data
pre.inx<-GetRandomSubsetIndex(ncol(dataSet$proc), sub.num=50);
namesVec <- colnames(dataSet$proc[,pre.inx]);
# only get common ones
nm.inx <- namesVec %in% colnames(dataSet$norm)
namesVec <- namesVec[nm.inx];
pre.inx <- pre.inx[nm.inx];
norm.inx<-match(namesVec, colnames(dataSet$norm));
namesVec <- substr(namesVec, 1, 12); # use abbreviated name
rangex.pre <- range(dataSet$proc[, pre.inx], na.rm=T);
rangex.norm <- range(dataSet$norm[, norm.inx], na.rm=T);
x.label<-GetValueLabel();
y.label<-GetVariableLabel();
# fig 1
op<-par(mar=c(4,7,4,0), xaxt="s");
plot(density(apply(dataSet$proc, 2, mean, na.rm=TRUE)), col='darkblue', las =2, lwd=2, main="", xlab="", ylab="");
mtext("Density", 2, 5);
mtext("Before Normalization",3, 1)
# fig 2
op<-par(mar=c(7,7,0,0), xaxt="s");
boxplot(dataSet$proc[,pre.inx], names= namesVec, ylim=rangex.pre, las = 2, col="lightgreen", horizontal=T);
mtext(x.label, 1, 5);
# fig 3
op<-par(mar=c(4,7,4,2), xaxt="s");
plot(density(apply(dataSet$norm, 2, mean, na.rm=TRUE)), col='darkblue', las=2, lwd =2, main="", xlab="", ylab="");
mtext("After Normalization",3, 1);
# fig 4
op<-par(mar=c(7,7,0,2), xaxt="s");
boxplot(dataSet$norm[,norm.inx], names=namesVec, ylim=rangex.norm, las = 2, col="lightgreen", horizontal=T);
mtext(paste("Normalized",x.label),1, 5);
#dev.off();
}
###################################################################################################
###################################################################################################
###################################################################################################
###################################################################################################
#########################################################
## R script for MetaboAnalyst
## Description: perform fold change, t-tests, volcano plot
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
#####################################
########### Fold Change #############
#####################################
# fold change analysis, method can be mean or median
# note: since the interface allow user to change all parameters
# the fold change has to be re-calculated each time
FC.Anal.unpaired<-function(fc.thresh=2, cmp.type = 0){
# make sure threshold is above 1
fc.thresh = ifelse(fc.thresh>1, fc.thresh, 1/fc.thresh);
max.thresh = fc.thresh;
min.thresh = 1/fc.thresh;
res <-GetFC(F, cmp.type);
fc.all <- res$fc.all;
fc.log <- res$fc.log;
imp.inx <- fc.all > max.thresh | fc.all < min.thresh;
sig.mat <- cbind(fc.all[imp.inx, drop=F], fc.log[imp.inx, drop=F]);
colnames(sig.mat)<-c("Fold Change", "log2(FC)");
# order by absolute log value (since symmetrical in pos and neg)
inx.ord <- order(abs(sig.mat[,2]), decreasing=T);
sig.mat <- sig.mat[inx.ord,,drop=F];
fileName <- "fold_change.csv";
write.csv(sig.mat,file=fileName);
# create a list object to store fc
fc<-list (
paired = FALSE,
raw.thresh = fc.thresh,
max.thresh = max.thresh,
min.thresh = min.thresh,
fc.all = fc.all, # note a vector
fc.log = fc.log,
inx.imp = imp.inx,
sig.mat = sig.mat
);
analSet$fc<<-fc;
}
FC.Anal.paired<-function(fc.thresh=2, percent.thresh=0.75, cmp.type=0){
# make sure threshold is above 1
fc.thresh = ifelse(fc.thresh>1, fc.thresh, 1/fc.thresh);
max.thresh = fc.thresh;
min.thresh = 1/fc.thresh;
fc.mat <-GetFC(T, cmp.type);
count.thresh<-round(nrow(dataSet$norm)/2*percent.thresh);
mat.up <- fc.mat >= log(max.thresh,2);
mat.down <- fc.mat <= log(min.thresh,2);
count.up<-apply(mat.up, 2, sum);
count.down<-apply(mat.down, 2, sum);
fc.all<-rbind(count.up, count.down);
inx.up <- count.up>=count.thresh;
inx.down <- count.down>=count.thresh;
colnames(fc.all)<-colnames(dataSet$norm);
rownames(fc.all)<-c("Count (up)", "Count (down)");
sig.var <- t(fc.all[,(inx.up|inx.down), drop=F]);
# sort sig.var using absolute difference between count(up)-count(down)
sig.dff<-abs(sig.var[,1]-sig.var[,2])
inx<-order(sig.dff, decreasing=T);
sig.var<-sig.var[inx,,drop=F];
fileName <- "fold_change.csv";
write.csv(signif(sig.var,5),file=fileName);
# create a list object to store fc
fc<-list (
paired = TRUE,
fc.mat = fc.mat,
raw.thresh = fc.thresh,
max.thresh = count.thresh,
min.thresh = -count.thresh,
fc.all = fc.all, # note: a 2-row matrix!
inx.up = inx.up,
inx.down = inx.down,
sig.mat = sig.var
);
analSet$fc<<-fc;
}
PlotFC<-function(imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$fc<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,2,3));
fc = analSet$fc;
if(fc$paired){
ylim<-c(-nrow(dataSet$norm)/2, nrow(dataSet$norm)/2);
xlim<-c(0, ncol(dataSet$norm));
plot(NULL, xlim=xlim, ylim=ylim, xlab = GetVariableLabel(),
ylab=paste("Count with FC >=", fc$max.thresh, "or <=", fc$min.thresh));
for(i in 1:ncol(fc$fc.all)){
segments(i,0, i, fc$fc.all[1,i], col= ifelse(fc$inx.up[i],"magenta", "darkgrey"),
lwd= ifelse(fc$inx.up[i], 2, 1));
segments(i,0, i, -fc$fc.all[2,i], col= ifelse(fc$inx.down[i], "magenta", "darkgrey"),
lwd= ifelse(fc$inx.down[i], 2, 1));
}
abline(h=fc$max.thresh, lty=3);
abline(h=fc$min.thresh, lty=3);
abline(h=0, lwd=1);
}else{
if(fc$raw.thresh > 0){
# be symmetrical
topVal <- max(abs(fc$fc.log));
ylim <- c(-topVal, topVal);
plot(fc$fc.log, ylab="Log2 (FC)", ylim = ylim, xlab = GetVariableLabel(), pch=19, axes=F,
col= ifelse(fc$inx.imp, "magenta", "darkgrey"));
axis(2);
axis(4); # added by Beomsoo
abline(h=log(fc$max.thresh,2), lty=3);
abline(h=log(fc$min.thresh,2), lty=3);
abline(h=0, lwd=1);
}else{ # plot side by side
dat1 <- dataSet$norm[as.numeric(dataSet$cls) == 1, ];
dat2 <- dataSet$norm[as.numeric(dataSet$cls) == 2, ];
mns1 <- apply(dat1, 2, mean);
mn1 <- mean(mns1);
sd1 <- sd(mns1);
msd1.top <- mn1 + 2*sd1;
msd1.low <- mn1 - 2*sd1;
mns2 <- apply(dat2, 2, mean);
mn2 <- mean(mns2);
sd2 <- sd(mns2);
msd2.top <- mn2 + 2*sd2;
msd2.low <- mn2 - 2*sd2;
ylims <- range(c(mns1, mns2, msd1.top, msd2.top, msd1.low, msd2.low));
new.mns <- c(mns1, rep(NA, 5), mns2);
cols <- c(rep("magenta", length(mns1)), rep(NA, 5), rep("blue", length(mns2)));
pchs <- c(rep(15, length(mns1)), rep(NA, 5), rep(19, length(mns2)));
plot(new.mns, ylim=ylims, pch = pchs, col = cols, cex = 1.25, axes=F, ylab="");
axis(2);
axis(4); # added by Beomsoo
abline(h=mn1, col="magenta", lty=3, lwd=2);
abline(h=msd1.low, col="magenta", lty=3, lwd=1);
abline(h=msd1.top, col="magenta", lty=3, lwd=1);
abline(h=mn2, col="blue", lty=3, lwd=2);
abline(h=msd2.low, col="blue", lty=3, lwd=1);
abline(h=msd2.top, col="blue", lty=3, lwd=1);
# abline(h=mean(all.mns), col="darkgrey", lty=3);
axis(1, at=1:length(new.mns), labels=c(1:length(mns1),rep(NA, 5),1:length(mns2)));
}
}
#dev.off();
}
GetSigTable.FC<-function(){
GetSigTable(analSet$fc$sig.mat, "fold change analysis");
}
GetFCSigMat<-function(){
return(CleanNumber(analSet$fc$sig.mat));
}
GetFCSigRowNames<-function(){
rownames(analSet$fc$sig.mat);
}
GetFCSigColNames<-function(){
colnames(analSet$fc$sig.mat);
}
# utility method to calculate FC
GetFC <- function(paired=FALSE, cmpType){
if(paired){
if(dataSet$combined.method){
data <- dataSet$norm;
}else{
data <- log(dataSet$row.norm,2);
}
G1 <- data[which(dataSet$cls==levels(dataSet$cls)[1]), ]
G2 <- data[which(dataSet$cls==levels(dataSet$cls)[2]), ]
if(cmpType == 0){
fc.mat <- G1-G2;
}else{
fc.mat <- G2-G1;
}
return (fc.mat);
}else{
if(dataSet$combined.method){
data <- dataSet$norm;
m1 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[1]), ]);
m2 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[2]), ]);
# create a named matrix of sig vars for display
if(cmpType == 0){
fc.log <- signif (m1-m2, 5);
}else{
fc.log <- signif (m2-m1, 5);
}
fc.all <- signif(2^fc.log, 5);
}else{
data <- dataSet$row.norm;
m1 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[1]), ]);
m2 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[2]), ]);
# create a named matrix of sig vars for display
if(cmpType == 0){
ratio <- m1/m2;
}else{
ratio <- m2/m1;
}
fc.all <- signif(ratio, 5);
fc.log <- signif(log2(ratio), 5);
}
names(fc.all)<-names(fc.log)<-colnames(dataSet$norm);
return(list(fc.all = fc.all, fc.log = fc.log));
}
}
#####################################
########### t-Tests ################
####################################
Ttests.Anal<-function(nonpar=F, threshp=0.05, paired=FALSE, equal.var=TRUE){
res <- GetTtestRes(paired, equal.var, nonpar);
t.stat <- res[,1];
p.value <- res[,2];
names(t.stat) <- names(p.value)<-colnames(dataSet$norm);
p.log <- -log10(p.value);
fdr.p <- p.adjust(p.value, "fdr");
inx.imp <- p.value <= threshp;
sig.t <- t.stat[inx.imp];
sig.p <- p.value[inx.imp];
lod<- -log10(sig.p);
sig.q <-fdr.p[inx.imp];
sig.mat <- cbind(sig.t, sig.p, lod, sig.q);
colnames(sig.mat)<-c("t.stat", "p.value", "-log10(p)", "FDR");
ord.inx <- order(sig.p);
sig.mat <- sig.mat[ord.inx,];
sig.mat <- signif(sig.mat, 5);
if(nonpar){
tt.nm = "Wilcoxon Rank Test";
}else{
tt.nm = "T-Tests";
}
write.csv(sig.mat,file="t_test.csv");
tt<-list (
tt.nm = tt.nm,
paired = paired,
raw.thresh = threshp,
p.value = sort(p.value),
p.log = p.log,
thresh = -log10(threshp), # only used for plot threshold line
inx.imp = inx.imp,
sig.mat = sig.mat
);
analSet$tt<<-tt;
}
GetSigTable.TT<-function(){
GetSigTable(analSet$tt$sig.mat, "t-tests");
}
# return a double matrix with 2 columns - p values and lod
GetTTSigMat<-function(){
return(CleanNumber(analSet$tt$sig.mat));
}
GetTTSigRowNames<-function(){
rownames(analSet$tt$sig.mat);
}
GetTTSigColNames<-function(){
colnames(analSet$tt$sig.mat);
}
GetTtUpMat<-function(){
lod <- analSet$tt$p.log;
red.inx<- which(analSet$tt$inx.imp);
as.matrix(cbind(red.inx, lod[red.inx]));
}
GetTtDnMat<-function(){
lod <- analSet$tt$p.log;
blue.inx <- which(!analSet$tt$inx.imp);
as.matrix(cbind(blue.inx, lod[blue.inx]));
}
GetTtLnMat<-function(){
lod <- analSet$tt$p.log;
as.matrix(rbind(c(0, analSet$tt$thresh), c(length(lod)+1,analSet$tt$thresh)));
}
GetTtCmpds<-function(){
names(analSet$tt$p.log);
}
GetMaxTtInx <- function(){
which.max(analSet$tt$p.log);
}
# utility method to get p values
GetTtestRes<- function(paired=FALSE, equal.var=TRUE, nonpar=F){
if(nonpar){
inx1 <- which(dataSet$cls==levels(dataSet$cls)[1]);
inx2 <- which(dataSet$cls==levels(dataSet$cls)[2]);
res <- apply(as.matrix(dataSet$norm), 2, function(x) {
tmp <- try(wilcox.test(x[inx1], x[inx2], paired = paired));
if(class(tmp) == "try-error") {
return(c(NA, NA));
}else{
return(c(tmp$statistic, tmp$p.value));
}
})
}else{
if(ncol(dataSet$norm) < 1000){
inx1 <- which(dataSet$cls==levels(dataSet$cls)[1]);
inx2 <- which(dataSet$cls==levels(dataSet$cls)[2]);
res <- apply(as.matrix(dataSet$norm), 2, function(x) {
tmp <- try(t.test(x[inx1], x[inx2], paired = paired, var.equal = equal.var));
if(class(tmp) == "try-error") {
return(c(NA, NA));
}else{
return(c(tmp$statistic, tmp$p.value));
}
})
}else{ # use fast version
require(genefilter);
res <- try(rowttests(t(as.matrix(dataSet$norm)), dataSet$cls));
if(class(res) == "try-error") {
res <- c(NA, NA);
}else{
res <- t(cbind(res$statistic, res$p.value));
}
}
}
return(t(res));
}
# utility method to perform the univariate analysis automatically
# Jeff note:
# The approach is computationally expensive,and fails more often
# get around: make it lazy unless users request, otherwise the default t-test will also be affected
GetUnivReport <- function(){
paired <- analSet$tt$paired;
threshp <- analSet$tt$raw.thresh;
inx1 <- which(dataSet$cls==levels(dataSet$cls)[1]);
inx2 <- which(dataSet$cls==levels(dataSet$cls)[2]);
# output list (mean(sd), mean(sd), p-value, FoldChange, Up/Down)
univStat.mat <- apply(as.matrix(dataSet$norm), 2, function(x) {
# normality test for each group
# ks <- ks.test(x[inx1], x[inx2]);
sw.g1 <- shapiro.test(x[inx1]);
sw.g2 <- shapiro.test(x[inx2]);
method <- ifelse( ((sw.g1$p.value <= 0.05) | (sw.g2$p.value <= 0.05)), "(W)","")
if (method == "(W)") {
# wilcoxon test
tmp <- try(wilcox.test(x[inx1], x[inx2], paired = paired));
} else {
# t-test
equal.var <- TRUE;
if(var(cbind(x[inx1], x[inx2]), na.rm=TRUE) != 0) {
anal.var <- var.test(x[inx1], x[inx2]);
equal.var <- ifelse(anal.var$p.value <= 0.05, FALSE, TRUE);
}
tmp <- try(t.test(x[inx1], x[inx2], paired = paired, var.equal = equal.var));
}
if(class(tmp) == "try-error") {
return(NA);
}else{
mean1 <- mean(x[inx1]);
mean2 <- mean(x[inx2]);
sd1 <- sd(x[inx1]);
sd2 <- sd(x[inx2]);
p.value <- paste(ifelse(tmp$p.value < 0.0001, "< 0.0001", sprintf("%.4f", tmp$p.value,4))," ", method, sep="");
p.value.origin <- tmp$p.value;
foldChange <- mean1 / mean2;
foldChange <- round(ifelse( foldChange >= 1, foldChange, (-1/foldChange) ), 2);
upDown <- ifelse(mean1 > mean2, "Up","Down");
univStat <- c(
meanSD1 = sprintf("%.3f (%.3f)", mean1, sd1),
meanSD2 = sprintf("%.3f (%.3f)", mean2, sd2),
p.value = p.value,
foldChange = foldChange,
upDown = upDown,
p.value.origin = sprintf("%.5f", p.value.origin)
);
return(univStat);
}
})
univStat.mat <- as.data.frame(t(univStat.mat));
# add FDR/q-value
q.value <- sprintf("%.4f", p.adjust(p=as.numeric(levels(univStat.mat$p.value.origin))[univStat.mat$p.value.origin], method='fdr'));
univStat.mat <- cbind(univStat.mat[, c(1,2,3)], q.value, univStat.mat[, c(4,5)], univStat.mat[,6]);
names(univStat.mat)[1] <- paste("Mean (SD) of ", levels(dataSet$cls)[1], sep='');
names(univStat.mat)[2] <- paste("Mean (SD) of ", levels(dataSet$cls)[2], sep='');
names(univStat.mat)[3] <- "p-value";
names(univStat.mat)[4] <- "q-value (FDR)";
names(univStat.mat)[5] <- "Fold Change";
names(univStat.mat)[6] <- paste(levels(dataSet$cls)[1],"/", levels(dataSet$cls)[2], sep='');
names(univStat.mat)[7] <- "p.value.origin";
univStat.mat <- cbind(Name=rownames(univStat.mat), univStat.mat);
rownames(univStat.mat) <- NULL
## generate univariate report file (univAnalReport.csv).
## mixed with t-test and wilcoxon test depend on each metabolite's distribution
univAnal.mat <- univStat.mat;
note.str <- paste("\n Univariate Analysis Result for each variable/metabolite\n\n",
"[NOTE]\n",
" p-value is calculated with t-test as a default.\n",
" p-value with (W) is calculated by the Wilcoxon Mann Whitney test\n\n\n", sep='');
cat(note.str, file="univAnalReport.csv", append=FALSE);
write.table(univAnal.mat, file="univAnalReport.csv", append=TRUE, sep=",", row.names=FALSE);
## generate subset with the threshold (p-value)
sigones <- which(as.numeric(as.character(univAnal.mat$p.value.origin)) <= threshp);
sigDataSet.orig <- cbind(SampleID=rownames(dataSet$orig), Label=dataSet$cls, dataSet$orig[,c(sigones)])
sigDataSet.norm <- cbind(SampleID=rownames(dataSet$orig), Label=dataSet$cls, dataSet$norm[,c(sigones)])
write.table(sigDataSet.orig, file=paste("data_subset_orig_p", threshp, ".csv", sep=''), append=FALSE, sep=",", row.names=FALSE);
write.table(sigDataSet.norm, file=paste("data_subset_norm_p", threshp, ".csv", sep=''), append=FALSE, sep=",", row.names=FALSE);
}
ContainInfiniteTT<-function(){
if(sum(!is.finite(analSet$tt$sig.mat))>0){
return("true");
}
return("false");
}
#####################################
########### Volcano ################
####################################
Volcano.Anal<-function(paired=FALSE,fcthresh,cmpType, percent.thresh, nonpar=F, threshp, equal.var=TRUE){
#### t-tests
t.res <- GetTtestRes(paired, equal.var, nonpar);
p.value <- t.res[,2];
inx.p <- p.value <= threshp;
p.log <- -log10(p.value);
### fold change analysis
# make sure threshold is above 1
fcthresh = ifelse(fcthresh>1, fcthresh, 1/fcthresh);
max.xthresh <- log(fcthresh,2);
min.xthresh <- log(1/fcthresh,2);
if(paired){
fc.mat <- GetFC(T, cmpType);
count.thresh<-round(nrow(dataSet$norm)/2*percent.thresh);
mat.up <- fc.mat >= max.xthresh;
mat.down <- fc.mat <= min.xthresh;
count.up<-apply(mat.up, 2, sum);
count.down<-apply(mat.down, 2, sum);
fc.all<-rbind(count.up, count.down);
inx.up <- count.up>=count.thresh;
inx.down <- count.down>=count.thresh;
colnames(fc.all)<-colnames(dataSet$norm);
rownames(fc.all)<-c("Count (up)", "Count (down)");
fc.log <- NULL; # dummy, not applicable for counts
# replace the count.thresh for plot
max.xthresh <- count.thresh;
min.xthresh <- -count.thresh;
}else{
res <- GetFC(F, cmpType);
# create a named matrix of sig vars for display
fc.log <- res$fc.log;
fc.all <- res$fc.all;
inx.up = fc.log > max.xthresh;
inx.down = fc.log < min.xthresh;
}
# create named sig table for display
inx.imp<-(inx.up | inx.down) & inx.p;
if(paired){
sig.var<-cbind(fc.all[1,][inx.imp,drop=F], fc.all[2,][inx.imp, drop=F], p.value[inx.imp, drop=F], p.log[inx.imp, drop=F]);
colnames(sig.var)<-c("Counts (up)","Counts (down)", "p.value", "-log10(p)");
# first order by count difference, then by log(p)
dif.count<-abs(sig.var[,1]-sig.var[,2]);
ord.inx<-order(dif.count, sig.var[,4], decreasing=T);
sig.var<-sig.var[ord.inx,,drop=F];
sig.var[,c(3,4)]<-signif(sig.var[,c(3,4)],5);
}else{
sig.var<-cbind(fc.all[inx.imp,drop=F], fc.log[inx.imp,drop=F], p.value[inx.imp,drop=F], p.log[inx.imp,drop=F]);
colnames(sig.var)<-c("FC", "log2(FC)", "p.value", "-log10(p)");
# first order by log(p), then by log(FC)
ord.inx<-order(sig.var[,4], abs(sig.var[,2]), decreasing=T);
sig.var<-sig.var[ord.inx,,drop=F];
sig.var<-signif(sig.var,5);
}
fileName <- "volcano.csv";
write.csv(signif(sig.var,5),file=fileName);
volcano<-list (
raw.threshx = fcthresh,
raw.threshy = threshp,
paired = paired,
max.xthresh = max.xthresh,
min.xthresh = min.xthresh,
thresh.y = -log10(threshp),
fc.all = fc.all,
fc.log = fc.log,
fc.log.uniq = jitter(fc.log),
inx.up = inx.up,
inx.down = inx.down,
p.log = p.log,
inx.p = inx.p,
sig.mat = sig.var
);
analSet$volcano<<-volcano;
}
# now try to label the interesting points
# it is defined by the following rules
# need to be signficant (sig.inx) and
# or 2. top 5 p
# or 2. top 5 left
# or 3. top 5 right
PlotVolcano<-function(imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 10;
}else if(width == 0){
w <- 8;
imgSet$volcano<<-imgName;
}else{
w <- width;
}
h <- w*6/10;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,3,4));
vcn<-analSet$volcano;
MyGray <- rgb(t(col2rgb("black")), alpha=40, maxColorValue=255);
MyHighlight <- rgb(t(col2rgb("magenta")), alpha=80, maxColorValue=255);
if(vcn$paired){
xlim<-c(-nrow(dataSet$norm)/2, nrow(dataSet$norm)/2)*1.2;
# merge fc.all two rows into one, bigger one win
fc.all <- apply(vcn$fc.all, 2, function(x){ if(x[1] > x[2]){return(x[1])}else{return(-x[2])}})
hit.inx <- vcn$inx.p & (vcn$inx.up | vcn$inx.down);
plot(fc.all, vcn$p.log, xlim=xlim, pch=20, cex=ifelse(hit.inx, 1.2, 0.8),
col = ifelse(hit.inx, MyHighlight, MyGray),
xlab="Count of Significant Pairs", ylab="-log10(p)");
sig.upInx <- vcn$inx.p & vcn$inx.up;
p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.up;
fc.rtInx <- GetTopInx(vcn$fc.all[1,], 5, T);
lblInx <- p.topInx & sig.upInx & fc.rtInx;
if(sum(lblInx, na.rm=T) > 0){
text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long
text(vcn$fc.all[1,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8);
}
sig.dnInx <- vcn$inx.p & vcn$inx.down;
p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.down;
fc.leftInx <- GetTopInx(vcn$fc.all[2,], 5, T) & vcn$inx.down;
lblInx <-p.topInx & sig.dnInx & fc.leftInx;
if(sum(lblInx, na.rm=T) > 0){
text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long
text(-vcn$fc.all[2,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8);
}
}else{
imp.inx<-(vcn$inx.up | vcn$inx.down) & vcn$inx.p;
plot(vcn$fc.log, vcn$p.log, pch=20, cex=ifelse(imp.inx, 1.2, 0.7),
col = ifelse(imp.inx, MyHighlight, MyGray),
xlab="log2 (FC)", ylab="-log10(p)");
sig.inx <- imp.inx;
p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.down);
fc.leftInx <- GetTopInx(vcn$fc.log, 5, F);
lblInx <- sig.inx & (p.topInx | fc.leftInx);
if(sum(lblInx, na.rm=T) > 0){
text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long
text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8);
}
p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.up);
fc.rtInx <- GetTopInx(vcn$fc.log, 5, T);
lblInx <- sig.inx & (p.topInx | fc.rtInx);
if(sum(lblInx, na.rm=T) > 0){
text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long
text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8);
}
}
abline (v = vcn$max.xthresh, lty=3);
abline (v = vcn$min.xthresh, lty=3);
abline (h = vcn$thresh.y, lty=3);
axis(4); # added by Beomsoo
#dev.off();
}
GetVolcanoDnMat<- function(){
vcn<-analSet$volcano;
imp.inx<-(vcn$inx.up | vcn$inx.down) & vcn$inx.p;
blue.inx<- which(!imp.inx);
# make sure they are not tied
xs <- vcn$fc.log.uniq[blue.inx]
ys <- vcn$p.log[blue.inx];
as.matrix(cbind(xs, ys));
}
GetVolcanoUpMat<- function(){
vcn<-analSet$volcano;
imp.inx<-(vcn$inx.up | vcn$inx.down) & vcn$inx.p;
red.inx<- which(imp.inx);
# make sure they are not tied
xs <- vcn$fc.log.uniq[red.inx]
ys <- vcn$p.log[red.inx];
as.matrix(cbind(xs, ys));
}
GetVolcanoVlMat<- function(){
vcn<-analSet$volcano;
limy <- GetExtendRange(vcn$fc.log);
as.matrix(rbind(c(vcn$min.xthresh, limy[1]), c(vcn$min.xthresh,limy[2])));
}
GetVolcanoVrMat<- function(){
vcn<-analSet$volcano;
limy <- GetExtendRange(vcn$fc.log);
as.matrix(rbind(c(vcn$max.xthresh, limy[1]), c(vcn$max.xthresh,limy[2])));
}
GetVolcanoHlMat<- function(){
vcn<-analSet$volcano;
limx <- GetExtendRange(vcn$fc.log);
as.matrix(rbind(c(limx[1], vcn$thresh.y), c(limx[2],vcn$thresh.y)));
}
GetVolcanoRangeX<- function(){
range(analSet$volcano$fc.log.uniq);
}
GetVolcanoCmpds<- function(){
names(analSet$volcano$fc.log);
}
GetVolcanoCmpdInxs<-function(){
analSet$volcano$fc.log.uniq
}
# get indices of top n largest/smallest number
GetTopInx <- function(vec, n, dec=T){
inx <- order(vec, decreasing = dec)[1:n];
# convert to T/F vec
vec<-rep(F, length=length(vec));
vec[inx] <- T;
return (vec);
}
GetSigTable.Volcano<-function(){
GetSigTable(analSet$volcano$sig.mat, "volcano plot");
}
GetVolcanoSigMat<-function(){
return(CleanNumber(analSet$volcano$sig.mat));
}
GetVolcanoSigRowNames<-function(){
rownames(analSet$volcano$sig.mat);
}
GetVolcanoSigColNames<-function(){
colnames(analSet$volcano$sig.mat);
}
ContainInfiniteVolcano<-function(){
if(sum(!is.finite(analSet$volcano$sig.mat))>0){
return("true");
}
return("false");
}
#################################################################
################ One-way ANOVA ##################################
#################################################################
# perform anova and only return p values and MSres (for Fisher's LSD)
aof <- function(x, cls = dataSet$cls) {
aov(x ~ cls);
}
# perform Kruskal Wallis Test
kwtest <- function(x, cls = dataSet$cls) {
kruskal.test(x ~ cls);
}
FisherLSD<-function(aov.obj, thresh){
LSD.test(aov.obj,"cls", alpha=thresh)
}
# return only the signicant comparison names
parseTukey <- function(tukey, cut.off){
inx <- tukey$cls[,"p adj"] <= cut.off;
paste(rownames(tukey$cls)[inx], collapse="; ");
}
# return only the signicant comparison names
parseFisher <- function(fisher, cut.off){
inx <- fisher[,"pvalue"] <= cut.off;
paste(rownames(fisher)[inx], collapse="; ");
}
ANOVA.Anal<-function(nonpar=F, thresh=0.05, post.hoc="fisher"){
if(nonpar){
aov.nm <- "Kruskal Wallis Test";
anova.res<-apply(as.matrix(dataSet$norm), 2, kwtest);
#extract all p values
res <- unlist(lapply(anova.res, function(x) {c(x$statistic, x$p.value)}));
res <- data.frame(matrix(res, nrow=length(anova.res), byrow=T), stringsAsFactors=FALSE);
fstat <- res[,1];
p.value <- res[,2];
names(fstat) <- names(p.value)<-colnames(dataSet$norm);
fdr.p <- p.adjust(p.value, "fdr");
inx.imp <- p.value <= thresh;
if(sum(inx.imp) == 0){ # no sig features!
cutpt <- round(0.2*length(p.value));
cutpt <- ifelse(cutpt>50, 50, cutpt);
inx <- which(rank(p.value) == cutpt);
thresh <- p.value[inx];
inx.imp <- p.value <= thresh;
}
sig.f <- fstat[inx.imp];
sig.p <- p.value[inx.imp];
fdr.p <- fdr.p[inx.imp];
sig.mat <- data.frame(signif(sig.f,5), signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), 'NA');
rownames(sig.mat) <- names(sig.p);
colnames(sig.mat) <- c("chi.squared", "p.value", "-log10(p)", "FDR", "Post-Hoc");
# order the result simultaneously
ord.inx <- order(sig.p, decreasing = FALSE);
sig.mat <- sig.mat[ord.inx,];
fileName <- "anova_posthoc.csv";
my.mat <- sig.mat[,1:4];
colnames(my.mat) <- c("chi_squared", "pval_KW", "-log10(p)", "FDR");
write.csv(my.mat,file=fileName);
}else{
aov.nm <- "One-way ANOVA";
aov.res<-apply(as.matrix(dataSet$norm), 2, aof);
anova.res<-lapply(aov.res, anova);
#extract all p values
res<-unlist(lapply(anova.res, function(x) { c(x["F value"][1,], x["Pr(>F)"][1,])}));
res <- data.frame(matrix(res, nrow=length(aov.res), byrow=T), stringsAsFactors=FALSE);
fstat <- res[,1];
p.value <- res[,2];
names(fstat) <- names(p.value)<-colnames(dataSet$norm);
fdr.p <- p.adjust(p.value, "fdr");
# do post-hoc only for signficant entries
inx.imp <- p.value <= thresh;
if(sum(inx.imp) == 0){ # no sig features with default thresh
# readjust threshold to top 20% or top 50
cutpt <- round(0.2*length(p.value));
cutpt <- ifelse(cutpt>50, 50, cutpt);
inx <- which(rank(p.value) == cutpt);
thresh <- p.value[inx];
inx.imp <- p.value <= thresh;
}
aov.imp <- aov.res[inx.imp];
sig.f <- fstat[inx.imp];
sig.p <- p.value[inx.imp];
fdr.p <- fdr.p[inx.imp];
cmp.res <- NULL;
post.nm <- NULL;
if(post.hoc=="tukey"){
tukey.res<-lapply(aov.imp, TukeyHSD, conf.level=1-thresh);
cmp.res <- unlist(lapply(tukey.res, parseTukey, cut.off=thresh));
post.nm = "Tukey's HSD";
}else{
fisher.res<-lapply(aov.imp, FisherLSD, thresh);
cmp.res <- unlist(lapply(fisher.res, parseFisher, cut.off=thresh));
post.nm = "Fisher's LSD";
}
# create the result dataframe,
# note, the last column is string, not double
sig.mat <- data.frame(signif(sig.f,5), signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), cmp.res);
rownames(sig.mat) <- names(sig.p);
colnames(sig.mat) <- c("f.value", "p.value", "-log10(p)", "FDR", post.nm);
# order the result simultaneously
ord.inx <- order(sig.p, decreasing = FALSE);
sig.mat <- sig.mat[ord.inx,];
fileName <- "anova_posthoc.csv";
write.csv(sig.mat,file=fileName);
}
aov<-list (
aov.nm = aov.nm,
raw.thresh = thresh,
thresh = -log10(thresh), # only used for plot threshold line
p.value = p.value,
p.log = -log10(p.value),
inx.imp = inx.imp,
post.hoc = post.hoc,
sig.mat = sig.mat
);
analSet$aov<<-aov;
return(1);
}
PlotANOVA<-function(imgName, format="png", dpi=72, width=NA){
lod <- analSet$aov$p.log;
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7;
imgSet$anova<<-imgName;
}else{
w <- width;
}
h <- w*6/9;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
plot(lod, ylab="-log10(p)", xlab = GetVariableLabel(), main=analSet$aov$aov.nm, type="n");
red.inx<- which(analSet$aov$inx.imp);
blue.inx <- which(!analSet$aov$inx.imp);
points(red.inx, lod[red.inx], bg="red", cex=1.2, pch=21);
points(blue.inx, lod[blue.inx], bg="green", pch=21);
abline (h=analSet$aov$thresh, lty=3);
#dev.off();
}
GetAovSigMat<-function(){
return(CleanNumber(as.matrix(analSet$aov$sig.mat[, 1:4])));
}
GetAovSigRowNames<-function(){
rownames(analSet$aov$sig.mat);
}
GetAovSigColNames<-function(){
colnames(analSet$aov$sig.mat[, 1:4]);
}
GetAovPostHocSig<-function(){
analSet$aov$sig.mat[,5];
}
GetSigTable.Anova<-function(){
GetSigTable(analSet$aov$sig.mat, "One-way ANOVA and post-hoc analysis");
}
GetAnovaUpMat<-function(){
lod <- analSet$aov$p.log;
red.inx<- which(analSet$aov$inx.imp);
as.matrix(cbind(red.inx, lod[red.inx]));
}
GetAnovaDnMat<-function(){
lod <- analSet$aov$p.log;
blue.inx <- which(!analSet$aov$inx.imp);
as.matrix(cbind(blue.inx, lod[blue.inx]));
}
GetAnovaLnMat<-function(){
lod <- analSet$aov$p.log;
as.matrix(rbind(c(0, analSet$aov$thresh), c(length(lod)+1,analSet$aov$thresh)));
}
GetAnovaCmpds<-function(){
names(analSet$aov$p.log);
}
GetMaxAnovaInx <- function(){
which.max(analSet$aov$p.log);
}
PlotCmpdView<-function(cmpdNm, format="png", dpi=72, width=NA){
imgName <- gsub("\\/", "_", cmpdNm);
imgName <- paste(imgName, "_dpi", dpi, ".", format, sep="");
Cairo(file = imgName, dpi=dpi, width=240, height=240, type=format, bg="transparent");
par(mar=c(4,3,1,2), oma=c(0,0,1,0));
boxplot(dataSet$norm[, cmpdNm]~dataSet$cls,las=2, col= unique(GetColorSchema()));
title(main=cmpdNm, out=T);
#dev.off();
return(imgName);
}
# change to use dataSet$proc instead of dataSet$orig in
# case of too many NAs
PlotCmpd<-function(cmpdNm, format="png", dpi=72, width=NA){
imgName <- gsub("\\/", "_", cmpdNm);
imgName <- paste(imgName, "_dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else{
w <- width;
}
if(substring(dataSet$format,4,5)!="ts"){
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height= w*5/9, type=format, bg="white");
par(mar=c(4,4,2,2), mfrow = c(1,2), oma=c(0,0,2,0));
mns <- by(as.numeric(dataSet$proc[, cmpdNm]), dataSet$proc.cls, mean, na.rm=T);
sds <- by(as.numeric(dataSet$proc[, cmpdNm]), dataSet$proc.cls, sd, na.rm=T);
ups <- mns + sds;
dns <- mns - sds;
# all concentration need start from 0
y <- c(0, dns, mns, ups);
rg <- range(y) + 0.05 * diff(range(y)) * c(-1, 1)
pt <- pretty(y)
axp=c(min(pt), max(pt[pt <= max(rg)]),length(pt[pt <= max(rg)]) - 1);
# ymk <- pretty(c(0,ymax));
x <- barplot(mns, col= unique(GetColorSchema()), las=2, yaxp=axp, ylim=range(pt));
arrows(x, dns, x, ups, code=3, angle=90, length=.1);
axis(1, at=x, col="white", col.tick="black", labels=F);
box();
mtext("Original Conc.", line=1);
boxplot(dataSet$norm[, cmpdNm]~dataSet$cls,las=2, col= unique(GetColorSchema()));
mtext("Normalized Conc.", line=1);
title(main=cmpdNm, out=T);
#dev.off();
}else if(dataSet$design.type =="time0"){
Cairo(file = imgName, unit="in", dpi=dpi, width=8, height= 6, type=format, bg="white");
plotProfile(cmpdNm);
#dev.off();
}else{
if(dataSet$design.type =="time"){ # time trend within phenotype
out.fac <- dataSet$exp.fac;
in.fac <- dataSet$time.fac;
xlab="Time";
}else{ # factor a split within factor b
out.fac <- dataSet$facB;
in.fac <- dataSet$facA;
xlab=dataSet$facA.lbl;
}
# two images per row
img.num <- length(levels(out.fac));
row.num <- ceiling(img.num/2)
if(row.num == 1){
h <- w*5/9;
}else{
h <- w*0.5*row.num;
}
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(3,4,4,2), mfrow=c(row.num, 2));
# make sure all at the same range
ylim.ext <- GetExtendRange (dataSet$norm[, cmpdNm], 12);
for(lv in levels(out.fac)){
inx <- out.fac == lv;
dat <- dataSet$norm[inx, cmpdNm];
cls <- in.fac[inx];
boxplot(dat ~ cls, col="#0000ff22", ylim=ylim.ext, outline=FALSE, boxwex=c(0.5, 0.5), xlab=xlab, ylab="Abundance", main=lv);
stripchart(dat ~ cls, method = "jitter", ylim=ylim.ext, vertical=T, add = T, pch=19, cex=0.7, names = c("",""));
}
#dev.off();
}
return(imgName);
}
############################################################################################
############################################################################################
############################################################################################
############################################################################################
##################################################
## R script for MetaboAnalyst
## Description: perform PCA/PLS-DA/OPLS-DA
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
############################
########### PCA #############
#############################
# perform PCA analysis
PCA.Anal<-function(){
pca<-prcomp(dataSet$norm, center=T, scale=F);
# obtain variance explained
sum.pca<-summary(pca);
imp.pca<-sum.pca$importance;
std.pca<-imp.pca[1,]; # standard devietation
var.pca<-imp.pca[2,]; # variance explained by each PC
cum.pca<-imp.pca[3,]; # cummulated variance explained
# store the item to the pca object
analSet$pca<<-append(pca, list(std=std.pca, variance=var.pca, cum.var=cum.pca));
write.csv(signif(analSet$pca$x,5), file="pca_score.csv");
write.csv(signif(analSet$pca$rotation,5), file="pca_loadings.csv");
}
# format: png, tiff, pdf, ps, svg
PlotPCAPairSummary<-function(imgName, format="png", dpi=72, width=NA, pc.num){
pclabels <- paste("PC", 1:pc.num, "\n", round(100*analSet$pca$variance[1:pc.num],1), "%");
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 10;
}else if(width == 0){
w <- 8;
imgSet$pca.pair <<- imgName;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(dataSet$cls.type == "disc"){
pairs(analSet$pca$x[,1:pc.num], col=GetColorSchema(), pch=as.numeric(dataSet$cls)+1, labels=pclabels);
}else{
pairs(analSet$pca$x[,1:pc.num], labels=pclabels);
}
#dev.off();
}
# scree plot
PlotPCAScree<-function(imgName, format="png", dpi=72, width=NA, scree.num){
stds <-analSet$pca$std[1:scree.num];
pcvars<-analSet$pca$variance[1:scree.num];
cumvars<-analSet$pca$cum.var[1:scree.num];
ylims <- range(c(pcvars,cumvars));
extd<-(ylims[2]-ylims[1])/10
miny<- ifelse(ylims[1]-extd>0, ylims[1]-extd, 0);
maxy<- ifelse(ylims[2]+extd>1, 1.0, ylims[2]+extd);
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 10;
}else if(width == 0){
w <- 8;
imgSet$pca.scree<<-imgName;
}else{
w <- width;
}
h <- w*2/3;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,6,3));
plot(pcvars, type='l', col='blue', main='Scree plot', xlab='PC index', ylab='Variance explained', ylim=c(miny, maxy), axes=F)
text(pcvars, labels =paste(100*round(pcvars,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T)
points(pcvars, col='red');
lines(cumvars, type='l', col='green')
text(cumvars, labels =paste(100*round(cumvars,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T)
points(cumvars, col='red');
abline(v=1:scree.num, lty=3);
axis(2);
axis(1, 1:length(pcvars), 1:length(pcvars));
#dev.off();
}
# 2D score plot
PlotPCA2DScore <- function(imgName, format="png", dpi=72, width=NA, pcx, pcy, reg = 0.95, show=1, grey.scale = 0){
xlabel = paste("PC",pcx, "(", round(100*analSet$pca$variance[pcx],1), "%)");
ylabel = paste("PC",pcy, "(", round(100*analSet$pca$variance[pcy],1), "%)");
pc1 = analSet$pca$x[, pcx];
pc2 = analSet$pca$x[, pcy];
text.lbls<-substr(names(pc1),1,14) # some names may be too long
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
imgSet$pca.score2d<<-imgName;
w <- 7.2;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
suppressMessages(require('ellipse'));
op<-par(mar=c(5,5,3,3));
if(dataSet$cls.type == "disc"){
# obtain ellipse points to the scatter plot for each category
lvs <- levels(dataSet$cls);
pts.array <- array(0, dim=c(100,2,length(lvs)));
for(i in 1:length(lvs)){
inx <-dataSet$cls == lvs[i];
groupVar<-var(cbind(pc1[inx],pc2[inx]), na.rm=T);
groupMean<-cbind(mean(pc1[inx], na.rm=T),mean(pc2[inx], na.rm=T));
pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100);
}
xrg <- range (pc1, pts.array[,1,]);
yrg <- range (pc2, pts.array[,2,]);
x.ext<-(xrg[2]-xrg[1])/12;
y.ext<-(yrg[2]-yrg[1])/12;
xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext);
ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext);
cols <- GetColorSchema(grey.scale==1);
uniq.cols <- unique(cols);
plot(pc1, pc2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot",
color=cols, pch=as.numeric(dataSet$cls)+1); ## added
grid(col = "lightgray", lty = "dotted", lwd = 1);
# make sure name and number of the same order DO NOT USE levels, which may be different
legend.nm <- unique(as.character(dataSet$cls));
## uniq.cols <- unique(cols);
## BHAN: when same color is choosen; it makes an error
if ( length(uniq.cols) > 1 ) {
names(uniq.cols) <- legend.nm;
}
# draw ellipse
for(i in 1:length(lvs)){
if (length(uniq.cols) > 1) {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA);
} else {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA);
}
if(grey.scale) {
lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2);
}
}
pchs <- GetShapeSchema(show, grey.scale);
if(grey.scale) {
cols <- rep("black", length(cols));
}
if(show == 1){
text(pc1, pc2, label=text.lbls, pos=4, xpd=T, cex=0.75);
points(pc1, pc2, pch=pchs, col=cols);
}else{
if(length(uniq.cols) == 1){
points(pc1, pc2, pch=pchs, col=cols, cex=1.0);
}else{
if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){
points(pc1, pc2, pch=pchs, col=cols, cex=1.8);
}else{
points(pc1, pc2, pch=21, bg=cols, cex=2);
}
}
}
uniq.pchs <- unique(pchs);
if(grey.scale) {
uniq.cols <- "black";
}
legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols);
}else{
plot(pc1, pc2, xlab=xlabel, ylab=ylabel, type='n', main="Scores Plot");
points(pc1, pc2, pch=15, col="magenta");
text(pc1, pc2, label=text.lbls, pos=4, col ="blue", xpd=T, cex=0.8);
}
par(op);
#dev.off();
}
GetPCALoadAxesSpec<-function(){
pca.axis.lims;
}
GetPCALoadCmpds<- function(){
names(analSet$pca$load.x.uniq);
}
GetPCALoadCmpdInxs<-function(){
analSet$pca$load.x.uniq;
}
GetPCALoadMat <- function(){
as.matrix(cbind(analSet$pca$load.x.uniq, analSet$pca$imp.loads[,2]));
}
# plot PCA loadings and also set up the matrix for display
PlotPCALoading<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2, plotType, lbl.feat=1){
loadings<-signif(as.matrix(cbind(analSet$pca$rotation[,inx1],analSet$pca$rotation[,inx2])),5);
ldName1<-paste("Loadings", inx1);
ldName2<-paste("Loadings", inx2);
colnames(loadings)<-c(ldName1, ldName2);
load.x.uniq <- jitter(loadings[,1]);
names(load.x.uniq) <- rownames(loadings);
analSet$pca$load.x.uniq <<- load.x.uniq;
analSet$pca$imp.loads<<-loadings; # set up the loading matrix
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pca.loading<<-imgName;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(plotType=="scatter"){
par(mar=c(6,5,2,6));
plot(loadings[,1],loadings[,2], las=2, xlab=ldName1, ylab=ldName2);
pca.axis.lims <<- par("usr"); # x1, x2, y1 ,y2
grid(col = "lightgray", lty = "dotted", lwd = 1);
points(loadings[,1],loadings[,2], pch=19, col="magenta");
if(lbl.feat > 0){
text(loadings[,1],loadings[,2], labels=substr(rownames(loadings), 1, 12), pos=4, col="blue", xpd=T);
}
}else{ # barplot
layout(matrix(c(1,1,2,2,2), nrow=5, byrow=T), respect = FALSE)
cmpd.nms <- substr(rownames(loadings), 1, 14);
hlims <- c(min(loadings[,1], loadings[,2]), max(loadings[,1], loadings[,2]));
par(mar=c(1,4,4,1));
barplot(loadings[,1], names.arg=NA, las=2, ylim=hlims, main =ldName1);
par(mar=c(10,4,3,1));
barplot(loadings[,2], names.arg=cmpd.nms, las=2, cex.names=1.0, ylim=hlims, main =ldName2);
}
#dev.off();
}
# Biplot, set xpd = T to plot outside margin
PlotPCABiplot<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2){
choices = c(inx1, inx2);
scores<-analSet$pca$x;
lam <- analSet$pca$sdev[choices]
n <- NROW(scores)
lam <- lam * sqrt(n);
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pca.biplot<<-imgName;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
biplot(t(t(scores[, choices]) / lam), t(t(analSet$pca$rotation[, choices]) * lam), xpd =T, cex=0.9);
#dev.off();
}
# for plotting, max top 9
GetMaxPCAComp<-function(){
return (min(9, dim(dataSet$norm)[1]-1, dim(dataSet$norm)[2]));
}
###############################
########### PLS-DA #############
################################
# pls analysis using oscorespls so that VIP can be calculated
# note: the VIP is calculated only after PLSDA-CV is performed
# to determine the best # of comp. used for VIP
PLSR.Anal<-function(){
comp.num <- dim(dataSet$norm)[1]-1;
if(comp.num > 8) {
comp.num <- 8;
}
suppressMessages(require('pls'));
# note, standardize the cls, to minimize the impact of categorical to numerical impact
cls<-scale(as.numeric(dataSet$cls))[,1];
datmat<-as.matrix(dataSet$norm);
analSet$plsr<<-plsr(cls~datmat,method='oscorespls', ncomp=comp.num);
write.csv(signif(analSet$plsr$scores,5), row.names=rownames(dataSet$norm), file="plsda_score.csv");
write.csv(signif(analSet$plsr$loadings,5), file="plsda_loadings.csv");
}
# plot pairwise summary
PlotPLSPairSummary<-function(imgName, format="png", dpi=72, width=NA, pc.num){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pls.pair <<- imgName;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
pclabels <- paste("Component", 1:pc.num, "\n", round(100*analSet$plsr$Xvar[1:pc.num]/analSet$plsr$Xtotvar,1), "%");
# pairs(analSet$plsr$scores[,1:pc.num], col=as.numeric(dataSet$cls)+1, pch=as.numeric(dataSet$cls)+1, labels=pclabels)
pairs(analSet$plsr$scores[,1:pc.num], col=GetColorSchema(), pch=as.numeric(dataSet$cls)+1, labels=pclabels)
#dev.off();
}
# score plot
PlotPLS2DScore<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2, reg=0.95, show=1, grey.scale=0){
suppressMessages(require('ellipse'));
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pls.score2d<<-imgName;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,3,3));
lv1 <- analSet$plsr$scores[,inx1];
lv2 <- analSet$plsr$scores[,inx2];
xlabel <- paste("Component", inx1, "(", round(100*analSet$plsr$Xvar[inx1]/analSet$plsr$Xtotvar,1), "%)");
ylabel <- paste("Component", inx2, "(", round(100*analSet$plsr$Xvar[inx2]/analSet$plsr$Xtotvar,1), "%)");
text.lbls<-substr(rownames(dataSet$norm),1,12) # some names may be too long
# obtain ellipse points to the scatter plot for each category
lvs <- levels(dataSet$cls);
pts.array <- array(0, dim=c(100,2,length(lvs)));
for(i in 1:length(lvs)){
inx <-dataSet$cls == lvs[i];
groupVar<-var(cbind(lv1[inx],lv2[inx]), na.rm=T);
groupMean<-cbind(mean(lv1[inx], na.rm=T),mean(lv2[inx], na.rm=T));
pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100);
}
xrg <- range (lv1, pts.array[,1,]);
yrg <- range (lv2, pts.array[,2,]);
x.ext<-(xrg[2]-xrg[1])/12;
y.ext<-(yrg[2]-yrg[1])/12;
xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext);
ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext);
## cols = as.numeric(dataSet$cls)+1;
cols <- GetColorSchema(grey.scale==1);
uniq.cols <- unique(cols);
plot(lv1, lv2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot");
grid(col = "lightgray", lty = "dotted", lwd = 1);
# make sure name and number of the same order DO NOT USE levels, which may be different
legend.nm <- unique(as.character(dataSet$cls));
## uniq.cols <- unique(cols);
## BHAN: when same color is choosen for black/white; it makes an error
# names(uniq.cols) <- legend.nm;
if ( length(uniq.cols) > 1 ) {
names(uniq.cols) <- legend.nm;
}
# draw ellipse
for(i in 1:length(lvs)){
if ( length(uniq.cols) > 1) {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA);
} else {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA);
}
if(grey.scale) {
lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2);
}
}
pchs <- GetShapeSchema(show, grey.scale);
if(grey.scale) {
cols <- rep("black", length(cols));
}
if(show==1){ # display sample name set on
text(lv1, lv2, label=text.lbls, pos=4, xpd=T, cex=0.75);
points(lv1, lv2, pch=pchs, col=cols);
}else{
if (length(uniq.cols) == 1) {
points(lv1, lv2, pch=pchs, col=cols, cex=1.0);
} else {
if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){
points(lv1, lv2, pch=pchs, col=cols, cex=1.8);
}else{
points(lv1, lv2, pch=21, bg=cols, cex=2);
}
}
}
uniq.pchs <- unique(pchs);
if(grey.scale) {
uniq.cols <- "black";
}
legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols);
#dev.off();
}
GetPLSLoadAxesSpec<-function(){
pls.axis.lims;
}
GetPLSLoadCmpds<- function(){
names(analSet$plsr$load.x.uniq);
}
GetPLSLoadCmpdInxs<-function(){
analSet$plsr$load.x.uniq;
}
GetPLSLoadMat <- function(){
as.matrix(cbind(analSet$plsr$load.x.uniq, analSet$plsr$imp.loads[,2]));
}
# plot loading plot, also set the loading matrix for display
PlotPLSLoading<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2, plotType, lbl.feat=1){
# named vector
load1<-analSet$plsr$loadings[,inx1];
load2<-analSet$plsr$loadings[,inx2];
loadings = signif(as.matrix(cbind(load1, load2)),5);
ldName1<-paste("Loadings", inx1);
ldName2<-paste("Loadings", inx2)
colnames(loadings)<-c(ldName1, ldName2);
load.x.uniq <- jitter(loadings[,1]);
names(load.x.uniq) <- rownames(loadings);
analSet$plsr$load.x.uniq <<- load.x.uniq;
analSet$plsr$imp.loads<<-loadings; # set up loading matrix
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pls.loading<<-imgName;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(plotType == "scatter"){
par(mar=c(6,4,4,5));
plot(loadings[,1],loadings[,2], las=2, xlab=ldName1, ylab=ldName2);
pls.axis.lims <<- par("usr"); # x1, x2, y1 ,y2
grid(col = "lightgray", lty = "dotted", lwd = 1);
points(loadings[,1],loadings[,2], pch=19, col="magenta");
if(lbl.feat > 0){
text(loadings[,1],loadings[,2], labels=substr(rownames(loadings), 1, 12), pos=4, col="blue", xpd=T);
}
}else{ # barplot
cmpd.nms <- substr(rownames(loadings), 1, 14);
hlims <- c(min(loadings[,1], loadings[,2]), max(loadings[,1], loadings[,2]));
layout(matrix(c(1,1,2,2,2), nrow=5, byrow=T))
par(mar=c(1,4,4,1));
barplot(loadings[,1], names.arg=NA, las=2, ylim=hlims, main = ldName1);
par(mar=c(10,4,3,1));
barplot(loadings[,2], names.arg=cmpd.nms, cex.names=1.0, las=2, ylim=hlims, main = ldName2);
}
#dev.off();
}
# classification and feature selection
PLSDA.CV<-function(methodName="T", compNum=GetDefaultPLSCVComp(), choice="Q2"){
# get classification accuracy using caret
suppressMessages(require('caret'));
cls<-as.numeric(dataSet$cls)-1;
datmat<-as.matrix(dataSet$norm);
plsda.cls <- train(dataSet$norm, dataSet$cls, "pls", trControl=trainControl(method=ifelse(methodName == 'L', "LOOCV", 'CV')), tuneLength=compNum);
# use the classifical regression to get R2 and Q2 measure
plsda.reg <- plsr(cls~datmat,method ='oscorespls', ncomp=compNum, validation= ifelse(methodName == 'L', "LOO", 'CV'));
fit.info <- pls::R2(plsda.reg, estimate = "all")$val[,1,];
# combine accuracy, R2 and Q2
accu <- plsda.cls$results[,2]
all.info <- rbind(accu, fit.info[,-1]);
rownames(all.info) <- c("Accuracy", "R2", "Q2");
# default use best number determined by Q2
if(choice == 'Q2'){
best.num <- which(all.info[3,] == max(all.info[3,]));
}else if(choice == "R2"){
best.num <- which(all.info[2,] == max(all.info[2,]));
}else{
best.num <- which(all.info[1,] == max(all.info[1,]));
}
# get coef. table, this can be error when class is very unbalanced
coef.mat <- try(varImp(plsda.cls, scale=T)$importance);
if(class(coef.mat) == "try-error") {
coef.mat <- NULL;
}else{
if(dataSet$cls.num > 2){ # add an average coef for multiple class
coef.mean<-apply(coef.mat, 1, mean);
coef.mat <- cbind(coef.mean = coef.mean, coef.mat);
}
# rearange in decreasing order, keep as matrix, prevent dimesion dropping if only 1 col
inx.ord<- order(coef.mat[,1], decreasing=T);
coef.mat <- data.matrix(coef.mat[inx.ord, ,drop=FALSE]);
write.csv(signif(coef.mat,5), file="plsda_coef.csv"); # added 27 Jan 2014
}
# calculate VIP http://mevik.net/work/software/VIP.R
pls<-analSet$plsr;
b <- c(pls$Yloadings)[1:compNum];
T <- pls$scores[,1:compNum, drop = FALSE]
SS <- b^2 * colSums(T^2)
W <- pls$loading.weights[,1:compNum, drop = FALSE]
Wnorm2 <- colSums(W^2);
SSW <- sweep(W^2, 2, SS / Wnorm2, "*")
vips <- sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS));
if(compNum > 1){
vip.mat <- as.matrix(t(vips));
}else{
vip.mat <- as.matrix(vips);
}
colnames(vip.mat) <- paste("Comp.", 1:ncol(vip.mat));
write.csv(signif(vip.mat,5),file="plsda_vip.csv");
analSet$plsda<<-list(best.num=best.num, choice=choice, coef.mat=coef.mat, vip.mat=vip.mat, fit.info=all.info);
return(1);
}
# perform permutation, using training classification accuracy as
# indicator, for two or multi-groups
PLSDA.Permut<-function(num=100, type="accu"){
orig.cls<-cls<-as.numeric(dataSet$cls);
datmat<-as.matrix(dataSet$norm);
best.num<-analSet$plsda$best.num;
# dummy is not used, for the purpose to maintain lapply API
Get.pls.bw <- function(dummy){
cls <- cls[order(runif(length(cls)))];
pls <- plsda(datmat, as.factor(cls), ncomp=best.num);
pred <- predict(pls, datmat);
Get.bwss(pred, cls);
}
Get.pls.accu <- function(dummy){
cls <- cls[order(runif(length(cls)))];
pls <- plsda(datmat, as.factor(cls), ncomp=best.num);
pred <- predict(pls, datmat);
sum(pred == cls)/length(cls);
}
# first calculate the bw values with original labels
pls <- plsda(datmat, as.factor(orig.cls), ncomp=best.num);
pred.orig <- predict(pls, datmat);
if(type=="accu"){
perm.type = "prediction accuracy";
res.orig <- sum(pred.orig == orig.cls)/length(orig.cls);
res.perm <- Perform.permutation(num, Get.pls.accu);
}else{
perm.type = "separation distance";
res.orig <- Get.bwss(pred.orig, orig.cls);
res.perm <- Perform.permutation(num, Get.pls.bw);
}
perm.vec <- c(res.orig, unlist(res.perm, use.names=FALSE));
# check for infinite since with group variance could be zero for perfect classification
inf.found = TRUE;
if(sum(is.finite(perm.vec))==length(perm.vec)){
inf.found = FALSE;
}else {
if(sum(is.finite(perm.vec))==0){ # all are infinite, give a random number 10
perm.vec<-rep(10, length(perm.vec));
}else{ # if not all inf, replace with the 10 fold of non-inf values
perm.vec[!is.finite(perm.vec)]<-10*max(perm.vec[is.finite(perm.vec)]);
}
}
# calculate the significant p value as the proportion of sampled permutations better than or equal to original one
# note, the precision is determined by the permutation number i.e. for 100 time, no better than original
# p value is < 0.01, we can not say it is zero
better.hits <- sum(perm.vec[-1]>=perm.vec[1]);
if(better.hits == 0) {
p <- paste("p < ", 1/num, " (", better.hits, "/", num, ")", sep="");
}else{
p <- better.hits/num;
p <- paste("p = ", signif(p, digits=5), " (", better.hits, "/", num, ")", sep="");
}
analSet$plsda$permut.p<<-p;
analSet$plsda$permut.inf<<-F;
analSet$plsda$permut.type<<- perm.type;
analSet$plsda$permut<<-perm.vec;
return(p);
}
# BHan: added bgcolor parameter for B/W color
PlotPLS.Imp<-function(imgName, format="png", dpi=72, width=NA, type, feat.nm, feat.num, color.BW=FALSE){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$pls.imp<<-imgName;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(type=="vip"){
analSet$plsda$imp.type<<-"vip";
vips<-analSet$plsda$vip.mat[,feat.nm];
PlotImpVar(vips, "VIP scores", feat.num, color.BW);
}else{
analSet$plsda$imp.type<<-"coef";
data<-analSet$plsda$coef.mat[,feat.nm];
PlotImpVar(data, "Coefficients", feat.num, color.BW);
}
#dev.off();
}
# BHan: added bgcolor parameter for B/W color
PlotImpVar <- function(imp.vec, xlbl, feat.num=15, color.BW=FALSE){
cls.len <- length(levels(dataSet$cls));
if(cls.len == 2){
rt.mrg <- 5;
}else if(cls.len == 3){
rt.mrg <- 6;
}else if(cls.len == 4){
rt.mrg <- 7;
}else if(cls.len == 5){
rt.mrg <- 8;
}else if(cls.len == 6){
rt.mrg <- 9;
}else{
rt.mrg <- 11;
}
op <- par(mar=c(5,7,3,rt.mrg)); # set right side margin with the number of class
if(feat.num <= 0){
feat.num = 15;
}
if(feat.num > length(imp.vec)){
feat.num <- length(imp.vec);
}
# first get the top subset
imp.vec <- rev(sort(imp.vec))[1:feat.num];
# reverser the order for display
imp.vec <- sort(imp.vec);
# as data should already be normalized, use mean/median should be the same
# mns is a list contains means of all vars at each level
# conver the list into a matrix with each row contains var averages across different lvls
mns <- by(dataSet$norm[, names(imp.vec)], dataSet$cls,
function(x){ # inner function note, by send a subset of dataframe
apply(x, 2, mean, trim=0.1)
});
mns <- t(matrix(unlist(mns), ncol=feat.num, byrow=TRUE));
# vip.nms <-substr(names(imp.vec), 1, 12);
vip.nms <-substr(names(imp.vec), 1, 14);
names(imp.vec) <- NULL;
# modified for B/W color
dotcolor <- ifelse(color.BW, "darkgrey", "blue");
dotchart(imp.vec, bg=dotcolor, xlab= xlbl, cex=1.3);
mtext(side=2, at=1:feat.num, vip.nms, las=2, line=1)
axis.lims <- par("usr"); # x1, x2, y1 ,y2
# get character width
shift <- 2*par("cxy")[1];
lgd.x <- axis.lims[2] + shift;
x <- rep(lgd.x, feat.num);
y <- 1:feat.num;
par(xpd=T);
suppressMessages(require(RColorBrewer));
nc <- ncol(mns);
# modified for B/W color
colorpalette <- ifelse(color.BW, "Greys", "RdYlGn");
col <- colorRampPalette(brewer.pal(10, colorpalette))(nc); # set colors for each class
if(color.BW) col <- rev(col);
# calculate background
bg <- matrix("", nrow(mns), nc);
for (m in 1:nrow(mns)){
bg[m,] <- (col[nc:1])[rank(mns[m,])];
}
cls.lbl <- levels(dataSet$cls);
for (n in 1:ncol(mns)){
points(x,y, bty="n", pch=22, bg=bg[,n], cex=3);
# now add label
text(x[1], axis.lims[4], cls.lbl[n], srt=45, adj=c(0.2,0.5));
# shift x, note, this is good for current size
x <- x + shift/1.25;
}
# now add color key, padding with more intermediate colors for contiuous band
col <- colorRampPalette(brewer.pal(25, colorpalette))(50)
if(color.BW) col <- rev(col);
nc <- length(col);
x <- rep(x[1] + shift, nc);
shifty <- (axis.lims[4]-axis.lims[3])/3;
starty <- axis.lims[3] + shifty;
endy <- axis.lims[3] + 2*shifty;
y <- seq(from = starty, to = endy, length = nc);
points(x,y, bty="n", pch=15, col=rev(col), cex=2);
text(x[1], endy+shifty/8, "High");
text(x[1], starty-shifty/8, "Low");
par(op);
}
# Plot plsda classification performance using different components
PlotPLS.Classification<-function(imgName, format="png", dpi=72, width=NA){
res<-analSet$plsda$fit.info;
colnames(res) <- 1:ncol(res);
best.num <- analSet$plsda$best.num;
choice <- analSet$plsda$choice;
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 7;
}else if(width == 0){
w <- 7;
imgSet$pls.class<<-imgName;
}else{
w <- width;
}
h <- w*5/7;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,2,7)); # put legend on the right outside
barplot(res, beside = TRUE, col = c("lightblue", "mistyrose","lightcyan"), ylim= c(0,1.05), xlab="Number of components", ylab="Performance");
if(choice == "Q2"){
text((best.num-1)*3 + best.num + 2.5, res[3,best.num]+ 0.02, labels = "*", cex=2.5, col="red");
}else if(choice == "R2"){
text((best.num-1)*3 + best.num + 1.5, res[2,best.num]+ 0.02, labels = "*", cex=2.5, col="red");
}else{
text((best.num-1)*3 + best.num + 0.5, res[1,best.num]+ 0.02, labels = "*", cex=2.5, col="red");
}
# calculate the maximum y position, each bar is 1, place one space between the group
xpos <- ncol(res)*3 + ncol(res) + 1;
legend(xpos, 1.0, rownames(res), fill = c("lightblue", "mistyrose","lightcyan"), xpd=T);
#dev.off();
}
# Plot plsda classification performance using different components
PlotPLS.Permutation<-function(imgName, format="png", dpi=72, width=NA){
bw.vec<-analSet$plsda$permut;
len<-length(bw.vec);
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$pls.permut<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,2,4));
hst <- hist(bw.vec, breaks = "FD", freq=T,
ylab="Frequency", xlab= 'Permutation test statistics', col="lightblue", main="");
# add the indicator using original label
h <- max(hst$counts)
arrows(bw.vec[1], h/5, bw.vec[1], 0, col="red", lwd=2);
text(bw.vec[1], h/3.5, paste('Observed \n statistic \n', analSet$plsda$permut.p), xpd=T);
#dev.off();
}
# get which number of components give best performance
GetPLSBestTune<-function(){
if(is.null(analSet$plsda$best.num)){
return (0);
}
analSet$plsda$best.num;
}
# obtain VIP score
GetPLSSigMat<-function(type){
if(type == "vip"){
return (CleanNumber(signif(as.matrix(analSet$plsda$vip.mat),5)));
}else if(type == "coef"){
return (CleanNumber(signif(as.matrix(analSet$plsda$coef.mat),5)));
}else{
return (CleanNumber(signif(as.matrix(analSet$plsr$imp.loads),5)));
}
}
GetPLSSigRowNames<-function(type){
if(type == "vip"){
return (rownames(analSet$plsda$vip.mat));
}else if(type == "coef"){
return (rownames(analSet$plsda$coef.mat));
}else{
return (rownames(analSet$plsr$imp.loads))
}
}
GetPLSSigColNames<-function(type){
if(type == "vip"){
return (colnames(analSet$plsda$vip.mat));
}else if(type == "coef"){
return (colnames(analSet$plsda$coef.mat));
}else{
return (colnames(analSet$plsr$imp.loads));
}
}
GetPLS_CVRowNames <- function(){
rownames(analSet$plsda$fit.info);
}
GetPLS_CVColNames <- function(){
colnames(analSet$plsda$fit.info);
}
GetPLS_CVMat<-function(){
return(signif(analSet$plsda$fit.info, 5));
}
GetMaxPLSPairComp<-function(){
return (min(dim(dataSet$norm)[1]-1, dim(dataSet$norm)[2]));
}
GetMaxPLSCVComp<-function(){
return (min(dim(dataSet$norm)[1]-2, dim(dataSet$norm)[2]));
}
GetDefaultPLSPairComp<-function(){
return (min(5, dim(dataSet$norm)[1]-1, dim(dataSet$norm)[2]));
}
GetDefaultPLSCVComp<-function(){
return (min(5, dim(dataSet$norm)[1]-2, dim(dataSet$norm)[2], dataSet$min.grp.size));
}
##############################
####### OPLS-DA ##############
##############################
OPLSR.Anal<-function(){
# note, standardize the cls, to minimize the impact of categorical to numerical impact
cls<-scale(as.numeric(dataSet$cls))[,1];
datmat<-as.matrix(dataSet$norm);
cv.num <- min(7, dim(dataSet$norm)[1]-1);
analSet$oplsda<<-perform_opls(datmat,cls, predI=1, permI=0, orthoI=NA, crossvalI=cv.num);
score.mat <- cbind(analSet$oplsda$scoreMN[,1], analSet$oplsda$orthoScoreMN[,1]);
colnames(score.mat) <- c("Score (t1)","OrthoScore (to1)");
write.csv(signif(score.mat,5), row.names=rownames(dataSet$norm), file="oplsda_score.csv");
load.mat <- cbind(analSet$oplsda$loadingMN[,1], analSet$oplsda$orthoLoadingMN[,1]);
colnames(load.mat) <- c("Loading (t1)","OrthoLoading (to1)");
write.csv(signif(load.mat,5), file="oplsda_loadings.csv");
custom.cmpds <<- c();
}
# score plot
PlotOPLS2DScore<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2, reg=0.95, show=1, grey.scale=0){
suppressMessages(require('ellipse'));
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$opls.score2d<<-imgName;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,3,3));
lv1 <- analSet$oplsda$scoreMN[,1];
lv2 <- analSet$oplsda$orthoScoreMN[,1];
xlabel <- paste("T score [1]", "(", round(100*analSet$oplsda$modelDF["p1", "R2X"],1), "%)");
ylabel <- paste("Orthogonal T score [1]", "(", round(100*analSet$oplsda$modelDF["o1", "R2X"],1), "%)");
text.lbls<-substr(rownames(dataSet$norm),1,12) # some names may be too long
# obtain ellipse points to the scatter plot for each category
lvs <- levels(dataSet$cls);
pts.array <- array(0, dim=c(100,2,length(lvs)));
for(i in 1:length(lvs)){
inx <-dataSet$cls == lvs[i];
groupVar<-var(cbind(lv1[inx],lv2[inx]), na.rm=T);
groupMean<-cbind(mean(lv1[inx], na.rm=T),mean(lv2[inx], na.rm=T));
pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100);
}
xrg <- range (lv1, pts.array[,1,]);
yrg <- range (lv2, pts.array[,2,]);
x.ext<-(xrg[2]-xrg[1])/12;
y.ext<-(yrg[2]-yrg[1])/12;
xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext);
ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext);
## cols = as.numeric(dataSet$cls)+1;
cols <- GetColorSchema(grey.scale==1);
uniq.cols <- unique(cols);
plot(lv1, lv2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot");
grid(col = "lightgray", lty = "dotted", lwd = 1);
# make sure name and number of the same order DO NOT USE levels, which may be different
legend.nm <- unique(as.character(dataSet$cls));
## uniq.cols <- unique(cols);
## BHAN: when same color is choosen for black/white; it makes an error
# names(uniq.cols) <- legend.nm;
if ( length(uniq.cols) > 1 ) {
names(uniq.cols) <- legend.nm;
}
# draw ellipse
for(i in 1:length(lvs)){
if ( length(uniq.cols) > 1) {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA);
} else {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA);
}
if(grey.scale) {
lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2);
}
}
pchs <- GetShapeSchema(show, grey.scale);
if(grey.scale) {
cols <- rep("black", length(cols));
}
if(show==1){ # display sample name set on
text(lv1, lv2, label=text.lbls, pos=4, xpd=T, cex=0.75);
points(lv1, lv2, pch=pchs, col=cols);
}else{
if (length(uniq.cols) == 1) {
points(lv1, lv2, pch=pchs, col=cols, cex=1.0);
} else {
if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){
points(lv1, lv2, pch=pchs, col=cols, cex=1.8);
}else{
points(lv1, lv2, pch=21, bg=cols, cex=2);
}
}
}
uniq.pchs <- unique(pchs);
if(grey.scale) {
uniq.cols <- "black";
}
legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols);
#dev.off();
}
ResetCustomCmpds <- function(){
custom.cmpds <<- c();
}
#S-plot for important features from OPLS-DA
PlotOPLS.Splot<-function(imgName, format="png", dpi=72, width=NA, plotType){
s <- as.matrix(dataSet$norm);
T <- as.matrix(analSet$oplsda$scoreMN)
p1 <- c()
for (i in 1:ncol(s)) {
scov <- cov(s[,i], T)
p1 <- matrix(c(p1, scov), ncol=1)
}
pcorr1 <- c()
for (i in 1:nrow(p1)) {
den <- apply(T, 2, sd)*sd(s[,i])
corr1 <- p1[i,]/den
pcorr1 <- matrix(c(pcorr1, corr1), ncol=1)
}
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- h <- 8;
}else if(width == 0){
imgSet$opls.loading<<-imgName;
}else{
w <- h <- width;
}
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,4,7))
plot(p1, pcorr1, pch=19, xlab="p[1]", ylab ="p(corr)[1]", main = "S-plot", col="magenta");
opls.axis.lims <<- par("usr");
if(plotType=="all"){
text(p1, pcorr1, labels=colnames(s), cex=0.8, pos=4, xpd=TRUE, col="blue");
}else if(plotType == "custom"){
if(length(custom.cmpds) > 0){
hit.inx <- colnames(dataSet$norm) %in% custom.cmpds;
text(p1[hit.inx], pcorr1[hit.inx], labels=colnames(s)[hit.inx], pos=4, xpd=TRUE, col="blue");
}
}else{
# do nothing
}
#dev.off();
splot.mat <- cbind(jitter(p1),p1, pcorr1);
rownames(splot.mat) <- colnames(s);
colnames(splot.mat) <- c("jitter", "p[1]","p(corr)[1]");
write.csv(signif(splot.mat[,2:3],5), file="oplsda_splot.csv");
analSet$oplsda$splot.mat <<- splot.mat;
}
PlotLoadingCmpd<-function(cmpdNm, format="png", dpi=72, width=NA){
# need to recoed the clicked compounds
custom.cmpds <<- c(custom.cmpds, cmpdNm);
return(PlotCmpdView(cmpdNm, format, dpi, width));
}
PlotOPLS.MDL <- function(imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 8;
imgSet$pls.class<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
# the model R2Y and Q2Y
par(mar=c(5,5,4,7)); # put legend on the right outside
modBarDF <- analSet$oplsda$modelDF[!(rownames(analSet$oplsda$modelDF) %in% c("rot")), ];
mod.dat <- rbind(modBarDF[, "R2Y(cum)"], modBarDF[, "Q2(cum)"]);
bplt <- barplot(mod.dat,beside=TRUE, names.arg = rownames(modBarDF),xlab = "");
axis(2, lwd.ticks=1);
barplot(mod.dat,add = TRUE, beside = TRUE, col = c("lightblue", "mistyrose"));
text(x=bplt, y=mod.dat+max(mod.dat)/25, labels=as.character(mod.dat), xpd=TRUE)
xpos <- nrow(modBarDF)*2 + nrow(modBarDF) + 0.5;
ypos <- max(mod.dat)/2;
legend(xpos, ypos, legend = c("R2Y", "Q2"), pch=15, col=c("lightblue", "mistyrose"), xpd=T, bty="n");
#dev.off();
}
GetOPLSLoadAxesSpec<-function(){
opls.axis.lims;
}
GetOPLSLoadCmpds<- function(){
rownames(analSet$oplsda$splot.mat);
}
GetOPLSLoadColNames<- function(){
return(c("p[1]","p(corr)[1]"));
}
GetOPLSLoadCmpdInxs<-function(){
analSet$oplsda$splot.mat[,1];
}
GetOPLSLoadMat <- function(){
as.matrix(analSet$oplsda$splot.mat[,c(1,3)]);
}
# perform permutation, using training classification accuracy as
# indicator, for two or multi-groups
PlotOPLS.Permutation<-function(imgName, format="png", dpi=72, num=100, width=NA){
cls<-scale(as.numeric(dataSet$cls))[,1];
datmat<-as.matrix(dataSet$norm);
cv.num <- min(7, dim(dataSet$norm)[1]-1);
#perm.res<-performOPLS(datmat,cls, predI=1, orthoI=NA, permI=num, crossvalI=cv.num);
perm.res<-perform_opls(datmat,cls, predI=1, orthoI=NA, permI=num, crossvalI=cv.num);
r.vec<-perm.res$suppLs[["permMN"]][, "R2Y(cum)"];
q.vec<-perm.res$suppLs[["permMN"]][, "Q2(cum)"];
rng <- range(c(r.vec, q.vec, 1));
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 8;
imgSet$pls.permut<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,2,7));
rhst <- hist(r.vec[-1], plot=FALSE);
qhst <- hist(q.vec[-1], plot=FALSE);
h <- max(c(rhst$counts, qhst$counts))+1;
bin.size <- min(c(rhst$breaks[2]-rhst$breaks[1], qhst$breaks[2]-qhst$breaks[1]));
rbins <- seq(min(rhst$breaks),max(rhst$breaks),bin.size);
qbins <- seq(min(qhst$breaks),max(qhst$breaks),bin.size);
hist(r.vec[-1], xlim=rng, ylim=c(0, h), breaks=rbins, border=F, ylab="Frequency", xlab= 'Permutations',
col=adjustcolor("lightblue", alpha=0.6), main="");
hist(q.vec[-1], add=TRUE,breaks=qbins, border=F, col=adjustcolor("mistyrose", alpha=0.6));
arrows(r.vec[1], h/3, r.vec[1], 0, length=0.1,angle=30,lwd=2);
text(r.vec[1], h/2.5, paste('Observed \n R2Y:', r.vec[1]), xpd=TRUE);
arrows(q.vec[1], h/2, q.vec[1], 0, length=0.1,angle=30,lwd=2);
text(q.vec[1], h/1.8, paste('Observed \n Q2:', q.vec[1]), xpd=TRUE);
legend(1, h/3, legend = c("Perm R2Y", "Perm Q2"), pch=15, col=c("lightblue", "mistyrose"), xpd=T, bty="n");
#dev.off();
better.rhits <- sum(r.vec[-1]>=r.vec[1]);
if(better.rhits == 0) {
pr <- paste("p < ", 1/num, " (", better.rhits, "/", num, ")", sep="");
}else{
p <- better.rhits/num;
pr <- paste("p = ", signif(p, digits=5), " (", better.rhits, "/", num, ")", sep="");
}
better.qhits <- sum(q.vec[-1]>=q.vec[1]);
if(better.qhits == 0) {
pq <- paste("p < ", 1/num, " (", better.qhits, "/", num, ")", sep="");
}else{
p <- better.qhits/num;
pq <- paste("p = ", signif(p, digits=5), " (", better.qhits, "/", num, ")", sep="");
}
msg <- paste0("Empirical p-values R2Y: ", pr, " and Q2: ", pq)
return(msg);
}
#############################################################################################################
#############################################################################################################
#############################################################################################################
#############################################################################################################
############################################################################################################
#########################################################
## R script for MetaboAnalyst
## Description: perform RandomForest and SVM
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
#######################################
########### Random Forest #############
#######################################
# random forests
RF.Anal<-function(treeNum=500, tryNum=10){
suppressMessages(require(randomForest));
rf_out<-randomForest(dataSet$norm, dataSet$cls, ntree = treeNum, mtry = tryNum, importance = TRUE, proximity = TRUE);
# set up named sig table for display
impmat<-rf_out$importance;
impmat<-impmat[rev(order(impmat[,"MeanDecreaseAccuracy"])),]
sigmat<-impmat[,"MeanDecreaseAccuracy", drop=F];
sigmat<-signif(sigmat, 5);
write.csv(sigmat,file="randomforests_sigfeatures.csv");
analSet$rf<<-rf_out;
analSet$rf.sigmat<<-sigmat;
}
# plot variable importance ranked by MeanDecreaseAccuracy
PlotRF.Classify<-function(imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 8;
imgSet$rf.cls<<-imgName;
}else{
w <- width;
}
h <- w*5/8;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
#par(mfrow=c(2,1));
par(mar=c(4,4,3,2));
cols <- rainbow(length(levels(dataSet$cls))+1);
plot(analSet$rf, main="Random Forest classification", col=cols);
legend("topright", legend = c("Overall", levels(dataSet$cls)), lty=2, lwd=1, col=cols);
#PlotConfusion(analSet$rf$confusion);
#dev.off();
}
# plot variable importance ranked by MeanDecreaseAccuracy
PlotRF.VIP<-function(imgName, format="png", dpi=72, width=NA){
vip.score <- rev(sort(analSet$rf$importance[,"MeanDecreaseAccuracy"]));
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$rf.imp<<-imgName;
}else{
w <- width;
}
h <- w*7/8;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
PlotImpVar(vip.score,"MeanDecreaseAccuracy");
#dev.off();
}
PlotRF.Outlier<-function(imgName, format="png", dpi=72, width=NA){
cols <- GetColorSchema();
uniq.cols <- unique(cols);
legend.nm <- unique(as.character(dataSet$cls));
dist.res <- outlier(analSet$rf);
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$rf.outlier<<-imgName;
}else{
w <- width;
}
h <- w*7/9;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
layout(matrix(c(1,2), 1, 2, byrow = TRUE), width=c(4,1));
op<-par(mar=c(5,5,4,0));
plot(dist.res, type="h", col=cols, xlab="Samples", xaxt="n", ylab="Outlying Measures", bty="n");
# add sample names to top 5
rankres <- rank(-abs(dist.res), ties.method="random");
inx.x <- which(rankres < 6);
inx.y <- dist.res[inx.x];
nms <- names(dist.res)[inx.x];
text(inx.x, inx.y, nms, pos=ifelse(inx.y >= 0, 3, 1), xpd=T)
op<-par(mar=c(5,0,4,1));
plot.new();
plot.window(c(0,1), c(0,1));
legend("center", legend =legend.nm, pch=15, col=uniq.cols);
#dev.off();
}
# get the OOB error for the last signif
GetRFOOB<-function(){
errors = analSet$rf$err.rate;
nrow = dim(errors)[1];
signif(errors[nrow, 1],3);
}
GetSigTable.RF<-function(){
GetSigTable(analSet$rf.sigmat, "Random Forest");
}
# significance measure, double[][]
GetRFSigMat<-function(){
return(CleanNumber(analSet$rf.sigmat))
}
GetRFSigRowNames<-function(){
rownames(analSet$rf.sigmat);
}
GetRFSigColNames<-function(){
colnames(analSet$rf.sigmat);
}
GetRFConf.Table<-function(){
print(xtable(analSet$rf$confusion,
caption="Random Forest Classification Performance"), size="\\scriptsize");
}
# return double[][] confusion matrix
GetRFConfMat<-function(){
signif(analSet$rf$confusion,3);
}
GetRFConfRowNames<-function(){
rownames(analSet$rf$confusion);
}
GetRFConfColNames<-function(){
colnames(analSet$rf$confusion);
}
#######################################
########### R-SVM #####################
#######################################
# recursive SVM for feature selection and classification
RSVM.Anal<-function(cvType){
ladder = CreateLadder(ncol(dataSet$norm));
svm.out <- RSVM(dataSet$norm, dataSet$cls, ladder, CVtype=cvType);
# calculate important features
ERInd <- max( which(svm.out$Error == min(svm.out$Error)) )
MinLevel <- svm.out$ladder[ERInd]
FreqVec <- svm.out$SelFreq[, ERInd]
SelInd <- which(rank(FreqVec) >= (svm.out$ladder[1]-MinLevel));
FreqInd<-svm.out$SelFreq[SelInd, ERInd]
names(FreqInd)<-names(dataSet$norm)[SelInd];
#create a sig table for display
sig.var<- rev(sort(FreqInd));
sig.var<-as.matrix(sig.var); # 1-column matrix
colnames(sig.var)<-"Freqency";
write.csv(sig.var,file="svm_sigfeatures.csv");
# add sorted features frequencies as importance indicator
svm.out<-append(svm.out, list(sig.mat=sig.var, best.inx=ERInd));
analSet$svm<<-svm.out;
}
# Plot plsda classification performance using different components
PlotRSVM.Classification<-function(imgName, format="png", dpi=72, width=NA){
res<-analSet$svm$Error;
edge<-(max(res)-min(res))/100; # expand y uplimit for text
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$svm.class<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
plot(res,type='l',xlab='Number of variables (levels)',ylab='Error Rate',
ylim = c(min(res)-5*edge, max(res)+18*edge), axes=F,
main="Recursive SVM classification")
text(res,labels =paste(100*round(res,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T)
points(res, col=ifelse(1:length(res)==analSet$svm$best.inx,"red","blue"));
axis(2);
axis(1, 1:length(res), names(res));
#dev.off();
}
# if too many, plot top 15
PlotRSVM.Cmpd<-function(imgName, format="png", dpi=72, width=NA){
sigs<-analSet$svm$sig.mat;
data<-sigs[,1];
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$svm<<-imgName;
}else{
w <- width;
}
h <- w*7/8;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
PlotImpVar(data,"Frequency");
#dev.off();
}
GetSigTable.SVM<-function(){
GetSigTable(analSet$svm$sig.mat, "Recursive SVM");
}
# significance measure, double[][]
GetSVMSigMat<-function(){
return(CleanNumber(analSet$svm$sig.mat));
}
GetSVMSigRowNames<-function(){
rownames(analSet$svm$sig.mat);
}
GetSVMSigColNames<-function(){
colnames(analSet$svm$sig.mat);
}
### R-code for R-SVM
### use leave-one-out / Nfold or bootstrape to permute data for external CV
### build SVM model and use mean-balanced weight to sort genes on training set
### and recursive elimination of least important genes
### author: Dr. Xin Lu, Research Scientist
### Biostatistics Department, Harvard School of Public Health
## create a decreasing ladder for recursive feature elimination
CreateLadder <- function(Ntotal, Nmin=5 ){
x <- vector()
x[1] <- Ntotal
# note SVM is very computationally intensive, large step first
# first descend with 0.5 -> 50 var left
# then descend with 0.6 -> 25 var left
# then desend with 0.75 -> 5 var
for( i in 1:100 ){
if(x[i]>200){
pRatio = 0.4
}else if(x[i]>50){
pRatio = 0.5
}else if(x[i]>25){
pRatio = 0.6
}else{
pRatio = 0.75
}
pp <- round(x[i] * pRatio)
if( pp == x[i] ){
pp <- pp-1
}
if( pp >= Nmin ) {
x[i+1] <- pp
} else{
break
}
}
x
}
## R-SVM core code
## input:
## x: row matrix of data
## y: class label: 1 / -1 for 2 classes
## CVtype:
## integer: N fold CV
## "LOO": leave-one-out CV
## "bootstrape": bootstrape CV
## CVnum: number of CVs
## LOO: defined as sample size
## Nfold and bootstrape: user defined, default as sample size
## output: a named list
## Error: a vector of CV error on each level
## SelFreq: a matrix for the frequency of each gene being selected in each level
## with each column corresponds to a level of selection
## and each row for a gene
## The top important gene in each level are those high-freqent ones
RSVM <- function(x, y, ladder, CVtype, CVnum=0 ){
suppressMessages(require(e1071));
## check if y is binary response
Ytype <- names(table(y))
if( length(Ytype) != 2)
{
print("ERROR!! RSVM can only deal with 2-class problem")
return(0)
}
## class mean
m1 <- apply(x[ which(y==Ytype[1]), ], 2, mean)
m2 <- apply(x[ which(y==Ytype[2]), ], 2, mean)
md <- m1-m2
yy <- vector( length=length(y))
yy[which(y==Ytype[1])] <- 1
yy[which(y==Ytype[2])] <- -1
y <- yy
## check ladder
if( min(diff(ladder)) >= 0 )
{
print("ERROR!! ladder must be monotonously decreasing")
return(0);
}
if( ladder[1] != ncol(x) )
{
ladder <- c(ncol(x), ladder)
}
nSample <- nrow(x)
nGene <- ncol(x)
SampInd <- seq(1, nSample)
if( CVtype == "LOO" )
{
CVnum <- nSample
} else
{
if( CVnum == 0 )
{
CVnum <- nSample
}
}
## vector for test error and number of tests
ErrVec <- vector( length=length(ladder))
names(ErrVec) <- as.character(ladder);
nTests <- 0
SelFreq <- matrix( 0, nrow=nGene, ncol=length(ladder))
colnames(SelFreq) <- paste("Level", ladder);
## for each CV
for( i in 1:CVnum )
{
## split data
if( CVtype == "LOO" )
{
TestInd <- i
TrainInd <- SampInd[ -TestInd]
} else {
if( CVtype == "bootstrape" ) {
TrainInd <- sample(SampInd, nSample, replace=T);
TestInd <- SampInd[ which(!(SampInd %in% TrainInd ))];
} else {
## Nfold
TrainInd <- sample(SampInd, nSample*(CVtype-1)/CVtype);
TestInd <- SampInd[ which(!(SampInd %in% TrainInd ))];
}
}
nTests <- nTests + length(TestInd)
## in each level, train a SVM model and record test error
xTrain <- x[TrainInd, ]
yTrain <- y[TrainInd]
xTest <- x[TestInd,]
yTest <- y[TestInd]
## index of the genes used in the
SelInd <- seq(1, nGene)
for( gLevel in 1:length(ladder) )
{
## record the genes selected in this ladder
SelFreq[SelInd, gLevel] <- SelFreq[SelInd, gLevel] +1
## train SVM model and test error
###################################################################################
## note the scale is changed to T or it never returns sometime for unscaled data ###
## note: the classification performance is idenpendent of about scale is T/F #####
## for "LOO", the test data should be as.data.frame, matrxi will trigger error #####
###################################################################################
svmres <- svm(xTrain[, SelInd], yTrain, scale=T, type="C-classification", kernel="linear" )
if( CVtype == "LOO" ){
svmpred <- predict(svmres, as.data.frame(xTest[SelInd], nrow=1) )
}else{
svmpred <- predict(svmres, xTest[, SelInd] )
}
ErrVec[gLevel] <- ErrVec[gLevel] + sum(svmpred != yTest )
## weight vector
W <- t(svmres$coefs*yTrain[svmres$index]) %*% svmres$SV * md[SelInd]
rkW <- rank(W)
if( gLevel < length(ladder) ){
SelInd <- SelInd[which(rkW > (ladder[gLevel] - ladder[gLevel+1]))]
}
}
}
ret <- list(ladder=ladder, Error=ErrVec/nTests, SelFreq=SelFreq);
ret;
}
PlotConfusion <- function(clsConf){
prior(clsConf) <- 100
# The above rescales the confusion matrix such that columns sum to 100.
opar <- par(mar=c(5.1, 6.1, 2, 2))
x <- x.orig <- unclass(clsConf)
x <- log(x + 0.5) * 2.33
x[x < 0] <- NA
x[x > 10] <- 10
diag(x) <- -diag(x)
image(1:ncol(x), 1:ncol(x),
-(x[, nrow(x):1]), xlab='Actual', ylab='',
col=colorRampPalette(c(hsv(h = 0, s = 0.9, v = 0.9, alpha = 1),
hsv(h = 0, s = 0, v = 0.9, alpha = 1),
hsv(h = 2/6, s = 0.9, v = 0.9, alpha = 1)))(41),
xaxt='n', yaxt='n', zlim=c(-10, 10))
axis(1, at=1:ncol(x), labels=colnames(x), cex.axis=0.8)
axis(2, at=ncol(x):1, labels=colnames(x), las=1, cex.axis=0.8)
title(ylab='Predicted', line=4.5)
abline(h = 0:ncol(x) + 0.5, col = 'gray')
abline(v = 0:ncol(x) + 0.5, col = 'gray')
text(1:6, rep(6:1, each=6), labels = sub('^0$', '', round(c(x.orig), 0)))
box(lwd=2)
par(opar) # reset par
}
################################################################################################
################################################################################################
################################################################################################
################################################################################################
################################################################################################
#########################################################
## R script for MetaboAnalyst
## Description: perform correlation analysis
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
#######################################
## Pattern hunter
##########################################
# Run template on all the high region effect genes
template.match <- function(x, template, dist.name) {
k<-cor.test(x,template, method=dist.name);
c(k$estimate, k$stat, k$p.value)
}
Match.Pattern<-function(dist.name="pearson", pattern=NULL){
if(is.null(pattern)){
pattern <- paste(1:length(levels(dataSet$cls)), collapse="-");
}
templ <- as.numeric(ClearStrings(strsplit(pattern, "-")[[1]]));
if(all(templ==templ[1])){
AddErrMsg("Cannot calculate correlation on constant values!");
return(0);
}
new.template <- vector(mode="numeric", length=length(dataSet$cls))
# expand to match each levels in the dataSet$cls
all.lvls <- levels(dataSet$cls);
if(length(templ)!=length(all.lvls)){
AddErrMsg("Wrong template - must the same length as the group number!");
return(0);
}
for(i in 1:length(templ)){
hit.inx <- dataSet$cls == all.lvls[i]
new.template[hit.inx] = templ[i];
}
cbtempl.results <- apply(dataSet$norm, 2, template.match, new.template, dist.name);
cor.res<-t(cbtempl.results);
fdr.col <- p.adjust(cor.res[,3], "fdr");
cor.res <- cbind(cor.res, fdr.col);
colnames(cor.res)<-c("correlation", "t-stat", "p-value", "FDR");
ord.inx<-order(cor.res[,3]);
sig.mat <- signif(cor.res[ord.inx,],5);
fileName <- "correlation_pattern.csv";
write.csv(sig.mat,file=fileName);
analSet$corr$sig.nm<<-fileName;
analSet$corr$cor.mat<<-sig.mat;
analSet$corr$pattern <<- pattern;
return(1);
}
GenerateTemplates <- function(){
level.len <- length(levels(dataSet$cls));
# only specify 4: increasing, decreasing, mid high, mid low, constant
incs <- 1:level.len;
desc <- level.len:1;
if(level.len > 2){
# use ceiling, so that the peak will be right for even length
mid.pos <- ceiling((level.len+1)/2);
mid.high <- c(1:mid.pos, seq(mid.pos-1,by=-1,length.out=level.len-mid.pos));
mid.low <- c(mid.pos:1, seq(2, length.out=level.len-mid.pos));
res <- rbind(incs, desc, mid.high, mid.low); # add the constant one
}else{
res <- rbind(incs, desc);
}
# turn into string
res <- apply(res, 1, paste, collapse="-");
# add the ledgends
res <- c(paste(levels(dataSet$cls), collapse="-"), res);
return (res);
}
# calculate correlation of all other feature to a given feature name
FeatureCorrelation<-function(dist.name, varName){
cbtempl.results <- apply(dataSet$norm, 2, template.match, dataSet$norm[,varName], dist.name);
cor.res<-t(cbtempl.results);
fdr.col <- p.adjust(cor.res[,3], "fdr");
cor.res <- cbind(cor.res, fdr.col);
colnames(cor.res)<-c("correlation", "t-stat", "p-value", "FDR");
ord.inx<-order(cor.res[,3])
sig.mat <-signif(cor.res[ord.inx,],5);
fileName <- "correlation_feature.csv";
write.csv(sig.mat,file=fileName);
analSet$corr$sig.nm<<-fileName;
analSet$corr$cor.mat<<-sig.mat;
analSet$corr$pattern<<-varName;
return(1);
}
PlotCorr <- function(imgName, format="png", dpi=72, width=NA){
cor.res <- analSet$corr$cor.mat;
pattern <- analSet$corr$pattern;
title <- paste(GetVariableLabel(), "correlated with the", pattern);
if(nrow(cor.res) > 25){
# first get most signficant ones (p value)
ord.inx<-order(cor.res[,3]);
cor.res <- cor.res[ord.inx, ];
cor.res <- cor.res[1:25, ];
# then order by their direction (correlation)
ord.inx<-order(cor.res[,1]);
if(sum(cor.res[,1] > 0) == 0){ # all negative correlation
ord.inx <- rev(ord.inx);
}
cor.res <- cor.res[ord.inx, ];
title <- paste("Top 25", tolower(GetVariableLabel()), "correlated with the", pattern);
}
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- h <- 7.2;
}else if(width == 0){
w <- 7.2;
imgSet$corr<<-imgName;
}else{
w <- h <- width;
}
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,6,4,3))
rownames(cor.res)<-substr(rownames(cor.res), 1, 18);
cols <- ifelse(cor.res[,1] >0, "mistyrose","lightblue");
dotchart(cor.res[,1], pch="", xlim=c(-1,1), xlab="Correlation coefficients", main=title);
rownames(cor.res) <- NULL;
barplot(cor.res[,1], space=c(0.5, rep(0, nrow(cor.res)-1)), xlim=c(-1,1), xaxt="n", col = cols, add=T,horiz=T);
#dev.off();
}
GetCorrSigFileName <- function(){
analSet$corr$sig.nm;
}
GetCorSigMat<-function(){
as.matrix(CleanNumber(analSet$corr$cor.mat));
}
GetCorSigRowNames<-function(){
rownames(analSet$corr$cor.mat);
}
GetCorSigColNames<-function(){
colnames(analSet$corr$cor.mat);
}
GetSigTable.Corr<-function(){
GetSigTable(analSet$corr$cor.mat, "Pattern search using correlation analysis");
}
PlotCorrHeatMap<-function(imgName, format="png", dpi=72, width=NA, cor.method,
colors, viewOpt, fix.col, no.clst, top, topNum){
main <- xlab <- ylab <- NULL;
data <- dataSet$norm;
if(ncol(data) > 1000){
filter.val <- apply(data.matrix(data), 2, IQR, na.rm=T);
rk <- rank(-filter.val, ties.method='random');
data <- as.data.frame(data[,rk <=1000]);
print("Data is reduced to 1000 vars ..");
}
colnames(data)<-substr(colnames(data), 1, 18);
corr.mat<-cor(data, method=cor.method);
# use total abs(correlation) to select
if(top){
cor.sum <- apply(abs(corr.mat), 1, sum);
cor.rk <- rank(-cor.sum);
var.sel <- cor.rk <= topNum;
corr.mat <- corr.mat[var.sel, var.sel];
}
# set up parameter for heatmap
suppressMessages(require(RColorBrewer));
suppressMessages(require(gplots));
if(colors=="gbr"){
colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256);
}else if(colors == "heat"){
colors <- heat.colors(256);
}else if(colors == "topo"){
colors <- topo.colors(256);
}else if(colors == "gray"){
colors <- colorRampPalette(c("grey90", "grey10"))(256);
}else{
colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256));
}
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(viewOpt == "overview"){
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$heatmap<<-imgName;
}else{
w <- 7.2;
}
h <- w;
}else{
if(ncol(corr.mat) > 50){
myH <- ncol(corr.mat)*12 + 40;
}else if(ncol(corr.mat) > 20){
myH <- ncol(corr.mat)*12 + 60;
}else{
myH <- ncol(corr.mat)*12 + 120;
}
h <- round(myH/72,2);
if(is.na(width)){
w <- h;
}else if(width == 0){
w <- h <- 7.2;
imgSet$corr.heatmap<<-imgName;
}else{
w <- h <- 7.2;
}
}
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(no.clst){
rowv=FALSE;
colv=FALSE;
dendro= "none";
}else{
rowv=TRUE;
colv=TRUE;
dendro= "both";
}
require(pheatmap);
if(fix.col){
breaks <- seq(from = -1, to = 1, length = 257);
pheatmap(corr.mat,
fontsize=8, fontsize_row=8,
cluster_rows = colv,
cluster_cols = rowv,
color = colors,
breaks = breaks
);
}else{
pheatmap(corr.mat,
fontsize=8, fontsize_row=8,
cluster_rows = colv,
cluster_cols = rowv,
color = colors
);
}
#dev.off();
write.csv(signif(corr.mat,5), file="correlation_table.csv")
}
#############################################################################################
#############################################################################################
#############################################################################################
#############################################################################################
####################################################################
## R script for MetaboAnalyst
## Description: perform Dendrogram, Heatmap, Kmeans & SOM analysis
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
####################################
########### Dendrogram ##############
#####################################
PlotHCTree<-function(imgName, format="png", dpi=72, width=NA, smplDist, clstDist){
# set up data set
hc.dat<-as.matrix(dataSet$norm);
colnames(hc.dat)<-substr(colnames(hc.dat), 1, 18) # some names are too long
# set up distance matrix
if(smplDist == 'euclidean'){
dist.mat<-dist(hc.dat, method = smplDist);
}else{
dist.mat<-dist(1-cor(t(hc.dat), method = smplDist));
}
# record the paramters
analSet$tree<<-list(dist.par=smplDist, clust.par=clstDist);
# build the tree
hc_tree<-hclust(dist.mat, method=clstDist);
# plot the tree
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- minH <- 630;
myH <- nrow(hc.dat)*10 + 150;
if(myH < minH){
myH <- minH;
}
w <- round(w/72,2);
h <- round(myH/72,2);
}else if(width == 0){
w <- h <- 7.2;
imgSet$tree<<-imgName;
}else{
w <- h <- 7.2;
}
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(cex=0.8, mar=c(4,2,2,8));
if(dataSet$cls.type == "disc"){
clusDendro<-as.dendrogram(hc_tree);
cols <- GetColorSchema();
names(cols) <- rownames(hc.dat);
labelColors <- cols[hc_tree$order];
colLab <- function(n){
if(is.leaf(n)) {
a <- attributes(n)
labCol <- labelColors[a$label];
attr(n, "nodePar") <-
if(is.list(a$nodePar)) c(a$nodePar, lab.col = labCol,pch=NA) else
list(lab.col = labCol,pch=NA)
}
n
}
clusDendro<-dendrapply(clusDendro, colLab)
plot(clusDendro,horiz=T,axes=T);
par(cex=1);
legend.nm <- as.character(dataSet$cls);
legend("topleft", legend = unique(legend.nm), pch=15, col=unique(cols), bty = "n");
}else{
plot(as.dendrogram(hc_tree), hang=-1, main=paste("Cluster with", clstDist, "method"), xlab=NULL, sub=NULL, horiz=TRUE);
}
#dev.off();
}
# inx has to be 1 or 2
GetClassLabel<-function(inx){
levels(dataSet$cls)[inx]
}
############################
########### SOM #############
#############################
# SOM analysis
SOM.Anal<-function(x.dim, y.dim, initMethod, neigb = 'gaussian'){
require(som);
analSet$som<<-som(as.matrix(dataSet$norm), xdim=x.dim, ydim=y.dim, init=initMethod, neigh=neigb);
}
# get members for given cluster index, return a character string
GetSOMClusterMembers<-function(i, j){
clust<-analSet$som$visual;
xTrue<-clust$x == i;
yTrue<-clust$y == j;
hit.inx <- xTrue & yTrue;
all.cols <- GetColorSchema();
paste("", rownames(dataSet$norm)[hit.inx], "",collapse =", ");
}
GetAllSOMClusterMembers<-function(){
clust<-analSet$som$visual;
xdim<-analSet$som$xdim;
ydim<-analSet$som$ydim;
clust.df = data.frame();
rowNameVec = c();
i = 0;
while(i < xdim){
j = 0;
while(j < ydim){
xTrue<-clust$x == i;
yTrue<-clust$y == j;
if(i==0 & j==0){ # bug in R, the first one need to be different
clust.df <- rbind(paste(rownames(dataSet$norm)[xTrue & yTrue], collapse = " "));
rowNameVec <- c(paste("Cluster(", i, ",", j,")"));
}else{
clust.df <- rbind(clust.df, paste(rownames(dataSet$norm)[xTrue & yTrue], collapse=" "));
rowNameVec <- c(rowNameVec, paste("Cluster(", i, ",", j,")"));
}
j = j+1;
}
i = i+1;
}
row.names(clust.df)<- rowNameVec;
colnames(clust.df)<-"Samples in each cluster";
print(xtable(clust.df, align="l|p{8cm}", caption="Clustering result using SOM"),caption.placement="top", size="\\scriptsize");
}
# plot SOM map for less than 20 clusters
PlotSOM <- function(imgName, format="png", dpi=72, width=NA){
xdim<-analSet$som$xdim;
ydim<-analSet$som$ydim;
total<-xdim*ydim;
if(total>20) { return();}
ylabel<-GetValueLabel();
clust<-analSet$som$visual;
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7;
imgSet$som<<-imgName;
}else{
w <- width;
}
h <- w*8/9;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mfrow = GetXYCluster(total), mar=c(5,4,2,2));
for (i in 0:(xdim-1)) {
xTrue<-clust$x == i;
for (j in 0:(ydim-1)) {
yTrue<-clust$y == j;
sel.inx<-xTrue & yTrue; # selected row
if(sum(sel.inx)>0){ # some cluster may not contain any member
matplot(t(dataSet$norm[sel.inx, ]), type="l", col='grey', axes=F, ylab=ylabel,
main=paste("Cluster(", i, ",", j,")", ", n=", sum(sel.inx), sep=""))
lines(apply(dataSet$norm[sel.inx, ], 2, median), type="l", col='blue', lwd=1);
}else{ # plot a dummy
plot(t(dataSet$norm[1, ]), type="n", axes=F, ylab=ylabel,
main=paste("Cluster(", i, ",", j,")",", n=", sum(sel.inx),sep=""))
}
axis(2);
axis(1, 1:ncol(dataSet$norm), substr(colnames(dataSet$norm), 1, 7), las=2);
}
}
#dev.off();
}
##################################
########### K-means ##############
###################################
# functions for k-means analysis
Kmeans.Anal<-function(clust.num){
analSet$kmeans<<-kmeans (dataSet$norm, clust.num, nstart=100);
}
PlotKmeans<-function(imgName, format="png", dpi=72, width=NA){
clust.num <- max(analSet$kmeans$cluster);
if(clust.num>20) return();
# calculate arrangement of panel
ylabel<-GetValueLabel();
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7;
imgSet$kmeans<<-imgName;
}else{
w <- width;
}
h <- w*8/9;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mfrow = GetXYCluster(clust.num), mar=c(5,4,2,2));
for (loop in 1:clust.num) {
matplot(t(dataSet$norm[analSet$kmeans$cluster==loop,]), type="l", col='grey', ylab=ylabel, axes=F,
main=paste("Cluster ",loop, ", n=", analSet$kmeans$size[loop], sep=""))
lines(apply(dataSet$norm[analSet$kmeans$cluster==loop,], 2, median), type="l", col='blue', lwd=1);
axis(2);
axis(1, 1:ncol(dataSet$norm), substr(colnames(dataSet$norm), 1, 7), las=2);
}
#dev.off();
}
# get cluster member for give index
# add HTML color to the names based on its group membership
GetKMClusterMembers<-function(i){
all.cols <- GetColorSchema();
hit.inx <- analSet$kmeans$cluster== i;
paste("", rownames(dataSet$norm)[hit.inx], "",collapse =", ");
# paste(all.cols[hit.inx], rownames(dataSet$norm)[hit.inx], collapse =", ");
}
GetAllKMClusterMembers<-function(){
clust.df = data.frame();
rowNameVec = c();
i = 1;
clust.num<-max(analSet$kmeans$cluster);
while(i<=clust.num){
if(i==1){
clust.df <- rbind(paste(rownames(dataSet$norm)[analSet$kmeans$cluster== i], collapse = " "));
}else{
clust.df <- rbind(clust.df,paste(rownames(dataSet$norm)[analSet$kmeans$cluster== i], collapse = " "));
}
rowNameVec <- c(rowNameVec, paste("Cluster(", i, ")"));
i = i+1;
}
row.names(clust.df)<- rowNameVec;
colnames(clust.df)<-"Samples in each cluster";
print(xtable(clust.df, align="l|p{8cm}", caption="Clustering result using K-means"), caption.placement="top", size="\\scriptsize");
}
# plot a sub heatmap based on results from t-tests/ANOVA, VIP or randomforest
PlotSubHeatMap <- function(imgName, format="png", dpi=72, width=NA, dataOpt, scaleOpt, smplDist, clstDist, palette, method.nm, top.num, viewOpt, rowV=T, colV=T, border=T){
var.nms = colnames(dataSet$norm);
if(top.num < length(var.nms)){
if(method.nm == 'tanova'){
if(GetGroupNumber() == 2){
if(is.null(analSet$tt)){
Ttests.Anal();
}
var.nms <- names(sort(analSet$tt$p.value))[1:top.num];
}else{
if(is.null(analSet$aov)){
ANOVA.Anal();
}
var.nms <- names(sort(analSet$aov$p.value))[1:top.num];
}
}else if(method.nm == 'cor'){
if(is.null(analSet$cor.res)){
Match.Pattern();
}
# re-order for pretty view
cor.res <- analSet$cor.res;
ord.inx<-order(cor.res[,3]);
cor.res <- cor.res[ord.inx, ];
ord.inx<-order(cor.res[,1]);
cor.res <- cor.res[ord.inx, ];
var.nms <- rownames(cor.res)[1:top.num];
}else if(method.nm == 'vip'){
if(is.null(analSet$plsda)){
PLSR.Anal();
PLSDA.CV();
}
vip.vars <- analSet$plsda$vip.mat[,1];# use the first component
var.nms <- names(rev(sort(vip.vars)))[1:top.num];
}else if(method.nm == 'rf'){
if(is.null(analSet$rf)){
RF.Anal();
}
var.nms <- GetRFSigRowNames()[1:top.num];
}
}
var.inx <- match(var.nms, colnames(dataSet$norm));
PlotHeatMap(imgName, format, dpi, width, dataOpt, scaleOpt, smplDist, clstDist, palette, viewOpt, rowV, colV, var.inx, border);
}
PlotHeatMap<-function(imgName, format="png", dpi=72, width=NA, dataOpt, scaleOpt, smplDist, clstDist, palette, viewOpt="detail", rowV=T, colV=T, var.inx=NA, border=T){
# record the paramters
analSet$htmap<<-list(dist.par=smplDist, clust.par=clstDist);
# set up data set
if(dataOpt=="norm"){
my.data <- dataSet$norm;
}else{
my.data <- dataSet$proc;
}
if(is.na(var.inx)){
hc.dat<-as.matrix(my.data);
}else{
hc.dat<-as.matrix(my.data[,var.inx]);
}
colnames(hc.dat)<-substr(colnames(hc.dat),1,18) # some names are too long
hc.cls <- dataSet$cls;
# set up colors for heatmap
if(palette=="gbr"){
colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256);
}else if(palette == "heat"){
colors <- heat.colors(256);
}else if(palette == "topo"){
colors <- topo.colors(256);
}else if(palette == "gray"){
colors <- colorRampPalette(c("grey90", "grey10"), space="rgb")(256);
}else{
suppressMessages(require(RColorBrewer));
colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256));
}
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
minW <- 630;
myW <- nrow(hc.dat)*18 + 150;
if(myW < minW){
myW <- minW;
}
w <- round(myW/72,2);
}else if(width == 0){
w <- 7.2;
imgSet$heatmap<<-imgName;
}else{
w <- 7.2;
}
myH <- ncol(hc.dat)*18 + 150;
h <- round(myH/72,2);
if(viewOpt == "overview"){
if(is.na(width)){
if(w > 9){
w <- 9;
}
}else if(width == 0){
if(w > 7.2){
w <- 7.2;
}
imgSet$heatmap<<-imgName;
}else{
w <- 7.2;
}
if(h > w){
h <- w;
}
}
if(border){
border.col<-"grey60";
}else{
border.col <- NA;
}
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(dataSet$cls.type == "disc"){
require(pheatmap);
annotation <- data.frame(class= hc.cls);
rownames(annotation) <-rownames(hc.dat);
# set up color schema for samples
if(palette== "gray"){
cols <- GetColorSchema(T);
uniq.cols <- unique(cols);
}else{
cols <- GetColorSchema();
uniq.cols <- unique(cols);
}
names(uniq.cols) <- unique(as.character(dataSet$cls));
ann_colors <- list(class= uniq.cols);
pheatmap(t(hc.dat),
annotation=annotation,
fontsize=8, fontsize_row=8,
clustering_distance_rows = smplDist,
clustering_distance_cols = smplDist,
clustering_method = clstDist,
border_color = border.col,
cluster_rows = colV,
cluster_cols = rowV,
scale = scaleOpt,
color = colors,
annotation_colors = ann_colors
);
}else{
heatmap(hc.dat, Rowv = rowTree, Colv=colTree, col = colors, scale="column");
}
#dev.off();
}
PlotHeatMap2<-function(imgName, format="png", dpi=72, width=NA, smplDist='pearson', clstDist='average', colors="bwm", viewOpt="overview", hiRes=FALSE, sortInx = 1, useSigFeature, drawBorder, var.inx=1:ncol(dataSet$norm)){
if(sortInx == 1){
ordInx <- order(dataSet$facA, dataSet$facB);
}else{
ordInx <- order(dataSet$facB, dataSet$facA);
}
new.facA <- dataSet$facA[ordInx];
new.facB <- dataSet$facB[ordInx];
# set up data set. note, need to transpose the data for two way plotting
data <- dataSet$norm[ordInx, ];
# use features from ANOVA2
if(useSigFeature){
hits <- colnames(data) %in% rownames(analSet$aov2$sig.mat);
data <- dataSet$norm[ordInx, hits];
}
hc.dat<-as.matrix(data);
colnames(hc.dat)<-substr(colnames(data), 1, 18) # some names are too long
# set up parameter for heatmap
suppressMessages(require(RColorBrewer));
if(colors=="gbr"){
colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256);
}else if(colors == "heat"){
colors <- heat.colors(256);
}else if(colors == "topo"){
colors <- topo.colors(256);
}else if(colors == "gray"){
colors <- colorRampPalette(c("grey90", "grey10"), space="rgb")(256);
}else{
colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256));
}
if(drawBorder){
border.col<-"grey60";
}else{
border.col <- NA;
}
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(viewOpt == "overview"){
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$heatmap<<-imgName;
}else{
w <- 7.2;
}
h <- w;
}else{
if(is.na(width)){
minW <- 650;
myW <- nrow(hc.dat)*11 + 150;
if(myW < minW){
myW <- minW;
}
w <- round(myW/72,2);
}else if(width == 0){
w <- 7.2;
imgSet$heatmap<<-imgName;
}else{
w <- 7.2;
}
if(ncol(hc.dat) >100){
myH <- ncol(hc.dat)*12 + 120;
}else if(ncol(hc.dat) > 50){
myH <- ncol(hc.dat)*12 + 60;
}else{
myH <- ncol(hc.dat)*12 + 20;
}
h <- round(myH/72,2);
}
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
require(pheatmap);
annotation <- data.frame(new.facB, new.facA);
colnames(annotation) <- c(dataSet$facB.lbl, dataSet$facA.lbl);
rownames(annotation) <-rownames(hc.dat);
pheatmap(t(hc.dat),
annotation=annotation,
fontsize=8, fontsize_row=8,
clustering_distance_rows = smplDist,
clustering_distance_cols = smplDist,
clustering_method = clstDist,
border_color = border.col,
cluster_rows = T,
cluster_cols = F,
scale = 'row',
color = colors);
#dev.off();
analSet$htmap2<<-list(dist.par=smplDist, clust.par=clstDist);
}
############################################################################################
############################################################################################
############################################################################################
############################################################################################
##################################################
## R script for MetaboAnalyst
## Description: perform SAM and EBAM for feature selection
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
##################################
########### SAM ##################
##################################
# SAM analysis
SAM.Anal<-function(method="d.stat", paired=FALSE, varequal=TRUE){
suppressMessages(require(siggenes));
mat<-t(dataSet$norm); # in sam the column is sample
cl<-as.numeric(dataSet$cls); # change to 0 and 1 for class label
if(dataSet$cls.num==2){
if(paired){
cl<-as.numeric(dataSet$pairs);
}
if(method == "d.stat"){
sam_out<-sam(mat, cl, method=d.stat, var.equal=varequal, R.fold=0, rand=123);
}else{
sam_out<-sam(mat, cl, method=wilc.stat, R.fold=0,rand=123);
}
}else{
sam_out<-sam(mat, cl, rand=123);
}
analSet$sam<<-sam_out;
}
PlotSAM.FDR<-function(delta, imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 10;
}else if(width == 0){
w <- 7.2;
imgSet$sam.fdr<<-imgName;
}
h <- w*3/5;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mfrow=c(1,2), mar=c(5,6,4,1));
mat.fdr<-analSet$sam@mat.fdr;
plot(mat.fdr[,"Delta"],mat.fdr[,"FDR"],xlab='Delta',ylab=NA,type="b", col='blue', las=2);
abline(v = delta, lty=3, col="magenta");
mtext("FDR", side=2, line=5);
par(mar=c(5,5,4,2))
plot(mat.fdr[,"Delta"],mat.fdr[,"Called"],xlab='Delta',ylab="Significant feaure No.",type="b", col='blue', las=2);
abline(v = delta, lty=3, col="magenta");
hit.inx <- mat.fdr[,"Delta"] <= delta;
my.fdr <- signif(min(mat.fdr[,"FDR"][hit.inx]), 3);
my.sigs <- min(mat.fdr[,"Called"][hit.inx]);
mtext(paste("Delta:", delta, " FDR:", my.fdr, " Sig. cmpds:", my.sigs), line=-2, side = 3, outer = TRUE, font=2)
#dev.off();
}
SetSAMSigMat<-function(delta){
sam.sum<-summary(analSet$sam, delta);
summary.mat<-sam.sum@mat.sig;
sig.mat <-as.matrix(signif(summary.mat[,-c(1,6)],5));
write.csv(signif(sig.mat,5),file="sam_sigfeatures.csv");
analSet$sam.cmpds<<-sig.mat;
analSet$sam.delta<<-delta;
}
GetSAMSigMat<-function(){
return(CleanNumber(analSet$sam.cmpds));
}
GetSAMSigRowNames<-function(){
rownames(analSet$sam.cmpds);
}
GetSAMSigColNames<-function(){
colnames(analSet$sam.cmpds);
}
GetSigTable.SAM<-function(){
GetSigTable(analSet$sam.cmpds, "SAM");
}
PlotSAM.Cmpd<-function(imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$sam.cmpd<<-imgName;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
plot(analSet$sam, analSet$sam.delta);
#dev.off();
}
# obtain a default delta with reasonable number
# of sig features and decent FDR
GetSuggestedSAMDelta<-function(){
mat.fdr<-analSet$sam@mat.fdr
deltaVec <- mat.fdr[,"Delta"];
fdrVec <- mat.fdr[,"FDR"];
signumVec <- mat.fdr[,"Called"];
for(i in 1:length(deltaVec)){
delta = deltaVec[i];
fdr = fdrVec[i];
called = signumVec[i];
if(called > 0){ # at least 1 significant cmpd
# check fdr, default threshold 0.01
# if too many significant compounds, tight up and vice versa
if(fdr < 0.001){
return (delta);
}else if(fdr < 0.01 & called < 100){
return (delta);
}else if(fdr < 0.05 & called <50){
return (delta);
}else if(fdr < 0.1 & called < 20){
return (delta);
}else if(called < 10){
return (delta);
}
}
}
return (deltaVec[1]); # if no significant found, return the first one
}
#######################################
############# EBAM ####################
#######################################
# deteriming a0, only applicable for z.ebam (default)
EBAM.A0.Init<-function(isPaired, isVarEq){
suppressMessages(require(siggenes));
if(isPaired){
cl.ebam<-as.numeric(dataSet$pairs);
}else{
cl.ebam<-as.numeric(dataSet$cls)-1; # change to 0 and 1 for class label
}
conc.ebam<-t(dataSet$norm); # in sam column is sample, row is gene
ebam_a0<-find.a0(conc.ebam, cl.ebam, var.equal=isVarEq, gene.names = names(dataSet$norm), rand=123);
analSet$ebam.a0<<-ebam_a0;
}
# plot ebam a0 plot also return the analSet$ebam.a0 object so that the suggested a0 can be obtained
PlotEBAM.A0<-function(imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$ebam.a0<<-imgName;
}
h <- 3*w/4;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
plot(analSet$ebam.a0);
#dev.off();
}
# note: if method is wilcoxon, the A0 and var equal will be ignored
EBAM.Cmpd.Init<-function(method="z.ebam", A0=0, isPaired=FALSE, isVarEq=TRUE){
if(isPaired){
cl.ebam<-as.numeric(dataSet$pairs);
}else{
cl.ebam<-as.numeric(dataSet$cls)-1;
}
conc.ebam<-t(dataSet$norm); # in sam column is sample, row is feature
if(method=="z.ebam"){
ebam_out<-ebam(conc.ebam, cl.ebam, method=z.ebam, a0=A0, var.equal=isVarEq, fast=TRUE, gene.names = names(dataSet$norm), rand=123);
}else{
ebam_out<-ebam(conc.ebam, cl.ebam, method=wilc.ebam, gene.names = names(dataSet$norm), rand=123);
}
analSet$ebam<<-ebam_out;
}
# return double matrix with 3 columns - z.value, posterior, local.fdr
SetEBAMSigMat<-function(delta){
ebam.sum<-summary(analSet$ebam, delta);
summary.mat<-ebam.sum@mat.sig;
sig.mat <-as.matrix(signif(summary.mat[,-1],5));
write.csv(signif(sig.mat,5),file="ebam_sigfeatures.csv");
analSet$ebam.cmpds<<-sig.mat;
analSet$ebam.delta<<-delta;
}
GetEBAMSigMat<-function(){
return(CleanNumber(analSet$ebam.cmpds));
}
GetEBAMSigRowNames<-function(){
rownames(analSet$ebam.cmpds);
}
GetEBAMSigColNames<-function(){
colnames(analSet$ebam.cmpds);
}
GetSigTable.EBAM<-function(){
GetSigTable(analSet$ebam.cmpds, "EBAM");
}
# plot ebam
PlotEBAM.Cmpd<-function(imgName, format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- h <- 7;
}else if(width == 0){
w <- h <- 7;
imgSet$ebam.cmpd<<-imgName;
}else{
w <- h <- width;
}
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
plot(analSet$ebam, analSet$ebam.delta);
#dev.off();
}
############################################################################################
############################################################################################
############################################################################################
############################################################################################
##################################################
## R script for MetaboAnalyst
## Description: some misceleneous tasks
##
## Author: Jeff Xia, jeff.xia@mcgill.ca
## McGill University, Canada
##
## License: GNU GPL (>= 2)
###################################################
# linearly transforms a vector or matrix of numbers to a new range
rescale<-function(x,newrange) {
if(missing(x) | missing(newrange)) {
usage.string<-paste("Usage: rescale(x,newrange)\n",
"\twhere x is a numeric object and newrange is the new min and max\n",
sep="",collapse="")
stop(usage.string)
}
if(is.numeric(x) && is.numeric(newrange)) {
xna<-is.na(x)
if(all(xna)) return(x)
if(any(xna)) xrange<-range(x[!xna])
else xrange<-range(x)
# if x is constant, just return it
if(xrange[1] == xrange[2]) return(x)
mfac<-(newrange[2]-newrange[1])/(xrange[2]-xrange[1])
return(newrange[1]+(x-xrange[1])*mfac)
}
else {
warning("Only numeric objects can be rescaled")
return(x)
}
}
# merge duplicated columns or rows by their mean
# dim 1 => row, dim 2 => column
MergeDuplicates <- function(data, dim=2){
if(is.null(dim(data))){ # a vector
if(is.null(names(data))){
print("Cannot detect duplicate data without names!!!");
return();
}
nm.cls <- as.factor(names(data));
uniq.len <- length(levels(nm.cls));
if(uniq.len == length(data)){
return(data);
}
new.data <- vector (mode="numeric",length=uniq.len);
for(i in 1:uniq.len){
dup.inx <- nm.cls == levels(nm.cls)[i];
new.data[i] <- mean(data[dup.inx]);
}
names(new.data) <- levels(nm.cls);
rem.len <- length(data) - length(new.data);
}else{
if(dim == 1){
data <- t(data);
}
if(is.null(colnames(data))){
print("Cannot detect duplicate data without var names!!!");
return();
}
nm.cls <- as.factor(colnames(data));
uniq.len <- length(levels(nm.cls));
if(uniq.len == ncol(data)){
if(dim == 1){
data <- t(data);
}
return(data);
}
new.data <- matrix (nrow=nrow(data), ncol=uniq.len);
for(i in 1:uniq.len){
dup.inx <- which(nm.cls == levels(nm.cls)[i]);
new.data[,i] <- apply(data[,dup.inx, drop=F], 1, mean);
}
rownames(new.data) <- rownames(data);
colnames(new.data) <- levels(nm.cls);
rem.len <- ncol(data) - ncol(new.data);
if(dim == 1){
new.data <- t(new.data);
}
}
print(paste(rem.len, "duplicates are merged to their average"));
new.data;
}
# given a data with duplicates, dups is the one with duplicates
RemoveDuplicates <- function(data, lvlOpt="mean", quiet=T){
all.nms <- rownames(data);
colnms <- colnames(data);
dup.inx <- duplicated(all.nms);
dim.orig <- dim(data);
data <- apply(data, 2, as.numeric); # force to be all numeric
dim(data) <- dim.orig; # keep dimension (will lost when only one item)
rownames(data) <- all.nms;
colnames(data) <- colnms;
if(sum(dup.inx) > 0){
uniq.nms <- all.nms[!dup.inx];
uniq.data <- data[!dup.inx,,drop=F];
dup.nms <- all.nms[dup.inx];
uniq.dupnms <- unique(dup.nms);
uniq.duplen <- length(uniq.dupnms);
for(i in 1:uniq.duplen){
nm <- uniq.dupnms[i];
hit.inx.all <- which(all.nms == nm);
hit.inx.uniq <- which(uniq.nms == nm);
# average the whole sub matrix
if(lvlOpt == "mean"){
uniq.data[hit.inx.uniq, ]<- apply(data[hit.inx.all,,drop=F], 2, mean, na.rm=T);
}else if(lvlOpt == "median"){
uniq.data[hit.inx.uniq, ]<- apply(data[hit.inx.all,,drop=F], 2, median, na.rm=T);
}else if(lvlOpt == "max"){
uniq.data[hit.inx.uniq, ]<- apply(data[hit.inx.all,,drop=F], 2, max, na.rm=T);
}else{ # sum
uniq.data[hit.inx.uniq, ]<- apply(data[hit.inx.all,,drop=F], 2, sum, na.rm=T);
}
}
if(!quiet){
current.msg <<- paste(current.msg, paste("A total of ", sum(dup.inx), " of duplicates were replaced by their ", lvlOpt, ".", sep=""), collapse="\n");
}
return(uniq.data);
}else{
if(!quiet){
current.msg <<- paste(current.msg, "All IDs are unique.", collapse="\n");
}
return(data);
}
}
# from two column input text to data matrix (single column data frame)
getDataFromTextInput <- function(txtInput, sep.type="space"){
lines <- unlist(strsplit(txtInput, "\r|\n|\r\n")[1]);
if(substring(lines[1],1,1)=="#"){
lines <- lines[-1];
}
# separated by tab
if(sep.type=="tab"){
my.lists <- strsplit(lines, "\\t");
}else{ # from any space
my.lists <- strsplit(lines, "\\s+");
}
my.mat <- do.call(rbind, my.lists);
if(dim(my.mat)[2] == 1){ # add 0
my.mat <- cbind(my.mat, rep(0, nrow(my.mat)));
}else if(dim(my.mat)[2] > 2){
my.mat <- my.mat[,1:2];
current.msg <- "More than two columns found in the list. Only first two columns will be used. ";
}
rownames(my.mat) <- data.matrix(my.mat[,1]);
my.mat <- my.mat[,-1, drop=F];
return(my.mat);
}
# use single core on the public server
Perform.permutation <- function(perm.num, fun){
print(paste("performing", perm.num, "permutations ..."));
#suppressMessages(require('multicore'));
#core.num <- multicore:::detectCores();
#if(core.num > 1){ # use two CPUs only, otherwise, the server will be unresponsive for other users
# perm.res <- mclapply(2:perm.num, fun, mc.cores =core.num-1);
#}else{ # just regular
perm.res <- lapply(2:perm.num,fun);
#}
perm.res;
}
`%fin%` <- function(x, table) {
fmatch(x, table, nomatch = 0L) > 0L
}
# create semitransparant colors for a given class label
CreateSemiTransColors <- function(cls){
# note, the first color (red) is for QC
col.nms <- rainbow(length(levels(cls)));
# convert to semi-transparent
semi.nms <- ToSemiTransParent(col.nms);
# now expand to the one-to-one match to cls element
col.vec <- vector(mode="character", length=length(cls));
for (i in 1:length(levels(cls))){
lv <- levels(cls)[i];
col.vec[cls==lv] <- semi.nms[i];
}
return(col.vec);
}
# convert rgb color i.e. "#00FF00FF" to semi transparent
ToSemiTransParent <- function (col.nms, alpha=0.5){
rgb.mat <- t(col2rgb(col.nms));
rgb(rgb.mat/255, alpha=alpha);
}
# col.vec should already been created
UpdateGraphSettings <- function(){
grpnms <- GetGroupNames();
names(colVec) <<- grpnms;
names(shapeVec) <<- grpnms;
}
GetShapeSchema <- function(show.name, grey.scale){
if(exists("shapeVec") && all(shapeVec > 0)){
sps <- rep(0, length=length(dataSet$cls));
clsVec <- as.character(dataSet$cls)
grpnms <- names(shapeVec);
for(i in 1:length(grpnms)){
sps[clsVec == grpnms[i]] <- shapeVec[i];
}
shapes <- sps;
}else{
if(show.name | grey.scale){
shapes <- as.numeric(dataSet$cls)+1;
}else{
shapes <- rep(19, length(dataSet$cls));
}
}
return(shapes);
}
GetColorSchema <- function(grayscale=F){
# test if total group number is over 9
grp.num <- length(levels(dataSet$cls));
if(grayscale){
dist.cols <- colorRampPalette(c("grey90", "grey30"))(grp.num);
lvs <- levels(dataSet$cls);
colors <- vector(mode="character", length=length(dataSet$cls));
for(i in 1:length(lvs)){
colors[dataSet$cls == lvs[i]] <- dist.cols[i];
}
}else if(grp.num > 9){
pal12 = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99",
"#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A",
"#FFFF99", "#B15928");
dist.cols <- colorRampPalette(pal12)(grp.num);
lvs <- levels(dataSet$cls);
colors <- vector(mode="character", length=length(dataSet$cls));
for(i in 1:length(lvs)){
colors[dataSet$cls == lvs[i]] <- dist.cols[i];
}
}else{
if(exists("colVec") && !any(colVec =="#NA") ){
cols <- vector(mode="character", length=length(dataSet$cls));
clsVec <- as.character(dataSet$cls)
grpnms <- names(colVec);
for(i in 1:length(grpnms)){
cols[clsVec == grpnms[i]] <- colVec[i];
}
colors <- cols;
}else{
colors <- as.numeric(dataSet$cls)+1;
}
}
return (colors);
}
# unzip the uploaded .zip files, remove the uploaded file, check for success
UnzipUploadedFile<-function(inPath, outPath, rmFile=T){
# a<-unzip(inPath, exdir=outPath);
a<-try(system(paste("unzip", "-o", inPath, "-d", outPath), intern=T));
if(class(a) == "try-error" | !length(a)>0){
AddErrMsg("Failed to unzip the uploaded files!");
AddErrMsg("Possible reason: file name contains space or special characters.");
AddErrMsg("Use only alphabets and numbers, make sure there is no space in your file name.");
AddErrMsg("For WinZip 12.x, use \"Legacy compression (Zip 2.0 compatible)\"");
return (0);
}
if(rmFile){
RemoveFile(inPath);
}
return(1);
}
# clean data and remove -Inf, Inf, NA, negative and 0
CleanData <-function(bdata, removeNA=T, removeNeg=T){
if(sum(bdata==Inf)>0){
inx <- bdata == Inf;
bdata[inx] <- NA;
bdata[inx] <- max(bdata, na.rm=T)*2
}
if(sum(bdata==-Inf)>0){
inx <- bdata == -Inf;
bdata[inx] <- NA;
bdata[inx] <- min(bdata, na.rm=T)/2
}
if(removeNA){
if(sum(is.na(bdata))>0){
bdata[is.na(bdata)] <- min(bdata, na.rm=T)/2
}
}
if(removeNeg){
if(sum(bdata<=0) > 0){
inx <- bdata <= 0;
bdata[inx] <- NA;
bdata[inx] <- min(bdata, na.rm=T)/2
}
}
bdata;
}
# replace -Inf, Inf to 99999 and -99999
CleanNumber <-function(bdata){
if(sum(bdata==Inf)>0){
inx <- bdata == Inf;
bdata[inx] <- NA;
bdata[inx] <- 999999;
}
if(sum(bdata==-Inf)>0){
inx <- bdata == -Inf;
bdata[inx] <- NA;
bdata[inx] <- -999999;
}
bdata;
}
# remove file
RemoveFolder<-function(folderName){
# a<-unzip(inPath, exdir=outPath);
a<-system(paste("rm", "-r", folderName), intern=T);
if(!length(a)>0){
AddErrMsg(paste("Could not remove file -", folderName));
return (0);
}
return(1);
}
# remove files
RemoveFile<-function(fileName){
if(file.exists(fileName)){
file.remove(fileName);
}
}
# clear the current folder and objects in memory
ClearUserDir<-function(){
# remove physical files
unlink(dir(), recursive=T);
dataSet <<- list();
analSet <<- list();
imgSet <<- list();
gc();
# remove objects in the memory
# rm(list=ls(envir=sys.frame(-1)),envir=sys.frame(-1));
}
# utils to remove from
# within, leading and trailing spaces
ClearStrings<-function(query){
# kill multiple white space
query <- gsub(" +"," ",query);
# remove leading and trailing space
query<- sub("^[[:space:]]*(.*?)[[:space:]]*$", "\\1", query, perl=TRUE);
return (query);
}
# remove HTML tag
# add escape for %
PrepareLatex <- function(stringVec){
stringVec <- gsub("<(.|\n)*?>","",stringVec);
stringVec <- gsub("%", "\\\\%", stringVec);
stringVec;
}
# get last command from the Rhistory.R file
GetCMD<-function(regexp){
# store all lines into a list object
all.lines<-readLines("Rhistory.R");
all.matches<-grep(regexp, all.lines, value=T);
if(length(all.matches)==0){
return(NULL);
}else{
# only return the last command
return(all.matches[length(all.matches)]);
}
}
# determine value label for plotting
GetValueLabel<-function(){
if(dataSet$type=="conc"){
return("Concentration");
}else {
return("Intensity");
}
}
# determine variable label for plotting
GetVariableLabel<-function(){
if(dataSet$type=="conc"){
return("Compounds");
}else if(dataSet$type=="specbin"){
return("Spectra Bins");
}else if(dataSet$type=="nmrpeak"){
return("Peaks (ppm)");
}else if(dataSet$type=="mspeak"){
if(dataSet$peakSet$ncol==2){
return("Peaks (mass)");
}else{
return("Peaks (mz/rt)");
}
}else{
return("Peaks(mz/rt)");
}
}
# determine the number of rows and columns for a given total
# number of plots (used by Kmeans and SOM plots)
GetXYCluster<-function(total){
if(total>16){
ncol<-4;
nrow<-5;
}else if(total>12){
ncol<-4;
nrow<-4;
}else if(total>9){
ncol<-3;
nrow<-4;
}else if(total>6){
ncol<-3;
nrow<-3;
}else if(total>4){
ncol<-2;
nrow<-3;
}else{
ncol<-1;
nrow<-total;
}
c(nrow, ncol);
}
###################################################
### ====== utility classes for peak grouping=== ###
###################################################
rectUnique <- function(m, order = seq(length = nrow(m)), xdiff = 0, ydiff = 0) {
nr <- nrow(m)
nc <- ncol(m)
if (!is.double(m))
m <- as.double(m)
.C("RectUnique",
m,
as.integer(order-1),
nr,
nc,
as.double(xdiff),
as.double(ydiff),
logical(nrow(m)),
DUP = FALSE, PACKAGE = "xcms")[[7]]
}
findEqualGreaterM <- function(x, values) {
if (!is.double(x)) x <- as.double(x)
if (!is.double(values)) values <- as.double(values)
.C("FindEqualGreaterM",
x,
length(x),
values,
length(values),
index = integer(length(values)),
DUP = FALSE, PACKAGE = "xcms")$index + 1
}
descendMin <- function(y, istart = which.max(y)) {
if (!is.double(y)) y <- as.double(y)
unlist(.C("DescendMin",
y,
length(y),
as.integer(istart-1),
ilower = integer(1),
iupper = integer(1),
DUP = FALSE, PACKAGE = "xcms")[4:5]) + 1
}
# obtain a random subset of numbers from a total number
GetRandomSubsetIndex<-function(total, sub.num = 50){
if(total < sub.num){
1:total;
}else{
sample(1:total, sub.num);
}
}
Get.Accuracy <- function(cm) {
sum(diag(cm)) / sum(cm);
}
# Get a subsets of data ranked by their p values from t tests
GetTTSubsetIndex<-function(data = dataSet$norm, sub.num=50){
if(ncol(data) < sub.num){
1:ncol(data);
}else{
if(is.null(analSet$tt)){
Ttests.Anal(0.75);
}
all.lod <- -log10(analSet$tt$p.value);
sub.inx <-order(all.lod, decreasing = T)[1:sub.num];
sel.inx <- 1:ncol(data) %in% sub.inx;
sel.inx;
}
}
# generate Latex table
GetSigTable<-function(mat, method){
suppressMessages(require(xtable));
if(!isEmptyMatrix(mat)){ # test if empty
cap<-"Important features identified by";
if(nrow(mat)>50){
smat<-as.matrix(mat[1:50,]); # only print top 50 if too many
colnames(smat)<-colnames(mat); # make sure column names are also copied
mat<-smat;
cap<-"Top 50 features identified by";
}
# change the rowname to first column
col1<-rownames(mat);
cname<-colnames(mat);
cname<-c(GetVariableLabel(), cname);
mat<-cbind(col1, mat);
rownames(mat)<-NULL;
colnames(mat)<-cname;
print(xtable(mat, caption=paste(cap, method)), ,caption.placement="top", size="\\scriptsize");
}else{
print(paste("No significant features were found using the given threshold for", method));
}
}
# test if a sig table matrix is empty
isEmptyMatrix<-function(mat){
if(is.null(mat) | length(mat)==0){
return(TRUE);
}
if(nrow(mat)==0 | ncol(mat)==0){
return(TRUE);
}
if(is.na(mat[1,1])){
return(TRUE);
}
return(FALSE);
}
# Compute BSS/WSS for each row of a matrix which may have NA
# Columns have labels
# x is a numeric vector,
# cl is consecutive integers
Get.bwss<-function(x, cl){
K <- max(cl) - min(cl) + 1
tvar <- var.na(x);
tn <- sum(!is.na(x));
wvar <- wn <- numeric(K);
for(i in (1:K)) {
if(sum(cl == (i + min(cl) - 1)) == 1){
wvar[i] <- 0;
wn[i] <- 1;
}
if(sum(cl == (i + min(cl) - 1)) > 1) {
wvar[i] <- var.na(x[cl == (i + min(cl) - 1)]);
wn[i] <- sum(!is.na(x[cl == (i + min(cl) - 1)]));
}
}
WSS <- sum.na(wvar * (wn - 1));
TSS <- tvar * (tn - 1)
(TSS - WSS)/WSS;
}
# Compute SSQ for each row of a matrix which may have NA
# Columns have labels cl=consecutive integers
# note: this is desgined for ASCA parition data
# in which Within group (WSS) is
# zero, so, we only need TSS
Get.tss<-function(x, cl){
K <- max(cl) - min(cl) + 1
tvar <- apply(x, 1, var.na);
tn <- apply(!is.na(x), 1, sum);
wvar <- matrix(0, nrow(x), K);
wn <- matrix(0, nrow(x), K);
for(i in (1:K)) {
if(sum(cl == (i + min(cl) - 1)) == 1){
wvar[, i] <- 0;
wn[, i] <- 1;
}
if(sum(cl == (i + min(cl) - 1)) > 1) {
wvar[, i] <- apply(x[, cl == (i + min(cl) - 1)], 1, var.na);
wn[, i] <- apply(!is.na(x[, cl == (i + min(cl) - 1)]), 1, sum);
}
}
WSS <- apply(wvar * (wn - 1), 1, sum.na)
TSS <- tvar * (tn - 1)
return(TSS);
}
sum.na <- function(x,...){
res <- NA
tmp <- !(is.na(x) | is.infinite(x))
if(sum(tmp) > 0)
res <- sum(x[tmp])
res
}
var.na <- function(x){
res <- NA
tmp <- !(is.na(x) | is.infinite(x))
if(sum(tmp) > 1){
res <- var(x[tmp])
}
res
}
#######################################################
## calculate Fisher's Least Significant Difference (LSD)
## adapted from the 'agricolae' package
##############################################
LSD.test <- function (y, trt, alpha = 0.05){
clase<-c("aov","lm")
name.y <- paste(deparse(substitute(y)))
name.t <- paste(deparse(substitute(trt)))
if("aov"%in%class(y) | "lm"%in%class(y)){
A<-y$model
DFerror<-df.residual(y)
MSerror<-deviance(y)/DFerror
y<-A[,1]
ipch<-pmatch(trt,names(A))
name.t <-names(A)[ipch]
trt<-A[,ipch]
name.y <- names(A)[1]
}
junto <- subset(data.frame(y, trt), is.na(y) == FALSE)
means <- tapply.stat(junto[, 1], junto[, 2], stat="mean") #change
sds <- tapply.stat(junto[, 1], junto[, 2], stat="sd") #change
nn <- tapply.stat(junto[, 1], junto[, 2], stat="length") #change
std.err <- sds[, 2]/sqrt(nn[, 2])
Tprob <- qt(1 - alpha/2, DFerror)
LCL <- means[,2]-Tprob*std.err
UCL <- means[,2]+Tprob*std.err
means <- data.frame(means, std.err, replication = nn[, 2], LCL, UCL)
names(means)[1:2] <- c(name.t, name.y)
#row.names(means) <- means[, 1]
ntr <- nrow(means)
nk <- choose(ntr, 2)
nr <- unique(nn[, 2])
comb <- combn(ntr, 2)
nn <- ncol(comb)
dif <- rep(0, nn)
LCL1<-dif
UCL1<-dif
sig<-NULL
pvalue <- rep(0, nn)
for (k in 1:nn) {
i <- comb[1, k]
j <- comb[2, k]
if (means[i, 2] < means[j, 2]){
comb[1, k]<-j
comb[2, k]<-i
}
dif[k] <- abs(means[i, 2] - means[j, 2])
sdtdif <- sqrt(MSerror * (1/means[i, 4] + 1/means[j,4]))
pvalue[k] <- 2 * (1 - pt(dif[k]/sdtdif, DFerror));
pvalue[k] <- round(pvalue[k],6);
LCL1[k] <- dif[k] - Tprob*sdtdif
UCL1[k] <- dif[k] + Tprob*sdtdif
sig[k]<-" "
if (pvalue[k] <= 0.001) sig[k]<-"***"
else if (pvalue[k] <= 0.01) sig[k]<-"**"
else if (pvalue[k] <= 0.05) sig[k]<-"*"
else if (pvalue[k] <= 0.1) sig[k]<-"."
}
tr.i <- means[comb[1, ],1]
tr.j <- means[comb[2, ],1]
output<-data.frame("Difference" = dif, pvalue = pvalue,sig,LCL=LCL1,UCL=UCL1)
rownames(output)<-paste(tr.i,tr.j,sep=" - ");
output;
}
tapply.stat <-function (y, x, stat = "mean"){
cx<-deparse(substitute(x))
cy<-deparse(substitute(y))
x<-data.frame(c1=1,x)
y<-data.frame(v1=1,y)
nx<-ncol(x)
ny<-ncol(y)
namex <- names(x)
namey <- names(y)
if (nx==2) namex <- c("c1",cx)
if (ny==2) namey <- c("v1",cy)
namexy <- c(namex,namey)
for(i in 1:nx) {
x[,i]<-as.character(x[,i])
}
z<-NULL
for(i in 1:nx) {
z<-paste(z,x[,i],sep="&")
}
w<-NULL
for(i in 1:ny) {
m <-tapply(y[,i],z,stat)
m<-as.matrix(m)
w<-cbind(w,m)
}
nw<-nrow(w)
c<-rownames(w)
v<-rep("",nw*nx)
dim(v)<-c(nw,nx)
for(i in 1:nw) {
for(j in 1:nx) {
v[i,j]<-strsplit(c[i],"&")[[1]][j+1]
}
}
rownames(w)<-NULL
junto<-data.frame(v[,-1],w)
junto<-junto[,-nx]
names(junto)<-namexy[c(-1,-(nx+1))]
return(junto)
}
########################################
#### Scatterplot3D
#### adapted for better visualization
#######################################
Plot3D <- function(x, y = NULL, z = NULL, color = par("col"), pch = NULL,
main = NULL, sub = NULL, xlim = NULL, ylim = NULL, zlim = NULL,
xlab = NULL, ylab = NULL, zlab = NULL, scale.y = 1, angle = 40,
axis = TRUE, tick.marks = TRUE, label.tick.marks = TRUE,
x.ticklabs = NULL, y.ticklabs = NULL, z.ticklabs = NULL,
y.margin.add = 0, grid = TRUE, box = TRUE, lab = par("lab"),
lab.z = mean(lab[1:2]), type = "p", highlight.3d = FALSE,
mar = c(5, 3, 4, 3) + 0.1, col.axis = par("col.axis"),
col.grid = "grey", col.lab = par("col.lab"), cex.symbols = par("cex"),
cex.axis = 0.8 * par("cex.axis"), cex.lab = par("cex.lab"),
font.axis = par("font.axis"), font.lab = par("font.lab"),
lty.axis = par("lty"), lty.grid = 2, lty.hide = 1,
lty.hplot = par("lty"), log = "", ...)
# log not yet implemented
{
## Uwe Ligges ,
## http://www.statistik.tu-dortmund.de/~ligges
##
## For MANY ideas and improvements thanks to Martin Maechler!!!
## Parts of the help files are stolen from the standard plotting functions in R.
mem.par <- par(mar = mar)
x.scal <- y.scal <- z.scal <- 1
xlabel <- if (!missing(x)) deparse(substitute(x))
ylabel <- if (!missing(y)) deparse(substitute(y))
zlabel <- if (!missing(z)) deparse(substitute(z))
## verification, init, ...
if(highlight.3d && !missing(color))
warning("color is ignored when highlight.3d = TRUE")
## color as part of `x' (data.frame or list):
if(!is.null(d <- dim(x)) && (length(d) == 2) && (d[2] >= 4))
color <- x[,4]
else if(is.list(x) && !is.null(x$color))
color <- x$color
## convert 'anything' -> vector
xyz <- xyz.coords(x=x, y=y, z=z, xlab=xlabel, ylab=ylabel, zlab=zlabel,
log=log)
if(is.null(xlab)) { xlab <- xyz$xlab; if(is.null(xlab)) xlab <- "" }
if(is.null(ylab)) { ylab <- xyz$ylab; if(is.null(ylab)) ylab <- "" }
if(is.null(zlab)) { zlab <- xyz$zlab; if(is.null(zlab)) zlab <- "" }
if(length(color) == 1)
color <- rep(color, length(xyz$x))
else if(length(color) != length(xyz$x))
stop("length(color) ", "must be equal length(x) or 1")
angle <- (angle %% 360) / 90
yz.f <- scale.y * abs(if(angle < 1) angle else if(angle > 3) angle - 4 else 2 - angle)
yx.f <- scale.y * (if(angle < 2) 1 - angle else angle - 3)
if(angle > 2) { ## switch y and x axis to ensure righthand oriented coord.
temp <- xyz$x; xyz$x <- xyz$y; xyz$y <- temp
temp <- xlab; xlab <- ylab; ylab <- temp
temp <- xlim; xlim <- ylim; ylim <- temp
}
angle.1 <- (1 < angle && angle < 2) || angle > 3
angle.2 <- 1 <= angle && angle <= 3
dat <- cbind(as.data.frame(xyz[c("x","y","z")]), col = color)
## xlim, ylim, zlim -- select the points inside the limits
if(!is.null(xlim)) {
xlim <- range(xlim)
dat <- dat[ xlim[1] <= dat$x & dat$x <= xlim[2] , , drop = FALSE]
}
if(!is.null(ylim)) {
ylim <- range(ylim)
dat <- dat[ ylim[1] <= dat$y & dat$y <= ylim[2] , , drop = FALSE]
}
if(!is.null(zlim)) {
zlim <- range(zlim)
dat <- dat[ zlim[1] <= dat$z & dat$z <= zlim[2] , , drop = FALSE]
}
n <- nrow(dat)
if(n < 1) stop("no data left within (x|y|z)lim")
y.range <- range(dat$y[is.finite(dat$y)])
### 3D-highlighting / colors / sort by y
if(type == "p" || type == "h") {
y.ord <- rev(order(dat$y))
dat <- dat[y.ord, ]
if(length(pch) > 1)
if(length(pch) != length(y.ord))
stop("length(pch) ", "must be equal length(x) or 1")
else pch <- pch[y.ord]
daty <- dat$y
daty[!is.finite(daty)] <- mean(daty[is.finite(daty)])
if(highlight.3d && !(all(diff(daty) == 0)))
dat$col <- rgb(seq(0, 1, length = n) * (y.range[2] - daty) / diff(y.range), g=0, b=0)
}
### optim. axis scaling
p.lab <- par("lab")
## Y
y.range <- range(dat$y[is.finite(dat$y)], ylim)
y.prty <- pretty(y.range, n = lab[2],
min.n = max(1, min(.5 * lab[2], p.lab[2])))
y.scal <- round(diff(y.prty[1:2]), digits = 12)
y.add <- min(y.prty)
dat$y <- (dat$y - y.add) / y.scal
y.max <- (max(y.prty) - y.add) / y.scal
if(!is.null(ylim)) y.max <- max(y.max, ceiling((ylim[2] - y.add) / y.scal))
# if(angle > 2) dat$y <- y.max - dat$y ## turn y-values around
## X
x.range <- range(dat$x[is.finite(dat$x)], xlim)
x.prty <- pretty(x.range, n = lab[1],
min.n = max(1, min(.5 * lab[1], p.lab[1])))
x.scal <- round(diff(x.prty[1:2]), digits = 12)
dat$x <- dat$x / x.scal
x.range <- range(x.prty) / x.scal
x.max <- ceiling(x.range[2])
x.min <- floor(x.range[1])
if(!is.null(xlim)) {
x.max <- max(x.max, ceiling(xlim[2] / x.scal))
x.min <- min(x.min, floor(xlim[1] / x.scal))
}
x.range <- range(x.min, x.max)
## Z
z.range <- range(dat$z[is.finite(dat$z)], zlim)
z.prty <- pretty(z.range, n = lab.z,
min.n = max(1, min(.5 * lab.z, p.lab[2])))
z.scal <- round(diff(z.prty[1:2]), digits = 12)
dat$z <- dat$z / z.scal
z.range <- range(z.prty) / z.scal
z.max <- ceiling(z.range[2])
z.min <- floor(z.range[1])
if(!is.null(zlim)) {
z.max <- max(z.max, ceiling(zlim[2] / z.scal))
z.min <- min(z.min, floor(zlim[1] / z.scal))
}
z.range <- range(z.min, z.max)
### init graphics
plot.new()
if(angle.2) {x1 <- x.min + yx.f * y.max; x2 <- x.max}
else {x1 <- x.min; x2 <- x.max + yx.f * y.max}
plot.window(c(x1, x2), c(z.min, z.max + yz.f * y.max))
temp <- strwidth(format(rev(y.prty))[1], cex = cex.axis/par("cex"))
if(angle.2) x1 <- x1 - temp - y.margin.add
else x2 <- x2 + temp + y.margin.add
plot.window(c(x1, x2), c(z.min, z.max + yz.f * y.max))
if(angle > 2) par("usr" = par("usr")[c(2, 1, 3:4)])
usr <- par("usr") # we have to remind it for use in closures
title(main, sub, ...)
### draw axis, tick marks, labels, grid, ...
xx <- if(angle.2) c(x.min, x.max) else c(x.max, x.min)
if(grid) {
## grids
###################
# XY wall
i <- x.min:x.max;
segments(i, z.min, i + (yx.f * y.max), yz.f * y.max + z.min,
col = col.grid, lty = lty.grid);
i <- 0:y.max;
segments(x.min + (i * yx.f), i * yz.f + z.min,
x.max + (i * yx.f), i * yz.f + z.min,
col = col.grid, lty = lty.grid);
######################
# XZ wall
# verticle lines
temp <- yx.f * y.max;
temp1 <- yz.f * y.max;
i <- (x.min + temp):(x.max + temp);
segments(i, z.min + temp1, i, z.max + temp1,
col = col.grid, lty = lty.grid);
# horizontal lines
i <- (z.min + temp1):(z.max + temp1);
segments(x.min + temp, i, x.max + temp, i,
col = col.grid, lty = lty.grid)
##################
# YZ wall
# horizontal lines
i <- xx[2]:x.min;
mm <- z.min:z.max;
segments(i, mm, i + temp, mm + temp1,
col = col.grid, lty = lty.grid);
# verticle lines
i <- 0:y.max;
segments(x.min + (i * yx.f), i * yz.f + z.min,
xx[2] + (i * yx.f), i * yz.f + z.max,
col = col.grid, lty = lty.grid)
# make the axis into solid line
segments(x.min, z.min, x.min + (yx.f * y.max), yz.f * y.max + z.min,
col = col.grid, lty = lty.hide);
segments(x.max, z.min, x.max + (yx.f * y.max), yz.f * y.max + z.min,
col = col.axis, lty = lty.hide);
segments(x.min + (y.max * yx.f), y.max * yz.f + z.min,
x.max + (y.max* yx.f), y.max * yz.f + z.min,
col = col.grid, lty = lty.hide);
segments(x.min + temp, z.min + temp1, x.min + temp, z.max + temp1,
col = col.grid, lty = lty.hide);
segments(x.max + temp, z.min + temp1, x.max + temp, z.max + temp1,
col = col.axis, lty = lty.hide);
segments(x.min + temp, z.max + temp1, x.max + temp, z.max + temp1,
col = col.axis, lty = lty.hide);
segments(xx[2], z.max, xx[2] + temp, z.max + temp1,
col = col.axis, lty = lty.hide);
}
if(axis) {
if(tick.marks) { ## tick marks
xtl <- (z.max - z.min) * (tcl <- -par("tcl")) / 50
ztl <- (x.max - x.min) * tcl / 50
mysegs <- function(x0,y0, x1,y1)
segments(x0,y0, x1,y1, col=col.axis, lty=lty.axis)
## Y
i.y <- 0:y.max
mysegs(yx.f * i.y - ztl + xx[1], yz.f * i.y + z.min,
yx.f * i.y + ztl + xx[1], yz.f * i.y + z.min)
## X
i.x <- x.min:x.max
mysegs(i.x, -xtl + z.min, i.x, xtl + z.min)
## Z
i.z <- z.min:z.max
mysegs(-ztl + xx[2], i.z, ztl + xx[2], i.z)
if(label.tick.marks) { ## label tick marks
las <- par("las")
mytext <- function(labels, side, at, ...)
mtext(text = labels, side = side, at = at, line = -.5,
col=col.lab, cex=cex.axis, font=font.lab, ...)
## X
if(is.null(x.ticklabs))
x.ticklabs <- format(i.x * x.scal)
mytext(x.ticklabs, side = 1, at = i.x)
## Z
if(is.null(z.ticklabs))
z.ticklabs <- format(i.z * z.scal)
mytext(z.ticklabs, side = if(angle.1) 4 else 2, at = i.z,
adj = if(0 < las && las < 3) 1 else NA)
## Y
temp <- if(angle > 2) rev(i.y) else i.y ## turn y-labels around
if(is.null(y.ticklabs))
y.ticklabs <- format(y.prty)
else if (angle > 2)
y.ticklabs <- rev(y.ticklabs)
text(i.y * yx.f + xx[1],
i.y * yz.f + z.min, y.ticklabs,
pos=if(angle.1) 2 else 4, offset=1,
col=col.lab, cex=cex.axis/par("cex"), font=font.lab)
}
}
## axis and labels
mytext2 <- function(lab, side, line, at)
mtext(lab, side = side, line = line, at = at, col = col.lab,
cex = cex.lab, font = font.axis, las = 0)
## X
lines(c(x.min, x.max), c(z.min, z.min), col = col.axis, lty = lty.axis)
mytext2(xlab, 1, line = 1.5, at = mean(x.range))
## Y
lines(xx[1] + c(0, y.max * yx.f), c(z.min, y.max * yz.f + z.min),
col = col.axis, lty = lty.axis)
mytext2(ylab, if(angle.1) 2 else 4, line= 0.5, at = z.min + y.max * yz.f)
## Z
lines(xx[c(2,2)], c(z.min, z.max), col = col.axis, lty = lty.axis)
mytext2(zlab, if(angle.1) 4 else 2, line= 1.5, at = mean(z.range))
}
### plot points
x <- dat$x + (dat$y * yx.f)
z <- dat$z + (dat$y * yz.f)
col <- as.character(dat$col)
if(type == "h") {
z2 <- dat$y * yz.f + z.min
segments(x, z, x, z2, col = col, cex = cex.symbols, lty = lty.hplot, ...)
points(x, z, type = "p", col = col, pch = pch, cex = cex.symbols, ...)
}
else points(x, z, type = type, col = col, pch = pch, cex = cex.symbols, ...)
### box-lines in front of points (overlay)
if(axis && box) {
lines(c(x.min, x.max), c(z.max, z.max),
col = col.axis, lty = lty.axis)
lines(c(0, y.max * yx.f) + x.max, c(0, y.max * yz.f) + z.max,
col = col.axis, lty = lty.axis)
lines(xx[c(1,1)], c(z.min, z.max), col = col.axis, lty = lty.axis)
}
# par(mem.par) # we MUST NOT set the margins back
### Return Function Object
ob <- ls() ## remove all unused objects from the result's enviroment:
rm(list = ob[!ob %in% c("angle", "mar", "usr", "x.scal", "y.scal", "z.scal", "yx.f",
"yz.f", "y.add", "z.min", "z.max", "x.min", "x.max", "y.max",
"x.prty", "y.prty", "z.prty")])
rm(ob)
invisible(list(
xyz.convert = function(x, y=NULL, z=NULL) {
xyz <- xyz.coords(x, y, z)
if(angle > 2) { ## switch y and x axis to ensure righthand oriented coord.
temp <- xyz$x; xyz$x <- xyz$y; xyz$y <- temp
}
y <- (xyz$y - y.add) / y.scal
return(list(x = xyz$x / x.scal + yx.f * y,
y = xyz$z / z.scal + yz.f * y))
},
points3d = function(x, y = NULL, z = NULL, type = "p", ...) {
xyz <- xyz.coords(x, y, z)
if(angle > 2) { ## switch y and x axis to ensure righthand oriented coord.
temp <- xyz$x; xyz$x <- xyz$y; xyz$y <- temp
}
y2 <- (xyz$y - y.add) / y.scal
x <- xyz$x / x.scal + yx.f * y2
y <- xyz$z / z.scal + yz.f * y2
mem.par <- par(mar = mar, usr = usr)
on.exit(par(mem.par))
if(type == "h") {
y2 <- z.min + yz.f * y2
segments(x, y, x, y2, ...)
points(x, y, type = "p", ...)
}
else points(x, y, type = type, ...)
},
plane3d = function(Intercept, x.coef = NULL, y.coef = NULL,
lty = "dashed", lty.box = NULL, ...){
if(!is.atomic(Intercept) && !is.null(coef(Intercept))) Intercept <- coef(Intercept)
if(is.null(lty.box)) lty.box <- lty
if(is.null(x.coef) && length(Intercept) == 3){
x.coef <- Intercept[if(angle > 2) 3 else 2]
y.coef <- Intercept[if(angle > 2) 2 else 3]
Intercept <- Intercept[1]
}
mem.par <- par(mar = mar, usr = usr)
on.exit(par(mem.par))
x <- x.min:x.max
ltya <- c(lty.box, rep(lty, length(x)-2), lty.box)
x.coef <- x.coef * x.scal
z1 <- (Intercept + x * x.coef + y.add * y.coef) / z.scal
z2 <- (Intercept + x * x.coef +
(y.max * y.scal + y.add) * y.coef) / z.scal
segments(x, z1, x + y.max * yx.f, z2 + yz.f * y.max, lty = ltya, ...)
y <- 0:y.max
ltya <- c(lty.box, rep(lty, length(y)-2), lty.box)
y.coef <- (y * y.scal + y.add) * y.coef
z1 <- (Intercept + x.min * x.coef + y.coef) / z.scal
z2 <- (Intercept + x.max * x.coef + y.coef) / z.scal
segments(x.min + y * yx.f, z1 + y * yz.f,
x.max + y * yx.f, z2 + y * yz.f, lty = ltya, ...)
},
wall3d = function(Intercept, x.coef = NULL, y.coef = NULL,
lty = "dashed", lty.box = NULL, ...){
if(!is.atomic(Intercept) && !is.null(coef(Intercept))) Intercept <- coef(Intercept)
if(is.null(lty.box)) lty.box <- lty
if(is.null(x.coef) && length(Intercept) == 3){
x.coef <- Intercept[if(angle > 2) 3 else 2]
y.coef <- Intercept[if(angle > 2) 2 else 3]
Intercept <- Intercept[1]
}
mem.par <- par(mar = mar, usr = usr)
on.exit(par(mem.par))
x <- x.min:x.max
ltya <- c(lty.box, rep(lty, length(x)-2), lty.box)
x.coef <- x.coef * x.scal
z1 <- (Intercept + x * x.coef + y.add * y.coef) / z.scal
z2 <- (Intercept + x * x.coef +
(y.max * y.scal + y.add) * y.coef) / z.scal
segments(x, z1, x + y.max * yx.f, z2 + yz.f * y.max, lty = ltya, ...)
y <- 0:y.max
ltya <- c(lty.box, rep(lty, length(y)-2), lty.box)
y.coef <- (y * y.scal + y.add) * y.coef
z1 <- (Intercept + x.min * x.coef + y.coef) / z.scal
z2 <- (Intercept + x.max * x.coef + y.coef) / z.scal
segments(x.min + y * yx.f, z1 + y * yz.f,
x.max + y * yx.f, z2 + y * yz.f, lty = ltya, ...)
},
box3d = function(...){
mem.par <- par(mar = mar, usr = usr)
on.exit(par(mem.par))
lines(c(x.min, x.max), c(z.max, z.max), ...)
lines(c(0, y.max * yx.f) + x.max, c(0, y.max * yz.f) + z.max, ...)
lines(c(0, y.max * yx.f) + x.min, c(0, y.max * yz.f) + z.max, ...)
lines(c(x.max, x.max), c(z.min, z.max), ...)
lines(c(x.min, x.min), c(z.min, z.max), ...)
lines(c(x.min, x.max), c(z.min, z.min), ...)
}
))
}
###################################################
## Utilities for create pathway maps for MetPA
#################################################
# a function to deal with long string names
Wrap.Names<-function(cName, wrap.len=10, tol.len=5){
nc <- nchar(cName);
long.inx <- nc > (wrap.len+tol.len);
long.nms <- cName[long.inx];
# first get positions of the natural breaks space or hyphen
pos.list <- gregexpr("[ -]", long.nms);
for(i in 1:length(pos.list)){
current.nm <- long.nms[i];
pos <- pos.list[[i]]+1;
start.pos<- c(0, pos);
end.pos <- c(pos, nchar(current.nm)+1);
splits <- sapply(1:(length(pos)+1), function(x) substring(current.nm, start.pos[x], end.pos[x]-1));
long.nms[i]<-CheckMergeSplittedNames(splits);
}
cName[long.inx] <- long.nms;
return (cName);
}
# given a vector with naturally splitted string elements
# check if a particular element is too long and need to be
# break by brute force
CheckMergeSplittedNames<-function(nms, wrap.len=10, tol.len=5){
clean.nm <- "";
current.nm <- "";
for(i in 1:length(nms)){
current.nm <- paste(current.nm, nms[i], sep="");
current.len <- nchar(current.nm);
# if too long, break into halves
if(current.len > wrap.len + tol.len){
break.pt <- round(current.len/2);
current.nm <- paste(substr(current.nm, 0, break.pt), "-", "\n",
substr(current.nm, break.pt+1, current.len), sep="");
clean.nm <- paste(clean.nm, "\n", current.nm, sep="");
current.nm <- "";
}else if(current.len > tol.len){
clean.nm <- paste(clean.nm, "\n", current.nm, sep="");
current.nm <- "";
}else{
if(i == length(nms)){
clean.nm <- paste(clean.nm, current.nm, sep=ifelse(nchar(current.nm) 0){
rbc <- round(rbc/sum(rbc),5);
}
dgr <- degree(g)$outDegree;
if(sum(dgr) >0){
dgr <- round(dgr/sum(dgr),5);
}
rbc.list[[i]] <- rbc;
dgr.list[[i]] <- dgr;
ms.list[[i]] <- nds;
}
names(ms.list) <- names(graph.list) <- names(dgr.list) <- names(rbc.list) <- substr(files, 0, nchar(files)-4);
# the variables that will be saved
metpa <- list();
metpa$mset.list <- ms.list;
metpa$rbc.list <- rbc.list;
metpa$dgr.list <- dgr.list;
metpa$uniq.count <- length(unique(unlist(ms.list)));
metpa$graph.list <- graph.list;
metpa$path.ids <- path.ids;
save(metpa, file=paste(nm.cp, ".rda", sep=""));
}
# given a vector of KEGGID, return a vector of KEGG compound names
KEGGID2Name<-function(ids){
hit.inx<- match(ids, cmpd.map$kegg);
if(sum(is.na(hit.inx))>0){
print(ids[is.na(hit.inx)]);
}
return(cmpd.map[hit.inx, 3]);
}
# get all the KEGG compounds from the pathway databases
getCmpdID<-function(dirName){
require(KEGGgraph);
folds<-dir(dirName);
all.nms <- "";
for(m in 1:length(folds)){
files <- dir(paste(dirName, "/", folds[m], sep=""));
cmpd.nms <- "";
for(i in 1:length(files)){
f <- paste(dirName, "/", folds[m],"/",files[i], sep="");
print(f);
g <- KEGGpathway2reactionGraph(parseKGML(f));
nms <- nodes(g);
start.pos <- unlist(gregexpr(":", nms))+1;
nms <- substr(nms, start.pos, nchar(nms));
cmpd.nms <- c(cmpd.nms, nms);
}
all.nms <- c(all.nms, unique(cmpd.nms));
}
write.csv(unique(all.nms), file="kegg_uniq.csv", row.names=F)
}
getPathName<-function(dirName, saveName){
require(KEGGgraph);
files<-dir(dirName);
nm.mat<-matrix("NA", nrow=length(files), ncol=2);
for(i in 1:length(files)){
f <- files[i];
print(f);
path <- parseKGML(paste(dirName,"/",f, sep=""));
nm.mat[i,]<-c(f, path@pathwayInfo@title);
}
write.csv(nm.mat, file=saveName);
}
IdentifyDuplicateCmpdInMsets <- function(cmpdFile){
cmpd.db <- read.csv(cmpdFile, as.is=T, header=T);
common.nms <- tolower(cmpd.db$name);
syns.list <- strsplit(cmpd.db$synonym, "; *");
for(i in 1:length(syns)){
current <- strsplit(syns[i], "; *")[[1]];
if(length(current)>length(unique(current))){
print(i);
}
}
}
# extend the axis range to both end
# vec is the values for that axis
# unit is the width to extend, 10 will increase by 1/10 of the range
GetExtendRange<-function(vec, unit=10){
var.max <- max(vec, na.rm=T);
var.min <- min(vec, na.rm=T);
exts <- (var.max - var.min)/unit;
c(var.min-exts, var.max+exts);
}
# to return a shorter names
# break long names at space, append "..." to indicate
# the abbrev
GetShortNames<-function(nm.vec, max.len= 45){
new.nms <- vector(mode="character", length=length(nm.vec));
for(i in 1:length(nm.vec)){
nm <- nm.vec[i];
if(nchar(nm) <= max.len){
new.nms[i] <- nm;
}else{
wrds <- strsplit(nm, "[[:space:]]+")[[1]];
new.nm <- "";
if(length(wrds)>1){
for(m in 1:length(wrds)){
wrd <- wrds[m];
if(nchar(new.nm)+4+nchar(wrd) <= max.len){
new.nm <- paste(new.nm, wrd);
}else{
new.nms[i] <- paste (new.nm, "...", sep="");
break;
}
}
}else{
new.nms[i] <- paste (substr(nm, 0, 21), "...", sep="");
}
}
}
return (new.nms);
}
# count the number of digits in the values
getndp <- function(x, tol=2*.Machine$double.eps){
ndp <- 0
while(!isTRUE(all.equal(x, round(x, ndp), tol=tol))) ndp <- ndp+1
if(ndp > -log10(tol)) {
warning("Tolerance reached, ndp possibly underestimated.")
}
ndp
}
### convert usr coords (as used in current plot) to pixels in a png
## adapted from the imagemap package
usr2png <- function(xy,im){
xy <- usr2dev(xy,dev.cur())
cbind(
ceiling(xy[,1]*im$Width),
ceiling((1-xy[,2])*im$Height)
)
}
usr2plt <- function(xy,dev=dev.cur()){
olddev <- dev.cur()
dev.set(dev)
usr <- par("usr")
dev.set(olddev)
xytrans(xy,usr)
}
plt2fig <- function(xy,dev=dev.cur()){
olddev <- dev.cur()
dev.set(dev)
plt <- par("plt")
dev.set(olddev)
xytrans2(xy,plt)
}
fig2dev <- function(xy,dev=dev.cur()){
olddev <- dev.cur()
dev.set(dev)
fig <- par("fig")
dev.set(olddev)
xytrans2(xy,fig)
}
usr2dev <- function(xy,dev=dev.cur()){
fig2dev(plt2fig(usr2plt(xy,dev),dev),dev)
}
xytrans2 <- function(xy,par){
cbind(par[1]+((par[2]-par[1])*xy[,1]),
par[3]+((par[4]-par[3])*xy[,2]))
}
xytrans <- function(xy,par){
cbind((xy[,1]-par[1])/(par[2]-par[1]),
(xy[,2]-par[3])/(par[4]-par[3]))
}
# VENN DIAGRAM COUNTS AND PLOTS
getVennCounts <- function(x,include="both") {
x <- as.matrix(x)
include <- match.arg(include,c("both","up","down"))
x <- sign(switch(include,
both = abs(x),
up = x > 0,
down = x < 0
))
nprobes <- nrow(x)
ncontrasts <- ncol(x)
names <- colnames(x)
if(is.null(names)) names <- paste("Group",1:ncontrasts)
noutcomes <- 2^ncontrasts
outcomes <- matrix(0,noutcomes,ncontrasts)
colnames(outcomes) <- names
for (j in 1:ncontrasts)
outcomes[,j] <- rep(0:1,times=2^(j-1),each=2^(ncontrasts-j))
xlist <- list()
for (i in 1:ncontrasts) xlist[[i]] <- factor(x[,ncontrasts-i+1],levels=c(0,1))
counts <- as.vector(table(xlist))
structure(cbind(outcomes,Counts=counts),class="VennCounts")
}
# Plot Venn diagram
# Gordon Smyth, James Wettenhall.
# Capabilities for multiple counts and colors by Francois Pepin.
# 4 July 2003. Last modified 12 March 2010.
plotVennDiagram <- function(object,include="both",names,mar=rep(0,4),cex=1.2,lwd=1,circle.col,counts.col,show.include,...)
{
if (!is(object, "VennCounts")){
if (length(include)>2) stop("Cannot plot Venn diagram for more than 2 sets of counts")
if (length(include)==2) object.2 <- getVennCounts(object, include = include[2])
object <- getVennCounts(object, include = include[1])
}
else if(length(include==2)) include <- include[1]
nsets <- ncol(object)-1
if(nsets > 3) stop("Can't plot Venn diagram for more than 3 sets")
if(missing(names)) names <- colnames(object)[1:nsets]
counts <- object[,"Counts"]
if(length(include)==2) counts.2 <- object.2[, "Counts"]
if(missing(circle.col)) circle.col <- par('col')
if(length(circle.col) max ) { # don't move top
tmp2 <- rev( as.logical( cumprod( rev(tmp) ) ) )
tmp <- tmp & !tmp2
}
x[ tmp ] <- x[ tmp] + stp
df <- x[-1] - x[-length(x)]
i <- i + 1
if( i > maxiter ) {
warning("Maximum iterations reached")
break
}
}
x[unsort]
}
# borrowed from
### http://www.r-statistics.com/2011/01/how-to-label-all-the-outliers-in-a-boxplot/
boxplot.with.outlier.label <- function(y, label_name, ..., spread_text = T, data, plot = T, range = 1.5, label.col = "blue", push_text_right = 1.0, # enlarge push_text_right in order to push the text labels further from their point
segement_width_as_percent_of_label_dist = .45, # Change this if you want to have the line closer to the label (range should be between 0 to 1
jitter_if_duplicate = T, jitter_only_positive_duplicates = F)
{
require(plyr) # for is.formula and ddply
if(missing(data)) {
boxdata <- boxplot(y, plot = plot,range = range ,...)
} else {
boxdata <- boxplot(y, plot = plot,data = data, range = range ,...)
}
# creating a data.frame with information from the boxplot output about the outliers (location and group)
boxdata_group_name <- factor(boxdata$group)
levels(boxdata_group_name) <- boxdata$names[as.numeric(levels(boxdata_group_name))] # the subseting is for cases where we have some sub groups with no outliers
if(!is.null(list(...)$at)) { # if the user chose to use the "at" parameter, then we would like the function to still function (added on 19.04.2011)
boxdata$group <- list(...)$at[boxdata$group]
}
boxdata_outlier_df <- data.frame(group = boxdata_group_name, y = boxdata$out, x = boxdata$group)
# Let's extract the x,y variables from the formula:
if(is.formula(y))
{
model_frame_y <- model.frame(y)
y <- model_frame_y[,1]
x <- model_frame_y[,-1]
if(!is.null(dim(x))) { # then x is a matrix/data.frame of the type x1*x2*..and so on - and we should merge all the variations...
x <- apply(x,1, paste, collapse = ".")
}
} else {
# if(missing(x)) x <- rep(1, length(y))
x <- rep(1, length(y)) # we do this in case y comes as a vector and without x
}
# and put all the variables (x, y, and outlier label name) into one data.frame
DATA <- data.frame(label_name, x ,y)
if(!is.null(list(...)$names)) { # if the user chose to use the names parameter, then we would like the function to still function (added on 19.04.2011)
DATA$x <- factor(DATA$x, levels = unique(DATA$x))
levels(DATA$x) = list(...)$names # enable us to handle when the user adds the "names" parameter # fixed on 19.04.11 # notice that DATA$x must be of the "correct" order (that's why I used split above
}
if(!missing(data)) detach(data) # we don't need to have "data" attached anymore.
# let's only keep the rows with our outliers
boxplot.outlier.data <- function(xx, y_name = "y"){
y <- xx[,y_name]
boxplot_range <- range(boxplot.stats(y, coef = range )$stats)
ss <- (y < boxplot_range[1]) | (y > boxplot_range[2])
return(xx[ss,])
}
outlier_df <-ddply(DATA, .(x), boxplot.outlier.data)
# create propor x/y locations to handle over-laping dots...
if(spread_text) {
# credit: Greg Snow
temp_x <- boxdata_outlier_df[,"x"]
temp_y1 <- boxdata_outlier_df[,"y"]
temp_y2 <- temp_y1
for(i in unique(temp_x)){
tmp <- temp_x == i
temp_y2[tmp] <- spread.labs( temp_y2[ tmp ], 1.3*strheight('A'), maxiter=6000, stepsize = 0.05) #, min=0 )
}
}
# plotting the outlier labels :) (I wish there was a non-loop wise way for doing this)
for(i in seq_len(dim(boxdata_outlier_df)[1]))
{
ss <- (outlier_df[,"x"] %in% boxdata_outlier_df[i,]$group) & (outlier_df[,"y"] %in% boxdata_outlier_df[i,]$y);
current_label <- outlier_df[ss,"label_name"]
temp_x <- boxdata_outlier_df[i,"x"]
temp_y <- boxdata_outlier_df[i,"y"]
if(spread_text) {
temp_y_new <- temp_y2[i] # not ss
move_text_right <- strwidth(current_label) * push_text_right
text( temp_x+move_text_right, temp_y_new, current_label, col = label.col)
# strwidth
segments( temp_x+(move_text_right/6), temp_y, temp_x+(move_text_right*segement_width_as_percent_of_label_dist), temp_y_new )
} else {
text(temp_x, temp_y, current_label, pos = 4, col = label.col)
}
}
}
# borrowed from Hmisc
all.numeric <- function (x, what = c("test", "vector"), extras = c(".", "NA")){
what <- match.arg(what)
old <- options(warn = -1)
on.exit(options(old));
x <- sub("[[:space:]]+$", "", x);
x <- sub("^[[:space:]]+", "", x);
inx <- x %in% c("", extras);
xs <- x[!inx];
isnum <- !any(is.na(as.numeric(xs)))
if (what == "test")
isnum
else if (isnum)
as.numeric(x)
else x
}
# cleaning up the "cmpd_name.csv"
clean_cmpds_lib <- function(dat){
# remove rarely used cmpds
rminx <- duplicated(dat$kegg_id) & is.na(dat$pubchem_id) & is.na(dat$chebi_id)
sum(rminx)
dat <- dat[!rminx,];
# add labels for lipids (those will be only be used for exact match)
lipid.prefix <- c("CE(", "CL(", "DG(", "MG(", "PC(","PE(","PA(", "PG(", "PGP(", "PS(", "TG(", "SM(", "Cer(", "CerP(", "CPA(", "LPA(", "PIP(", "PIP2(", "CDP-DG(",
"Ganglioside GD1a (", "Ganglioside GD1b (", "Ganglioside GD2 (", "Ganglioside GD3 (", "Ganglioside GM1 (", "Ganglioside GM2 (", "Ganglioside GM3 (",
"Ganglioside GQ1c (", "Ganglioside GT1b (", "Ganglioside GT1c (", "Ganglioside GT2 (", "Ganglioside GT3 (",
"Glucosylceramide (", "Lactosyceramide (", "Lactosylceramide (", "Galactosylceramide (",
"Tetrahexosylceramide (", "LysoPC(", "LysoPE(", "Ganglioside GM1 (", "Ganglioside GM3 (", "Ganglioside GM2 (",
"Trihexosylceramide (", "3-O-Sulfogalactosylceramide (", "Galabiosylceramide (",
"Ganglioside GD3 (", "Ganglioside GA2 (", "Ganglioside GA1 (");
hit <- rep(F, length(dat$name));
for(name in lipid.prefix){
hitInx <- substring(dat$name, 1, nchar(name)) == name;
hit <- hit | hitInx;
}
exactinx <- (duplicated(dat$kegg_id) | is.na(dat$kegg_id)) & hit
dat <- cbind(dat, lipid=as.numeric(exactinx));
write.csv(dat, file="cmpd_name_update9.csv", row.names=F)
}
GetFileContentAsString <- function(file.nm){
content <- paste(readLines(file.nm), collapse="\n");
return(content);
}
ClearNumerics <-function(dat.mat){
dat.mat[is.na(dat.mat)] <- -777;
dat.mat[dat.mat == Inf] <- -999;
dat.mat[dat.mat == -Inf] <- -111;
dat.mat;
}
# adapted from ropls package
perform_opls <- function (x, y = NULL, predI = NA, orthoI = 0, crossvalI = 7, log10L = FALSE, permI = 20,
scaleC = c("none", "center", "pareto", "standard")[4], ...) {
xMN <- x
yMCN <- matrix(y, ncol = 1);
rownames(yMCN) <- rownames(xMN)
colnames(yMCN) <- paste0("y", 1:ncol(yMCN))
yLevelVc <- NULL;
xZeroVarVi <- NULL;
epsN <- .Machine[["double.eps"]]
opl <- .coreOPLS(xMN = xMN, yMCN = yMCN, orthoI = orthoI, predI = predI,
scaleC = scaleC, crossvalI = crossvalI);
opl$suppLs[["y"]] <- y
opl$typeC <- "OPLS-DA";
## Permutation testing (Szymanska et al, 2012)
if(permI > 0) {
modSumVc <- colnames(opl$summaryDF)
permMN <- matrix(0,
nrow = 1 + permI,
ncol = length(modSumVc),
dimnames = list(NULL, modSumVc))
perSimVn <- numeric(1 + permI)
perSimVn[1] <- 1
permMN[1, ] <- as.matrix(opl$summaryDF)
for(k in 1:permI) {
yVcn <- drop(opl$suppLs[["yMCN"]])
yPerVcn <- sample(yVcn)
yPerMCN <- matrix(yPerVcn, ncol = 1)
perOpl <- .coreOPLS(xMN = xMN,
yMCN = yPerMCN,
orthoI = opl$summaryDF[, "ort"],
predI = opl$summaryDF[, "pre"],
scaleC = scaleC,
crossvalI = crossvalI)
permMN[1 + k, ] <- as.matrix(perOpl$summaryDF);
perSimVn[1 + k] <- .similarityF(opl$suppLs[["yMCN"]], yPerMCN)
}
permMN <- cbind(permMN, sim = perSimVn);
perPvaVn <- c(pR2Y = (1 + length(which(permMN[-1, "R2Y(cum)"] >= permMN[1, "R2Y(cum)"]))) / (nrow(permMN) - 1),
pQ2 = (1 + length(which(permMN[-1, "Q2(cum)"] >= permMN[1, "Q2(cum)"]))) / (nrow(permMN) - 1));
opl$summaryDF[, "pR2Y"] <- perPvaVn["pR2Y"];
opl$summaryDF[, "pQ2"] <- perPvaVn["pQ2"];
opl$suppLs[["permMN"]] <- permMN;
}
##------------------------------------
## Numerical results
##------------------------------------
totN <- length(c(xMN))
nasN <- sum(is.na(c(xMN)))
if(!is.null(opl$suppLs[["yMCN"]])) {
totN <- totN + length(c(opl$suppLs[["yMCN"]]))
nasN <- nasN + sum(is.na(c(opl$suppLs[["yMCN"]])))
}
## Raw summary
##------------
opl$suppLs[["topLoadI"]] <- 3
if(ncol(xMN) > opl$suppLs[["topLoadI"]]) {
xVarVn <- apply(xMN, 2, var)
names(xVarVn) <- 1:length(xVarVn)
xVarVn <- sort(xVarVn)
xVarSorVin <- as.numeric(names(xVarVn[seq(1, length(xVarVn), length = opl$suppLs[["topLoadI"]])]))
opl$suppLs[["xSubIncVarMN"]] <- xMN[, xVarSorVin, drop = FALSE]
} else{
opl$suppLs[["xSubIncVarMN"]] <- xMN
}
if(ncol(xMN) <= 100) {
xCorMN <- cor(xMN, use = "pairwise.complete.obs")
xCorMN[lower.tri(xCorMN, diag = TRUE)] <- 0
if(ncol(xMN) > opl$suppLs[["topLoadI"]]) {
xCorNexDF <- which(abs(xCorMN) >= sort(abs(xCorMN), decreasing = TRUE)[opl$suppLs[["topLoadI"]] + 1],
arr.ind = TRUE);
xCorDisMN <- matrix(0,
nrow = nrow(xCorNexDF),
ncol = nrow(xCorNexDF),
dimnames = list(colnames(xMN)[xCorNexDF[, "row"]],
colnames(xMN)[xCorNexDF[, "col"]]))
for(k in 1:nrow(xCorDisMN)){
xCorDisMN[k, k] <- xCorMN[xCorNexDF[k, "row"], xCorNexDF[k, "col"]]
}
} else {
xCorDisMN <- xCorMN
}
opl$suppLs[["xCorMN"]] <- xCorDisMN
rm(xCorDisMN)
}
return(invisible(opl))
}
.coreOPLS <- function (xMN, yMCN, orthoI, predI, scaleC, crossvalI) {
epsN <- .Machine[["double.eps"]]
varVn <- NULL
yMeanVn <- NULL
ySdVn <- NULL
wMN <- NULL
cMN <- NULL
uMN <- NULL
rMN <- NULL
bMN <- NULL
vipVn <- NULL
yPreMN <- NULL
yTesMN <- NULL
toMN <- NULL
poMN <- NULL
woMN <- NULL
coMN <- NULL
orthoVipVn <- NULL
naxVi <- which(is.na(c(xMN)))
naxL <- length(naxVi) > 0
nayVi <- integer()
nayL <- FALSE;
yMN <- yMCN;
obsNamVc <- rownames(xMN)
autNcoL <- autNcpL <- FALSE
autMaxN <- min(c(10, dim(xMN)))
if (is.na(orthoI)) {
if (autMaxN == 1) {
orthoI <- 0
predI <- 1
warning("The data contain a single variable (or sample): A PLS model with a single component will be built",
call. = FALSE)
}
else {
orthoI <- autMaxN - 1
predI <- 1
autNcoL <- TRUE
}
}
if (is.na(predI)) {
if (orthoI > 0) {
if (autMaxN == 1) {
orthoI <- 0
warning("The data contain a single variable (or sample): A PLS model with a single component will be built",
call. = FALSE)
}
else warning("OPLS(-DA): The number of predictive component is set to 1 for a single response model",
call. = FALSE)
predI <- 1
if ((predI + orthoI) > min(dim(xMN)))
stop("The sum of 'predI' (", predI, ") and 'orthoI' (",
orthoI, ") exceeds the minimum dimension of the 'x' data matrix (",
min(dim(xMN)), ")", call. = FALSE)
}
else {
predI <- autMaxN
autNcpL <- TRUE
}
}
xVarVn <- apply(xMN, 2, function(colVn) var(colVn, na.rm = TRUE))
xMeanVn <- apply(xMN, 2, function(colVn) mean(colVn, na.rm = TRUE))
switch(scaleC, none = {
xMeanVn <- rep(0, ncol(xMN))
xSdVn <- rep(1, times = ncol(xMN))
}, center = {
xSdVn <- rep(1, times = ncol(xMN))
}, pareto = {
xSdVn <- apply(xMN, 2, function(colVn) sqrt(sd(colVn,
na.rm = TRUE)))
}, standard = {
xSdVn <- apply(xMN, 2, function(colVn) sd(colVn, na.rm = TRUE))
})
xMN <- scale(xMN, center = xMeanVn, scale = xSdVn)
if (!is.null(colnames(xMN))) {
xvaNamVc <- colnames(xMN)
}
else xvaNamVc <- paste("x", 1:ncol(xMN), sep = "")
preNamVc <- paste("p", 1:predI, sep = "")
pMN <- matrix(0, nrow = ncol(xMN), ncol = predI, dimnames = list(xvaNamVc,
preNamVc))
tMN <- uMN <- matrix(0, nrow = nrow(xMN), ncol = predI, dimnames = list(obsNamVc,
preNamVc))
ssxTotN <- sum(xMN^2, na.rm = TRUE)
yMeanVn <- apply(yMN, 2, function(colVn) mean(colVn,na.rm = TRUE))
yMeanVn <- rep(0, times = ncol(yMN))
ySdVn <- rep(1, times = ncol(yMN))
yMN <- scale(yMN, center = yMeanVn, scale = ySdVn)
yvaNamVc <- paste("y", 1:ncol(yMN), sep = "")
wMN <- pMN
uMN <- tMN
cMN <- matrix(0, nrow = ncol(yMN), ncol = predI, dimnames = list(yvaNamVc, preNamVc))
cvfNamVc <- paste("cv", 1:crossvalI, sep = "")
cvfOutLs <- split(1:nrow(xMN), rep(1:crossvalI, length = nrow(xMN)))
prkVn <- numeric(crossvalI)
ru1ThrN <- ifelse(orthoI == 0, ifelse(nrow(xMN) > 100, yes = 0, no = 0.05), 0.01)
ssyTotN <- rs0N <- sum(yMN^2, na.rm = TRUE)
hN <- 1
orthoNamVc <- paste("o", 1:orthoI, sep = "");
toMN <- matrix(0, nrow = nrow(xMN), ncol = orthoI,
dimnames = list(obsNamVc, orthoNamVc));
woMN <- poMN <- matrix(0, nrow = ncol(xMN), ncol = orthoI,
dimnames = list(xvaNamVc, orthoNamVc));
coMN <- matrix(0, nrow = ncol(yMN), ncol = orthoI,
dimnames = list(yvaNamVc, orthoNamVc));
modelDF <- as.data.frame(matrix(NA, nrow = 1 + orthoI +
1, ncol = 7, dimnames = list(c("p1", orthoNamVc,
"sum"), c("R2X", "R2X(cum)", "R2Y", "R2Y(cum)",
"Q2", "Q2(cum)", "Signif."))));
for (j in 1:ncol(modelDF)){
mode(modelDF[, j]) <- ifelse(colnames(modelDF)[j] == "Signif.", "character", "numeric")
}
xcvTraLs <- lapply(cvfOutLs, function(obsVi) xMN[-obsVi, , drop = FALSE])
xcvTesLs <- lapply(cvfOutLs, function(obsVi) xMN[obsVi, , drop = FALSE])
ycvTraLs <- lapply(cvfOutLs, function(obsVi) yMN[-obsVi, , drop = FALSE])
ycvTesLs <- lapply(cvfOutLs, function(obsVi) yMN[obsVi, , drop = FALSE])
xcvTraLs <- c(xcvTraLs, list(xMN))
ycvTraLs <- c(ycvTraLs, list(yMN))
breL <- FALSE
for (noN in 1:(orthoI + 1)) {
if (breL){
break
}
for (cvN in 1:length(xcvTraLs)) {
xcvTraMN <- xcvTraLs[[cvN]]
ycvTraMN <- ycvTraLs[[cvN]]
if (ncol(ycvTraMN) > 1) {
wwMN <- apply(ycvTraMN, 2, function(colVn) crossprod(xcvTraMN, colVn)/drop(crossprod(colVn)))
wwSvdLs <- svd(wwMN)
wwNcpVin <- which(wwSvdLs[["d"]]^2 > epsN * sum(wwSvdLs[["d"]]^2))
twMN <- wwSvdLs[["u"]][, wwNcpVin, drop = FALSE] %*% diag(wwSvdLs[["d"]][wwNcpVin], nrow = length(wwNcpVin))
}
uOldVn <- ycvTraMN[, 1, drop = FALSE]
repeat {
wVn <- crossprod(xcvTraMN, uOldVn)/drop(crossprod(uOldVn))
wVn <- wVn/sqrt(drop(crossprod(wVn)))
tVn <- xcvTraMN %*% wVn
cVn <- crossprod(ycvTraMN, tVn)/drop(crossprod(tVn))
uVn <- ycvTraMN %*% cVn/drop(crossprod(cVn))
dscN <- drop(sqrt(crossprod((uVn - uOldVn)/uVn)))
if (ncol(ycvTraMN) == 1 || dscN < 1e-10) {
break
}else {
uOldVn <- uVn
}
}
pVn <- crossprod(xcvTraMN, tVn)/drop(crossprod(tVn))
if (ncol(ycvTraMN) > 1){
for (j in 1:ncol(twMN)) {
woVn <- pVn - drop(crossprod(twMN[,
j, drop = FALSE], pVn))/drop(crossprod(twMN[,
j, drop = FALSE])) * twMN[, j, drop = FALSE];
}
} else {
woVn <- pVn - drop(crossprod(wVn, pVn))/drop(crossprod(wVn)) * wVn
}
woVn <- woVn/sqrt(drop(crossprod(woVn)))
toVn <- xcvTraMN %*% woVn
coVn <- crossprod(ycvTraMN, toVn)/drop(crossprod(toVn))
poVn <- crossprod(xcvTraMN, toVn)/drop(crossprod(toVn))
if (cvN <= crossvalI) {
xcvTesMN <- xcvTesLs[[cvN]]
ycvTesMN <- ycvTesLs[[cvN]]
if (any(is.na(xcvTesMN))) {
prxVn <- numeric(nrow(xcvTesMN))
for (r in 1:length(prxVn)) {
comVl <- complete.cases(xcvTesMN[r, ])
prxVn[r] <- crossprod(xcvTesMN[r, comVl], wVn[comVl])/drop(crossprod(wVn[comVl]))
}
prkVn[cvN] <- sum((ycvTesMN - prxVn %*% t(cVn))^2, na.rm = TRUE)
} else {
prkVn[cvN] <- sum((ycvTesMN - xcvTesMN %*% wVn %*% t(cVn))^2, na.rm = TRUE)
}
toTesVn <- xcvTesMN %*% woVn
xcvTesLs[[cvN]] <- xcvTesMN - tcrossprod(toTesVn, poVn)
if (cvN == crossvalI) {
q2N <- 1 - sum(prkVn)/rs0N
if (noN == 1) {
modelDF["p1", "Q2(cum)"] <- modelDF["p1", "Q2"] <- q2N
} else {
modelDF[noN, "Q2(cum)"] <- q2N - modelDF["p1", "Q2"]
modelDF[noN, "Q2"] <- q2N - sum(modelDF[1:(noN - 1), "Q2"], na.rm = TRUE)
}
}
} else {
r2yN <- sum(tcrossprod(tVn, cVn)^2)/ssyTotN
if (noN == 1) {
modelDF["p1", "R2Y(cum)"] <- modelDF["p1", "R2Y"] <- r2yN
} else {
modelDF[noN, "R2Y(cum)"] <- r2yN - modelDF["p1", "R2Y"]
modelDF[noN, "R2Y"] <- r2yN - sum(modelDF[1:(noN - 1), "R2Y"], na.rm = TRUE)
}
if (noN <= orthoI) {
modelDF[paste0("o", noN), "R2X"] <- sum(tcrossprod(toVn,poVn)^2)/ssxTotN
poMN[, noN] <- poVn
toMN[, noN] <- toVn
woMN[, noN] <- woVn
coMN[, noN] <- coVn
}
if (modelDF[noN, "R2Y"] < 0.01) {
modelDF[noN, "Signif."] <- "N4"
} else if (modelDF[noN, "Q2"] < ru1ThrN) {
modelDF[noN, "Signif."] <- "NS"
} else {
modelDF[noN, "Signif."] <- "R1"
}
if (autNcoL && modelDF[noN, "Signif."] != "R1" && noN > 2) {
breL <- TRUE
break
} else {
cMN[, 1] <- cVn
pMN[, 1] <- pVn
tMN[, 1] <- tVn
uMN[, 1] <- uVn
wMN[, 1] <- wVn
}
}
if (breL) {
break;
}
if (noN < orthoI + 1){
xcvTraLs[[cvN]] <- xcvTraMN - tcrossprod(toVn, poVn);
}
}
}
rm(xcvTraLs)
rm(xcvTesLs)
rm(ycvTraLs)
modelDF["p1", "R2X(cum)"] <- modelDF["p1", "R2X"] <- sum(tcrossprod(tMN, pMN)^2)/ssxTotN
modelDF[1:(1 + orthoI), "R2X(cum)"] <- cumsum(modelDF[1:(1 + orthoI), "R2X"]);
if (autNcoL) {
if (all(modelDF[, "Signif."] == "R1", na.rm = TRUE)) {
orthoI <- noN - 1
}else{
orthoI <- noN - 3
}
if (orthoI == autMaxN - 1){
warning("The maximum number of orthogonal components in the automated mode (",
autMaxN - 1, ") has been reached whereas R2Y (",
round(modelDF[1 + orthoI, "R2Y"] * 100),
"%) is above 1% and Q2Y (", round(modelDF[1 +
orthoI, "Q2"] * 100), "%) is still above ",
round(ru1ThrN * 100), "%.", call. = FALSE)
}
poMN <- poMN[, 1:orthoI, drop = FALSE]
toMN <- toMN[, 1:orthoI, drop = FALSE]
woMN <- woMN[, 1:orthoI, drop = FALSE]
coMN <- coMN[, 1:orthoI, drop = FALSE]
orthoNamVc <- orthoNamVc[1:orthoI]
modelDF <- modelDF[c(1:(orthoI + 1), nrow(modelDF)), ]
}
modelDF["sum", "R2X(cum)"] <- modelDF[1 + orthoI, "R2X(cum)"]
modelDF["sum", "R2Y(cum)"] <- sum(modelDF[, "R2Y"], na.rm = TRUE)
modelDF["sum", "Q2(cum)"] <- sum(modelDF[, "Q2"], na.rm = TRUE)
summaryDF <- modelDF["sum", c("R2X(cum)", "R2Y(cum)", "Q2(cum)")]
rMN <- wMN
bMN <- tcrossprod(rMN, cMN)
yPreScaMN <- tcrossprod(tMN, cMN)
yPreMN <- scale(scale(yPreScaMN, FALSE, 1/ySdVn), -yMeanVn, FALSE)
attr(yPreMN, "scaled:center") <- NULL
attr(yPreMN, "scaled:scale") <- NULL
yActMCN <- yMCN
yActMN <- yActMCN
summaryDF[, "RMSEE"] <- sqrt(.errorF(yActMN, yPreMN)^2 * nrow(yActMN)/(nrow(yActMN) - (1 + predI + orthoI)))
yTestMCN <- NULL
sxpVn <- sapply(1:ncol(tMN), function(h) sum(drop(tcrossprod(tMN[, h], pMN[, h])^2)))
sxpCumN <- sum(sxpVn)
sxoVn <- sapply(1:ncol(toMN), function(h) sum(drop(tcrossprod(toMN[, h], poMN[, h])^2)))
sxoCumN <- sum(sxoVn)
ssxCumN <- sxpCumN + sxoCumN
sypVn <- sapply(1:ncol(tMN), function(h) sum(drop(tcrossprod(tMN[, h], cMN[, h])^2)))
sypCumN <- sum(sypVn)
syoVn <- sapply(1:ncol(toMN), function(h) sum(drop(tcrossprod(toMN[,
h], coMN[, h])^2)))
syoCumN <- sum(syoVn)
ssyCumN <- sypCumN + syoCumN
kpN <- nrow(wMN)/(sxpCumN/ssxCumN + sypCumN/ssyCumN)
pNorMN <- sweep(pMN, 2, sqrt(colSums(pMN^2)), "/")
vipVn <- sqrt(kpN * (rowSums(sweep(pNorMN^2, 2, sxpVn,
"*"))/ssxCumN + rowSums(sweep(pNorMN^2, 2, sypVn,
"*"))/ssyCumN))
koN <- nrow(wMN)/(sxoCumN/ssxCumN + syoCumN/ssyCumN)
poNorMN <- sweep(poMN, 2, sqrt(colSums(poMN^2)),"/")
orthoVipVn <- sqrt(koN * (rowSums(sweep(poNorMN^2,
2, sxoVn, "*"))/ssxCumN + rowSums(sweep(poNorMN^2,
2, syoVn, "*"))/ssyCumN))
summaryDF[, "pre"] <- predI
summaryDF[, "ort"] <- orthoI
rownames(summaryDF) <- "Total"
sigNamVc <- c("R2X", "R2X(cum)", "R2Y", "R2Y(cum)", "Q2",
"Q2(cum)", "RMSEE", "RMSEP")
for (namC in intersect(colnames(modelDF), sigNamVc)) modelDF[,
namC] <- signif(modelDF[, namC], 3)
for (namC in intersect(colnames(summaryDF), sigNamVc)) summaryDF[,
namC] <- signif(summaryDF[, namC], 3)
retLs <- list(typeC = NULL, modelDF = modelDF,
summaryDF = summaryDF, pcaVarVn = varVn,
vipVn = vipVn, orthoVipVn = orthoVipVn, fitted = NULL,
tested = NULL, coefficients = bMN, residuals = NULL,
xMeanVn = xMeanVn, xSdVn = xSdVn, yMeanVn = yMeanVn,
ySdVn = ySdVn, xZeroVarVi = NULL, scoreMN = tMN, loadingMN = pMN,
weightMN = wMN, orthoScoreMN = toMN, orthoLoadingMN = poMN,
orthoWeightMN = woMN, cMN = cMN, uMN = uMN, weightStarMN = rMN,
coMN = coMN, suppLs = list(yLevelVc = NULL, naxL = naxL, nayL = nayL, nayVi = nayVi,
permMN = NULL, scaleC = scaleC, topLoadI = NULL,
yMCN = yMCN, xSubIncVarMN = NULL, xCorMN = NULL,
xModelMN = xMN, yModelMN = yMN, yPreMN = yPreMN,
yTesMN = yTesMN))
}
.errorF <- function(x, y){
sqrt(mean(drop((x - y)^2), na.rm = TRUE))
}
.similarityF <- function(x, y) {
return(cor(x, y, use = "pairwise.complete.obs"))
}
#################################################################################################
#################################################################################################
#################################################################################################
#################################################################################################
################################### ############################
################################### Altered/Wrote Scripts ############################
################################### ############################
#################################################################################################
#################################################################################################
#################################################################################################
#################################################################################################
##############################################
################## Summary plot ##############
##############################################
PlotNormSum<-function(imgName, format="png", dpi=72, width=NA){
#imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
#if(is.na(width)){
# w <- 10.5; h <- 12;
#}else if(width == 0){
# w <- 7.2;h <- 9;
#imgSet$norm<<-imgName;
#}else{
# w <- 7.2; h <- 9;
#}
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
layout(matrix(c(1,1,1,2,3,3,3,4), 4, 2, byrow = FALSE))
# since there may be too many compounds, only plot a subsets (50) in box plot
# but density plot will use all the data
pre.inx<-GetRandomSubsetIndex(ncol(dataSet$proc), sub.num=50);
namesVec <- colnames(dataSet$proc[,pre.inx]);
# only get common ones
nm.inx <- namesVec %in% colnames(dataSet$norm)
namesVec <- namesVec[nm.inx];
pre.inx <- pre.inx[nm.inx];
norm.inx<-match(namesVec, colnames(dataSet$norm));
namesVec <- substr(namesVec, 1, 12); # use abbreviated name
rangex.pre <- range(dataSet$proc[, pre.inx], na.rm=T);
rangex.norm <- range(dataSet$norm[, norm.inx], na.rm=T);
x.label<-GetValueLabel();
y.label<-GetVariableLabel();
# fig 1
op<-par(mar=c(0,7,4,0), xaxt="n");
boxplot(dataSet$proc[,pre.inx], names= namesVec, ylim=rangex.pre, las = 2, col="lightgreen", horizontal=T);
mtext("Before Normalization",3, 1)
# fig 2
op<-par(mar=c(7,7,0,0), xaxt="s");
plot(density(apply(dataSet$proc, 2, mean, na.rm=TRUE)), col='darkblue', las =2, lwd=2, main="", xlab="", ylab="");
mtext("Density", 2, 5);
mtext(x.label, 1, 5);
# fig 3
op<-par(mar=c(0,7,4,2), xaxt="n");
boxplot(dataSet$norm[,norm.inx], names=namesVec, ylim=rangex.norm, las = 2, col="lightgreen", horizontal=T);
mtext("After Normalization",3, 1);
# fig 4
op<-par(mar=c(7,7,0,2), xaxt="s");
plot(density(apply(dataSet$norm, 2, mean, na.rm=TRUE)), col='darkblue', las=2, lwd =2, main="", xlab="", ylab="");
mtext(paste("Normalized",x.label),1, 5);
#dev.off();
}
################################
######## Anova test ############
################################
ANOVA.Anal<-function(nonpar=F, thresh=0.05, post.hoc="fisher"){
if(nonpar){
aov.nm <- "Kruskal Wallis Test";
anova.res<-apply(as.matrix(dataSet$norm), 2, kwtest);
#extract all p values
p.value<-unlist(lapply(anova.res, function(x) {x$p.value}));
names(p.value)<-colnames(dataSet$norm);
fdr.p <- p.adjust(p.value, "fdr");
inx.imp <- p.value <= thresh;
if(sum(inx.imp) == 0){ # no sig features!
cutpt <- round(0.2*length(p.value));
cutpt <- ifelse(cutpt>50, 50, cutpt);
inx <- which(rank(p.value) == cutpt);
thresh <- p.value[inx];
inx.imp <- p.value <= thresh;
}
sig.p <- p.value[inx.imp];
fdr.p <- fdr.p[inx.imp];
sig.mat <- data.frame(signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), 'NA');
rownames(sig.mat) <- names(sig.p);
colnames(sig.mat) <- c("p.value", "-log10(p)", "FDR", "Post-Hoc");
# order the result simultaneously
ord.inx <- order(sig.p, decreasing = FALSE);
sig.mat <- sig.mat[ord.inx,];
fileName <- "anova_posthoc.csv";
my.mat <- sig.mat[,1:3];
colnames(my.mat) <- c("pval_KW", "-log10(p)", "FDR");
write.csv(my.mat,file=fileName);
}else{
aov.nm <- "One-way ANOVA";
aov.res<-apply(as.matrix(dataSet$norm), 2, aof);
anova.res<-lapply(aov.res, anova);
#extract all p values
p.value<-unlist(lapply(anova.res, function(x) { x["Pr(>F)"][1,]}));
names(p.value)<-colnames(dataSet$norm);
fdr.p <- p.adjust(p.value, "fdr");
# do post-hoc only for signficant entries
inx.imp <- p.value <= thresh;
if(sum(inx.imp) == 0){ # no sig features with default thresh
# readjust threshold to top 20% or top 50
cutpt <- round(0.2*length(p.value));
cutpt <- ifelse(cutpt>50, 50, cutpt);
inx <- which(rank(p.value) == cutpt);
thresh <- p.value[inx];
inx.imp <- p.value <= thresh;
}
aov.imp <- aov.res[inx.imp];
sig.p <- p.value[inx.imp];
fdr.p <- fdr.p[inx.imp];
cmp.res <- NULL;
post.nm <- NULL;
if(post.hoc=="tukey"){
tukey.res<-lapply(aov.imp, TukeyHSD, conf.level=1-thresh);
cmp.res <- unlist(lapply(tukey.res, parseTukey, cut.off=thresh));
post.nm = "Tukey's HSD";
}else{
fisher.res<-lapply(aov.imp, FisherLSD, thresh);
cmp.res <- unlist(lapply(fisher.res, parseFisher, cut.off=thresh));
post.nm = "Fisher's LSD";
}
# create the result dataframe,
# note, the last column is string, not double
sig.mat <- data.frame(signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), cmp.res);
rownames(sig.mat) <- names(sig.p);
colnames(sig.mat) <- c("p.value", "-log10(p)", "FDR", post.nm);
# order the result simultaneously
ord.inx <- order(sig.p, decreasing = FALSE);
sig.mat <- sig.mat[ord.inx,];
fileName <- "anova_posthoc.csv";
write.csv(sig.mat,file=fileName);
}
aov<-list (
aov.nm = aov.nm,
raw.thresh = thresh,
thresh = -log10(thresh), # only used for plot threshold line
p.value = p.value,
p.log = -log10(p.value),
inx.imp = inx.imp,
post.hoc = post.hoc,
sig.mat = sig.mat
);
analSet$aov<<-aov;
return(1);
}
################################
###### ANOVA Table##############
#################################
MyANOVATable <-function(){
m=cbind(row.names(analSet$aov$sig.mat),analSet$aov$sig.mat)
colnames(m)[1] <- c("Name")
return(m)
}
#########################
# ANOVA plot
#########################
PlotLiveANOVA<-function(c1,c2){
lod <- analSet$aov$p.log;
AnovaPlot= plot(lod, ylab="-log10(p)", xlab = GetVariableLabel(), main=analSet$aov$aov.nm, type="n");
grid();
red.inx<- which(analSet$aov$inx.imp);
blue.inx <- which(!analSet$aov$inx.imp);
points(red.inx, lod[red.inx], bg=c1, cex=1.2, pch=21);
points(blue.inx, lod[blue.inx], bg=c2, pch=21);
abline (h=analSet$aov$thresh, lty=3);
return(AnovaPlot)
#dev.off();
}
#################################
###### t-test plot#############
################################
MyPlotTT<-function(c1,c2){
lod=analSet$tt$p.log;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
ttestgraph=plot(lod, ylab="-log10(p)", xlab=GetVariableLabel(), main=analSet$tt$tt.nm, type="n");
grid();
red.inx<- which(analSet$tt$inx.imp);
blue.inx <- which(!analSet$tt$inx.imp);
points(red.inx, lod[red.inx], bg=c1, cex=1.2, pch=21);
points(blue.inx, lod[blue.inx], bg=c2, pch=21);
abline (h=analSet$tt$thresh, lty=3);
axis(4);
return(ttestgraph)
#dev.off();
}
##################################
############ T-test ##############
##################################
MyTTTable <-function(){
m=cbind(row.names(analSet$tt$sig.mat),analSet$tt$sig.mat)
colnames(m) <- c("Name","t.stat","p.value","-log10(p)","FDR")
return(m)
}
##############################################
#Interval plot methods of individual compound
################################################
IntervalPlot<-function(cmpdNm, dpi=200, colors, calc){
cmpDat = split(dataSet$norm[,cmpdNm], dataSet$cls)
cmpName = colnames(dataSet$norm[cmpdNm])
numOfLvl = length(levels(dataSet$cls))
groups=c()
for(i in 1:numOfLvl){
groups[i]=levels(dataSet$cls)[i]
}
N=c()
for(j in 1:numOfLvl){
N[j]=as.numeric(summary(dataSet$cls)[j])
}
means=c()
for(k in 1:numOfLvl){
means[k] = colMeans(as.data.frame(cmpDat[k],StringsAsFactors = FALSE))
}
sd=c()
for(l in 1:numOfLvl){
sd[l]= sapply((as.data.frame(cmpDat[l],StringsAsFactors = FALSE)), sd)
}
se=c()
for(m in 1:numOfLvl){
se[m]= sd[m]/sqrt(as.numeric(summary(dataSet$cls)[m]))
}
##### needs to check to check with other dataset
ci=c()
for (n in 1:numOfLvl){
ci[n]=qt( .95/2 + .5, as.numeric(summary(dataSet$cls)[n])-1)
ci[n] = se[n]*ci[n]
}
if(calc=="se"){
dfp = data.frame(groups,N, means,sd,se,ci)
SE.up = as.numeric(dfp$means)+as.numeric(dfp$se)
SE.dn = as.numeric(dfp$means)-as.numeric(dfp$se)
imgName <- gsub("\\/", "_", cmpdNm);
imgName <- paste(imgName, "_dpi", dpi, ".", "png", sep="");
plot=ggplot(dfp, aes(x=dfp$groups, y=dfp$means, group=dfp$groups, color=dfp$groups))+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(),panel.background=element_blank(),
axis.line = element_line(size = 0.3,colour = "black"),
axis.text=element_text(size=12,colour="black"),axis.title=element_text(size=14,face="bold"))+
geom_errorbar(aes(ymin=SE.dn, ymax=SE.up), width=.2) +
geom_point(size=3.5) + scale_colour_manual(name = "Groups",values=colors) +
xlab(" ") + ylab(" ") + ggtitle(cmpName) + list()
} else if(calc=="sd"){
dfp = data.frame(groups,N, means,sd,se,ci)
SD.up = as.numeric(dfp$means)+as.numeric(dfp$sd)
SD.dn = as.numeric(dfp$means)-as.numeric(dfp$sd)
imgName <- gsub("\\/", "_", cmpdNm);
imgName <- paste(imgName, "_dpi", dpi, ".", "png", sep="");
plot=ggplot(dfp, aes(x=dfp$groups, y=dfp$means, group=dfp$groups, color=dfp$groups))+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(),panel.background=element_blank(),
axis.line = element_line(size = 0.3,colour = "black"),
axis.text=element_text(size=12,colour="black"),axis.title=element_text(size=14,face="bold"))+
geom_errorbar(aes(ymin=SD.dn, ymax=SD.up), width=.2) +
geom_point(size=3.5) + scale_colour_manual(name = "Groups",values=colors) +
xlab(" ") + ylab(" ") + ggtitle(cmpName) + list()
}
return(plot)
#ggsave(plot = plot, imgName, h = 9/3, w = 16/3, dpi=dpi, type = "cairo-png")
}
####################################################################
###################OPLSDA 2D scrore plot############################
####################################################################
PlotOPLSDA2DScore <- function(imgName, format="png", dpi=72, width=NA, pcx, ocy, reg = 0.95, show=1, grey.scale = 0){
xlabel = paste("PC",1, "(", round(100*exoplsda$modelDF[1,1],1), "%)");
ylabel = paste("OC",1, "(", round(100*exoplsda$modelDF[2,1],1), "%)");
pc1 = exoplsda$scoreMN[, pcx];
pc2 = exoplsda$orthoScoreMN[, ocy];
text.lbls<-substr(names(pc1),1,14) # some names may be too long
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
imgSet$pca.score2d<<-imgName;
w <- 7.2;
}else{
w <- width;
}
h <- w;
Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
suppressMessages(require('ellipse'));
op<-par(mar=c(5,5,3,3));
if(dataSet$cls.type == "disc"){
# obtain ellipse points to the scatter plot for each category
lvs <- levels(dataSet$cls);
pts.array <- array(0, dim=c(100,2,length(lvs)));
for(i in 1:length(lvs)){
inx <-dataSet$cls == lvs[i];
groupVar<-var(cbind(pc1[inx],pc2[inx]), na.rm=T);
groupMean<-cbind(mean(pc1[inx], na.rm=T),mean(pc2[inx], na.rm=T));
pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100);
}
xrg <- range (pc1, pts.array[,1,]);
yrg <- range (pc2, pts.array[,2,]);
x.ext<-(xrg[2]-xrg[1])/12;
y.ext<-(yrg[2]-yrg[1])/12;
xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext);
ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext);
cols <- GetColorSchema(grey.scale==1);
uniq.cols <- unique(cols);
plot(pc1, pc2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="OPLSDA Scores Plot",
color=cols, pch=as.numeric(dataSet$cls)+1); ## added
grid(col = "lightgray", lty = "dotted", lwd = 1);
# make sure name and number of the same order DO NOT USE levels, which may be different
legend.nm <- unique(as.character(dataSet$cls));
## uniq.cols <- unique(cols);
## BHAN: when same color is choosen; it makes an error
if ( length(uniq.cols) > 1 ) {
names(uniq.cols) <- legend.nm;
}
# draw ellipse
for(i in 1:length(lvs)){
if (length(uniq.cols) > 1) {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA);
} else {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA);
}
if(grey.scale) {
lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2);
}
}
pchs <- GetShapeSchema(show, grey.scale);
if(grey.scale) {
cols <- rep("black", length(cols));
}
if(show == 1){
text(pc1, pc2, label=text.lbls, pos=4, xpd=T, cex=0.75);
points(pc1, pc2, pch=pchs, col=cols);
}else{
if(length(uniq.cols) == 1){
points(pc1, pc2, pch=pchs, col=cols, cex=1.0);
}else{
if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){
points(pc1, pc2, pch=pchs, col=cols, cex=1.8);
}else{
points(pc1, pc2, pch=21, bg=cols, cex=2);
}
}
}
uniq.pchs <- unique(pchs);
if(grey.scale) {
uniq.cols <- "black";
}
legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols);
}else{
plot(pc1, pc2, xlab=xlabel, ylab=ylabel, type='n', main="Scores Plot");
points(pc1, pc2, pch=15, col="magenta");
text(pc1, pc2, label=text.lbls, pos=4, col ="blue", xpd=T, cex=0.8);
}
par(op);
#dev.off();
}
##########################################################
################ 3Dplot for PCA #########################
##########################################################
Graphs3DPCA <- function(inx1=1, inx2=2, inx3=3, pointSize=0.2,
transparency=0.1,
Title="PCA PLOT",
grd=TRUE,
ell=TRUE,
group.col) {
x1=analSet$pca$x[,inx1]
y1=analSet$pca$x[,inx2]
z1=analSet$pca$x[,inx3]
numOfLvl <-length(levels(dataSet$cls))
grouplabels=c()
for(i in 1:numOfLvl){
grouplabels[i]=levels(dataSet$cls)[i]
}
pchs <- as.numeric(dataSet$cls)+1;
uniq.pchs <- unique(pchs);
legend.nm <- unique(as.character(dataSet$cls))
#open3d()
# par3d(windowRect = c(216,30, 958, 695))
groups <- dataSet$cls
levs <-levels(groups)
xlabel = paste("PC",inx1, "(", round(100*analSet$pca$variance[inx1],1), "%)")
ylabel = paste("PC",inx2, "(", round(100*analSet$pca$variance[inx2],1), "%)")
zlabel = paste("PC",inx3, "(", round(100*analSet$pca$variance[inx3],1), "%)")
plot3d(x1, y1, z1, xlab = xlabel, ylab=ylabel, zlab=zlabel, col=group.col[as.numeric(groups)], size=pointSize, type='s');
if(ell==TRUE){
for (i in 1:length(levs)) {
group <- levs[i]
selected <- groups == group
xx <- x1[selected];
yy <- y1[selected];
zz <- z1[selected];
ellips <- ellipse3d(cov(cbind(xx,yy,zz)),
centre=c(mean(xx), mean(yy), mean(zz)), level = 0.95)
#or use shade3d
plot3d(ellips, col = group.col[i], add=TRUE, alpha = transparency)
# show group labels
# texts3d(mean(xx),mean(yy), mean(zz), text = group,
# col= group.col[i], cex = 2)
}
}
decorate3d(main=Title, box=FALSE)
legend3d("topright", legend = paste(grouplabels), pch=16, col = group.col, cex=1, inset=c(0.02))
#rgl.spheres(x1, y1, z1, r = pointSize,
# color = group.col[as.numeric(groups)])
if(grd==TRUE){
grid3d(side=c("x","y","z"), at = NULL, col = "gray", lwd = 1, lty = 1, n = 5)
}
aspect3d(1,1,1)
rgl.material(color = "blue")
# rglwidget()
# dev.off();
}
##########################################################
################ 3Dplot for PLSDA #######################
##########################################################
Graphs3DPLSDA <- function(inx1=1,
inx2=2,
inx3=3,
pointSize=0.2,
transparency=0.1,
Title="PLSDA 3D PLOT",
grd=TRUE,
ell=TRUE,
group.col) {
x2=analSet$plsr$score[,inx1]
y2=analSet$plsr$score[,inx2]
z2=analSet$plsr$score[,inx3]
numOfLvl <-length(levels(dataSet$cls))
grouplabels=c()
for(i in 1:numOfLvl){
grouplabels[i]=levels(dataSet$cls)[i]
}
pchs <- as.numeric(dataSet$cls)+1;
uniq.pchs <- unique(pchs);
legend.nm <- unique(as.character(dataSet$cls))
#Viewing Window Size
#open3d()
#par3d(windowRect = c(216,30, 958, 695))
groups <- dataSet$cls
levs <-levels(groups)
xlabel <- paste("Component", inx1, "(", round(100*analSet$plsr$Xvar[inx1]/analSet$plsr$Xtotvar,1), "%)");
ylabel <- paste("Component", inx2, "(", round(100*analSet$plsr$Xvar[inx2]/analSet$plsr$Xtotvar,1), "%)");
zlabel <- paste("Component", inx3, "(", round(100*analSet$plsr$Xvar[inx3]/analSet$plsr$Xtotvar,1), "%)");
plot3d(x2, y2, z2, xlab = xlabel, ylab=ylabel, zlab=zlabel, col=group.col[as.numeric(groups)], size=pointSize, type='s');
if(ell==TRUE){
for (i in 1:length(levs)) {
group <- levs[i]
selected <- groups == group
xx <- x2[selected];
yy <- y2[selected];
zz <- z2[selected];
ellips <- ellipse3d(cov(cbind(xx,yy,zz)),
centre=c(mean(xx), mean(yy), mean(zz)), level = 0.95)
#or use shade3d
plot3d(ellips, col = group.col[i], add=TRUE, alpha = transparency)
# show group labels
# texts3d(mean(xx),mean(yy), mean(zz), text = group,
# col= group.col[i], cex = 2)
}
}
decorate3d(main=Title, box=FALSE)
legend3d("topright", legend = paste(grouplabels), pch=16, col = group.col, cex=1, inset=c(0.02))
# rgl.spheres(x2, y2, z2, r = pointSize,
# color = group.col[as.numeric(groups)])
if(grd==TRUE){
grid3d(side=c("x","y","z"), at = NULL, col = "gray", lwd = 1, lty = 1, n = 5)
}
aspect3d(1,1,1)
rgl.material(color = "blue")
#dev.off();
}
###############################################################################
############################### Cmpd plot #####################################
###############################################################################
PlotCmpdBoxView<-function(cmpdNm, dpi=200, colr){
imgName <- gsub("\\/", "_", cmpdNm);
imgName <- paste(imgName, "_dpi", dpi, ".", sep="");
par(mar=c(4,3,1,2), oma=c(0,0,1,0));
bplot=boxplot(dataSet$norm[, cmpdNm]~dataSet$cls,las=2, col= colr);
title(main=cmpdNm, out=T);
#dev.off();
return(bplot);
}
#################################################################################
############################## FC Plot ##########################################
#################################################################################
MyPlotFC<-function(imgName="Fold Change Plot", format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$fc<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
par(mar=c(5,5,2,3));
fc = analSet$fc;
if(fc$paired){
ylim<-c(-nrow(dataSet$norm)/2, nrow(dataSet$norm)/2);
xlim<-c(0, ncol(dataSet$norm));
plot(NULL, xlim=xlim, ylim=ylim, xlab = GetVariableLabel(),
ylab=paste("Count with FC >=", fc$max.thresh, "or <=", fc$min.thresh));
for(i in 1:ncol(fc$fc.all)){
segments(i,0, i, fc$fc.all[1,i], col= ifelse(fc$inx.up[i],"magenta", "darkgrey"),
lwd= ifelse(fc$inx.up[i], 2, 1));
segments(i,0, i, -fc$fc.all[2,i], col= ifelse(fc$inx.down[i], "magenta", "darkgrey"),
lwd= ifelse(fc$inx.down[i], 2, 1));
}
abline(h=fc$max.thresh, lty=3);
abline(h=fc$min.thresh, lty=3);
abline(h=0, lwd=1);
}else{
if(fc$raw.thresh > 0){
# be symmetrical
topVal <- max(abs(fc$fc.log));
ylim <- c(-topVal, topVal);
plot(fc$fc.log, ylab="Log2 (FC)", ylim = ylim, xlab = GetVariableLabel(), pch=19, axes=F,
col= ifelse(fc$inx.imp, "magenta", "darkgrey"));
axis(2);
axis(4); # added by Beomsoo
abline(h=log(fc$max.thresh,2), lty=3);
abline(h=log(fc$min.thresh,2), lty=3);
abline(h=0, lwd=1);
}else{ # plot side by side
dat1 <- dataSet$norm[as.numeric(dataSet$cls) == 1, ];
dat2 <- dataSet$norm[as.numeric(dataSet$cls) == 2, ];
mns1 <- apply(dat1, 2, mean);
mn1 <- mean(mns1);
sd1 <- sd(mns1);
msd1.top <- mn1 + 2*sd1;
msd1.low <- mn1 - 2*sd1;
mns2 <- apply(dat2, 2, mean);
mn2 <- mean(mns2);
sd2 <- sd(mns2);
msd2.top <- mn2 + 2*sd2;
msd2.low <- mn2 - 2*sd2;
ylims <- range(c(mns1, mns2, msd1.top, msd2.top, msd1.low, msd2.low));
new.mns <- c(mns1, rep(NA, 5), mns2);
cols <- c(rep("magenta", length(mns1)), rep(NA, 5), rep("blue", length(mns2)));
pchs <- c(rep(15, length(mns1)), rep(NA, 5), rep(19, length(mns2)));
plot(new.mns, ylim=ylims, pch = pchs, col = cols, cex = 1.25, axes=F, ylab="");
axis(2);
axis(4); # added by Beomsoo
abline(h=mn1, col="magenta", lty=3, lwd=2);
abline(h=msd1.low, col="magenta", lty=3, lwd=1);
abline(h=msd1.top, col="magenta", lty=3, lwd=1);
abline(h=mn2, col="blue", lty=3, lwd=2);
abline(h=msd2.low, col="blue", lty=3, lwd=1);
abline(h=msd2.top, col="blue", lty=3, lwd=1);
# abline(h=mean(all.mns), col="darkgrey", lty=3);
axis(1, at=1:length(new.mns), labels=c(1:length(mns1),rep(NA, 5),1:length(mns2)));
}
}
#dev.off();
}
#################################################################################
############################## FC Table ##########################################
#################################################################################
MyFCTable <-function(){
m=cbind(row.names(analSet$fc$sig.mat),analSet$fc$sig.mat)
colnames(m) <- c("Name","Fold Change","Log2(FC)")
return(m)
}
###########################################################################################################
############################################# my volcano plot #############################################
###########################################################################################################
MyPlotVolcano2<-function(imgName="Volcano Plot", format="png", dpi=72, width=NA){
vcn<-analSet$volcano;
MyGray <- rgb(t(col2rgb("black")), alpha=40, maxColorValue=255);
MyHighlight <- rgb(t(col2rgb("magenta")), alpha=80, maxColorValue=255);
if(vcn$paired){
xlim<-c(-nrow(dataSet$norm)/2, nrow(dataSet$norm)/2)*1.2;
# merge fc.all two rows into one, bigger one win
fc.all <- apply(vcn$fc.all, 2, function(x){ if(x[1] > x[2]){return(x[1])}else{return(-x[2])}})
hit.inx <- vcn$inx.p & (vcn$inx.up | vcn$inx.down);
plot(fc.all, vcn$p.log, xlim=xlim, pch=20, cex=ifelse(hit.inx, 1.2, 0.8),
col = ifelse(hit.inx, MyHighlight, MyGray),
xlab="Count of Significant Pairs", ylab="-log10(p)");
sig.upInx <- vcn$inx.p & vcn$inx.up;
p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.up;
fc.rtInx <- GetTopInx(vcn$fc.all[1,], 5, T);
lblInx <- p.topInx & sig.upInx & fc.rtInx;
if(sum(lblInx, na.rm=T) > 0){
text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long
text(vcn$fc.all[1,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8);
}
sig.dnInx <- vcn$inx.p & vcn$inx.down;
p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.down;
fc.leftInx <- GetTopInx(vcn$fc.all[2,], 5, T) & vcn$inx.down;
lblInx <-p.topInx & sig.dnInx & fc.leftInx;
if(sum(lblInx, na.rm=T) > 0){
text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long
text(-vcn$fc.all[2,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8);
}
}else{
imp.inx<-(vcn$inx.up | vcn$inx.down) & vcn$inx.p;
plot(vcn$fc.log, vcn$p.log, pch=20, cex=ifelse(imp.inx, 1.2, 0.7),
col = ifelse(imp.inx, "Red", "blue"),
xlab="log2 (FC)", ylab="-log10(p)");
grid();
sig.inx <- imp.inx;
p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.down);
fc.leftInx <- GetTopInx(vcn$fc.log, 5, F);
lblInx <- sig.inx & (p.topInx | fc.leftInx);
if(sum(lblInx, na.rm=T) > 0){
text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long
text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8);
}
p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.up);
fc.rtInx <- GetTopInx(vcn$fc.log, 5, T);
lblInx <- sig.inx & (p.topInx | fc.rtInx);
if(sum(lblInx, na.rm=T) > 0){
text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long
text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8);
}
}
abline (v = vcn$max.xthresh, lty=3);
abline (v = vcn$min.xthresh, lty=3);
abline (h = vcn$thresh.y, lty=3);
axis(4); # added by Beomsoo
}
###########################################################################################################
############################################# volcano Table #############################################
###########################################################################################################
MyVOLTable <-function(){
m=cbind(row.names(analSet$volcano$sig.mat),analSet$volcano$sig.mat)
colnames(m) <- c("Name","FC","log2(FC)","p.value","-log10(p)")
return(m)
}
##################################################################################################################
######################################### CorrHeatMap ############################################################
##################################################################################################################
MyPlotCorrHeatMap<-function(imgName, format="png", dpi=72, width=NA, cor.method,
colors, viewOpt, fix.col, no.clst, top, topNum){
main <- xlab <- ylab <- NULL;
data <- dataSet$norm;
if(ncol(data) > 1000){
filter.val <- apply(data.matrix(data), 2, IQR, na.rm=T);
rk <- rank(-filter.val, ties.method='random');
data <- as.data.frame(data[,rk <=1000]);
print("Data is reduced to 1000 vars ..");
}
colnames(data)<-substr(colnames(data), 1, 18);
corr.mat<-cor(data, method=cor.method);
# use total abs(correlation) to select
if(top){
cor.sum <- apply(abs(corr.mat), 1, sum);
cor.rk <- rank(-cor.sum);
var.sel <- cor.rk <= topNum;
corr.mat <- corr.mat[var.sel, var.sel];
}
# set up parameter for heatmap
suppressMessages(require(RColorBrewer));
suppressMessages(require(gplots));
if(colors=="gbr"){
colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256);
}else if (colors=="wnvyb"){
colors <- colorRampPalette(c("white","navyblue"), space="Lab")(256);
}else if (colors=="rwg"){
colors <- colorRampPalette(c("red", "white", "green"), space="rgb")(256);
}else if (colors=="rwb"){
colors <- colorRampPalette(c("red","white","blue"),space="rgb")(256);
}else if(colors == "heat"){
colors <- heat.colors(256);
}else if(colors == "topo"){
colors <- topo.colors(256);
}else if(colors == "gray"){
colors <- colorRampPalette(c("grey90", "grey10"))(256);
}else{
colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256));
}
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(viewOpt == "overview"){
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$heatmap<<-imgName;
}else{
w <- 7.2;
}
h <- w;
}else{
if(ncol(corr.mat) > 50){
myH <- ncol(corr.mat)*12 + 40;
}else if(ncol(corr.mat) > 20){
myH <- ncol(corr.mat)*12 + 60;
}else{
myH <- ncol(corr.mat)*12 + 120;
}
h <- round(myH/72,2);
if(is.na(width)){
w <- h;
}else if(width == 0){
w <- h <- 7.2;
imgSet$corr.heatmap<<-imgName;
}else{
w <- h <- 7.2;
}
}
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(no.clst){
rowv=FALSE;
colv=FALSE;
dendro= "none";
}else{
rowv=TRUE;
colv=TRUE;
dendro= "both";
}
require(pheatmap);
if(fix.col){
breaks <- seq(from = -1, to = 1, length = 257);
pheatmap(corr.mat,
fontsize=8, fontsize_row=8,
cluster_rows = colv,
cluster_cols = rowv,
color = colors,
breaks = breaks
);
}else{
pheatmap(corr.mat,
fontsize=8, fontsize_row=8,
cluster_rows = colv,
cluster_cols = rowv,
color = colors
);
}
#dev.off();
write.csv(signif(corr.mat,5), file="correlation_table.csv")
}
######################################################################################
###############################correlation plot for pattern search ###################
######################################################################################
MyPlotCorr <- function(imgName="corr2", format="png", dpi=72, width=NA){
cor.res <- analSet$corr$cor.mat;
pattern <- analSet$corr$pattern;
title <- paste(GetVariableLabel(), "correlated with the", pattern);
if(nrow(cor.res) > 25){
# first get most signficant ones (p value)
ord.inx<-order(cor.res[,3]);
cor.res <- cor.res[ord.inx, ];
cor.res <- cor.res[1:25, ];
# then order by their direction (correlation)
ord.inx<-order(cor.res[,1]);
if(sum(cor.res[,1] > 0) == 0){ # all negative correlation
ord.inx <- rev(ord.inx);
}
cor.res <- cor.res[ord.inx, ];
title <- paste("Top 25", tolower(GetVariableLabel()), "correlated with the", pattern);
}
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- h <- 7.2;
}else if(width == 0){
w <- 7.2;
imgSet$corr<<-imgName;
}else{
w <- h <- width;
}
# Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,6,4,3))
rownames(cor.res)<-substr(rownames(cor.res), 1, 18);
cols <- ifelse(cor.res[,1] >0, "mistyrose","lightblue");
dotchart(cor.res[,1], pch="", xlim=c(-1,1), xlab="Correlation coefficients", main=title);
rownames(cor.res) <- NULL;
barplot(cor.res[,1], space=c(0.5, rep(0, nrow(cor.res)-1)), xlim=c(-1,1), xaxt="n", col = cols, add=T,horiz=T);
#dev.off();
}
######################################################################################
###############################correlation Table ####################################
######################################################################################
MyCORRTable <-function(){
m=cbind(row.names(analSet$corr$cor.mat),analSet$corr$cor.mat)
colnames(m)[1] <- c("Name")
return(m)
}
##############################################################################################
#################################### PCA Summry Plot #########################################
##############################################################################################
MyPlotPCAPairSummary<-function(imgName="PCA Summary", format="png", dpi=72, width=NA, pc.num){
pclabels <- paste("PC", 1:pc.num, "\n", round(100*analSet$pca$variance[1:pc.num],1), "%");
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 10;
}else if(width == 0){
w <- 8;
imgSet$pca.pair <<- imgName;
}else{
w <- width;
}
h <- w;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(dataSet$cls.type == "disc"){
pairs(analSet$pca$x[,1:pc.num], col=GetColorSchema(), pch=as.numeric(dataSet$cls)+1, labels=pclabels);
}else{
pairs(analSet$pca$x[,1:pc.num], labels=pclabels);
}
#dev.off();
}
#####################################################################################################
###################################################### Scree Plot ###################################
#####################################################################################################
MyPlotPCAScree<-function(imgName="PCA Scree Plot", format="png", dpi=72, width=NA, scree.num){
stds <-analSet$pca$std[1:scree.num];
pcvars<-analSet$pca$variance[1:scree.num];
cumvars<-analSet$pca$cum.var[1:scree.num];
ylims <- range(c(pcvars,cumvars));
extd<-(ylims[2]-ylims[1])/10
miny<- ifelse(ylims[1]-extd>0, ylims[1]-extd, 0);
maxy<- ifelse(ylims[2]+extd>1, 1.0, ylims[2]+extd);
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 10;
}else if(width == 0){
w <- 8;
imgSet$pca.scree<<-imgName;
}else{
w <- width;
}
h <- w*2/3;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,6,3));
plot(pcvars, type='l', col='blue', main='Scree plot', xlab='PC index', ylab='Variance explained', ylim=c(miny, maxy), axes=F)
text(pcvars, labels =paste(100*round(pcvars,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T)
points(pcvars, col='red');
lines(cumvars, type='l', col='green')
text(cumvars, labels =paste(100*round(cumvars,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T)
points(cumvars, col='red');
abline(v=1:scree.num, lty=3);
axis(2);
axis(1, 1:length(pcvars), 1:length(pcvars));
#dev.off();
}
##################################################################################################
######################################### 2D PCA #################################################
##################################################################################################
MyPlotPCA2DScore <- function(imgName="2D PCA PLOT", format="png", dpi=72, width=NA, pcx, pcy, reg = 0.95, show=1, grey.scale = 0){
xlabel = paste("PC",pcx, "(", round(100*analSet$pca$variance[pcx],1), "%)");
ylabel = paste("PC",pcy, "(", round(100*analSet$pca$variance[pcy],1), "%)");
pc1 = analSet$pca$x[, pcx];
pc2 = analSet$pca$x[, pcy];
text.lbls<-substr(names(pc1),1,14) # some names may be too long
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
imgSet$pca.score2d<<-imgName;
w <- 7.2;
}else{
w <- width;
}
h <- w;
# Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
suppressMessages(require('ellipse'));
op<-par(mar=c(5,5,3,3));
if(dataSet$cls.type == "disc"){
# obtain ellipse points to the scatter plot for each category
lvs <- levels(dataSet$cls);
pts.array <- array(0, dim=c(100,2,length(lvs)));
for(i in 1:length(lvs)){
inx <-dataSet$cls == lvs[i];
groupVar<-var(cbind(pc1[inx],pc2[inx]), na.rm=T);
groupMean<-cbind(mean(pc1[inx], na.rm=T),mean(pc2[inx], na.rm=T));
pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100);
}
xrg <- range (pc1, pts.array[,1,]);
yrg <- range (pc2, pts.array[,2,]);
x.ext<-(xrg[2]-xrg[1])/12;
y.ext<-(yrg[2]-yrg[1])/12;
xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext);
ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext);
cols <- GetColorSchema(grey.scale==1);
uniq.cols <- unique(cols);
plot(pc1, pc2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot",
color=cols, pch=as.numeric(dataSet$cls)+1); ## added
grid(col = "lightgray", lty = "dotted", lwd = 1);
# make sure name and number of the same order DO NOT USE levels, which may be different
legend.nm <- unique(as.character(dataSet$cls));
## uniq.cols <- unique(cols);
## BHAN: when same color is choosen; it makes an error
if ( length(uniq.cols) > 1 ) {
names(uniq.cols) <- legend.nm;
}
# draw ellipse
for(i in 1:length(lvs)){
if (length(uniq.cols) > 1) {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA);
} else {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA);
}
if(grey.scale) {
lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2);
}
}
pchs <- GetShapeSchema(show, grey.scale);
if(grey.scale) {
cols <- rep("black", length(cols));
}
if(show == 1){
text(pc1, pc2, label=text.lbls, pos=4, xpd=T, cex=0.75);
points(pc1, pc2, pch=pchs, col=cols);
}else{
if(length(uniq.cols) == 1){
points(pc1, pc2, pch=pchs, col=cols, cex=1.0);
}else{
if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){
points(pc1, pc2, pch=pchs, col=cols, cex=1.8);
}else{
points(pc1, pc2, pch=21, bg=cols, cex=2);
}
}
}
uniq.pchs <- unique(pchs);
if(grey.scale) {
uniq.cols <- "black";
}
legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols);
}else{
plot(pc1, pc2, xlab=xlabel, ylab=ylabel, type='n', main="Scores Plot");
points(pc1, pc2, pch=15, col="magenta");
text(pc1, pc2, label=text.lbls, pos=4, col ="blue", xpd=T, cex=0.8);
}
par(op);
#dev.off();
}
##################################################################################################
######################################### PCA Loading Plot #######################################
##################################################################################################
# plot PCA loadings and also set up the matrix for display
MyPlotPCALoading<-function(imgName="Loading Plot", format="png", dpi=72, width=NA, inx1, inx2, plotType, lbl.feat=1){
loadings<-signif(as.matrix(cbind(analSet$pca$rotation[,inx1],analSet$pca$rotation[,inx2])),5);
ldName1<-paste("Loadings", inx1);
ldName2<-paste("Loadings", inx2);
colnames(loadings)<-c(ldName1, ldName2);
load.x.uniq <- jitter(loadings[,1]);
names(load.x.uniq) <- rownames(loadings);
analSet$pca$load.x.uniq <<- load.x.uniq;
analSet$pca$imp.loads<<-loadings; # set up the loading matrix
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pca.loading<<-imgName;
}else{
w <- width;
}
h <- w;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(plotType=="scatter"){
#par(mar=c(6,5,2,6));
plot(loadings[,1],loadings[,2], las=2, xlab=ldName1, ylab=ldName2);
pca.axis.lims <<- par("usr"); # x1, x2, y1 ,y2
grid(col = "lightgray", lty = "dotted", lwd = 1);
points(loadings[,1],loadings[,2], pch=19, col="blue");
if(lbl.feat > 0){
text(loadings[,1],loadings[,2], labels=substr(rownames(loadings), 1, 12), pos=4, col="blue", xpd=T);
}
}else{ # barplot
layout(matrix(c(1,1,2,2,2), nrow=5, byrow=T), respect = FALSE)
cmpd.nms <- substr(rownames(loadings), 1, 14);
hlims <- c(min(loadings[,1], loadings[,2]), max(loadings[,1], loadings[,2]));
par(mar=c(1,4,4,1));
barplot(loadings[,1], names.arg=NA, las=2, ylim=hlims, main =ldName1);
par(mar=c(10,4,3,1));
barplot(loadings[,2], names.arg=cmpd.nms, las=2, cex.names=1.0, ylim=hlims, main =ldName2);
}
#dev.off();
}
##################################################################################################
################################ PCA loading table ##############################################
##################################################################################################
MyLOADTable <- function(x,y){
loadings<-signif(as.matrix(cbind(analSet$pca$rotation[,x],analSet$pca$rotation[,y])),5);
ldName1<-paste("Loadings", x);
ldName2<-paste("Loadings", y);
colnames(loadings)<-c(ldName1, ldName2);
m=cbind(row.names(loadings),loadings)
colnames(m)[1] <- c("Name")
return(m)
}
##################################################################################################
######################################### Biplot #################################################
##################################################################################################
MyPlotPCABiplot<-function(imgName="biplot", format="png", dpi=72, width=NA, inx1, inx2){
choices = c(inx1, inx2);
scores<-analSet$pca$x;
lam <- analSet$pca$sdev[choices]
n <- NROW(scores)
lam <- lam * sqrt(n);
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pca.biplot<<-imgName;
}else{
w <- width;
}
h <- w;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
biplot(t(t(scores[, choices]) / lam), t(t(analSet$pca$rotation[, choices]) * lam), xpd =T, cex=0.9);
#dev.off();
}
#################################################################################################
####################################### PLSDA Summary plot ######################################
#################################################################################################
MyPlotPLSPairSummary<-function(imgName="PLSDA Summary Plot", format="png", dpi=72, width=NA, pc.num){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pls.pair <<- imgName;
}else{
w <- width;
}
h <- w;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
pclabels <- paste("Component", 1:pc.num, "\n", round(100*analSet$plsr$Xvar[1:pc.num]/analSet$plsr$Xtotvar,1), "%");
# pairs(analSet$plsr$scores[,1:pc.num], col=as.numeric(dataSet$cls)+1, pch=as.numeric(dataSet$cls)+1, labels=pclabels)
pairs(analSet$plsr$scores[,1:pc.num], col=GetColorSchema(), pch=as.numeric(dataSet$cls)+1, labels=pclabels)
#dev.off();
}
##################################################################################################
################################ PLSDA 2D plot ###################################################
##################################################################################################
MyPlotPLS2DScore<-function(imgName="PLSDA2D Plot", format="png", dpi=72, width=NA, inx1, inx2, reg=0.95, show=1, grey.scale=0){
suppressMessages(require('ellipse'));
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pls.score2d<<-imgName;
}else{
w <- width;
}
h <- w;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,3,3));
lv1 <- analSet$plsr$scores[,inx1];
lv2 <- analSet$plsr$scores[,inx2];
xlabel <- paste("Component", inx1, "(", round(100*analSet$plsr$Xvar[inx1]/analSet$plsr$Xtotvar,1), "%)");
ylabel <- paste("Component", inx2, "(", round(100*analSet$plsr$Xvar[inx2]/analSet$plsr$Xtotvar,1), "%)");
text.lbls<-substr(rownames(dataSet$norm),1,12) # some names may be too long
# obtain ellipse points to the scatter plot for each category
lvs <- levels(dataSet$cls);
pts.array <- array(0, dim=c(100,2,length(lvs)));
for(i in 1:length(lvs)){
inx <-dataSet$cls == lvs[i];
groupVar<-var(cbind(lv1[inx],lv2[inx]), na.rm=T);
groupMean<-cbind(mean(lv1[inx], na.rm=T),mean(lv2[inx], na.rm=T));
pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100);
}
xrg <- range (lv1, pts.array[,1,]);
yrg <- range (lv2, pts.array[,2,]);
x.ext<-(xrg[2]-xrg[1])/12;
y.ext<-(yrg[2]-yrg[1])/12;
xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext);
ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext);
## cols = as.numeric(dataSet$cls)+1;
cols <- GetColorSchema(grey.scale==1);
uniq.cols <- unique(cols);
plot(lv1, lv2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot");
grid(col = "lightgray", lty = "dotted", lwd = 1);
# make sure name and number of the same order DO NOT USE levels, which may be different
legend.nm <- unique(as.character(dataSet$cls));
## uniq.cols <- unique(cols);
## BHAN: when same color is choosen for black/white; it makes an error
# names(uniq.cols) <- legend.nm;
if ( length(uniq.cols) > 1 ) {
names(uniq.cols) <- legend.nm;
}
# draw ellipse
for(i in 1:length(lvs)){
if ( length(uniq.cols) > 1) {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA);
} else {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA);
}
if(grey.scale) {
lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2);
}
}
pchs <- GetShapeSchema(show, grey.scale);
if(grey.scale) {
cols <- rep("black", length(cols));
}
if(show==1){ # display sample name set on
text(lv1, lv2, label=text.lbls, pos=4, xpd=T, cex=0.75);
points(lv1, lv2, pch=pchs, col=cols);
}else{
if (length(uniq.cols) == 1) {
points(lv1, lv2, pch=pchs, col=cols, cex=1.0);
} else {
if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){
points(lv1, lv2, pch=pchs, col=cols, cex=1.8);
}else{
points(lv1, lv2, pch=21, bg=cols, cex=2);
}
}
}
uniq.pchs <- unique(pchs);
if(grey.scale) {
uniq.cols <- "black";
}
legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols);
#dev.off();
}
##################################################################################################
################################ PLSDA loading plot ##############################################
##################################################################################################
MyPlotPLSLoading<-function(imgName="PLSDA Loading plot", format="png", dpi=72, width=NA, inx1, inx2, plotType, lbl.feat=1){
# named vector
load1<-analSet$plsr$loadings[,inx1];
load2<-analSet$plsr$loadings[,inx2];
loadings = signif(as.matrix(cbind(load1, load2)),5);
ldName1<-paste("Loadings", inx1);
ldName2<-paste("Loadings", inx2)
colnames(loadings)<-c(ldName1, ldName2);
load.x.uniq <- jitter(loadings[,1]);
names(load.x.uniq) <- rownames(loadings);
analSet$plsr$load.x.uniq <<- load.x.uniq;
analSet$plsr$imp.loads<<-loadings; # set up loading matrix
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$pls.loading<<-imgName;
}else{
w <- width;
}
h <- w;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(plotType == "scatter"){
par(mar=c(6,4,4,5));
plot(loadings[,1],loadings[,2], las=2, xlab=ldName1, ylab=ldName2);
pls.axis.lims <<- par("usr"); # x1, x2, y1 ,y2
grid(col = "lightgray", lty = "dotted", lwd = 1);
points(loadings[,1],loadings[,2], pch=19, col="magenta");
if(lbl.feat > 0){
text(loadings[,1],loadings[,2], labels=substr(rownames(loadings), 1, 12), pos=4, col="blue", xpd=T);
}
}else{ # barplot
cmpd.nms <- substr(rownames(loadings), 1, 14);
hlims <- c(min(loadings[,1], loadings[,2]), max(loadings[,1], loadings[,2]));
layout(matrix(c(1,1,2,2,2), nrow=5, byrow=T))
par(mar=c(1,4,4,1));
barplot(loadings[,1], names.arg=NA, las=2, ylim=hlims, main = ldName1);
par(mar=c(10,4,3,1));
barplot(loadings[,2], names.arg=cmpd.nms, cex.names=1.0, las=2, ylim=hlims, main = ldName2);
}
#dev.off();
}
##############################################################################################
################################ PLSDA Loading Table #########################################
##############################################################################################
MyLOADTable2 <- function(inx,iny){
loadings<-signif(as.matrix(cbind(analSet$plsr$loadings[,inx],analSet$plsr$loadings[,iny])),5);
ldName1<-paste("Loadings", inx);
ldName2<-paste("Loadings", iny);
colnames(loadings)<-c(ldName1, ldName2);
m=cbind(row.names(loadings),loadings)
colnames(m)[1] <- c("Name")
return(m)
}
##################################################################################################
################################ PLSDA CV Plot ###################################################
##################################################################################################
MyPlotPLS.Classification<-function(imgName="PLSCV", format="png", dpi=200, width=NA){
res<-analSet$plsda$fit.info;
colnames(res) <- 1:ncol(res);
best.num <- analSet$plsda$best.num;
choice <- analSet$plsda$choice;
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 7;
}else if(width == 0){
w <- 7;
imgSet$pls.class<<-imgName;
}else{
w <- width;
}
h <- w*5/7;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,2,7)); # put legend on the right outside
barplot(res, beside = TRUE, col = c("lightblue", "mistyrose","lightcyan"), ylim= c(0,1.05), xlab="Number of components", ylab="Performance");
if(choice == "Q2"){
text((best.num-1)*3 + best.num + 2.5, res[3,best.num]+ 0.02, labels = "*", cex=2.5, col="red");
}else if(choice == "R2"){
text((best.num-1)*3 + best.num + 1.5, res[2,best.num]+ 0.02, labels = "*", cex=2.5, col="red");
}else{
text((best.num-1)*3 + best.num + 0.5, res[1,best.num]+ 0.02, labels = "*", cex=2.5, col="red");
}
# calculate the maximum y position, each bar is 1, place one space between the group
xpos <- ncol(res)*3 + ncol(res) + 1;
legend(xpos, 1.0, rownames(res), fill = c("lightblue", "mistyrose","lightcyan"), xpd=T);
#dev.off();
}
##################################################################################################
################################ PLSDA CV Table ###################################################
##################################################################################################
MyCVTable <- function(){
m=cbind(row.names(signif(analSet$plsda$fit.info, 5)),signif(analSet$plsda$fit.info, 5))
colnames(m)[1] <- c(" ")
return(m)
}
##################################################################################################
################################ PLSDA Imp Plot ##################################################
##################################################################################################
MyPlotPLS.Imp<-function(imgName="PLSDA Imp", format="png", dpi=72, width=NA, type, feat.nm, feat.num, color.BW=FALSE){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$pls.imp<<-imgName;
}else{
w <- width;
}
h <- w;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(type=="vip"){
analSet$plsda$imp.type<<-"vip";
vips<-analSet$plsda$vip.mat[,feat.nm];
PlotImpVar(vips, "VIP scores", feat.num, color.BW);
}else{
analSet$plsda$imp.type<<-"coef";
data<-analSet$plsda$coef.mat[,feat.nm];
PlotImpVar(data, "Coefficients", feat.num, color.BW);
}
#dev.off();
}
##################################################################################################
################################ PLSDA Imp table ##################################################
##################################################################################################
VIPTab <-function(){
m=signif(as.matrix(analSet$plsda$vip.mat),5)
c=cbind(row.names(m),m)
colnames(c)[1] <- c("Name")
return(c)
}
COEFTab <- function(){
m=signif(as.matrix(analSet$plsda$coef.mat),5)
c=cbind(row.names(m),m)
colnames(c)[1] <- c("Name")
return(c)
}
##################################################################################################
################################ PLSDA Perm Plot #################################################
##################################################################################################
MyPlotPLS.Permutation<-function(imgName="PLSDA Permutation", format="png", dpi=72, width=NA){
bw.vec<-analSet$plsda$permut;
len<-length(bw.vec);
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$pls.permut<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,2,4));
hst <- hist(bw.vec, breaks = "FD", freq=T,
ylab="Frequency", xlab= 'Permutation test statistics', col="lightblue", main="");
# add the indicator using original label
h <- max(hst$counts)
arrows(bw.vec[1], h/5, bw.vec[1], 0, col="red", lwd=2);
text(bw.vec[1], h/3.5, paste('Observed \n statistic \n', analSet$plsda$permut.p), xpd=T);
#dev.off();
}
##################################################################################################
################################ OPLSDA Score Plot ###############################################
##################################################################################################
MyPlotOPLS2DScore<-function(imgName="OPLSDA Score", format="png", dpi=72, width=NA, inx1, inx2, reg=0.95, show=1, grey.scale=0){
suppressMessages(require('ellipse'));
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$opls.score2d<<-imgName;
}else{
w <- width;
}
h <- w;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,3,3));
lv1 <- analSet$oplsda$scoreMN[,1];
lv2 <- analSet$oplsda$orthoScoreMN[,1];
xlabel <- paste("T score [1]", "(", round(100*analSet$oplsda$modelDF["p1", "R2X"],1), "%)");
ylabel <- paste("Orthogonal T score [1]", "(", round(100*analSet$oplsda$modelDF["o1", "R2X"],1), "%)");
text.lbls<-substr(rownames(dataSet$norm),1,12) # some names may be too long
# obtain ellipse points to the scatter plot for each category
lvs <- levels(dataSet$cls);
pts.array <- array(0, dim=c(100,2,length(lvs)));
for(i in 1:length(lvs)){
inx <-dataSet$cls == lvs[i];
groupVar<-var(cbind(lv1[inx],lv2[inx]), na.rm=T);
groupMean<-cbind(mean(lv1[inx], na.rm=T),mean(lv2[inx], na.rm=T));
pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100);
}
xrg <- range (lv1, pts.array[,1,]);
yrg <- range (lv2, pts.array[,2,]);
x.ext<-(xrg[2]-xrg[1])/12;
y.ext<-(yrg[2]-yrg[1])/12;
xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext);
ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext);
## cols = as.numeric(dataSet$cls)+1;
cols <- GetColorSchema(grey.scale==1);
uniq.cols <- unique(cols);
plot(lv1, lv2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot");
grid(col = "lightgray", lty = "dotted", lwd = 1);
# make sure name and number of the same order DO NOT USE levels, which may be different
legend.nm <- unique(as.character(dataSet$cls));
## uniq.cols <- unique(cols);
## BHAN: when same color is choosen for black/white; it makes an error
# names(uniq.cols) <- legend.nm;
if ( length(uniq.cols) > 1 ) {
names(uniq.cols) <- legend.nm;
}
# draw ellipse
for(i in 1:length(lvs)){
if ( length(uniq.cols) > 1) {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA);
} else {
polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA);
}
if(grey.scale) {
lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2);
}
}
pchs <- GetShapeSchema(show, grey.scale);
if(grey.scale) {
cols <- rep("black", length(cols));
}
if(show==1){ # display sample name set on
text(lv1, lv2, label=text.lbls, pos=4, xpd=T, cex=0.75);
points(lv1, lv2, pch=pchs, col=cols);
}else{
if (length(uniq.cols) == 1) {
points(lv1, lv2, pch=pchs, col=cols, cex=1.0);
} else {
if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){
points(lv1, lv2, pch=pchs, col=cols, cex=1.8);
}else{
points(lv1, lv2, pch=21, bg=cols, cex=2);
}
}
}
uniq.pchs <- unique(pchs);
if(grey.scale) {
uniq.cols <- "black";
}
legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols);
#dev.off();
}
##################################################################################################
################################ OPLSDA S-Plot ###################################################
##################################################################################################
MyPlotOPLS.Splot<-function(imgName='OPLSDA S-Plot', format="png", dpi=72, width=NA, plotType){
s <- as.matrix(dataSet$norm);
T <- as.matrix(analSet$oplsda$scoreMN)
p1 <- c()
for (i in 1:ncol(s)) {
scov <- cov(s[,i], T)
p1 <- matrix(c(p1, scov), ncol=1)
}
pcorr1 <- c()
for (i in 1:nrow(p1)) {
den <- apply(T, 2, sd)*sd(s[,i])
corr1 <- p1[i,]/den
pcorr1 <- matrix(c(pcorr1, corr1), ncol=1)
}
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- h <- 8;
}else if(width == 0){
imgSet$opls.loading<<-imgName;
}else{
w <- h <- width;
}
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,4,7))
plot(p1, pcorr1, pch=19, xlab="p[1]", ylab ="p(corr)[1]", main = "S-plot", col="magenta");
opls.axis.lims <<- par("usr");
if(plotType=="all"){
text(p1, pcorr1, labels=colnames(s), cex=0.8, pos=4, xpd=TRUE, col="blue");
}else if(plotType == "custom"){
if(length(custom.cmpds) > 0){
hit.inx <- colnames(dataSet$norm) %in% custom.cmpds;
text(p1[hit.inx], pcorr1[hit.inx], labels=colnames(s)[hit.inx], pos=4, xpd=TRUE, col="blue");
}
}else{
# do nothing
}
#dev.off();
splot.mat <- cbind(jitter(p1),p1, pcorr1);
rownames(splot.mat) <- colnames(s);
colnames(splot.mat) <- c("jitter", "p[1]","p(corr)[1]");
write.csv(signif(splot.mat[,2:3],5), file="oplsda_splot.csv");
analSet$oplsda$splot.mat <<- splot.mat;
}
##################################################################################################
################################ OPLSDA Tab ######################################################
##################################################################################################
OPLSTab <- function(){
m=as.matrix(analSet$oplsda$splot.mat[,c(2,3)])
c=cbind(row.names(m),m)
colnames(c)[1] <- c("Name")
return(c)
}
##################################################################################################
################################ OPLSDA Overview Plot ############################################
##################################################################################################
MyPlotOPLS.MDL <- function(imgName="OPLSDA OverView", format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 8;
imgSet$pls.class<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
# the model R2Y and Q2Y
par(mar=c(5,5,4,7)); # put legend on the right outside
modBarDF <- analSet$oplsda$modelDF[!(rownames(analSet$oplsda$modelDF) %in% c("sum")), ];
mod.dat <- rbind(modBarDF[, "R2X"] ,modBarDF[, "R2Y"], modBarDF[, "Q2"]);
bplt <- barplot(mod.dat,beside=TRUE, names.arg = rownames(modBarDF),xlab = "");
axis(2, lwd.ticks=1);
barplot(mod.dat,add = TRUE, beside = TRUE, col = c("lightblue", "mistyrose","lightgreen"));
text(x=bplt, y=mod.dat+max(mod.dat)/25, labels=as.character(mod.dat), xpd=TRUE)
xpos <- nrow(modBarDF)*3 + nrow(modBarDF) + 0.5;
ypos <- max(mod.dat)/2;
legend(xpos, ypos, legend = c("R2X","R2Y", "Q2"), pch=15, col=c("lightblue", "mistyrose","lightgreen"), xpd=TRUE, bty="n");
#dev.off();
}
##################################################################################################
################################ OPLSDA permutation Plot #########################################
##################################################################################################
MyPlotOPLS.Permutation<-function(imgName="OPLSDA permutation Plot", format="png", dpi=72, num, width=NA){
cls<-scale(as.numeric(dataSet$cls))[,1];
datmat<-as.matrix(dataSet$norm);
cv.num <- min(7, dim(dataSet$norm)[1]-1);
#perm.res<-performOPLS(datmat,cls, predI=1, orthoI=NA, permI=num, crossvalI=cv.num);
perm.res<-perform_opls(datmat,cls, predI=1, orthoI=NA, permI=num, crossvalI=cv.num);
r.vec<-perm.res$suppLs[["permMN"]][, "R2Y(cum)"];
q.vec<-perm.res$suppLs[["permMN"]][, "Q2(cum)"];
rng <- range(c(r.vec, q.vec, 1));
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 8;
imgSet$pls.permut<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mar=c(5,5,2,7));
rhst <- hist(r.vec[-1], plot=FALSE);
qhst <- hist(q.vec[-1], plot=FALSE);
h <- max(c(rhst$counts, qhst$counts))+1;
bin.size <- min(c(rhst$breaks[2]-rhst$breaks[1], qhst$breaks[2]-qhst$breaks[1]));
rbins <- seq(min(rhst$breaks),max(rhst$breaks),bin.size);
qbins <- seq(min(qhst$breaks),max(qhst$breaks),bin.size);
hist(r.vec[-1], xlim=rng, ylim=c(0, h), breaks=rbins, border=F, ylab="Frequency", xlab= 'Permutations',
col=adjustcolor("lightblue", alpha=0.6), main="");
hist(q.vec[-1], add=TRUE,breaks=qbins, border=F, col=adjustcolor("mistyrose", alpha=0.6));
arrows(r.vec[1], h/3, r.vec[1], 0, length=0.1,angle=30,lwd=2);
text(r.vec[1], h/2.5, paste('Observed \n R2Y:', r.vec[1]), xpd=TRUE);
arrows(q.vec[1], h/2, q.vec[1], 0, length=0.1,angle=30,lwd=2);
text(q.vec[1], h/1.8, paste('Observed \n Q2:', q.vec[1]), xpd=TRUE);
legend(1, h/3, legend = c("Perm R2Y", "Perm Q2"), pch=15, col=c("lightblue", "mistyrose"), xpd=T, bty="n");
#dev.off();
better.rhits <- sum(r.vec[-1]>=r.vec[1]);
if(better.rhits == 0) {
pr <- paste("p < ", 1/num, " (", better.rhits, "/", num, ")", sep="");
}else{
p <- better.rhits/num;
pr <- paste("p = ", signif(p, digits=5), " (", better.rhits, "/", num, ")", sep="");
}
better.qhits <- sum(q.vec[-1]>=q.vec[1]);
if(better.qhits == 0) {
pq <- paste("p < ", 1/num, " (", better.qhits, "/", num, ")", sep="");
}else{
p <- better.qhits/num;
pq <- paste("p = ", signif(p, digits=5), " (", better.qhits, "/", num, ")", sep="");
}
msg <- paste0("Empirical p-values R2Y: ", pr, " and Q2: ", pq)
return(msg);
}
###################################################################################################
################################ PCA Trajectory Plot ##############################################
###################################################################################################
PlotTraPCA<-function(pc1,pc2,title,ptsSize,extPer,colors,errW){
pointx=analSet$pca$x[,pc1]# pc1
pointy=analSet$pca$x[,pc2]# pc2
groups <- dataSet$cls
levs <-levels(groups)
meanX=c()
sdX=c()
seX=c()
meanY=c()
sdY=c()
seY=c()
gr=c()
for(i in 1:length(levs)){
gr[i]=levels(dataSet$cls)[i]
}
for (i in 1:length(levs)) {
group <- levs[i]
selected <- groups == group
x1 <- pointx[selected];
meanX[i]<-mean(x1)
sdX[i]=sapply(as.data.frame(x1), sd)
seX[i]= sdX[i]/sqrt(as.numeric(summary(dataSet$cls)[i]))
}
for(i in 1:length(levs)){
group <- levs[i]
selected <-groups ==group
y1 <- pointy[selected];
meanY[i]<-mean(y1)
sdY[i]=sapply(as.data.frame(y1), sd)
seY[i]= sdY[i]/sqrt(as.numeric(summary(dataSet$cls)[i]))
}
dFrame = data.frame(gr,meanX,meanY,seX,seY)
SEX.up = as.numeric(dFrame$meanX)+as.numeric(dFrame$seX)
SEX.dn = as.numeric(dFrame$meanX)-as.numeric(dFrame$seX)
SEY.up = as.numeric(dFrame$meanY)+as.numeric(dFrame$seY)
SEY.dn = as.numeric(dFrame$meanY)-as.numeric(dFrame$seY)
pts.arrayX <- c(meanX,SEX.up,SEX.dn)
pts.arrayY <- c(meanY,SEY.up,SEY.dn)
xrg <- range (min(pts.arrayX), max(pts.arrayX));
yrg <- range (min(pts.arrayY), max(pts.arrayY));
x.ext<-abs(xrg[2]-xrg[1])*extPer;
y.ext<-abs(yrg[2]-yrg[1])*extPer;
xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext);
ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext);
ggplot(data = dFrame,aes(x = meanX,y = meanY,group=dFrame$gr, color=dFrame$gr)) +
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(),panel.background=element_blank(),
axis.line = element_line(size = 0.3,colour = "black"),
axis.text=element_text(size=12,colour="black"),axis.title=element_text(size=14,face="bold"))+
geom_point(size=ptsSize) + scale_colour_manual(name = "Groups",values=colors) + ggtitle(title) +
xlim(xlims[1], xlims[2]) + ylim(ylims[1], ylims[2]) +
xlab(paste(c("PC",pc1),collapse=" ")) + ylab(paste(c("PC",pc2),collapse=" ")) +
geom_errorbar(aes(ymin = SEY.dn,ymax = SEY.up),width=errW) +
geom_errorbarh(aes(xmin = SEX.dn,xmax = SEX.up),height=errW)
}
###################################################################################################
############################### PLSDA Trajectory Plot ###########################################
###################################################################################################
PlotTraPLSDA <- function(inx1, inx2,title,ptsSize,extPer,colors,errW){
lv1 <- analSet$plsr$scores[,inx1];# plsScore1
lv2 <- analSet$plsr$scores[,inx2];# plsScore2
groups <- dataSet$cls
levs <-levels(groups)
meanX=c()
sdX=c()
seX=c()
meanY=c()
sdY=c()
seY=c()
gr=c()
for(i in 1:length(levs)){
gr[i]=levels(dataSet$cls)[i]
}
for (i in 1:length(levs)) {
group <- levs[i]
selected <- groups == group
x1 <- lv1[selected];
meanX[i]<-mean(x1)
sdX[i]=sapply(as.data.frame(x1), sd)
seX[i]= sdX[i]/sqrt(as.numeric(summary(dataSet$cls)[i]))
}
for(i in 1:length(levs)){
group <- levs[i]
selected <-groups ==group
y1 <- lv2[selected];
meanY[i]<-mean(y1)
sdY[i]=sapply(as.data.frame(y1), sd)
seY[i]= sdY[i]/sqrt(as.numeric(summary(dataSet$cls)[i]))
}
dFrame = data.frame(gr,meanX,meanY,seX,seY)
SEX.up = as.numeric(dFrame$meanX)+as.numeric(dFrame$seX)
SEX.dn = as.numeric(dFrame$meanX)-as.numeric(dFrame$seX)
SEY.up = as.numeric(dFrame$meanY)+as.numeric(dFrame$seY)
SEY.dn = as.numeric(dFrame$meanY)-as.numeric(dFrame$seY)
pts.arrayX <- c(meanX,SEX.up,SEX.dn)
pts.arrayY <- c(meanY,SEY.up,SEY.dn)
xrg <- range (min(pts.arrayX), max(pts.arrayX));
yrg <- range (min(pts.arrayY), max(pts.arrayY));
x.ext<-abs(xrg[2]-xrg[1])*extPer;
y.ext<-abs(yrg[2]-yrg[1])*extPer;
xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext);
ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext);
ggplot(data = dFrame,aes(x = meanX,y = meanY,group=dFrame$gr, color=dFrame$gr)) +
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(),panel.background=element_blank(),
axis.line = element_line(size = 0.3,colour = "black"),
axis.text=element_text(size=12,colour="black"),axis.title=element_text(size=14,face="bold"))+
geom_point(size=ptsSize) + scale_colour_manual(name = "Groups",values=colors) + ggtitle(title) +
xlim(xlims[1], xlims[2]) + ylim(ylims[1], ylims[2]) +
xlab(paste(c("Component",inx1),collapse=" ")) + ylab(paste(c("Component",inx2),collapse=" ")) +
geom_errorbar(aes(ymin = SEY.dn,ymax = SEY.up),width=errW) +
geom_errorbarh(aes(xmin = SEX.dn,xmax = SEX.up),height=errW)
}
###################################################################################################
##################################### SAM Plot ###################################################
###################################################################################################
MyPlotSAM.FDR<-function(delta, imgName="SAM FDR Plot", format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 10;
}else if(width == 0){
w <- 7.2;
imgSet$sam.fdr<<-imgName;
}
h <- w*3/5;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mfrow=c(1,2), mar=c(5,6,4,1));
mat.fdr<-analSet$sam@mat.fdr;
plot(mat.fdr[,"Delta"],mat.fdr[,"FDR"],xlab='Delta',ylab=NA,type="b", col='blue', las=2);
abline(v = delta, lty=3, col="magenta");
mtext("FDR", side=2, line=5);
par(mar=c(5,5,4,2))
plot(mat.fdr[,"Delta"],mat.fdr[,"Called"],xlab='Delta',ylab="Significant feaure No.",type="b", col='blue', las=2);
abline(v = delta, lty=3, col="magenta");
hit.inx <- mat.fdr[,"Delta"] <= delta;
my.fdr <- signif(min(mat.fdr[,"FDR"][hit.inx]), 3);
my.sigs <- min(mat.fdr[,"Called"][hit.inx]);
mtext(paste("Delta:", delta, " FDR:", my.fdr, " Sig. cmpds:", my.sigs), line=-2, side = 3, outer = TRUE, font=2)
#dev.off();
}
###################################################################################################
#################################### SAM plot 2 ####################################################
###################################################################################################
SAMResPlot <- function(delta){
sam.plot2(analSet$sam,delta)
}
###################################################################################################
#################################### SAM Table ####################################################
###################################################################################################
SAMTable <- function(del){
SetSAMSigMat(delta=del)
m=cbind(row.names(analSet$sam.cmpds),analSet$sam.cmpds)
colnames(m)[1] <- c("Name")
return(m)
}
###################################################################################################
##################################### Dendrogram Plot ############################################
###################################################################################################
MyPlotHCTree<-function(imgName="Dendrpgram", format="png", dpi=600, width=NA, smplDist, clstDist){
# set up data set
hc.dat<-as.matrix(dataSet$norm);
colnames(hc.dat)<-substr(colnames(hc.dat), 1, 18) # some names are too long
# set up distance matrix
if(smplDist == 'euclidean'){
dist.mat<-dist(hc.dat, method = smplDist);
}else{
dist.mat<-dist(1-cor(t(hc.dat), method = smplDist));
}
# record the paramters
analSet$tree<<-list(dist.par=smplDist, clust.par=clstDist);
# build the tree
hc_tree<-hclust(dist.mat, method=clstDist);
# plot the tree
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- minH <- 630;
myH <- nrow(hc.dat)*10 + 150;
if(myH < minH){
myH <- minH;
}
w <- round(w/72,2);
h <- round(myH/72,2);
}else if(width == 0){
w <- h <- 7.2;
imgSet$tree<<-imgName;
}else{
w <- h <- 7.2;
}
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(cex=0.8, mar=c(4,2,2,8));
if(dataSet$cls.type == "disc"){
clusDendro<-as.dendrogram(hc_tree);
cols <- GetColorSchema();
names(cols) <- rownames(hc.dat);
labelColors <- cols[hc_tree$order];
colLab <- function(n){
if(is.leaf(n)) {
a <- attributes(n)
labCol <- labelColors[a$label];
attr(n, "nodePar") <-
if(is.list(a$nodePar)) c(a$nodePar, lab.col = labCol,pch=NA) else
list(lab.col = labCol,pch=NA)
}
n
}
clusDendro<-dendrapply(clusDendro, colLab)
plot(clusDendro,horiz=T,axes=T);
par(cex=1);
legend.nm <- as.character(dataSet$cls);
legend("topleft", legend = unique(legend.nm), pch=15, col=unique(cols), bty = "n");
}else{
plot(as.dendrogram(hc_tree), hang=-1, main=paste("Cluster with", clstDist, "method"), xlab=NULL, sub=NULL, horiz=TRUE);
}
#dev.off();
}
###################################################################################################
##################################### Cluster Analysis Heatmap ####################################
###################################################################################################
MyPlotSubHeatMap <- function(imgName="Sub Heat Map", format="png", dpi=600, width=NA, dataOpt, scaleOpt, smplDist, clstDist, palette, method.nm, top.num, viewOpt, rowV=T, colV=T, border=T){
var.nms = colnames(dataSet$norm);
if(top.num < length(var.nms)){
if(method.nm == 'tanova'){
if(GetGroupNumber() == 2){
if(is.null(analSet$tt)){
Ttests.Anal();
}
var.nms <- names(sort(analSet$tt$p.value))[1:top.num];
}else{
if(is.null(analSet$aov)){
ANOVA.Anal();
}
var.nms <- names(sort(analSet$aov$p.value))[1:top.num];
}
}else if(method.nm == 'cor'){
if(is.null(analSet$cor.res)){
Match.Pattern();
}
# re-order for pretty view
cor.res <- analSet$cor.res;
ord.inx<-order(cor.res[,3]);
cor.res <- cor.res[ord.inx, ];
ord.inx<-order(cor.res[,1]);
cor.res <- cor.res[ord.inx, ];
var.nms <- rownames(cor.res)[1:top.num];
}else if(method.nm == 'vip'){
if(is.null(analSet$plsda)){
PLSR.Anal();
PLSDA.CV();
}
vip.vars <- analSet$plsda$vip.mat[,1];# use the first component
var.nms <- names(rev(sort(vip.vars)))[1:top.num];
}else if(method.nm == 'rf'){
if(is.null(analSet$rf)){
RF.Anal();
}
var.nms <- GetRFSigRowNames()[1:top.num];
}
}
var.inx <- match(var.nms, colnames(dataSet$norm));
MyPlotHeatMap(imgName, format, dpi, width, dataOpt, scaleOpt, smplDist, clstDist, palette, viewOpt, rowV, colV, var.inx, border);
}
MyPlotHeatMap<-function(imgName="Heat Map", format="png", dpi=72, width=NA, dataOpt, scaleOpt, smplDist, clstDist, palette, viewOpt="detail", rowV=T, colV=T, var.inx=NA, border=T){
# record the paramters
analSet$htmap<<-list(dist.par=smplDist, clust.par=clstDist);
# set up data set
if(dataOpt=="norm"){
my.data <- dataSet$norm;
}else{
my.data <- dataSet$proc;
}
if(is.na(var.inx)){
hc.dat<-as.matrix(my.data);
}else{
hc.dat<-as.matrix(my.data[,var.inx]);
}
colnames(hc.dat)<-substr(colnames(hc.dat),1,18) # some names are too long
hc.cls <- dataSet$cls;
# set up colors for heatmap
if(palette=="gbr"){
colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256);
}else if (palette=="wnvyb"){
colors <- colorRampPalette(c("white","navyblue"), space="Lab")(256);
}else if (palette=="rwg"){
colors <- colorRampPalette(c("red", "white", "green"), space="rgb")(256);
}else if (palette=="rwb"){
colors <- colorRampPalette(c("red","white","blue"),space="rgb")(256);
}else if(palette == "heat"){
colors <- heat.colors(256);
}else if(palette == "topo"){
colors <- topo.colors(256);
}else if(palette == "gray"){
colors <- colorRampPalette(c("grey90", "grey10"), space="rgb")(256);
}else{
suppressMessages(require(RColorBrewer));
colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256));
}
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
minW <- 630;
myW <- nrow(hc.dat)*18 + 150;
if(myW < minW){
myW <- minW;
}
w <- round(myW/72,2);
}else if(width == 0){
w <- 7.2;
imgSet$heatmap<<-imgName;
}else{
w <- 7.2;
}
myH <- ncol(hc.dat)*18 + 150;
h <- round(myH/72,2);
if(viewOpt == "overview"){
if(is.na(width)){
if(w > 9){
w <- 9;
}
}else if(width == 0){
if(w > 7.2){
w <- 7.2;
}
imgSet$heatmap<<-imgName;
}else{
w <- 7.2;
}
if(h > w){
h <- w;
}
}
if(border){
border.col<-"grey60";
}else{
border.col <- NA;
}
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
if(dataSet$cls.type == "disc"){
require(pheatmap);
annotation <- data.frame(class= hc.cls);
rownames(annotation) <-rownames(hc.dat);
# set up color schema for samples
if(palette== "gray"){
cols <- GetColorSchema(T);
uniq.cols <- unique(cols);
}else{
cols <- GetColorSchema();
uniq.cols <- unique(cols);
}
names(uniq.cols) <- unique(as.character(dataSet$cls));
ann_colors <- list(class= uniq.cols);
pheatmap(t(hc.dat),
annotation=annotation,
fontsize=8, fontsize_row=8,
clustering_distance_rows = smplDist,
clustering_distance_cols = smplDist,
clustering_method = clstDist,
border_color = border.col,
cluster_rows = colV,
cluster_cols = rowV,
scale = scaleOpt,
color = colors,
annotation_colors = ann_colors
);
}else{
heatmap(hc.dat, Rowv = rowTree, Colv=colTree, col = colors, scale="column");
}
#dev.off();
}
#####################################################################################################
################################# K-Means ###########################################################
#####################################################################################################
MyPlotKmeans<-function(imgName="K-means Plot", format="png", dpi=72, width=NA){
clust.num <- max(analSet$kmeans$cluster);
if(clust.num>20) return();
# calculate arrangement of panel
ylabel<-GetValueLabel();
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7;
imgSet$kmeans<<-imgName;
}else{
w <- width;
}
h <- w*8/9;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mfrow = GetXYCluster(clust.num), mar=c(5,4,2,2));
for (loop in 1:clust.num) {
matplot(t(dataSet$norm[analSet$kmeans$cluster==loop,]), type="l", col='grey', ylab=ylabel, axes=F,
main=paste("Cluster ",loop, ", n=", analSet$kmeans$size[loop], sep=""))
lines(apply(dataSet$norm[analSet$kmeans$cluster==loop,], 2, median), type="l", col='blue', lwd=1);
axis(2);
axis(1, 1:ncol(dataSet$norm), substr(colnames(dataSet$norm), 1, 7), las=2);
}
#dev.off();
}
###############################################################################################################
####################################### K-means Table #########################################################
###############################################################################################################
MyGetAllKMClusterMembers<-function(){
clust.df = data.frame();
rowNameVec = c();
i = 1;
clust.num<-max(analSet$kmeans$cluster);
while(i<=clust.num){
if(i==1){
clust.df <- rbind(paste(rownames(dataSet$norm)[analSet$kmeans$cluster== i], collapse = " "));
}else{
clust.df <- rbind(clust.df,paste(rownames(dataSet$norm)[analSet$kmeans$cluster== i], collapse = " "));
}
rowNameVec <- c(rowNameVec, paste("Cluster(", i, ")"));
i = i+1;
}
row.names(clust.df)<- rowNameVec;
colnames(clust.df)<-"Samples in each cluster";
#xtable(clust.df, align="l|p{8cm}", caption="Clustering result using K-means");
m=cbind(row.names(clust.df),clust.df)
colnames(m)[1] <- c(" ")
return(m)
}
###############################################################################################################
####################################### SOM Plot #########################################################
###############################################################################################################
MyPlotSOM <- function(imgName="SOM Plot", format="png", dpi=72, width=NA){
xdim<-analSet$som$xdim;
ydim<-analSet$som$ydim;
total<-xdim*ydim;
if(total>20) { return();}
ylabel<-GetValueLabel();
clust<-analSet$som$visual;
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7;
imgSet$som<<-imgName;
}else{
w <- width;
}
h <- w*8/9;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
par(mfrow = GetXYCluster(total), mar=c(5,4,2,2));
for (i in 0:(xdim-1)) {
xTrue<-clust$x == i;
for (j in 0:(ydim-1)) {
yTrue<-clust$y == j;
sel.inx<-xTrue & yTrue; # selected row
if(sum(sel.inx)>0){ # some cluster may not contain any member
matplot(t(dataSet$norm[sel.inx, ]), type="l", col='grey', axes=F, ylab=ylabel,
main=paste("Cluster(", i, ",", j,")", ", n=", sum(sel.inx), sep=""))
lines(apply(dataSet$norm[sel.inx, ], 2, median), type="l", col='blue', lwd=1);
}else{ # plot a dummy
plot(t(dataSet$norm[1, ]), type="n", axes=F, ylab=ylabel,
main=paste("Cluster(", i, ",", j,")",", n=", sum(sel.inx),sep=""))
}
axis(2);
axis(1, 1:ncol(dataSet$norm), substr(colnames(dataSet$norm), 1, 7), las=2);
}
}
#dev.off();
}
###############################################################################################################
####################################### SOM Table #########################################################
###############################################################################################################
MyGetAllSOMClusterMembers<-function(){
clust<-analSet$som$visual;
xdim<-analSet$som$xdim;
ydim<-analSet$som$ydim;
clust.df = data.frame();
rowNameVec = c();
i = 0;
while(i < xdim){
j = 0;
while(j < ydim){
xTrue<-clust$x == i;
yTrue<-clust$y == j;
if(i==0 & j==0){ # bug in R, the first one need to be different
clust.df <- rbind(paste(rownames(dataSet$norm)[xTrue & yTrue], collapse = " "));
rowNameVec <- c(paste("Cluster(", i, ",", j,")"));
}else{
clust.df <- rbind(clust.df, paste(rownames(dataSet$norm)[xTrue & yTrue], collapse=" "));
rowNameVec <- c(rowNameVec, paste("Cluster(", i, ",", j,")"));
}
j = j+1;
}
i = i+1;
}
row.names(clust.df)<- rowNameVec;
colnames(clust.df)<-"Samples in each cluster";
#xtable(clust.df, align="l|p{8cm}", caption="Clustering result using SOM");
m=cbind(row.names(clust.df),clust.df)
colnames(m)[1] <- c(" ")
return(m)
}
############################################################################################################
####################################### Random Forest Plot #################################################
#############################################################################################################
MyPlotRF.Classify<-function(imgName="Random Forest Classification Plot", format="png", dpi=72, width=NA){
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 8;
imgSet$rf.cls<<-imgName;
}else{
w <- width;
}
h <- w*5/8;
# Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
#par(mfrow=c(2,1));
par(mar=c(4,4,3,2));
cols <- rainbow(length(levels(dataSet$cls))+1);
plot(analSet$rf, main="Random Forest classification", col=cols);
legend("topright", legend = c("Overall", levels(dataSet$cls)), lty=2, lwd=1, col=cols);
#PlotConfusion(analSet$rf$confusion);
#dev.off();
}
###################################################################################################
#################################### Random Forest Table ##########################################
##################################################################################################
MyGetRFConf.Table<-function(){
m=cbind(row.names(analSet$rf$confusion),analSet$rf$confusion)
colnames(m)[1] <- c(" ")
return(m)
}
###################################################################################################
#################################### Random Forest VIP ##########################################
##################################################################################################
MyPlotRF.VIP<-function(imgName="Random Forest VIP", format="png", dpi=72, width=NA){
vip.score <- rev(sort(analSet$rf$importance[,"MeanDecreaseAccuracy"]));
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$rf.imp<<-imgName;
}else{
w <- width;
}
h <- w*7/8;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
PlotImpVar(vip.score,"MeanDecreaseAccuracy");
#dev.off();
}
###################################################################################################
#################################### Random Forest VIP Table ##########################################
##################################################################################################
MyRFVipTab <-function(){
m=cbind(row.names(analSet$rf.sigmat),analSet$rf.sigmat)
colnames(m) <- c("Names","MeanDecreaseAccuracy")
return(m)
}
########################################################################################################
################################## Random Forest Outlier Detection Plot#################################
########################################################################################################
MyPlotRF.Outlier<-function(imgName="Random Forest Outlier Dectection Plot", format="png", dpi=72, width=NA){
cols <- GetColorSchema();
uniq.cols <- unique(cols);
legend.nm <- unique(as.character(dataSet$cls));
dist.res <- outlier(analSet$rf);
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 9;
}else if(width == 0){
w <- 7.2;
imgSet$rf.outlier<<-imgName;
}else{
w <- width;
}
h <- w*7/9;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
layout(matrix(c(1,2), 1, 2, byrow = TRUE), width=c(4,1));
op<-par(mar=c(5,5,4,0));
plot(dist.res, type="h", col=cols, xlab="Samples", xaxt="n", ylab="Outlying Measures", bty="n");
# add sample names to top 5
rankres <- rank(-abs(dist.res), ties.method="random");
inx.x <- which(rankres < 6);
inx.y <- dist.res[inx.x];
nms <- names(dist.res)[inx.x];
text(inx.x, inx.y, nms, pos=ifelse(inx.y >= 0, 3, 1), xpd=T)
op<-par(mar=c(5,0,4,1));
plot.new();
plot.window(c(0,1), c(0,1));
legend("center", legend =legend.nm, pch=15, col=uniq.cols);
#dev.off();
}
########################################################################################################
################################## SVM Plot ############################################################
########################################################################################################
MyPlotRSVM.Classification<-function(imgName="SVM Plot", format="png", dpi=72, width=NA){
res<-analSet$svm$Error;
edge<-(max(res)-min(res))/100; # expand y uplimit for text
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$svm.class<<-imgName;
}else{
w <- width;
}
h <- w*6/8;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
plot(res,type='l',xlab='Number of variables (levels)',ylab='Error Rate',
ylim = c(min(res)-5*edge, max(res)+18*edge), axes=F,
main="Recursive SVM classification")
text(res,labels =paste(100*round(res,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T)
points(res, col=ifelse(1:length(res)==analSet$svm$best.inx,"red","blue"));
axis(2);
axis(1, 1:length(res), names(res));
#dev.off();
}
########################################################################################################
################################## SVM Plot Var.importance ############################################################
########################################################################################################
MyPlotRSVM.Cmpd<-function(imgName="SVm var.importance plot", format="png", dpi=72, width=NA){
sigs<-analSet$svm$sig.mat;
data<-sigs[,1];
imgName = paste(imgName, "dpi", dpi, ".", format, sep="");
if(is.na(width)){
w <- 8;
}else if(width == 0){
w <- 7;
imgSet$svm<<-imgName;
}else{
w <- width;
}
h <- w*7/8;
#Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white");
PlotImpVar(data,"Frequency");
#dev.off();
}
PlsRegPlot <- function(no,color,ptsize){
cls<-as.numeric(dataSet$cls);
datmat<-as.matrix(dataSet$norm);
theModel <<-plsr(cls~datmat,method='oscorespls', ncomp=no)
summary(theModel)
tempMatrix1 <- predictionFunc(theModel,ncomp=no)
inx.one <- tempMatrix1
predplot(theModel, ncomp = no, asp = 1, line=TRUE, cex=ptsize)
points(inx.one, bg=color, cex=ptsize, pch=21);
}
plsRegPlotCV <- function(no,color,ptsize){
cls<-as.numeric(dataSet$cls);
datmat<-as.matrix(dataSet$norm);
cvModel <<-plsr(cls~datmat,method='oscorespls', ncomp=no, validation="LOO")
summary(cvModel)
tempMatrix2 <- predictionFunc(cvModel,ncomp=no)
inx.two <- tempMatrix2
predplot(cvModel, ncomp = no, asp = 1, line=TRUE, cex=ptsize)
points(inx.two, bg=color, cex=ptsize, pch=21);
}
predOvrlyPlt <- function(no,color1,color2,ptsize){
cls<-as.numeric(dataSet$cls);
datmat<-as.matrix(dataSet$norm);
theModel <<-plsr(cls~datmat,method='oscorespls', ncomp=no)
cvModel <<-plsr(cls~datmat,method='oscorespls', ncomp=no, validation="LOO")
tempMatrix1 <- predictionFunc(theModel,ncomp=no)
tempMatrix2 <- predictionFunc(cvModel,ncomp=no)
inx.one <- tempMatrix1
inx.two <- tempMatrix2
predplot(theModel, ncomp = no, asp = 1, line=TRUE, cex=ptsize, main=paste("Cls,", no, "comps overlay", collapse=" "))
points(inx.one, bg=color1, cex=ptsize, pch=21);
points(inx.two, bg=color2, cex=ptsize, pch=21);
}
PLSR.Table<-function(no){
cls<-as.numeric(dataSet$cls)
datmat<-as.matrix(dataSet$norm);
plsda.cls <- train(dataSet$norm, dataSet$cls, "pls", trControl=trainControl(method="LOOCV"), tuneLength=no);
# use the classifical regression to get R2 and Q2 measure
modelcv<<-plsr(cls~datmat,method='oscorespls', ncomp=no, validation="LOO")
fit.info <- pls::R2(modelcv, estimate = "all")$val[,1,];
# combine accuracy, R2 and Q2
accu <- plsda.cls$results[,2]
all.info <- rbind(accu, fit.info[,-1]);
rownames(all.info) <- c("Accuracy", "R2", "Q2")
return(all.info);
}
predictionFunc <- function(object, ncomp = object$ncomp, which, newdata,
nCols, nRows, xlab = "measured", ylab = "predicted",
main, ..., font.main, cex.main)
{
## Select type(s) of prediction
if (missing(which)) {
## Pick the `best' alternative.
if (!missing(newdata)) {
which <- "test"
} else {
if (!is.null(object$validation)) {
which <- "validation"
} else {
which <- "train"
}
}
} else {
## Check the supplied `which'
allTypes <- c("train", "validation", "test")
which <- allTypes[pmatch(which, allTypes)]
if (length(which) == 0 || any(is.na(which)))
stop("`which' should be a subset of ",
paste(allTypes, collapse = ", "))
}
## Help variables
nEst <- length(which)
nSize <- length(ncomp)
nResp <- dim(object$fitted.values)[2]
## Set plot parametres as needed:
dims <- c(nEst, nSize, nResp)
dims <- dims[dims > 1]
nPlots <- prod(dims)
if (nPlots > 1) {
## Set up default font.main and cex.main for individual titles:
if (missing(font.main)) font.main <- 1
if (missing(cex.main)) cex.main <- 1.1
## Show the *labs in the margin:
mXlab <- xlab
mYlab <- ylab
xlab <- ylab <- ""
if(missing(nCols)) nCols <- min(c(3, dims[1]))
if(missing(nRows))
nRows <- min(c(3, ceiling(prod(dims[1:2], na.rm = TRUE) / nCols)))
opar <- par(no.readonly = TRUE)
on.exit(par(opar))
par(mfrow = c(nRows, nCols),
oma = c(1, 1, if(missing(main)) 0 else 2, 0) + 0.1,
mar = c(3,3,3,1) + 0.1)
if (nRows * nCols < nPlots && dev.interactive()) par(ask = TRUE)
} else {
## Set up default font.main and cex.main for the main title:
if (missing(font.main)) font.main <- par("font.main")
if (missing(cex.main)) cex.main <- par("cex.main")
nCols <- nRows <- 1
}
## Set up measured and predicted for all model sizes, responses and
## estimates:
if ("train" %in% which) {
train.measured <- as.matrix(model.response(model.frame(object)))
train.predicted <- object$fitted.values[,,ncomp, drop = FALSE]
}
if ("validation" %in% which) {
if (is.null(object$validation)) stop("`object' has no `validation' component.")
if(!exists("train.measured"))
train.measured <- as.matrix(model.response(model.frame(object)))
validation.predicted <- object$validation$pred[,,ncomp, drop = FALSE]
}
if ("test" %in% which) {
if (missing(newdata)) stop("Missing `newdata'.")
test.measured <- as.matrix(model.response(model.frame(formula(object),
data = newdata)))
test.predicted <- predict(object, ncomp = ncomp, newdata = newdata)
}
## Do the plots
plotNo <- 0
for (resp in 1:nResp) {
for (size in 1:nSize) {
for (est in 1:nEst) {
plotNo <- plotNo + 1
if (nPlots == 1 && !missing(main)) {
lmain <- main
} else {
lmain <- sprintf("%s, %d comps, %s",
respnames(object)[resp],
ncomp[size], which[est])
}
sub <- which[est]
switch(which[est],
train = {
measured <- train.measured[,resp]
predicted <- train.predicted[,resp,size]
},
validation = {
measured <- train.measured[,resp]
predicted <- validation.predicted[,resp,size]
},
test = {
measured <- test.measured[,resp]
predicted <- test.predicted[,resp,size]
}
)
xy <- predplotXYFunc(measured, predicted, main = lmain,
font.main = font.main, cex.main = cex.main,
xlab = xlab, ylab = ylab, ...)
if (nPlots > 1 &&
(plotNo %% (nCols * nRows) == 0 || plotNo == nPlots)) {
## Last plot on a page; add outer margin text and title:
mtext(mXlab, side = 1, outer = TRUE)
mtext(mYlab, side = 2, outer = TRUE)
if (!missing(main)) title(main = main, outer = TRUE)
}
}
}
}
invisible(xy)
}
predplotXYFunc <- function(x, y, line = FALSE, labels, type = "p",
main = "Prediction plot", xlab = "measured response",
ylab = "predicted response", line.col = par("col"),
line.lty = NULL, line.lwd = NULL, ...)
{
if (!missing(labels)) {
## Set up point labels
if (length(labels) == 1) {
labels <- switch(match.arg(labels, c("names", "numbers")),
names = names(y),
numbers = as.character(1:length(y))
)
}
## Override plot type:
type <- "n"
}
#plot(y ~ x, type = type, main = main, xlab = xlab, ylab = ylab, ...)
if (!missing(labels)) text(x, y, labels, ...)
if (line) abline(0, 1, col = line.col, lty = line.lty, lwd = line.lwd)
invisible(cbind(measured = x, predicted = as.vector(y)))
}
#session$onSessionEnded(function() {
#stopApp()
#q("no")
# })
LoadAllPackages<-function(){
packages <- c("xtable","ggplot2","shiny","rgl", "pca3d",
"ellipse", "scatterplot3d","pls", "caret","lattice",
"Cairo", "randomForest", "e1071","gplots", "som","RColorBrewer",
"genefilter", "pheatmap","preprocessCore","xcms",
"impute","siggenes","sva",
"ropls","RBGL","pcaMethods")
# loading packages
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
ipak(packages)
}
}
shinyApp(ui = ui, server = server)