---
title: "Principal Component Analysis (PCA)"
author: "Eric Bridgeford"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{pca}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r}
require(lolR)
require(ggplot2)
require(MASS)
n=400
d=30
r=3
```

Data for this notebook will be `n=400` examples of `d=30` dimensions.

# PCA

## Stacked Cigar Simulation


We first visualize the first `2` dimensions:

```{r, fig.width=5}
testdat <- lol.sims.cigar(n, d)
X <- testdat$X
Y <- testdat$Y

data <- data.frame(x1=X[,1], x2=X[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point() +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Simulated Data")
```

Projecting with PCA to `3` dimensions and visualizing the first `2`:

```{r, fig.width=5}
result <- lol.project.pca(X, r)

data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point() +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Projected Data using PCA")
```

Projecting with LDA to `K-1=1` dimensions:

```{r, fig.width=5}
liney <- MASS::lda(result$Xr, Y)
result <- predict(liney, result$Xr)
lhat <- 1 - sum(result$class == Y)/length(Y)

data <- data.frame(x1=result$x[,1], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, fill=y)) +
  geom_density(adjust=1.5, alpha=0.6) +
  xlab("$x_1$") +
  ylab("Density") +
  ggtitle(sprintf("PCA-LDA, L = %.2f", lhat))
```

## Trunk Simulation

We visualize the first `2` dimensions:

```{r, fig.width=5}
testdat <- lol.sims.rtrunk(n, d)
X <- testdat$X
Y <- testdat$Y

data <- data.frame(x1=X[,1], x2=X[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point() +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Simulated Data")
```

Projecting with PCA to `3` dimensions and visualizing the first `2`:

```{r, fig.width=5}
result <- lol.project.pca(X, r)

data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point() +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Projected Data using PCA")
```

Projecting with LDA to `K-1=1` dimensions:

```{r, fig.width=5}
liney <- MASS::lda(result$Xr, Y)
result <- predict(liney, result$Xr)
lhat <- 1 - sum(result$class == Y)/length(Y)

data <- data.frame(x1=result$x[,1], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, fill=y)) +
  geom_density(adjust=1.5, alpha=0.6) +
  xlab("x1") +
  ylab("Density") +
  ggtitle(sprintf("PCA-LDA, L = %.2f", lhat))
```

## Rotated Trunk Simulation

We visualize the first `2` dimensions:

```{r, fig.width=5}
testdat <- lol.sims.rtrunk(n, d, rotate=TRUE)
X <- testdat$X
Y <- testdat$Y

data <- data.frame(x1=X[,1], x2=X[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point() +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Simulated Data")
```

Projecting with PCA to `3` dimensions and visualizing the first `2`:

```{r, fig.width=5}
result <- lol.project.pca(X, r)

data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
  geom_point() +
  xlab("x1") +
  ylab("x2") +
  ggtitle("Projected Data using PCA")
```

Projecting with LDA to `K-1=1` dimensions:

```{r, fig.width=5}
liney <- MASS::lda(result$Xr, Y)
result <- predict(liney, result$Xr)
lhat <- 1 - sum(result$class == Y)/length(Y)

data <- data.frame(x1=result$x[,1], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, fill=y)) +
  geom_density(adjust=1.5, alpha=0.6) +
  xlab("x1") +
  ylab("Density") +
  ggtitle(sprintf("PCA-LDA, L = %.2f", lhat))
```

