---
title: "L5_DFA_MIIVsem"
author: "Zachary Fisher & Kathleen M. Gates"
output:
  pdf_document: default
---

```{r setup, include=FALSE, warning=FALSE,message=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(MIIVsem)
library(lavaan)
options(width=90)
```

## Function for generating data.

Below we introduce how to make a function. This function requires the user to input the structure and values of the relations in matrix form (i.e., psi, lambda, theta, and phi). The user must also specify an argument, "time", that indicates the length of the series. 

By default, the time series is padded by 100 observations. This helps to initialize the data generation; the time points after this are retained. 

```{r, echo = TRUE}

library(mvtnorm)

# Function for generating data
generateData <- function(psi, lambda, phi, theta, time, npad = 100){
  ne        <- nrow(psi)   # Number of latent variables
  ny        <- nrow(theta) # Number of manifest variables
  timepad   <- time + npad 
  # Generate LV time series
  #  Latent variable residuals.
  zeta <- mvtnorm::rmvnorm(timepad, rep(0, ne), sigma = psi)
  #  Set up matrix for contemporaneous variables. 
  etaC <- matrix(0, nrow = ne, ncol = timepad) 
  #  Set up matrix for lagged variables. 
  etaL <- matrix(0, nrow = ne, ncol = timepad + 1)           
  etaL[,1]  <- c(0,0)
  for (i in 1:(timepad)){
    etaC[ ,i]   <- phi %*% etaL[ ,i] + zeta[i, ]
    etaL[ ,i+1] <- etaC[,i]
  }
  etaC <- etaC[,(npad+1):(timepad)]
  etaL <- etaL[,(npad+1):(timepad)]
  eta  <- t(etaC)
  # Generate observed variable time series
  #  Measurement errors.
  epsilon   <- rmvnorm(time, mean = rep(0, ny), sigma = theta)      
  #  Observed variables.
  y   <- matrix(0, nrow = time, ncol = ny)
  for (p in 1:nrow(y)){
    y[p, ] <- lambda %*% eta[p, ] + epsilon[p, ]
  }
  # Create block toeplitz data structure
  data <- embed(y,2)
  # create column names
  var.names <- c()
  for(j in 0:1){
    for (i in 1:ny){
        var.names <- c(var.names,paste0("y",i,"_L",j))
    }
  }
  colnames(data) <- var.names
  return(data)
}


```

\newpage 

## Generate Data
The data are generated to have the same pattern used in the book as well as in last lecture. However, now we are generating a lag of 2.

```{r, echo = TRUE}
# Residual variance-covariance matrix
psi <- matrix(
  c( 2.77, 2.47,  
     2.47, 8.40 ), 
  nrow = 2, ncol = 2, byrow = T
)

# Lambda matrix containing contemporaneous relations among 
# observed variables and 2 latent variables
lambda <- matrix(
  c( 1, 0,         
     2, 0,        
     1, 0, 
     0, 1, 
     0, 2, 
     0, 1 ), 
  nrow = 6, ncol = 2, byrow = TRUE
)

 # Measurement error variances. 
theta     <- diag(.5, ncol = 6, nrow = 6) 

# Lagged directed relations among variables. 
phi      <- matrix(
  c(0.5, 0.0,    
    0.4, 0.5), 
  nrow = 2, ncol = 2, byrow = TRUE
)

time <- 200

set.seed(123456)

data <- generateData(psi, lambda, phi, theta, time, npad = 100)
```

\newpage

##Process Factor Analysis Models
Below we define the model structures for the correctly and the incorrectly specified models. The incorrectly specified model removes the cross loadings from observed variables 5 and 2.

```{r, echo = TRUE}
model.correct <- '
  eta1_L0 =~ y1_L0 + l1*y2_L0 + l2*y3_L0 
  eta2_L0 =~ y4_L0 + l4*y5_L0 + l5*y6_L0
  eta1_L1 =~ y1_L1 + l1*y2_L1 + l2*y3_L1 
  eta2_L1 =~ y4_L1 + l4*y5_L1 + l5*y6_L1 
  eta1_L0 ~ eta1_L1
  eta2_L0 ~ eta2_L1
  eta2_L0 ~ eta1_L1
'

model.misspecified <- '
  eta1_L0 =~ y1_L0 + l1*y2_L0 + l2*y3_L0 + l3*y5_L0
  eta2_L0 =~ y4_L0 + l4*y5_L0 + l5*y6_L0 + l6*y2_L0
  eta1_L1 =~ y1_L1 + l1*y2_L1 + l2*y3_L1 + l3*y5_L1
  eta2_L1 =~ y4_L1 + l4*y5_L1 + l5*y6_L1 + l6*y2_L1
  eta2_L0 ~ eta2_L1
  eta2_L0 ~ eta1_L1
  eta1_L0 ~ eta2_L1
'

```

##Fit Process Factor Analysis Models using MIIVsem

###Correctly Specified Model

```{r, echo = TRUE}
fit.miiv.cor <- MIIVsem::miive(model.correct, data = data)
fit.miiv.cor
```


###Misspecified Model

Here we might expect to see problems with the $\Lambda_{21}$ and $\Lambda_{52}$ parameters at both lags.

```{r, echo = TRUE}
fit.miiv.mis <- MIIVsem::miive(model.misspecified, data = data)
fit.miiv.mis
```

\newpage
## Fit Process Factor Analysis Models in lavaan
```{r, echo = TRUE}
fit.lava.cor    <- lavaan::sem(model.correct, data = data)
fit.lava.mis    <- lavaan::sem(model.misspecified, data = data)
```

###Compare lavaan and MIIVsem Estimates

```{r, echo = TRUE, warning=FALSE, message=FALSE}
# Pull the MIIV parameter estimates. 
fit.miiv.cor.et <- MIIVsem::estimatesTable(fit.miiv.cor)
fit.miiv.mis.et <- MIIVsem::estimatesTable(fit.miiv.mis)
# Pull the lavaan paramter estimates. 
fit.lava.cor.pt <- lavaan::parameterTable(fit.lava.cor)
fit.lava.mis.pt <- lavaan::parameterTable(fit.lava.mis)
# Create a pretty table. 
keep.cols       <- c("lhs", "op", "rhs", "est")
res.miiv <- merge(fit.miiv.cor.et[,keep.cols],fit.miiv.mis.et[,keep.cols], by = c("lhs","op","rhs"))
res.lava <- merge(fit.lava.cor.pt[,keep.cols],fit.lava.mis.pt[,keep.cols], by = c("lhs","op","rhs"))
res.all  <- merge(res.miiv,res.lava, by = c("lhs","op","rhs"))
colnames(res.all) <- c("lhs","op","rhs","miiv.cor","miiv.mis","lava.cor","lava.mis")
col.order <- c("lhs","op","rhs","pop","miiv.cor","miiv.mis","lava.cor","lava.mis")
res.all  <- res.all[!grepl("L1",res.all$lhs) & res.all$op %in% c("~","=~"),]
res.all[,c(4:7)]  <- round(res.all[,c(4:7)],2)
```