--- title: "L5_DFA_MIIVsem" author: "Zachary Fisher & Kathleen M. Gates" output: pdf_document: default --- ```{r setup, include=FALSE, warning=FALSE,message=FALSE} knitr::opts_chunk$set(echo = TRUE) library(MIIVsem) library(lavaan) options(width=90) ``` ## Function for generating data. Below we introduce how to make a function. This function requires the user to input the structure and values of the relations in matrix form (i.e., psi, lambda, theta, and phi). The user must also specify an argument, "time", that indicates the length of the series. By default, the time series is padded by 100 observations. This helps to initialize the data generation; the time points after this are retained. ```{r, echo = TRUE} library(mvtnorm) # Function for generating data generateData <- function(psi, lambda, phi, theta, time, npad = 100){ ne <- nrow(psi) # Number of latent variables ny <- nrow(theta) # Number of manifest variables timepad <- time + npad # Generate LV time series # Latent variable residuals. zeta <- mvtnorm::rmvnorm(timepad, rep(0, ne), sigma = psi) # Set up matrix for contemporaneous variables. etaC <- matrix(0, nrow = ne, ncol = timepad) # Set up matrix for lagged variables. etaL <- matrix(0, nrow = ne, ncol = timepad + 1) etaL[,1] <- c(0,0) for (i in 1:(timepad)){ etaC[ ,i] <- phi %*% etaL[ ,i] + zeta[i, ] etaL[ ,i+1] <- etaC[,i] } etaC <- etaC[,(npad+1):(timepad)] etaL <- etaL[,(npad+1):(timepad)] eta <- t(etaC) # Generate observed variable time series # Measurement errors. epsilon <- rmvnorm(time, mean = rep(0, ny), sigma = theta) # Observed variables. y <- matrix(0, nrow = time, ncol = ny) for (p in 1:nrow(y)){ y[p, ] <- lambda %*% eta[p, ] + epsilon[p, ] } # Create block toeplitz data structure data <- embed(y,2) # create column names var.names <- c() for(j in 0:1){ for (i in 1:ny){ var.names <- c(var.names,paste0("y",i,"_L",j)) } } colnames(data) <- var.names return(data) } ``` \newpage ## Generate Data The data are generated to have the same pattern used in the book as well as in last lecture. However, now we are generating a lag of 2. ```{r, echo = TRUE} # Residual variance-covariance matrix psi <- matrix( c( 2.77, 2.47, 2.47, 8.40 ), nrow = 2, ncol = 2, byrow = T ) # Lambda matrix containing contemporaneous relations among # observed variables and 2 latent variables lambda <- matrix( c( 1, 0, 2, 0, 1, 0, 0, 1, 0, 2, 0, 1 ), nrow = 6, ncol = 2, byrow = TRUE ) # Measurement error variances. theta <- diag(.5, ncol = 6, nrow = 6) # Lagged directed relations among variables. phi <- matrix( c(0.5, 0.0, 0.4, 0.5), nrow = 2, ncol = 2, byrow = TRUE ) time <- 200 set.seed(123456) data <- generateData(psi, lambda, phi, theta, time, npad = 100) ``` \newpage ##Process Factor Analysis Models Below we define the model structures for the correctly and the incorrectly specified models. The incorrectly specified model removes the cross loadings from observed variables 5 and 2. ```{r, echo = TRUE} model.correct <- ' eta1_L0 =~ y1_L0 + l1*y2_L0 + l2*y3_L0 eta2_L0 =~ y4_L0 + l4*y5_L0 + l5*y6_L0 eta1_L1 =~ y1_L1 + l1*y2_L1 + l2*y3_L1 eta2_L1 =~ y4_L1 + l4*y5_L1 + l5*y6_L1 eta1_L0 ~ eta1_L1 eta2_L0 ~ eta2_L1 eta2_L0 ~ eta1_L1 ' model.misspecified <- ' eta1_L0 =~ y1_L0 + l1*y2_L0 + l2*y3_L0 + l3*y5_L0 eta2_L0 =~ y4_L0 + l4*y5_L0 + l5*y6_L0 + l6*y2_L0 eta1_L1 =~ y1_L1 + l1*y2_L1 + l2*y3_L1 + l3*y5_L1 eta2_L1 =~ y4_L1 + l4*y5_L1 + l5*y6_L1 + l6*y2_L1 eta2_L0 ~ eta2_L1 eta2_L0 ~ eta1_L1 eta1_L0 ~ eta2_L1 ' ``` ##Fit Process Factor Analysis Models using MIIVsem ###Correctly Specified Model ```{r, echo = TRUE} fit.miiv.cor <- MIIVsem::miive(model.correct, data = data) fit.miiv.cor ``` ###Misspecified Model Here we might expect to see problems with the $\Lambda_{21}$ and $\Lambda_{52}$ parameters at both lags. ```{r, echo = TRUE} fit.miiv.mis <- MIIVsem::miive(model.misspecified, data = data) fit.miiv.mis ``` \newpage ## Fit Process Factor Analysis Models in lavaan ```{r, echo = TRUE} fit.lava.cor <- lavaan::sem(model.correct, data = data) fit.lava.mis <- lavaan::sem(model.misspecified, data = data) ``` ###Compare lavaan and MIIVsem Estimates ```{r, echo = TRUE, warning=FALSE, message=FALSE} # Pull the MIIV parameter estimates. fit.miiv.cor.et <- MIIVsem::estimatesTable(fit.miiv.cor) fit.miiv.mis.et <- MIIVsem::estimatesTable(fit.miiv.mis) # Pull the lavaan paramter estimates. fit.lava.cor.pt <- lavaan::parameterTable(fit.lava.cor) fit.lava.mis.pt <- lavaan::parameterTable(fit.lava.mis) # Create a pretty table. keep.cols <- c("lhs", "op", "rhs", "est") res.miiv <- merge(fit.miiv.cor.et[,keep.cols],fit.miiv.mis.et[,keep.cols], by = c("lhs","op","rhs")) res.lava <- merge(fit.lava.cor.pt[,keep.cols],fit.lava.mis.pt[,keep.cols], by = c("lhs","op","rhs")) res.all <- merge(res.miiv,res.lava, by = c("lhs","op","rhs")) colnames(res.all) <- c("lhs","op","rhs","miiv.cor","miiv.mis","lava.cor","lava.mis") col.order <- c("lhs","op","rhs","pop","miiv.cor","miiv.mis","lava.cor","lava.mis") res.all <- res.all[!grepl("L1",res.all$lhs) & res.all$op %in% c("~","=~"),] res.all[,c(4:7)] <- round(res.all[,c(4:7)],2) ```