Psych Package In R Download
Contents
- 1 Chapter 1: Introduction to R
- 1.1 Input data using c() function
- 1.2 Input covariance matrix
- 1.3 Summary statistics
- 1.4 Simulated data
- 1.5 Z scores using the scale() function
- 1.6 Statistical tests
- 2 Chapter 2: Path Models and Analysis
- 2.1 Example: Path Analysis using lavaan
- 2.1.1 Example: Indirect Effects
- 2.2 Create output table
- 2.1 Example: Path Analysis using lavaan
- 3 Chapter 3: Basic Latent Variable Models
- 3.1 Example: Single factor model of WISC-IV data
- 3.1.1 Marker variable
- 3.1.2 Standardized latent variable
- 3.1.3 Effects coding
- 3.2 Example: Two-factor model of WISC-IV data
- 3.2.1 Structure coefficients
- 3.3 Example: Structural equation model
- 3.1 Example: Single factor model of WISC-IV data
- 4 Chapter 4: Latent Variable Models with Multiple Groups
- 4.1 Example: Multiple-group model examining invariance
- 4.1.1 Input data
- 4.1.2 Fit the model
- 4.2 Example: Behavior genetic analysis
- 4.1 Example: Multiple-group model examining invariance
- 5 Chapter 5: Models with Multiple Time Periods
- 5.1 Example: Latent curve model
- 5.1.1 Import data
- 5.1.2 Basic latent curve model specification
- 5.1.3 Latent curve models with covariates
- 5.1.4 Alternative specification of latent intercept and slope
- 5.1 Example: Latent curve model
- 6 Chapter 6: Models with Dichotomous Indicator Variables
- 6.1 Example: dichotomous indicator variables
- 6.1.1 Import data
- 6.1.2 Item response theory models
- 6.1.3 Item factor analysis models
- 6.1 Example: dichotomous indicator variables
- 7 Chapter 7: Missing Data
- 7.1 Exploring missing values
- 7.2 Handling missing values
- 7.2.1 Auxiliary variable
- 8 Chapter 8: Sample Size Planning
- 8.1 Example: Mean difference between two groups
- 8.2 Example: Latent curve model
- 8.3 Example: Latent curve model with attrition
- 9 Chapter 9: Hierarchical Latent Variable Models
- 9.1 Example: WISC-IV extended data
- 9.1.1 Import data
- 9.1.2 Correlated factors model
- 9.1.3 Higher-order model
- 9.1.4 Bi-factor model
- 9.1 Example: WISC-IV extended data
Chapter 1: Introduction to R
Input data using c() function
# create new dataset newData <- c(4,5,3,6,9)
Input covariance matrix
# load lavaan library(lavaan) # input covariances example.cor <- lower2full(c(1, 0.85, 1, 0.84, 0.61, 1, 0.68, 0.59, 0.41, 1)) # name the rows and columns rownames(example.cor) <- colnames(example.cor) <- c("Var1", "Var2", "Var3", "Var4")
Summary statistics
# load the MLBPitching2011 dataset from the BaylorEdPsych package library(BaylorEdPsych) data(MLBPitching2011) # summary statistics summary(MLBPitching2011$ERAP) library(psych) describe(MLBPitching2011$ERAP) # frequency of each value of ERAP variable table(MLBPitching2011$ERAP) # frequency table # make cut points for frequency table groupings--here I used 50 boundaries <- seq(0,550,50) # frequency table table(cut(MLBPitching2011$ERAP, boundaries)) # relative frequency table table(cut(MLBPitching2011$ERAP, boundaries)) / length(MLBPitching2011$ERAP) # Pearson correlations for losses (L) and Age cor(MLBPitching2011$Age,MLBPitching2011$L) # Spearman correlation cor(MLBPitching2011$Age,MLBPitching2011$L, method="spearman") # covariance for losses (L) and Age cov(MLBPitching2011$Age,MLBPitching2011$L)
Simulated data
# simulate 1000 observations from Normal distribution with a mean of 100, and SD of 15 X.n<-rnorm(1000,mean=100,sd=15) # simulate 1000 observations from a Poisson distribution with a mean and variance of 2 X.p<-rpois(1000,lambda=2) # calculate mean and variance of Normal data mean(X.n); var(X.n) # calculate mean and variance of Poisson data mean(X.p); var(X.p) # plot the distributions m1 <- seq(-4,4, .01)*15+100 m2 <- seq(0,15, 1) plot(m1, dnorm(m1, mean=100, sd=15), type="l", col="black", lwd=3, ylab="Density", xlab="Value", main="Normal") plot(m2, dpois(m2, lambda=2), type="b", lwd=3, xlim=c(-1,15), xlab="Value", ylab="Density", main="Poisson")
Z scores using the scale() function
# Normal variable Z.X.n <- scale(X.n) # Poisson variable Z.X.p <- scale(X.p) # calculate mean and variance of Normal Z-scores mean(Z.X.n); var(Z.X.n) # calculate mean and variance of Poisson Z-scores mean(Z.X.p); var(Z.X.p)
Statistical tests
# uses BaylorEdPsych package's MLBPitching2011 data # Z test library(TeachingDemos) z.test(na.omit(MLBPitching2011$WLP), mu=0.50, stdev=sqrt(.08)) # one sample t-test t.test(MLBPitching2011$WLP, mu=.50, alternative="two.sided", conf.level=.95) # independent samples t-test t.test(WLP~Lg, data=MLBPitching2011, na.rm=TRUE, var.equal=TRUE) # dependent samples t-test t.test(MLBPitching2011$W, MLBPitching2011$L, paired=TRUE) # homogeneity of variance tests # F-test var.test(WLP[Lg=="NL"], WLP[Lg=="AL"], na.rm=TRUE) # Bartlett's Test bartlett.test(WLP,Lg) library(car) # Levene's test leveneTest(WLP, Lg,center="mean") # Brown-Forsyth Test leveneTest(WLP,Lg,center="median")
Chapter 2: Path Models and Analysis
Example: Path Analysis using lavaan
# create a correlation matrix library(lavaan) regression.cor <- lower2full(c(1.0,0.20,1,0.24,0.30,1,0.70,0.80,0.30,1)) # name the variables in the matrix colnames(regression.cor) <- rownames(regression.cor) <- c("X1", "X2", "X3", "Y") # model syntax regression.model <-' # structural model for Y Y ~ a*X1 + b*X2 + c*X3 # label the residual variance of Y Y ~~ z*Y ' # fit the model regression.fit <- sem(regression.model, sample.cov=regression.cor, sample.nobs=1000) summary(regression.fit, rsquare=TRUE)
Example: Indirect Effects
# input data beaujean.cov <- lower2full(c(648.07, 30.05, 8.64, 140.18, 25.57, 233.21)) colnames(beaujean.cov) <- rownames(beaujean.cov) <- c("salary", "school", "iq") # specify the path model beaujean.model <- ' salary ~ a*school + c*iq iq ~ b*school # this is reversed in first printing of the book ind:= b*c ' # estimate parameters beaujean.fit <- sem(beaujean.model, sample.cov=beaujean.cov, sample.nobs=300) summary(beaujean.fit)
Create output table
xtable(parameterEstimates(regression.fit, standardized=TRUE)[,c(1:3,5:6,12)], caption="Parameter Estimates from Path Analysis Model.", label="tab:path-analysis-estimates")
Chapter 3: Basic Latent Variable Models
Example: Single factor model of WISC-IV data
Marker variable
# import data library(lavaan) # convert vector of correlations into matrix wisc4.cor <- lower2full(c(1,0.72,1,0.64,0.63,1,0.51,0.48,0.37,1,0.37,0.38,0.38,0.38,1)) # enter the SDs wisc4.sd <- c(3.01 , 3.03 , 2.99 , 2.89 , 2.98) # name the variables colnames(wisc4.cor) <- rownames(wisc4.cor) <- c("Information", "Similarities", "Word.Reasoning", "Matrix.Reasoning", "Picture.Concepts") names(wisc4.sd) <- c("Information", "Similarities", "Word.Reasoning", "Matrix.Reasoning", "Picture.Concepts") # convert correlations and SDs to covarainces wisc4.cov <- cor2cov(wisc4.cor,wisc4.sd) # specify single factor model wisc4.model<-' g =~ a*Information + b*Similarities + c*Word.Reasoning + d*Matrix.Reasoning + e*Picture.Concepts ' # fit model wisc4.fit <- cfa(model=wisc4.model, sample.cov=wisc4.cov, sample.nobs=550, std.lv=FALSE) # examine parameter estimates summary(wisc4.fit,standardized=TRUE) parameterEstimates(wisc4.fit,standardized=TRUE) # check model # model-implied covariances fitted(wisc4.fit) # transform model-implied covariances to correlations wisc4Fit.cov <- fitted(wisc4.fit)$cov wisc4Fit.cor <- cov2cor(wisc4Fit.cov) # residual correlations residuals(wisc4.fit,type="cor") # measures of model fit fitMeasures(wisc4.fit) # modification indices modificationIndices(wisc4.fit)
Standardized latent variable
# method 1 wisc4.model.Std<-' g =~ NA*Information + a*Information + b*Similarities + c*Word.Reasoning + d*Matrix.Reasoning + e*Picture.Concepts # constrain the LV variance to 1 g~~1*g ' wisc4.fit.Std <- cfa(wisc4.model.Std, sample.cov=wisc4.cov, sample.nobs=550) # method 2 wisc4.fit.Std <- cfa(wisc4.model, sample.cov=wisc4.cov, sample.nobs=550, std.lv=TRUE)
Effects coding
wisc4.model.effects<-' g =~ NA*Information + a*Information + b*Similarities + c*Word.Reasoning + d*Matrix.Reasoning + e*Picture.Concepts # constrain the loadings to sum to one a + b + c + d + e == 5 ' wisc4.fit.effects <- cfa(wisc4.model.effects, sample.cov=wisc4.cor, sample.nobs=550)
Example: Two-factor model of WISC-IV data
wisc4.model2<-' V =~ a*Information + b*Similarities + c*Word.Reasoning F =~ d*Matrix.Reasoning + e*Picture.Concepts V~~f*F ' wisc4.fit2 <- cfa(wisc4.model2, sample.cov=wisc4.cov, sample.nobs=550)
Structure coefficients
wisc4.fit2Alt <- cfa(wisc4.model2, sample.cov=wisc4.cor, sample.nobs=550, std.lv=TRUE) wisc4.est2 <- inspect(wisc4.fit2Alt, what="coef") wisc4.structure2 <- wisc4.est2$lambda %*% wisc4.est2$psi
Example: Structural equation model
wisc4SEM.model <- ' # define latent variables V =~ a*Information + b*Similarities + c*Word.Reasoning F =~ d*Matrix.Reasoning + e*Picture.Concepts # define structural relations V~k*F ' wisc4SEM.fit <- cfa(wisc4SEM.model, sample.cov=wisc4.cov, sample.nobs=550)
Chapter 4: Latent Variable Models with Multiple Groups
Example: Multiple-group model examining invariance
Input data
# input data # variable names wisc3.names <- c("Info", "Sim", "Vocab","Comp", "PicComp", "PicArr", "BlkDsgn", "ObjAsmb") # group with manic symptoms ## covariances manic.cov <- c(9.364, 7.777, 12.461, 6.422, 8.756, 10.112, 5.669, 7.445, 6.797, 8.123, 3.048, 4.922, 4.513, 4.116, 6.200, 3.505, 4.880, 4.899, 5.178, 5.114, 15.603, 3.690, 5.440, 5.220, 3.151, 3.587, 6.219, 11.223, 3.640, 4.641, 4.877, 3.568, 3.819, 5.811, 6.501, 9.797) manic.cov <- lower2full(manic.cov) ## means manic.means <- c(10.09, 12.07, 10.25, 9.96, 10.90, 11.24, 10.30, 10.44) ## label the covariances and means colnames(manic.cov) <- rownames(manic.cov) <- wisc3.names names(manic.means) <- wisc3.names # norming group ## covariances norming.cov <- c(9.610, 5.844, 8.410, 6.324, 6.264, 9.000, 4.405, 4.457, 5.046, 8.410, 4.464, 4.547, 4.512, 3.712, 10.240, 3.478, 2.967, 2.970, 2.871, 3.802, 10.890, 5.270, 4.930, 4.080, 3.254, 5.222, 3.590, 11.560, 4.297, 4.594, 4.356, 3.158, 4.963, 3.594, 6.620, 10.890) norming.cov <- lower2full(norming.cov) ## means norming.means <- c(10.10, 10.30, 9.80, 10.10, 10.10, 10.10, 9.90, 10.20) ## label the covariances and means colnames(norming.cov) <- rownames(norming.cov) <- wisc3.names names(norming.means) <- wisc3.names # combine the covariance matrices, sample sizes, and means into single list objects combined.cov <- list(manic=manic.cov, norming=norming.cov) combined.n <- list(manic=81, norming=200) combined.means <- list(manic=manic.means, norming=norming.means)
Fit the model
# specify fit indices of interest fit.indices <- c("chisq", "df", "cfi", "rmsea", "srmr", "mfi") # specify model wisc3.model <-' VC =~ Info + Sim + Vocab + Comp VS =~ PicComp + PicArr + BlkDsgn + ObjAsmb VC ~~ VS ' # fit separate models for both groups # group with manic symptoms manic.fit <- cfa(wisc3.model, sample.cov=manic.cov, sample.nobs=81, sample.mean=manic.means, meanstructure=TRUE) fitMeasures(manic.fit, fit.indices) # norming group norming.fit <- cfa(wisc3.model, sample.cov=norming.cov, sample.nobs=200, sample.mean=norming.means, , meanstructure=TRUE) fitMeasures(norming.fit, fit.indices) # configural invariance configural.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means) fitMeasures(configural.fit, fit.indices) # weak invariance weak.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings")) fitMeasures(weak.fit, fit.indices) # strong invariance strong.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts")) fitMeasures(strong.fit, fit.indices) # show modification indices for strong invariance model in descending order strong.mi <- modindices(strong.fit) strong.mi <- strong.mi[strong.mi$op=="~1",] strong.mi[order(strong.mi$mi, decreasing=TRUE),] # strong invariance 2: intercepts for Similarities subtest is freely estimated strong.fit2 <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts"), group.partial=c("Sim~1")) fitMeasures(strong.fit2, fit.indices) # partial strict invariance strict.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals"), group.partial=c("Sim~1", "Sim~~Sim")) fitMeasures(strict.fit, fit.indices) # partial strict invariance 2: residual variances for Picture Completion, Comprehension, and Picture Arrangement subtests are freely estimated strict.fit2 <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals"), group.partial=c("Sim~1", "Sim~~Sim", "PicComp~~PicComp", "Comp~~Comp", "PicArr~~PicArr")) fitMeasures(strict.fit2, fit.indices) # latent variances factor.var.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals", "lv.variances"), group.partial=c("Sim~1", "Sim~~Sim", "PicComp~~PicComp", "Comp~~Comp", "PicArr~~PicArr")) fitMeasures(factor.var.fit, fit.indices) # latent covariances factor.covar.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"), group.partial=c("Sim~1", "Sim~~Sim", "PicComp~~PicComp", "Comp~~Comp", "PicArr~~PicArr")) fitMeasures(factor.covar.fit, fit.indices) # latent means factor.means.fit <- cfa(wisc3.model, sample.cov=combined.cov, sample.nobs=combined.n, sample.mean=combined.means, group.equal=c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances", "means"), group.partial=c("Sim~1", "Sim~~Sim", "PicComp~~PicComp", "Comp~~Comp", "PicArr~~PicArr")) fitMeasures(factor.means.fit, fit.indices)
Example: Behavior genetic analysis
# import data ## MZ twins MZ <- lower2full(c(.725,.589,.792)) rownames(MZ) <- colnames(MZ) <- c("P1", "P2") ## DZ twins DZ <- lower2full(c(.779,.246,.837)) rownames(DZ) <- colnames(DZ) <- c("P1", "P2") # combine the covariances and sample sizes bmi.cov <- list(MZ=MZ,DZ=DZ) bmi.n <- list(MZ=534,DZ=328) # specify ADE model bmi.ade.model<-' # build the factor model with group constraints A1=~ NA*P1 + c(a,a)*P1 + c(.5,.5)*P1 A2=~ NA*P2 + c(a,a)*P2 + c(.5,.5)*P2 D1 =~ NA*P1 + c(d,d)*P1 D2 =~ NA*P2 + c(d,d)*P2 # constrain the factor variances A1 ~~ 1*A1 A2 ~~ 1*A2 D1 ~~ 1*D1 D2 ~~ 1*D2 P1~~c(e2,e2)*P1 P2~~c(e2,e2)*P2 # constrain the factor covariances A1 ~~ c(1,.5)*A2 A1 ~~ 0*D1 + 0*D2 A2 ~~ 0*D1 + 0*D2 D1 ~~ c(1,.25)*D2 ' # fit model bmi.ade.fit <- cfa(bmi.ade.model, sample.cov=bmi.cov, sample.nobs=bmi.n) summary(bmi.ade.fit, standardized=TRUE)
Chapter 5: Models with Multiple Time Periods
Example: Latent curve model
Import data
Download LCM.zip file from UCLA web page. The nypa95_listwise.sav file is in the SPSS folder of the the chapter02 folder.
library(foreign) crime.data <- read.spss("nypa95_listwise.sav", to.data.frame=TRUE) crime.data <- na.omit(crime.data) # make state x centered poverty interaction variable crime.data$STATEPOV <- crime.data$PA * crime.data$CPV12590 # dataset 1 time <- c("JANFEB95", "MARAPR95", "MAYJUN95","JLYAUG95") crime.cov <- cov(crime.data[time]) crime.mean <- colMeans(crime.data[time]) names(crime.mean) <- colnames(crime.cov) <- rownames(crime.cov) <- c("Time1", "Time2", "Time3", "Time4") # dataset 2 crime.vars <- c("JANFEB95", "MARAPR95", "MAYJUN95","JLYAUG95", "PA", "CPV12590", "STATEPOV") crime2.cov <- cov(crime.data[crime.vars]) crime2.mean <- colMeans(crime.data[crime.vars]) names(crime2.mean) <- colnames(crime2.cov) <- rownames(crime2.cov) <- c("Time1", "Time2", "Time3", "Time4", "State", "Poverty", "StPov")
Basic latent curve model specification
library(lavaan) # mean latent intercept and constrained residual variances crime.model1 <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 i~~0*i # residual variances Time1~~r*Time1 Time2~~r*Time2 Time3~~r*Time3 Time4~~r*Time4 ' crime.fit1 <- growth(crime.model1, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952) # mean latent intercept that is allowed to vary, and constrained residual variances crime.model2 <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # residual variances Time1~~r*Time1 Time2~~r*Time2 Time3~~r*Time3 Time4~~r*Time4 ' crime.fit2 <- growth(crime.model2, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952) # mean latent intercept that is allowed to vary, mean latent slope, and constrained residual variances crime.model3 <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4 s~0*1 s~~0*i # residual variances Time1~~r*Time1 Time2~~r*Time2 Time3~~r*Time3 Time4~~r*Time4 ' crime.fit3 <- growth(crime.model3, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952) # mean latent intercept that is allowed to vary, mean latent slope that is allowed to vary, and constrained residual variances crime.model4 <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4 # residual variances Time1~~r*Time1 Time2~~r*Time2 Time3~~r*Time3 Time4~~r*Time4 ' crime.fit4 <- growth(crime.model4, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952) # unconstrained model crime.model5 <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4 ' crime.fit5 <- growth(crime.model5, sample.cov=crime.cov, sample.mean=crime.mean, sample.nobs=952)
Latent curve models with covariates
# State is a predictor variable crime.model6 <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4 # regression i + s ~ State ' crime.fit6 <- growth(crime.model6, sample.cov=crime2.cov, sample.mean=crime2.mean, sample.nobs=952) # State and poverty are predictors crime.model7 <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4 # regression s + i ~ State + Poverty ' crime.fit7 <- growth(crime.model7, sample.cov=crime2.cov, sample.mean=crime2.mean, sample.nobs=952) # State, poverty, and their interaction are predictors crime.model8 <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4 # regression s + i ~ State + Poverty + StPov ' crime.fit8 <- growth(crime.model8, sample.cov=crime2.cov, sample.mean=crime2.mean, sample.nobs=952)
Alternative specification of latent intercept and slope
# intercept coded to be the last data collection period model <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ -3*Time1 + -2*Time2 + -1*Time3 + 0*Time4 ' # intercept coded to be the third data collection period model <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ -2*Time1 + -1*Time2 + 0*Time3 + 1*Time4 ' # intercept coded to be the middle of data collection model <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ -1.5*Time1 + -0.5*Time2 + 0.5*Time3 + 1.5*Time4 ' # using two units as the time between data collection periods model <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ 0*Time1 + 2*Time2 + 4*Time3 + 6*Time4 ' # model with a follow-up time period distant (four units) from the end of regular data collection model <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope s =~ 0*Time1 + 1*Time2 + 2*Time3 + 6*Time4 ' # quadratic growth--second latent slope's loadings are the square of the first latent slope's loadings model <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 # slope 1 s1 =~ 0*Time1 + 1*Time2 + 2*Time3 + 3*Time4 # slope 2 s2 =~ 0*Time1 + 1*Time2 + 4*Time3 + 9*Time4 ' # piecewise linear slopes model <- ' # intercept i =~ 1*Time1 + 1*Time2 + 1*Time3 + 1*Time4 + 1*Time5 + 1*Time6 # slope 1 s1 =~ -3*Time1 + -2*Time2 + -1*Time3 + 0*Time4 + 0*Time5 + 0*Time6 # slope 2 s2 =~ 0*Time1 + 0*Time2 + 0*Time3 + 0*Time4 + 1*Time5 + 2*Time6 '
Chapter 6: Models with Dichotomous Indicator Variables
Example: dichotomous indicator variables
Import data
library(psych) data(lsat6) # tetrachoric correlations tetrachoric(lsat6)
Item response theory models
library(ltm) # logistic distribution # discrimination and difficulty parameterization lsat.IRT <- ltm(lsat6~z1, IRT.param=TRUE) coef(lsat.IRT) # slope and intercept parameterization lsat.SI <- ltm(lsat6~z1, IRT.param=FALSE) coef(lsat.SI) # item characteristic curves plot(lsat.IRT) # normal distributon lsat.NO <- irt.fa(lsat6, plot=FALSE) # IRT parameter estimates lsat.NO$irt # loadings lsat.NO$fa # thresholds lsat.NO$tau
Item factor analysis models
library(lavaan) twoP.model<-' # loadings Theta =~ l1*Q1 + l2*Q2 + l3*Q3 + l4*Q4 + l5*Q5 # thresholds Q1 | th1*t1 Q2 | th2*t1 Q3 | th3*t1 Q4 | th4*t1 Q5 | th5*t1 # convert loadings to slopes (normal) alpha1.N := (l1)/sqrt(1-l1^2) alpha2.N := (l2)/sqrt(1-l2^2) alpha3.N := (l3)/sqrt(1-l3^2) alpha4.N := (l4)/sqrt(1-l4^2) alpha5.N := (l5)/sqrt(1-l5^2) # convert thresholds to intercepts (normal) beta1.N := (-th1)/sqrt(1-l1^2) beta2.N := (-th2)/sqrt(1-l2^2) beta3.N := (-th3)/sqrt(1-l3^2) beta4.N := (-th4)/sqrt(1-l4^2) beta5.N := (-th5)/sqrt(1-l5^2) # convert intercepts to locations (normal) loc1 := -beta1.N/alpha1.N loc2 := -beta2.N/alpha2.N loc3 := -beta3.N/alpha3.N loc4 := -beta4.N/alpha4.N loc5 := -beta5.N/alpha5.N # convert loadings to slopes (logistic) alpha1.L := (l1)/sqrt(1-l1^2)*1.7 alpha2.L := (l2)/sqrt(1-l2^2)*1.7 alpha3.L := (l3)/sqrt(1-l3^2)*1.7 alpha4.L := (l4)/sqrt(1-l4^2)*1.7 alpha5.L := (l5)/sqrt(1-l5^2)*1.7 # convert thresholds to locations (logistic) loc1.L := th1/l1 loc2.L := th2/l2 loc3.L := th3/l3 loc4.L := th4/l4 loc5.L := th5/l5 # convert locations to intercepts (logistic) beta1.L := (-alpha1.L)*loc1.L beta2.L := (-alpha2.L)*loc2.L beta3.L := (-alpha3.L)*loc3.L beta4.L := (-alpha4.L)*loc4.L beta5.L := (-alpha5.L)*loc5.L ' twoP.fit <- cfa(twoP.model, data=data.frame(lsat6), std.lv=TRUE, ordered=c("Q1","Q2","Q3","Q4", "Q5")) summary(twoP.fit, standardized=TRUE)
Chapter 7: Missing Data
Data can be downloaded here
Exploring missing values
# load mcar data mcar.data <- read.table("mcar.dat", sep=",", header=TRUE) # missing data patterns library(mice) md.pattern(mcar.data) # Little's MCAR test of mean equality library(BaylorEdPsych) mcar.little <- LittleMCAR(mcar.data) mcar.little[c("chi.square", "df", "p.value")] # Examine equality of means and covariances library(MissMech) TestMCARNormality(mcar.data)
Handling missing values
# model specification complete.model <- ' read =~ a*z1 + b*z2 + c*z3 read ~ g*x1 z1~~d*z1 z2~~e*z2 z3~~f*z3 read~~h*read ' # listwise deletion mcarListwise.fit <- sem(complete.model, data=mcar.data) summary(mcarListwise.fit, rsquare=TRUE, standardized=TRUE) # pairwise deletion library(psych) pairwiseMCAR.cov <- cov(mcar.data, use="pairwise.complete.obs") pairwiseMCAR.mean <- c(mean(mcar.data$z1,na.rm=TRUE),mean(mcar.data$z2, na.rm=TRUE),mean(mcar.data$z3,na.rm=TRUE),mean(mcar.data$x1, na.rm=TRUE),mean(mcar.data$x2, na.rm=TRUE)) names(pairwiseMCAR.mean)<-colnames(pairwiseMCAR.cov) mcarPairwise.n <- min(count.pairwise(mcar.data)) mcarPairwise.fit <- sem(complete.model, sample.cov=pairwiseMCAR.cov, sample.nobs=mcarPairwise.n, sample.mean=pairwiseMCAR.mean) summary(mcarPairwise.fit, rsquare=TRUE, standardized=TRUE) # mean imputation library(Hmisc) mcarMeanI.data <- mcar.data mcarMeanI.data$z1 <- impute(mcarMeanI.data$z1, fun=mean) mcarMeanI.data$z2 <- impute(mcarMeanI.data$z2, fun=mean) mcarMeanI.data$z3 <- impute(mcarMeanI.data$z3, fun=mean) mcarMeanImputation.fit<-sem(complete.model, data=mcarMeanI.data) summary(mcarMeanImputation.fit, rsquare=TRUE) # FIML mcarFIML.fit <- sem(complete.model, data=mcar.data, missing="fiml") summary(mcarFIML.fit, rsquare=TRUE, standardized=TRUE) # multiple imputation using mice library(semTools) mcarMI.fit <- runMI(complete.model, data=mcar.data, m=20, miPackage="mice", fun="sem", seed=56587) # multiple imputation using Amelia library(Amelia) mcar.sim <- amelia(MCAR.data,m=20) mcarMI.fit <- runMI(complete.model, data=MCAR.sim$imputations, fun="sem")
Auxiliary variable
library(semTools) # FIML with auxiliary variable mcarFIMLAux2.fit <- auxiliary(complete.model, aux="x2", data=mcar.data, fun="sem") summary(mcarFIMLAux2.fit, standardized=TRUE)
Chapter 8: Sample Size Planning
*Warning: Depending on your computer, the following simulations could take multiple hours to converge
Example: Mean difference between two groups
# traditional power analysis library(pwr) pwr.t.test(d = 0.49, sig.level = 0.10, power = 0.80) t.sampSize <- pwr.t.test(d = 0.49, sig.level = 0.10, power = 0.80) # Monte Carlo power analysis # convert effect size to correlation library(compute.es) des(d=0.49, n.1=100,n.2=100) # specify data generation model act.model <- ' # regression act ~ 0.24*class # error variance act ~~ 0.9424*act ' # specify analysis model act2.model <- ' act ~ class ' # simulate and analyze data library(simsem) act.power <- sim(nRep=1000, generate=act.model, model=act2.model, n =104, lavaanfun = "sem", multicore=TRUE, seed=0) summaryParam(act.power,alpha = 0.10,detail=TRUE) # search sample sizes act.n <- sim(model=act2.model, n =rep(seq(50,120,10), 500), generate=act.model, lavaanfun = "sem", multicore=TRUE) # power curve with line added at y= 0.80 plotPower(act.n, alpha=0.1, powerParam="act~class") abline(h=.8, lwd=2, lty=2) # find sample size needed for power = .0.8 act.pwr.n <- getPower(act.n, alpha = 0.10) findPower(act.pwr.n, iv="N", power=0.8)
Example: Latent curve model
# specify data generation model lcm.pop.model <- ' # latent variable model i =~ 1*y1 + 1*y2 + 1*y3 + 1*y4 s =~ 0*y1 + 1*y2 + 2*y3 + 3*y4 # latent variable means i ~ 0.00*1 s ~ 0.20*1 # regressions, with parameter of interest labeled i ~ 0.50*x s ~ a*x + 0.20*x # mean and variance of x x ~ 0.50*1 x ~~ 0.25*x # manifest (residual) variances y1 ~~ 0.5*y1 y2 ~~ 0.5*y2 y3 ~~ 0.5*y3 y4 ~~ 0.5*y4 # latent variable residual variances/covariances i~~0.25*i s~~0.09*s i~~0.0*s ' # specify analysis model lcm.model <- ' # latent variable model i =~ 1*y1 + 1*y2 + 1*y3 + 1*y4 s =~ 0*y1 + 1*y2 + 2*y3 + 3*y4 # regressions i ~ x s ~ a*x ' # show model's fixed and default values lcm.pop.fit <- growth(lcm.pop.model, do.fit=FALSE, fixed.x=FALSE) summary(lcm.pop.fit, std=TRUE, rsquare=TRUE) fitted(lcm.pop.fit) # run simulations for multiple sample sizes lcm.n <- sim(nRep=NULL, model=lcm.model, n =rep(seq(100,200,10), 500), generate=lcm.pop.model, lavaanfun = "growth", multicore=TRUE) # find the sample size needed for power = .0.8 lcm.pwr.n <- getPower(lcm.n) findPower(lcm.pwr.n, "N", 0.8) # power analysis with n=149 lcm1.power <- sim(nRep=10000, model=lcm.model, n =149, generate=lcm.pop.model, lavaanfun = "growth", multicore=TRUE, seed=998877) summaryParam(lcm1.power,alpha = 0.05,detail=TRUE)
Example: Latent curve model with attrition
# specify missing data lcm.pcnt.missing <- ' y1 ~ p(0.12) y2 ~ p(0.18) + 1*x y3 ~ p(0.27) + 1*x y4 ~ p(0.50) + 1*x ' missing.model <- miss(logit=lcm.pcnt.missing) # plot missing data plotLogitMiss(lcm.pcnt.missing, x1lim=c(0,1)) # run simulations for multiple sample sizes missing.model <- miss(logit=lcm.pcnt.missing, m=10, package="mice") lcm.missing.n <- sim(nRep=NULL, model=lcm.model, n=rep(seq(200,300,10), 100), generate=lcm.pop.model, lavaanfun = "growth", multicore=TRUE, miss=missing.model) # find the sample size needed for power = .0.8 lcm.missing.pwr.n <- getPower(lcm.missing.n) findPower(lcm.missing.pwr.n, "N", 0.8) # power analysis with n=273 lcm1.missing <- sim(nRep=10000, model=lcm.model, n =273, generate=lcm.pop.model, lavaanfun = "growth", multicore=TRUE, miss=missing.model, seed=4335) summaryParam(lcm1.missing,alpha = 0.05,detail=TRUE)
Chapter 9: Hierarchical Latent Variable Models
Example: WISC-IV extended data
Import data
wisc4.cov <- lower2full(c(8.29,5.37,9.06,2.83,4.44,8.35,2.83,3.32,3.36,8.88,5.50,6.66,4.20,3.43,9.18,6.18,6.73,4.01,3.33,6.77,9.12,3.52,3.77,3.19,2.75,3.88,4.05,8.88,3.79,4.50,3.72,3.39,4.53,4.70,4.54,8.94,2.30,2.67,2.40,2.38,2.06,2.59,2.65,2.83,8.76,3.06,4.04,3.70,2.79,3.59,3.67,3.44,4.20,4.53,9.73)) wisc4.sd <- c(2.88,3.01,2.89,2.98,3.03,3.02,2.98,2.99,2.96,3.12) names(wisc4.sd) <- colnames(wisc4.cov) <- rownames(wisc4.cov) <-c("Comprehension", "Information", "Matrix.Reasoning", "Picture.Concepts", "Similarities", "Vocabulary", "Digit.Span", "Letter.Number", "Coding", "Symbol.Search")
wisc4.fourFactor.model <-' gc =~ Comprehension + Information + Similarities + Vocabulary gf =~ Matrix.Reasoning + Picture.Concepts gsm =~ Digit.Span + Letter.Number gs =~ Coding + Symbol.Search ' wisc4.fourFactor.fit<-cfa(model=wisc4.fourFactor.model, sample.cov=wisc4.cov, sample.nobs=550) summary(wisc4.fourFactor.fit, fit.measure=TRUE, standardized=TRUE)
Higher-order model
wisc4.higherOrder.model<-' gc =~ Comprehension + Information + Similarities + Vocabulary gf =~ Matrix.Reasoning + Picture.Concepts gsm =~ Digit.Span + Letter.Number gs =~ Coding + Symbol.Search g=~ NA*gf + gc + gsm + gs g~~ 1*g ' wisc4.higherOrder.fit <- cfa(model=wisc4.higherOrder.model, sample.cov=wisc4.cov, sample.nobs=550)
Bi-factor model
wisc4.bifactor.model<-' gc =~ Comprehension + Information + Similarities + Vocabulary gf =~ a*Matrix.Reasoning + a*Picture.Concepts gsm =~ b*Digit.Span + b*Letter.Number gs =~ c*Coding + c*Symbol.Search g =~ Information + Comprehension + Matrix.Reasoning + Picture.Concepts + Similarities + Vocabulary + Digit.Span + Letter.Number + Coding + Symbol.Search ' wisc4.bifactor.fit<-cfa(model=wisc4.bifactor.model, sample.cov=wisc4.cov, sample.nobs=550, std.lv=TRUE, orthogonal=TRUE)
Source: https://blogs.baylor.edu/rlatentvariable/sample-page/r-syntax/
Posted by: enochkrein.blogspot.com