multipleGroup {mirt} | R Documentation |
multipleGroup
performs a full-information
maximum-likelihood multiple group analysis for any combination of dichotomous and polytomous
data under the item response theory paradigm using either Cai's (2010)
Metropolis-Hastings Robbins-Monro (MHRM) algorithm or with an EM algorithm approach. This
function may be used for detecting differential item functioning (DIF), thought the
DIF
function may provide a more convenient approach. If the grouping
variable is not specified then the dentype
input can be modified to fit
mixture models to estimate any latent group components.
multipleGroup( data, model = 1, group, itemtype = NULL, invariance = "", method = "EM", dentype = "Gaussian", ... )
data |
a |
model |
string to be passed to, or a model object returned from, |
group |
a |
itemtype |
can be same type of input as is documented in |
invariance |
a character vector containing the following possible options:
Additionally, specifying specific item name bundles (from |
method |
a character object that is either |
dentype |
type of density form to use for the latent trait parameters. Current options include
all of the methods described in
|
... |
additional arguments to be passed to the estimation engine. See |
By default the estimation in multipleGroup
assumes that the models are maximally
independent, and therefore could initially be performed by sub-setting the data and running
identical models with mirt
and aggregating the results (e.g., log-likelihood).
However, constrains may be automatically imposed across groups by invoking various
invariance
keywords. Users may also supply a list of parameter equality constraints
to by constrain
argument, of define equality constraints using the
mirt.model
syntax (recommended).
function returns an object of class MultipleGroupClass
(MultipleGroupClass-class).
Phil Chalmers rphilip.chalmers@gmail.com
Chalmers, R., P. (2012). mirt: A Multidimensional Item Response Theory Package for the R Environment. Journal of Statistical Software, 48(6), 1-29. doi: 10.18637/jss.v048.i06
mirt
, DIF
, extract.group
, DRF
## Not run: #single factor set.seed(12345) a <- matrix(abs(rnorm(15,1,.3)), ncol=1) d <- matrix(rnorm(15,0,.7),ncol=1) itemtype <- rep('2PL', nrow(a)) N <- 1000 dataset1 <- simdata(a, d, N, itemtype) dataset2 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5)) dat <- rbind(dataset1, dataset2) group <- c(rep('D1', N), rep('D2', N)) # marginal information itemstats(dat) # conditional information itemstats(dat, group=group) mod_configural <- multipleGroup(dat, 1, group = group) #completely separate analyses #limited information fit statistics M2(mod_configural) mod_metric <- multipleGroup(dat, 1, group = group, invariance=c('slopes')) #equal slopes #equal intercepts, free variance and means mod_scalar2 <- multipleGroup(dat, 1, group = group, invariance=c('slopes', 'intercepts', 'free_var','free_means')) mod_scalar1 <- multipleGroup(dat, 1, group = group, #fixed means invariance=c('slopes', 'intercepts', 'free_var')) mod_fullconstrain <- multipleGroup(dat, 1, group = group, invariance=c('slopes', 'intercepts')) extract.mirt(mod_fullconstrain, 'time') #time of estimation components #optionally use Newton-Raphson for (generally) faster convergence in the M-step's mod_fullconstrain <- multipleGroup(dat, 1, group = group, optimizer = 'NR', invariance=c('slopes', 'intercepts')) extract.mirt(mod_fullconstrain, 'time') #time of estimation components summary(mod_scalar2) coef(mod_scalar2, simplify=TRUE) residuals(mod_scalar2) plot(mod_configural) plot(mod_configural, type = 'info') plot(mod_configural, type = 'trace') plot(mod_configural, type = 'trace', which.items = 1:4) itemplot(mod_configural, 2) itemplot(mod_configural, 2, type = 'RE') anova(mod_metric, mod_configural) #equal slopes only anova(mod_scalar2, mod_metric) #equal intercepts, free variance and mean anova(mod_scalar1, mod_scalar2) #fix mean anova(mod_fullconstrain, mod_scalar1) #fix variance # compared all at once (in order of most constrained to least) anova(mod_fullconstrain, mod_scalar2, mod_configural) #test whether first 6 slopes should be equal across groups values <- multipleGroup(dat, 1, group = group, pars = 'values') values constrain <- list(c(1, 63), c(5,67), c(9,71), c(13,75), c(17,79), c(21,83)) equalslopes <- multipleGroup(dat, 1, group = group, constrain = constrain) anova(equalslopes, mod_configural) #same as above, but using mirt.model syntax newmodel <- ' F = 1-15 CONSTRAINB = (1-6, a1)' equalslopes <- multipleGroup(dat, newmodel, group = group) coef(equalslopes, simplify=TRUE) ############ # vertical scaling (i.e., equating when groups answer items others do not) dat2 <- dat dat2[group == 'D1', 1:2] <- dat2[group != 'D1', 14:15] <- NA head(dat2) tail(dat2) # items with missing responses need to be constrained across groups for identification nms <- colnames(dat2) mod <- multipleGroup(dat2, 1, group, invariance = nms[c(1:2, 14:15)]) # this will throw an error without proper constraints (SEs cannot be computed either) # mod <- multipleGroup(dat2, 1, group) # model still does not have anchors, therefore need to add a few (here use items 3-5) mod_anchor <- multipleGroup(dat2, 1, group, invariance = c(nms[c(1:5, 14:15)], 'free_means', 'free_var')) coef(mod_anchor, simplify=TRUE) # check if identified by computing information matrix mod_anchor <- multipleGroup(dat2, 1, group, pars = mod2values(mod_anchor), TOL=NaN, SE=TRUE, invariance = c(nms[c(1:5, 14:15)], 'free_means', 'free_var')) mod_anchor coef(mod_anchor) coef(mod_anchor, printSE=TRUE) ############# #DIF test for each item (using all other items as anchors) itemnames <- colnames(dat) refmodel <- multipleGroup(dat, 1, group = group, SE=TRUE, invariance=c('free_means', 'free_var', itemnames)) #loop over items (in practice, run in parallel to increase speed). May be better to use ?DIF estmodels <- vector('list', ncol(dat)) for(i in 1:ncol(dat)) estmodels[[i]] <- multipleGroup(dat, 1, group = group, verbose = FALSE, invariance=c('free_means', 'free_var', itemnames[-i])) anova(refmodel, estmodels[[1]]) (anovas <- lapply(estmodels, function(x, refmodel) anova(refmodel, x), refmodel=refmodel)) #family-wise error control p <- do.call(rbind, lapply(anovas, function(x) x[2, 'p'])) p.adjust(p, method = 'BH') #same as above, except only test if slopes vary (1 df) #constrain all intercepts estmodels <- vector('list', ncol(dat)) for(i in 1:ncol(dat)) estmodels[[i]] <- multipleGroup(dat, 1, group = group, verbose = FALSE, invariance=c('free_means', 'free_var', 'intercepts', itemnames[-i])) (anovas <- lapply(estmodels, function(x, refmodel) anova(refmodel, x), refmodel=refmodel)) #quickly test with Wald test using DIF() mod_configural2 <- multipleGroup(dat, 1, group = group, SE=TRUE) DIF(mod_configural2, which.par = c('a1', 'd'), Wald=TRUE, p.adjust = 'fdr') ############# # Three group model where the latent variable parameters are constrained to # be equal in the focal groups set.seed(12345) a <- matrix(abs(rnorm(15,1,.3)), ncol=1) d <- matrix(rnorm(15,0,.7),ncol=1) itemtype <- rep('2PL', nrow(a)) N <- 1000 dataset1 <- simdata(a, d, N, itemtype) dataset2 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5)) dataset3 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5)) dat <- rbind(dataset1, dataset2, dataset3) group <- rep(c('D1', 'D2', 'D3'), each=N) # marginal information itemstats(dat) # conditional information itemstats(dat, group=group) model <- 'F1 = 1-15 FREE[D2, D3] = (GROUP, MEAN_1), (GROUP, COV_11) CONSTRAINB[D2,D3] = (GROUP, MEAN_1), (GROUP, COV_11)' mod <- multipleGroup(dat, model, group = group, invariance = colnames(dat)) coef(mod, simplify=TRUE) ############# #multiple factors a <- matrix(c(abs(rnorm(5,1,.3)), rep(0,15),abs(rnorm(5,1,.3)), rep(0,15),abs(rnorm(5,1,.3))), 15, 3) d <- matrix(rnorm(15,0,.7),ncol=1) mu <- c(-.4, -.7, .1) sigma <- matrix(c(1.21,.297,1.232,.297,.81,.252,1.232,.252,1.96),3,3) itemtype <- rep('2PL', nrow(a)) N <- 1000 dataset1 <- simdata(a, d, N, itemtype) dataset2 <- simdata(a, d, N, itemtype, mu = mu, sigma = sigma) dat <- rbind(dataset1, dataset2) group <- c(rep('D1', N), rep('D2', N)) #group models model <- ' F1 = 1-5 F2 = 6-10 F3 = 11-15' #define mirt cluster to use parallel architecture if(interactive()) mirtCluster() #EM approach (not as accurate with 3 factors, but generally good for quick model comparisons) mod_configural <- multipleGroup(dat, model, group = group) #completely separate analyses mod_metric <- multipleGroup(dat, model, group = group, invariance=c('slopes')) #equal slopes mod_fullconstrain <- multipleGroup(dat, model, group = group, #equal means, slopes, intercepts invariance=c('slopes', 'intercepts')) anova(mod_metric, mod_configural) anova(mod_fullconstrain, mod_metric) #same as above, but with MHRM (generally more accurate with 3+ factors, but slower) mod_configural <- multipleGroup(dat, model, group = group, method = 'MHRM') mod_metric <- multipleGroup(dat, model, group = group, invariance=c('slopes'), method = 'MHRM') mod_fullconstrain <- multipleGroup(dat, model, group = group, method = 'MHRM', invariance=c('slopes', 'intercepts')) anova(mod_metric, mod_configural) anova(mod_fullconstrain, mod_metric) ############ #polytomous item example set.seed(12345) a <- matrix(abs(rnorm(15,1,.3)), ncol=1) d <- matrix(rnorm(15,0,.7),ncol=1) d <- cbind(d, d-1, d-2) itemtype <- rep('graded', nrow(a)) N <- 1000 dataset1 <- simdata(a, d, N, itemtype) dataset2 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5)) dat <- rbind(dataset1, dataset2) group <- c(rep('D1', N), rep('D2', N)) model <- 'F1 = 1-15' mod_configural <- multipleGroup(dat, model, group = group) plot(mod_configural) plot(mod_configural, type = 'SE') itemplot(mod_configural, 1) itemplot(mod_configural, 1, type = 'info') plot(mod_configural, type = 'trace') # messy, score function typically better plot(mod_configural, type = 'itemscore') fs <- fscores(mod_configural, full.scores = FALSE) head(fs[["D1"]]) fscores(mod_configural, method = 'EAPsum', full.scores = FALSE) # constrain slopes within each group to be equal (but not across groups) model2 <- 'F1 = 1-15 CONSTRAIN = (1-15, a1)' mod_configural2 <- multipleGroup(dat, model2, group = group) plot(mod_configural2, type = 'SE') plot(mod_configural2, type = 'RE') itemplot(mod_configural2, 10) ############ ## empirical histogram example (normal and bimodal groups) set.seed(1234) a <- matrix(rlnorm(50, .2, .2)) d <- matrix(rnorm(50)) ThetaNormal <- matrix(rnorm(2000)) ThetaBimodal <- scale(matrix(c(rnorm(1000, -2), rnorm(1000,2)))) #bimodal Theta <- rbind(ThetaNormal, ThetaBimodal) dat <- simdata(a, d, 4000, itemtype = '2PL', Theta=Theta) group <- rep(c('G1', 'G2'), each=2000) EH <- multipleGroup(dat, 1, group=group, dentype="empiricalhist", invariance = colnames(dat)) coef(EH, simplify=TRUE) plot(EH, type = 'empiricalhist', npts = 60) #DIF test for item 1 EH1 <- multipleGroup(dat, 1, group=group, dentype="empiricalhist", invariance = colnames(dat)[-1]) anova(EH, EH1) #-------------------------------- # Mixture model (no prior group variable specified) set.seed(12345) nitems <- 20 a1 <- matrix(.75, ncol=1, nrow=nitems) a2 <- matrix(1.25, ncol=1, nrow=nitems) d1 <- matrix(rnorm(nitems,0,1),ncol=1) d2 <- matrix(rnorm(nitems,0,1),ncol=1) itemtype <- rep('2PL', nrow(a1)) N1 <- 500 N2 <- N1*2 # second class twice as large dataset1 <- simdata(a1, d1, N1, itemtype) dataset2 <- simdata(a2, d2, N2, itemtype) dat <- rbind(dataset1, dataset2) # group <- c(rep('D1', N1), rep('D2', N2)) # Mixture Rasch model (Rost, 1990) models <- 'F1 = 1-20 CONSTRAIN = (1-20, a1)' mod_mix <- multipleGroup(dat, models, dentype = 'mixture-2', GenRandomPars = TRUE) coef(mod_mix, simplify=TRUE) summary(mod_mix) plot(mod_mix) plot(mod_mix, type = 'trace') itemplot(mod_mix, 1, type = 'info') head(fscores(mod_mix)) # theta estimates head(fscores(mod_mix, method = 'classify')) # classification probability itemfit(mod_mix) # Mixture 2PL model mod_mix2 <- multipleGroup(dat, 1, dentype = 'mixture-2', GenRandomPars = TRUE) anova(mod_mix, mod_mix2) coef(mod_mix2, simplify=TRUE) itemfit(mod_mix2) # Zero-inflated 2PL IRT model model <- "F = 1-20 START [MIXTURE_1] = (GROUP, MEAN_1, -100), (GROUP, COV_11, .00001), (1-20, a1, 1.0), (1-20, d, 0.0) FIXED [MIXTURE_1] = (GROUP, MEAN_1), (GROUP, COV_11), (1-20, a1), (1-20, d)" zip <- multipleGroup(dat, model, dentype = 'mixture-2') coef(zip, simplify=TRUE) ## End(Not run)