############################################## ## Generalized Linar Models - R Tutorial ## ############################################## # Please read the tutorial's general instructions first, and prepare # for loading an external datafile in case you will use one. # The content of this file can be pasted directly into the R console. # It should blurp lost of errors only at the point where you # are supposed to have an external dataset available. # However, it is much handier to use the "display file" command # within the R menu to look at this file, and paste command by command # to the console, using control-V. Take your time to experiment a bit # with the listed commands.# You will need the "nlme" and "MASS" # libraries from CRAN. # You will also need the "gnlm" and "rmutil" libraries from # Jim Lindsey's homepage. #! We load them here. library(nlme) library(MASS) library(gnlm) #--------------------------------------------- # .Rglim: Generalized Linear Models in R. #--------------------------------------------- # Content: # # I. Linear Models # II. Generalized Linear Models # a. Binomial # b. Poisson # III. Mixed Linear Models # IV. Generalized Mixed Models # -------------------- # | I. Linear Models | # -------------------- # In R, the function lm() is used to fit linear models. # It can be used to carry out multiple regression, # analysis of variance and analysis of covariance. # Let's produce few examples. # The first one is an ANOVA example from help(lm), taken from # Annette Dobson (1990) "An Introduction to Generalized Linear Models". # It uses plant weight data from a control and a treatment group. ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14) trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69) group <- gl(2,10,20, labels=c("Ctl","Trt")) # make group levels weight <- c(ctl, trt) # concatenate the response vector hist(weight) # check gl() to see how you can easily make group levels. help(gl) # We fit the model lm1<-lm(weight ~ group) summary(lm1) # anova() produces an F-test for the group effect anova(lm1) # For ANOVA designs, you can also get your fit plus anova table # in one go using aov(): aov(weight ~ group) # We can investigate the residuals of a linear model # using a normal probability plot. # If the error is normally distributed, then we # should obtain a straight line. qqnorm(residuals(lm1)) qqline(residuals(lm1)) # add the quantile line for easy comparison # It is handier to combine qqnorm() and qqline() into a single function: qqplot<-function(reslist){qqnorm(reslist);qqline(reslist)} qqplot(residuals(lm1)) # The dataset is small, so there is not really a clear departure from # normality. # If we simulate residuals from the error distribution given # by this model, # we see that the normal probability plot is quite wiggly and # depending on the exact sample as well. x<-rnorm(20,0,0.6964) # can you see where I got .6964? qqnorm(x) summary(lm1)$sigma # yes, I got it from here... # Please take a look at rnorm and qqnorm, # we will use similar functions quite regularly. help(rnorm) help(qqnorm) # finally, plot() used on an lm object, # produces a number of diagnostic plots. ############################################## ## Generalized Linar Models - R Tutorial ## ############################################### Please read the tutorial's general instructions first, and prepare # for loading an external datafile in case you will use one. # The content of this file can be pasted directly into the R console. # It should blurp lost of errors only at the point where you # are supposed to have an external dataset available. # However, it is much handier to use the "display file" command # within the R menu to look at this file, and paste command by command # to the console, using control-V. Take your time to experiment a bit # with the listed commands.# You will need the "nlme" and "MASS" libraries from CRAN. # You will also need the "gnlm" and "rmutil" libraries from # Jim Lindsey's homepage. #! We load them here. library(nlme) library(MASS) library(gnlm) #--------------------------------------------- # .Rglim: Generalized Linear Models in R. #--------------------------------------------- # Content: # # I. Linear Models # II. Generalized Linear Models # a. Binomial # b. Poisson # III. Mixed Linear Models # IV. Generalized Mixed Models # -------------------- # | I. Linear Models | # -------------------- # In R, the function lm() is used to fit linear models. # It can be used to carry out multiple regression, # analysis of variance and analysis of covariance. # Let's produce few examples. # The first one is an ANOVA example from help(lm), taken from # Annette Dobson (1990) "An Introduction to Generalized Linear Models". # It uses plant weight data from a control and a treatment group. ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14) trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69) group <- gl(2,10,20, labels=c("Ctl","Trt")) # make group levels weight <- c(ctl, trt) # concatenate the response vectorhist(weight) # check gl() to see how you can easily make group levels. help(gl) # We fit the model lm1<-lm(weight ~ group) summary(lm1) # anova() produces an F-test for the group effect anova(lm1) # For ANOVA designs, you can also get your fit plus anova table # in one go using aov(): aov(weight ~ group) # We can investigate the residuals of a linear model # using a normal probability plot. # If the error is normally distributed, then we # should obtain a straight line. qqnorm(residuals(lm1)) qqline(residuals(lm1)) # add the quantile line for easy comparison # It is handier to combine qqnorm() and qqline() into a single function: qqplot<-function(reslist){qqnorm(reslist);qqline(reslist)} qqplot(residuals(lm1)) # The dataset is small, so there is not really a clear departure from # normality. # If we simulate residuals from the error distribution given # by this model, # we see that the normal probability plot is quite wiggly and # depending on the exact sample as well. x<-rnorm(20,0,0.6964) # can you see where I got .6964? qqnorm(x) summary(lm1)$sigma # yes, I got it from here... # Please take a look at rnorm and qqnorm, # we will use similar functions quite regularly. help(rnorm) help(qqnorm) # finally, plot() used on an lm object, # produces a number of diagnostic plots. plot(lm1) #! If you would want type III sums of squares for some reason, # you can install the package "car" # and use the Anova function in there for that. # The function TukeyHSD(), on the other hand, can be used for # post-hoc comparison. # We now present an ANCOVA, # which combines factors and continuous variables. # It is on Whiteside's data on house insulation. data(whiteside) summary(whiteside) # First a graph: xyplot(Gas~Temp|Insul,data=whiteside) # Gas~ Temp|Insul is called "the formula" of the plot. # Here it says that Gas depends on Temp per # level of Insul (insulation), so we plot temperature on the x # axis and gas on the y. lm2<-lm(Gas~Insul/Temp-1,data=whiteside) summary(lm2) # the "/" means that we will fit "Temp" nested within # insulation level. # The minus one in the formula has as effect # that no intercept will be fitted. # If you don't write -1, an intercept will be fitted, # if you write +1 an intercept will be fitted as well. # Maybe there is a quadratic effect of temperature on gas use? # Let's fit it. The "I" means that the term in the formula is # evaluated arithmetically. lm3<-lm(Gas~Insul/(Temp+I(Temp^2))-1,data=whiteside) summary(lm3) # We can compare both models using anova(lm3,lm2) # Is there a significant improvement with the quadratic term? # Try to carry out model simplification and model checking. # A useful function in this respect is stepAIC(). # Try to use it. #! We will go onto the issue of model matrices and contrasts # in the next section. # -------------------------------- # | I. Generalized Linear Models | # -------------------------------- # Generalized linear models can be made for many probability # distributions of the error terms, such as # binomial, poisson, and so on. # The idea is that the location parameter of those distributions # can be made to vary with a linear model, that is usually # coupled to the location through a link function # (which is like a transformation). # Lindsey (1995) introduces many types of generalized # linear models, and McCullagh and Nelder (1989) is a standard reference. # We will fit some examples from Lindsey (1995), # and some from Venables and Ripley's (2002) book. # Generalized linear models all use likelihoods for estimation # and model comparison. # Example Lindsey (1995) p. 15, 36. # We will first run an analysis for binomially distributed data. #! For such data, each observation or data matrix row, #! has to contain two numbers, a number of successes and failures, #! or positive and negative responses. # We construct a matrix of frequencies; # rows correspond to sexes and columns to # whether individuals had PhD plans or not. y <- matrix(c(5,6,12,5),ncol=2) y sex <- gl(2,1) #sex contains the sex groups sex # So there are two sex groups, with a single observation at # each group level. # The first model we fit to these data, calculates # an overall mean: summary(m1<-glm(y~1,family=binomial)) # If you type help(binomial) # you will read that the binomial model links # the data to the linear model through a logit link (while # many other links are also possible). # This means that the mean probability p relates to a linear # model x via log(p/(1-p)) = x. To translate fitted values x # back to p, we have to carry out the inverse transformation. # The following little function just does that: invlogit<-function(x)(exp(x)/(1+exp(x))) # For model m1 that gives: invlogit(m1$coef) # which is the population average probability of having PhD plans. # Let's add a sex effect on this probability: summary(m2<-glm(y~sex,family=binomial, options(contrasts = c("contr.treatment", "contr.poly")))) # In this call, you see the "contrast" statement appearing. # It is used to control the way group or factor effects are calculated. # The first statement (("contr.treatment" here) is the contrast option # for unordered factors, # the second one for ordered factors ("contr.poly" is default). # You can see the effects of changing the options by running summary(m3<-glm(y~sex,family=binomial, options(contrasts = c("contr.sum", "contr.poly")))) # Function model.matrix() is really hady to figure out which predictor # variables contribute to which parameter: model.matrix(m2) model.matrix(m3) # In these model matrices, you can see which parameters contribute # to the fitted value for each observation (per row). # For model m2, if you want to calculate the linear model fits for # observations one and two, you have to type m2$coef[1] # fitted value for row/observation one is the first parameter m2$coef[1] + m2$coef[2] # for model 3, we read from model.matrix that we need to # calculate m3$coef[1] + m3$coef[2] m3$coef[1] - m3$coef[2] # We can also use matrix multiplication "%*%" to get the fitted values: model.matrix(m3)[1,]%*%m3$coef model.matrix(m3)[2,]%*%m3$coef # Try using invlogit() on these values to transform them# back to the original scale. # Running fitted() on the models gives the same result: fitted(m2) fitted(m3) # or, for the second model invlogit(c(model.matrix(m3)[1,]%*%m3$coef, model.matrix(m3)[2,]%*%m3$coef)) # We can also use function bnlr() from the gnlm library to # fit binomial models: m4<-bnlr(y, link="logit",mu=~sex,pmu=c(1,1)) m4$aic 2*m4$aic m3$aic # Often, in generalized linear models, model selection is done using # Akaike information criteria. # As you see, the AIC reported here is 1/2 the AIC you # get returned from glm. This is the choice of Jim Lindsey, # who programmed bnlr(). # AIC's and deviances can differ in exact value between packages # and functions, # but their relative ranking among models should remain identical. # We now set the contrast options back to their default # before proceeding. options(contrasts = c("contr.treatment", "contr.poly")) # Let's look at Poisson distributed data. # This is a distribution often used for counts. # The example is Lindsey (1995), exercise 2.8 (p.19 and 65), on # migration in Britain between 1966 and 1971. # The frequencies of migrated individuals are: freq <- c(118,12,7,23,14,2127,86,130,8,69,2548,107,12,110,88,7712) # The groups (4 areas of departure or arrival) are: r66 <- gl(4,4,16) r71 <- gl(4,1,16) r66 r71 mp1 <- glm(freq~r66*r71,family=poisson) mp2 <- glm(freq~r66+r71,family=poisson) mp3 <- glm(freq~r66,family=poisson) # These models can be called and compared using mp1$aic mp2$aic mp3$aic # Which model do you prefer? # You could also use stepAIC() for this. # We now present a slightly more advanced example from the MASS library. # The example is also discussed in Venables & Ripley (2002), p. 193. # It is data on the number of dead budworms with dose of toxine and sex # as explanatory variables. dose <- rep(0:5, 2) # toxine levels numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16) sex <- factor(rep(c("M", "F"), c(6, 6))) y <- cbind(numdead, numalive = 20 - numdead) budworm1 <- glm(y ~ sex + dose - 1, family = binomial) # the dose.p() function from MASS, can be used to calculate # doses corresponding to a given probability of death dose.p(budworm1, cf = c(1,3), p = 1:3/4) # "cf" selects the appropriate coefficients from the linear model. # Here coefficient one corresponds to the intercept, # the third coefficient is the dose effect. # See also help(dose.p). budworm2<-update(budworm1, family = binomial(link=probit)) # The update function used here is really practical, # you don't need to retype an entire model, # just the part that you want to modify. # Here we change the link to probit, which corresponds to a # threshold trait model with an underlying liability, # and the doses at 25%, 50% and 75% lethality also change: dose.p(budworm2,cf = c(1,3), p = 1:3/4) # A lot more functions are available for generalized linear models, # the gnlr library, for instance, contains many, # with extremely flexible fitting options. # Library MASS contains glm.nb(), # for negative binomially distributed data. # -------------------------- # | I. Mixed Linear Models | # -------------------------- # Mixed models have risen enormously in popularity in recent years. # The idea is that you have sampled a number of subjects from a # population, and that individual subjects contribute a random # effect to a response. # It is maybe easy to understand what this means when thinking of # quantitative genetics. There, individual phenotypes have a # random contribution from the breeding or genotypic value. # The assumption in mixed models usually is that random effects are from # a normal distribution. # We repeat an example from the nlme library, and Pinheiro and Bates' # (2000) book. data(Orthodont) # Orthodontic growth data. summary(Orthodont) # Pinheiro and Bates (2000) have turned Orthodont into a # "groupedData" object, to make fitting of mixed models # easier from their point of view. # Such an object contains a formula describing the nested # structure of the data. formula.groupedData(Orthodont) Orthodont[1,] # Fitting we do: mm1 <- lme(distance ~ age, data = Orthodont,random=~1|Subject) mm2 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1|Subject) summary(mm1) summary(mm2) # Inspect this information carefully. # For the random effects, only their standard deviation gets estimated. # We compare the model fit of mm1 and mm2: anova(mm1,mm2) # The two models we fitted differed in the number of fixed effects. # Lme() warns us here! In the case of differences in fixed effects, # we have to fit the model using maximum likelihood (ML) # estimation and not restricted maximum likelihood (REML). mm3 <- update(mm1,method="ML") mm4 <- update(mm2,method="ML") anova(mm3,mm4) # So the more complicated model, with two fixed effects is preferred. # The random effects can be predicted using ranef() # and inspected as follows: hist(ranef(mm2)[,1]) qqplot(ranef(mm2)[,1]) # The normality of random effects is not very convincing here. # It is also possible to model dependent observations, # nested and crossed random effects and so on. Hugely flexible! # However, you should keep in mind that the nlme library # was optimized to fit models with nested random effects. # Crossed random effects can be tricky to specify. # Some quick examples: mm5 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1+age|Subject)intervals(mm5) # a random regression model, where the age effect # varies randomly between Subjects. mm6 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1|Subject, weights=varIdent(form=~1|Sex)) intervals(mm6) # Different variances per sex. # Possible extensions of this approach are non-linear mixed effect # models (function nlme()) and generalized mixed effect models # (see below). # --------------------------------------- # | IV. Generalized Mixed Linear Models | # --------------------------------------- # Should it still come as a surprise that generalized mixed models # can be fitted too? This can be done most easily using glmmPQL() # from the MASS library. The syntax for glmmPQL() is nearly identical # to lme(). # It calls functions from the nlme library during evaluation. # An example: data(bacteria) gmm<-glmmPQL(y ~ trt + I(week > 2), random = ~ 1 | ID,family = binomial, data =bacteria) summary(gmm) qqplot(ranef(gmm)[,1]) #---------------------------------------------------------------------# Most of the material in this short tutorial comes from # Modern Applied Statistics with S. # by W. N. Venables and B. D. Ripley (2002) # Introductory Statistics. A Modelling Approach. # J. K. Lindsey (1995) # Mixed Effects Models in S and S-Plus. # J. Pinheiro, D.M. Bates (2000) # Generalized Linear Models. # P. McCullagh, J.A. Nelder (1989) # Tom Van Dooren, version 30/01/2003 data(whiteside) summary(whiteside) # First a graph: xyplot(Gas~Temp|Insul,data=whiteside) # Gas~ Temp|Insul is called "the formula" of the plot. # Here it says that Gas depends on Temp per # level of Insul (insulation), so we plot temperature on the x # axis and gas on the y. lm2<-lm(Gas~Insul/Temp-1,data=whiteside) summary(lm2) # the "/" means that we will fit "Temp" nested within # insulation level. # The minus one in the formula has as effect # that no intercept will be fitted. # If you don't write -1, an intercept will be fitted, # if you write +1 an intercept will be fitted as well. # Maybe there is a quadratic effect of temperature on gas use? # Let's fit it. The "I" means that the term in the formula is # evaluated arithmetically. lm3<-lm(Gas~Insul/(Temp+I(Temp^2))-1,data=whiteside) summary(lm3) # We can compare both models using anova(lm3,lm2) # Is there a significant improvement with the quadratic term? # Try to carry out model simplification and model checking. # A useful function in this respect is stepAIC(). # Try to use it. #! We will go onto the issue of model matrices and contrasts # in the next section. # -------------------------------- # | I. Generalized Linear Models | # -------------------------------- # Generalized linear models can be made for many probability # distributions of the error terms, such as # binomial, poisson, and so on. # The idea is that the location parameter of those distributions # can be made to vary with a linear model, that is usually # coupled to the location through a link function # (which is like a transformation). # Lindsey (1995) introduces many types of generalized # linear models, and McCullagh and Nelder (1989) is a standard reference. # We will fit some examples from Lindsey (1995), # and some from Venables and Ripley's (2002) book. # Generalized linear models all use likelihoods for estimation # and model comparison. # Example Lindsey (1995) p. 15, 36. # We will first run an analysis for binomially distributed data. #! For such data, each observation or data matrix row, #! has to contain two numbers, a number of successes and failures, #! or positive and negative responses. # We construct a matrix of frequencies; # rows correspond to sexes and columns to # whether individuals had PhD plans or not. y <- matrix(c(5,6,12,5),ncol=2) y sex <- gl(2,1) #sex contains the sex groups sex # So there are two sex groups, with a single observation at # each group level. # The first model we fit to these data, calculates # an overall mean: summary(m1<-glm(y~1,family=binomial)) # If you type help(binomial) # you will read that the binomial model links # the data to the linear model through a logit link (while # many other links are also possible). # This means that the mean probability p relates to a linear # model x via log(p/(1-p)) = x. To translate fitted values x # back to p, we have to carry out the inverse transformation. # The following little function just does that: invlogit<-function(x)(exp(x)/(1+exp(x))) # For model m1 that gives: invlogit(m1$coef) # which is the population average probability of having PhD plans. # Let's add a sex effect on this probability: summary(m2<-glm(y~sex,family=binomial, options(contrasts = c("contr.treatment", "contr.poly")))) # In this call, you see the "contrast" statement appearing. # It is used to control the way group or factor effects are calculated. # The first statement (("contr.treatment" here) is the contrast option # for unordered factors, # the second one for ordered factors ("contr.poly" is default). # You can see the effects of changing the options by running summary(m3<-glm(y~sex,family=binomial, options(contrasts = c("contr.sum", "contr.poly")))) # Function model.matrix() is really hady to figure out which predictor # variables contribute to which parameter: model.matrix(m2) model.matrix(m3) # In these model matrices, you can see which parameters contribute # to the fitted value for each observation (per row). # For model m2, if you want to calculate the linear model fits for # observations one and two, you have to type m2$coef[1] # fitted value for row/observation one is the first parameter m2$coef[1] + m2$coef[2] # for model 3, we read from model.matrix that we need to # calculate m3$coef[1] + m3$coef[2] m3$coef[1] - m3$coef[2] # We can also use matrix multiplication "%*%" to get the fitted values: model.matrix(m3)[1,]%*%m3$coef model.matrix(m3)[2,]%*%m3$coef # Try using invlogit() on these values to transform them # back to the original scale. # Running fitted() on the models gives the same result: fitted(m2) fitted(m3) # or, for the second model, invlogit(c(model.matrix(m3)[1,]%*%m3$coef, model.matrix(m3)[2,]%*%m3$coef)) # We can also use function bnlr() from the gnlm library to # fit binomial models: m4<-bnlr(y, link="logit",mu=~sex,pmu=c(1,1)) m4$aic 2*m4$aic m3$aic # Often, in generalized linear models, model selection is done using # Akaike information criteria. # As you see, the AIC reported here is 1/2 the AIC you # get returned from glm. This is the choice of Jim Lindsey, # who programmed bnlr(). # AIC's and deviances can differ in exact value between packages # and functions, # but their relative ranking among models should remain identical. # We now set the contrast options back to their default # before proceeding. options(contrasts = c("contr.treatment", "contr.poly")) # Let's look at Poisson distributed data. # This is a distribution often used for counts. # The example is Lindsey (1995), exercise 2.8 (p.19 and 65), on # migration in Britain between 1966 and 1971. # The frequencies of migrated individuals are: freq <- c(118,12,7,23,14,2127,86,130,8,69,2548,107,12,110,88,7712) # The groups (4 areas of departure or arrival) are: r66 <- gl(4,4,16) r71 <- gl(4,1,16) r66 r71 mp1 <- glm(freq~r66*r71,family=poisson) mp2 <- glm(freq~r66+r71,family=poisson) mp3 <- glm(freq~r66,family=poisson) # These models can be called and compared using mp1$aic mp2$aic mp3$aic # Which model do you prefer? # You could also use stepAIC() for this. # We now present a slightly more advanced example from the MASS library. # The example is also discussed in Venables & Ripley (2002), p. 193. # It is data on the number of dead budworms with dose of toxine and sex # as explanatory variables. dose <- rep(0:5, 2) # toxine levels numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16) sex <- factor(rep(c("M", "F"), c(6, 6))) y <- cbind(numdead, numalive = 20 - numdead) budworm1 <- glm(y ~ sex + dose - 1, family = binomial) # the dose.p() function from MASS, can be used to calculate # doses corresponding to a given probability of death dose.p(budworm1, cf = c(1,3), p = 1:3/4) # "cf" selects the appropriate coefficients from the linear model. # Here coefficient one corresponds to the intercept, # the third coefficient is the dose effect. # See also help(dose.p). budworm2<-update(budworm1, family = binomial(link=probit)) # The update function used here is really practical, # you don't need to retype an entire model, # just the part that you want to modify. # Here we change the link to probit, which corresponds to a # threshold trait model with an underlying liability, # and the doses at 25%, 50% and 75% lethality also change: dose.p(budworm2,cf = c(1,3), p = 1:3/4) # A lot more functions are available for generalized linear models, # the gnlr library, for instance, contains many, # with extremely flexible fitting options. # Library MASS contains glm.nb(), # for negative binomially distributed data. # -------------------------- # | I. Mixed Linear Models | # -------------------------- # Mixed models have risen enormously in popularity in recent years. # The idea is that you have sampled a number of subjects from a # population, and that individual subjects contribute a random # effect to a response. # It is maybe easy to understand what this means when thinking of # quantitative genetics. There, individual phenotypes have a # random contribution from the breeding or genotypic value. # The assumption in mixed models usually is that random effects are from # a normal distribution. # We repeat an example from the nlme library, and Pinheiro and Bates' # (2000) book. data(Orthodont) # Orthodontic growth data. summary(Orthodont) # Pinheiro and Bates (2000) have turned Orthodont into a # "groupedData" object, to make fitting of mixed models # easier from their point of view. # Such an object contains a formula describing the nested # structure of the data. formula.groupedData(Orthodont) Orthodont[1,] # Fitting we do: mm1 <- lme(distance ~ age, data = Orthodont,random=~1|Subject) mm2 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1|Subject) summary(mm1) summary(mm2) # Inspect this information carefully. # For the random effects, only their standard deviation gets estimated. # We compare the model fit of mm1 and mm2: anova(mm1,mm2) # The two models we fitted differed in the number of fixed effects. # Lme() warns us here! In the case of differences in fixed effects, # we have to fit the model using maximum likelihood (ML) # estimation and not restricted maximum likelihood (REML). mm3 <- update(mm1,method="ML") mm4 <- update(mm2,method="ML") anova(mm3,mm4) # So the more complicated model, with two fixed effects is preferred. # The random effects can be predicted using ranef() # and inspected as follows: hist(ranef(mm2)[,1]) qqplot(ranef(mm2)[,1]) # The normality of random effects is not very convincing here. # It is also possible to model dependent observations, # nested and crossed random effects and so on. Hugely flexible! # However, you should keep in mind that the nlme library # was optimized to fit models with nested random effects. # Crossed random effects can be tricky to specify. # Some quick examples: mm5 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1+age|Subject)intervals(mm5) # a random regression model, where the age effect # varies randomly between Subjects. mm6 <- lme(distance ~ age + Sex, data = Orthodont, random = ~ 1|Subject, weights=varIdent(form=~1|Sex)) intervals(mm6) # Different variances per sex. # Possible extensions of this approach are non-linear mixed effect # models (function nlme()) and generalized mixed effect models # (see below). # --------------------------------------- # | IV. Generalized Mixed Linear Models | # --------------------------------------- # Should it still come as a surprise that generalized mixed models # can be fitted too? This can be done most easily using glmmPQL() # from the MASS library. The syntax for glmmPQL() is nearly identical # to lme(). # It calls functions from the nlme library during evaluation. # An example: data(bacteria) gmm<-glmmPQL(y ~ trt + I(week > 2), random = ~ 1 | ID,family = binomial, data =bacteria) summary(gmm) qqplot(ranef(gmm)[,1]) #---------------------------------------------------------------------# Most of the material in this short tutorial comes from # Modern Applied Statistics with S. # by W. N. Venables and B. D. Ripley (2002) # Introductory Statistics. A Modelling Approach. # J. K. Lindsey (1995) # Mixed Effects Models in S and S-Plus. # J. Pinheiro, D.M. Bates (2000) # Generalized Linear Models. # P. McCullagh, J.A. Nelder (1989) # Tom Van Dooren, version 30/01/2003