• Load R Packages
  • Ridge Regression
  • LASSO
  • Elastic Net
  • Penalized Generalized Linear Model
    • Introduction to glmnet package
    • Penalized Logistic Regression
    • Group LASSO Logistic Regression

This notebook illustrates how to implement regularization methods using R.

Load R Packages

# install packages from CRAN
p_needed <- c('caret', 'elasticnet', 'glmnet', 'devtools',
              'MASS', 'grplasso')

packages <- rownames(installed.packages())
p_to_install <- p_needed[!(p_needed %in% packages)]
if (length(p_to_install) > 0) {
    install.packages(p_to_install)
}

lapply(p_needed, require, character.only = TRUE)

# install packages from GitHub
p_needed_gh <- c('NetlifyDS')

if (! p_needed_gh %in% packages) {
    devtools::install_github("netlify/NetlifyDS")
}

library(NetlifyDS)

Ridge Regression

dat <- read.csv("http://bit.ly/2P5gTw4")
# data cleaning: delete wrong observations
# expense can't be negative
dat <- subset(dat, store_exp > 0 & online_exp > 0)

# get predictors
trainx <- dat[ , grep("Q", names(dat))]
# get response
trainy <- dat$store_exp + dat$online_exp

In Ridge regression, there is a tuning parameter λ that needs to set for the right level of regularization. The value of lambda can be determined by looking at the performance in the cross-validation data.

# set cross validation
ctrl <- trainControl(method = "cv", number = 10)
# set the parameter range
ridgeGrid <- data.frame(.lambda = seq(0, .1, length = 20))
set.seed(100)

ridgeRegTune <- train(trainx, trainy,
                      method = "ridge",
                      tuneGrid = ridgeGrid,
                      trControl = ctrl,
                      ## center and scale predictors
                      preProc = c("center", "scale"))
ridgeRegTune
## Ridge Regression 
## 
## 999 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 899, 899, 899, 899, 899, 900, ... 
## Resampling results across tuning parameters:
## 
##   lambda       RMSE      Rsquared   MAE     
##   0.000000000  1620.980  0.8271792  756.8867
##   0.005263158  1620.079  0.8273987  757.8206
##   0.010526316  1619.773  0.8275476  758.9396
##   0.015789474  1619.901  0.8276512  760.1890
##   0.021052632  1620.366  0.8277243  761.5209
##   0.026315789  1621.107  0.8277760  763.1556
##   0.031578947  1622.083  0.8278122  764.8858
##   0.036842105  1623.267  0.8278370  766.7312
##   0.042105263  1624.639  0.8278529  768.6860
##   0.047368421  1626.185  0.8278620  770.7971
##   0.052631579  1627.892  0.8278657  772.9889
##   0.057894737  1629.753  0.8278649  775.2529
##   0.063157895  1631.760  0.8278605  777.6964
##   0.068421053  1633.908  0.8278530  780.2181
##   0.073684211  1636.190  0.8278430  782.8435
##   0.078947368  1638.604  0.8278307  785.5305
##   0.084210526  1641.144  0.8278165  788.2773
##   0.089473684  1643.808  0.8278006  791.2583
##   0.094736842  1646.592  0.8277832  794.3876
##   0.100000000  1649.494  0.8277644  797.5885
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was lambda = 0.01052632.
plot(ridgeRegTune)

Once we have determined the value for tuning parameter lambda, we can use different ways to fit the ridge regression model, for example, using enet() function.

ridgefit = enet(x = as.matrix(trainx), y = trainy, lambda = 0.01,
                # center and scale predictors
                normalize = TRUE)

Once the model is fit, we can use the predict() function to predict new dataset.

ridgePred <- predict(ridgefit, newx = as.matrix(trainx),
                     s = 1, mode = "fraction", type = "fit")
names(ridgePred)
## [1] "s"        "fraction" "mode"     "fit"
head(ridgePred$fit)
##         1         2         3         4         5         6 
## 1290.4697  224.1595  591.4406 1220.6384  853.3572  908.2040
ridgeCoef<-predict(ridgefit,newx = as.matrix(trainx),
                   s=1, mode="fraction", type="coefficients")
ridgeCoef$coefficients
##          Q1          Q2          Q3          Q4          Q5          Q6 
## -316.763403  684.044587  649.176319  246.932116  576.078371  692.149705 
##          Q7          Q8          Q9         Q10 
##    6.181062 -116.500992  382.265663 -314.739623

LASSO

LASSO method will shrink certain parameters to be zero which can be used as a variable selection tool. In LASSO, there is also a tuning parameter called fraction which can be determined by performance of the cross validation data set.

ctrl <- trainControl(method = "cv", number = 10)
lassoGrid <- data.frame(fraction = seq(.8, 1, length = 20))
set.seed(100)
lassoTune <- train(trainx, trainy,
                   ## set the method to be lasso
                   method = "lars",
                   tuneGrid = lassoGrid,
                   trControl = ctrl,
                   ## standardize the predictors
                   preProc = c("center", "scale"))
lassoTune
## Least Angle Regression 
## 
## 999 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 899, 899, 899, 899, 899, 900, ... 
## Resampling results across tuning parameters:
## 
##   fraction   RMSE      Rsquared   MAE     
##   0.8000000  1633.994  0.8250185  788.8895
##   0.8105263  1631.956  0.8253421  785.7150
##   0.8210526  1630.196  0.8256225  782.6543
##   0.8315789  1628.441  0.8258906  779.6467
##   0.8421053  1626.539  0.8261791  776.6017
##   0.8526316  1624.745  0.8264526  773.6821
##   0.8631579  1623.107  0.8267066  771.0365
##   0.8736842  1621.643  0.8269375  768.4821
##   0.8842105  1620.390  0.8271378  766.0623
##   0.8947368  1619.349  0.8273079  763.8790
##   0.9052632  1618.520  0.8274481  761.9152
##   0.9157895  1617.903  0.8275585  760.2604
##   0.9263158  1617.516  0.8276338  758.9376
##   0.9368421  1617.382  0.8276544  757.8210
##   0.9473684  1617.452  0.8276476  757.0097
##   0.9578947  1617.802  0.8276001  756.4950
##   0.9684211  1618.367  0.8275239  756.1559
##   0.9789474  1619.138  0.8274210  756.0236
##   0.9894737  1620.048  0.8273066  756.3794
##   1.0000000  1620.980  0.8271792  756.8867
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.9368421.
plot(lassoTune)

Once the tuning parameter fraction is set, we can use different functions to fit LASSO model, such as lars() in lars, enet() in elasticnet, and glmnet() in glmnet.

lassoModel <- enet(x = as.matrix(trainx), y = trainy,
                  lambda = 0, normalize = TRUE)
lassoFit <- predict(lassoModel, newx = as.matrix(trainx),
                    s = 0.957, mode = "fraction", type = "fit")
head(lassoFit$fit)
##         1         2         3         4         5         6 
## 1357.2815  300.5298  690.2390 1228.1558  838.4467 1010.0713
lassoCoef <- predict(lassoModel, 
                     newx = as.matrix(trainx),
                     s = 0.95, mode = "fraction", 
                     type = "coefficients")
lassoCoef$coefficients
##        Q1        Q2        Q3        Q4        Q5        Q6        Q7        Q8 
## -326.7759  720.2801  722.7518  180.7107  542.1603  603.0865    0.0000  -75.9610 
##        Q9       Q10 
##  342.6375 -281.7818

Elastic Net

Elastic net is a generalization of LASSO and ridge regression. It is a combination of LASSO and ridge regression. For elastic net, there are two tuning parameters called lambda and fraction.

enetGrid <- expand.grid(.lambda = seq(0,0.2,length=20),
                        .fraction = seq(.8, 1, length = 20))
set.seed(100)
enetTune <- train(trainx, trainy,
                  method = "enet",
                  tuneGrid = enetGrid,
                  trControl = ctrl,
                  preProc = c("center", "scale"))
enetTune
## Elasticnet 
## 
## 999 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 899, 899, 899, 899, 899, 900, ... 
## Resampling results across tuning parameters:
## 
##   lambda      fraction   RMSE      Rsquared   MAE     
##   0.00000000  0.8000000  1633.994  0.8250185  788.8895
##   0.00000000  0.8105263  1631.956  0.8253421  785.7150
##   0.00000000  0.8210526  1630.196  0.8256225  782.6543
##   0.00000000  0.8315789  1628.441  0.8258906  779.6467
##   0.00000000  0.8421053  1626.539  0.8261791  776.6017
##   0.00000000  0.8526316  1624.745  0.8264526  773.6821
##   0.00000000  0.8631579  1623.107  0.8267066  771.0365
##   0.00000000  0.8736842  1621.643  0.8269375  768.4821
##   0.00000000  0.8842105  1620.390  0.8271378  766.0623
##   0.00000000  0.8947368  1619.349  0.8273079  763.8790
##   0.00000000  0.9052632  1618.520  0.8274481  761.9152
##   0.00000000  0.9157895  1617.903  0.8275585  760.2604
##   0.00000000  0.9263158  1617.516  0.8276338  758.9376
##   0.00000000  0.9368421  1617.382  0.8276544  757.8210
##   0.00000000  0.9473684  1617.452  0.8276476  757.0097
##   0.00000000  0.9578947  1617.802  0.8276001  756.4950
##   0.00000000  0.9684211  1618.367  0.8275239  756.1559
##   0.00000000  0.9789474  1619.138  0.8274210  756.0236
##   0.00000000  0.9894737  1620.048  0.8273066  756.3794
##   0.00000000  1.0000000  1620.980  0.8271792  756.8867
##   0.01052632  0.8000000  1635.826  0.8246536  792.3834
##   0.01052632  0.8105263  1633.554  0.8250088  789.1700
##   0.01052632  0.8210526  1631.502  0.8253351  785.9805
##   0.01052632  0.8315789  1629.488  0.8256497  782.7859
##   0.01052632  0.8421053  1627.400  0.8259707  779.6851
##   0.01052632  0.8526316  1625.519  0.8262635  776.6770
##   0.01052632  0.8631579  1623.789  0.8265383  773.9974
##   0.01052632  0.8736842  1622.231  0.8267914  771.4169
##   0.01052632  0.8842105  1620.827  0.8270245  768.9600
##   0.01052632  0.8947368  1619.623  0.8272307  766.6214
##   0.01052632  0.9052632  1618.627  0.8274092  764.5374
##   0.01052632  0.9157895  1617.839  0.8275603  762.6204
##   0.01052632  0.9263158  1617.259  0.8276842  761.0392
##   0.01052632  0.9368421  1616.888  0.8277814  759.8925
##   0.01052632  0.9473684  1616.838  0.8278132  759.0871
##   0.01052632  0.9578947  1617.006  0.8278170  758.5339
##   0.01052632  0.9684211  1617.445  0.8277844  758.1862
##   0.01052632  0.9789474  1618.087  0.8277273  758.1863
##   0.01052632  0.9894737  1618.885  0.8276494  758.4365
##   0.01052632  1.0000000  1619.773  0.8275476  758.9396
##   0.02105263  0.8000000  1636.749  0.8244436  794.3219
##   0.02105263  0.8105263  1634.399  0.8248178  791.0998
##   0.02105263  0.8210526  1632.324  0.8251543  787.9501
##   0.02105263  0.8315789  1630.101  0.8254999  784.7358
##   0.02105263  0.8421053  1627.975  0.8258362  781.5357
##   0.02105263  0.8526316  1626.056  0.8261444  778.6445
##   0.02105263  0.8631579  1624.264  0.8264405  775.8744
##   0.02105263  0.8736842  1622.668  0.8267113  773.2656
##   0.02105263  0.8842105  1621.279  0.8269554  770.8272
##   0.02105263  0.8947368  1620.099  0.8271730  768.6565
##   0.02105263  0.9052632  1619.127  0.8273644  766.5949
##   0.02105263  0.9157895  1618.365  0.8275300  764.9173
##   0.02105263  0.9263158  1617.813  0.8276701  763.5334
##   0.02105263  0.9368421  1617.470  0.8277849  762.2718
##   0.02105263  0.9473684  1617.360  0.8278664  761.4052
##   0.02105263  0.9578947  1617.537  0.8278957  760.8778
##   0.02105263  0.9684211  1617.974  0.8278925  760.6887
##   0.02105263  0.9789474  1618.638  0.8278626  760.7326
##   0.02105263  0.9894737  1619.458  0.8278079  761.0056
##   0.02105263  1.0000000  1620.366  0.8277243  761.5209
##   0.03157895  0.8000000  1637.268  0.8243143  795.3702
##   0.03157895  0.8105263  1634.805  0.8247123  792.1327
##   0.03157895  0.8210526  1632.613  0.8250724  789.0308
##   0.03157895  0.8315789  1630.397  0.8254248  785.9244
##   0.03157895  0.8421053  1628.345  0.8257606  782.8069
##   0.03157895  0.8526316  1626.479  0.8260740  779.9636
##   0.03157895  0.8631579  1624.758  0.8263731  777.2614
##   0.03157895  0.8736842  1623.248  0.8266463  774.7894
##   0.03157895  0.8842105  1621.949  0.8268939  772.5342
##   0.03157895  0.8947368  1620.862  0.8271163  770.4573
##   0.03157895  0.9052632  1619.988  0.8273137  768.7255
##   0.03157895  0.9157895  1619.327  0.8274865  767.1724
##   0.03157895  0.9263158  1618.879  0.8276351  765.8840
##   0.03157895  0.9368421  1618.644  0.8277598  764.7367
##   0.03157895  0.9473684  1618.623  0.8278610  764.0250
##   0.03157895  0.9578947  1618.878  0.8279155  763.6802
##   0.03157895  0.9684211  1619.400  0.8279324  763.5917
##   0.03157895  0.9789474  1620.176  0.8279200  763.7311
##   0.03157895  0.9894737  1621.106  0.8278819  764.1907
##   0.03157895  1.0000000  1622.083  0.8278122  764.8858
##   0.04210526  0.8000000  1637.336  0.8242673  795.7676
##   0.04210526  0.8105263  1634.988  0.8246580  792.6866
##   0.04210526  0.8210526  1632.865  0.8250148  789.7883
##   0.04210526  0.8315789  1630.709  0.8253703  786.7658
##   0.04210526  0.8421053  1628.769  0.8257030  783.8272
##   0.04210526  0.8526316  1626.981  0.8260211  781.0815
##   0.04210526  0.8631579  1625.383  0.8263184  778.5497
##   0.04210526  0.8736842  1624.002  0.8265908  776.3261
##   0.04210526  0.8842105  1622.837  0.8268386  774.2760
##   0.04210526  0.8947368  1621.890  0.8270623  772.4349
##   0.04210526  0.9052632  1621.161  0.8272620  770.8630
##   0.04210526  0.9157895  1620.651  0.8274383  769.4260
##   0.04210526  0.9263158  1620.360  0.8275915  768.1913
##   0.04210526  0.9368421  1620.287  0.8277218  767.3754
##   0.04210526  0.9473684  1620.432  0.8278297  766.9569
##   0.04210526  0.9578947  1620.819  0.8279065  766.9184
##   0.04210526  0.9684211  1621.492  0.8279369  767.0220
##   0.04210526  0.9789474  1622.441  0.8279368  767.3846
##   0.04210526  0.9894737  1623.547  0.8279114  767.9179
##   0.04210526  1.0000000  1624.639  0.8278529  768.6860
##   0.05263158  0.8000000  1637.408  0.8242360  796.1282
##   0.05263158  0.8105263  1635.203  0.8246179  793.2558
##   0.05263158  0.8210526  1633.116  0.8249752  790.4644
##   0.05263158  0.8315789  1631.093  0.8253287  787.5629
##   0.05263158  0.8421053  1629.276  0.8256597  784.7421
##   0.05263158  0.8526316  1627.611  0.8259779  782.1662
##   0.05263158  0.8631579  1626.167  0.8262718  779.9035
##   0.05263158  0.8736842  1624.947  0.8265417  777.8830
##   0.05263158  0.8842105  1623.951  0.8267879  776.1158
##   0.05263158  0.8947368  1623.179  0.8270109  774.4921
##   0.05263158  0.9052632  1622.632  0.8272109  773.0840
##   0.05263158  0.9157895  1622.310  0.8273884  771.8317
##   0.05263158  0.9263158  1622.214  0.8275437  770.8828
##   0.05263158  0.9368421  1622.342  0.8276773  770.3604
##   0.05263158  0.9473684  1622.695  0.8277893  770.2500
##   0.05263158  0.9578947  1623.272  0.8278802  770.4072
##   0.05263158  0.9684211  1624.145  0.8279207  770.7594
##   0.05263158  0.9789474  1625.308  0.8279300  771.3430
##   0.05263158  0.9894737  1626.645  0.8279158  772.0595
##   0.05263158  1.0000000  1627.892  0.8278657  772.9889
##   0.06315789  0.8000000  1637.532  0.8242144  796.5193
##   0.06315789  0.8105263  1635.488  0.8245865  793.7879
##   0.06315789  0.8210526  1633.451  0.8249476  791.0514
##   0.06315789  0.8315789  1631.584  0.8252951  788.2870
##   0.06315789  0.8421053  1629.890  0.8256275  785.6468
##   0.06315789  0.8526316  1628.396  0.8259407  783.4649
##   0.06315789  0.8631579  1627.132  0.8262302  781.4300
##   0.06315789  0.8736842  1626.100  0.8264966  779.6719
##   0.06315789  0.8842105  1625.299  0.8267401  778.1326
##   0.06315789  0.8947368  1624.731  0.8269612  776.7504
##   0.06315789  0.9052632  1624.395  0.8271603  775.4892
##   0.06315789  0.9157895  1624.291  0.8273377  774.5648
##   0.06315789  0.9263158  1624.420  0.8274937  773.9205
##   0.06315789  0.9368421  1624.781  0.8276289  773.5923
##   0.06315789  0.9473684  1625.374  0.8277434  773.6843
##   0.06315789  0.9578947  1626.197  0.8278377  774.1672
##   0.06315789  0.9684211  1627.300  0.8278907  774.6892
##   0.06315789  0.9789474  1628.705  0.8279081  775.4247
##   0.06315789  0.9894737  1630.314  0.8279032  776.5186
##   0.06315789  1.0000000  1631.760  0.8278605  777.6964
##   0.07368421  0.8000000  1637.738  0.8241989  796.9050
##   0.07368421  0.8105263  1635.823  0.8245637  794.3508
##   0.07368421  0.8210526  1633.895  0.8249260  791.6353
##   0.07368421  0.8315789  1632.191  0.8252692  789.1041
##   0.07368421  0.8421053  1630.653  0.8256000  786.8485
##   0.07368421  0.8526316  1629.353  0.8259075  784.8898
##   0.07368421  0.8631579  1628.292  0.8261922  783.1740
##   0.07368421  0.8736842  1627.470  0.8264545  781.6704
##   0.07368421  0.8842105  1626.890  0.8266946  780.3200
##   0.07368421  0.8947368  1626.549  0.8269131  779.1108
##   0.07368421  0.9052632  1626.450  0.8271104  778.2312
##   0.07368421  0.9157895  1626.591  0.8272867  777.6039
##   0.07368421  0.9263158  1626.972  0.8274425  777.2800
##   0.07368421  0.9368421  1627.592  0.8275782  777.1814
##   0.07368421  0.9473684  1628.451  0.8276941  777.4615
##   0.07368421  0.9578947  1629.548  0.8277906  778.0005
##   0.07368421  0.9684211  1630.916  0.8278526  778.8693
##   0.07368421  0.9789474  1632.595  0.8278761  780.0075
##   0.07368421  0.9894737  1634.513  0.8278789  781.4383
##   0.07368421  1.0000000  1636.190  0.8278430  782.8435
##   0.08421053  0.8000000  1638.058  0.8241860  797.3405
##   0.08421053  0.8105263  1636.219  0.8245492  794.8565
##   0.08421053  0.8210526  1634.473  0.8249074  792.3585
##   0.08421053  0.8315789  1632.921  0.8252503  790.1821
##   0.08421053  0.8421053  1631.587  0.8255746  788.1906
##   0.08421053  0.8526316  1630.499  0.8258763  786.5015
##   0.08421053  0.8631579  1629.661  0.8261559  785.0487
##   0.08421053  0.8736842  1629.071  0.8264137  783.7361
##   0.08421053  0.8842105  1628.730  0.8266501  782.6377
##   0.08421053  0.8947368  1628.639  0.8268656  781.8686
##   0.08421053  0.9052632  1628.797  0.8270605  781.3347
##   0.08421053  0.9157895  1629.204  0.8272352  781.0349
##   0.08421053  0.9263158  1629.860  0.8273902  780.9363
##   0.08421053  0.9368421  1630.762  0.8275257  781.1351
##   0.08421053  0.9473684  1631.911  0.8276423  781.5617
##   0.08421053  0.9578947  1633.305  0.8277401  782.4285
##   0.08421053  0.9684211  1634.966  0.8278086  783.5899
##   0.08421053  0.9789474  1636.945  0.8278368  785.0542
##   0.08421053  0.9894737  1639.182  0.8278450  786.6634
##   0.08421053  1.0000000  1641.144  0.8278165  788.2773
##   0.09473684  0.8000000  1638.510  0.8241734  797.9016
##   0.09473684  0.8105263  1636.746  0.8245397  795.4687
##   0.09473684  0.8210526  1635.189  0.8248924  793.3868
##   0.09473684  0.8315789  1633.818  0.8252326  791.4605
##   0.09473684  0.8421053  1632.704  0.8255504  789.6859
##   0.09473684  0.8526316  1631.847  0.8258462  788.3153
##   0.09473684  0.8631579  1631.248  0.8261205  787.0551
##   0.09473684  0.8736842  1630.908  0.8263736  786.0017
##   0.09473684  0.8842105  1630.826  0.8266061  785.2973
##   0.09473684  0.8947368  1631.003  0.8268182  784.8744
##   0.09473684  0.9052632  1631.438  0.8270105  784.7020
##   0.09473684  0.9157895  1632.130  0.8271833  784.6224
##   0.09473684  0.9263158  1633.079  0.8273369  784.8459
##   0.09473684  0.9368421  1634.284  0.8274718  785.3350
##   0.09473684  0.9473684  1635.742  0.8275884  786.2146
##   0.09473684  0.9578947  1637.453  0.8276870  787.4105
##   0.09473684  0.9684211  1639.430  0.8277601  788.7539
##   0.09473684  0.9789474  1641.732  0.8277919  790.4008
##   0.09473684  0.9894737  1644.299  0.8278041  792.4600
##   0.09473684  1.0000000  1646.592  0.8277832  794.3876
##   0.10526316  0.8000000  1639.012  0.8241683  798.4797
##   0.10526316  0.8105263  1637.414  0.8245335  796.3703
##   0.10526316  0.8210526  1636.035  0.8248844  794.5291
##   0.10526316  0.8315789  1634.891  0.8252174  792.7750
##   0.10526316  0.8421053  1634.014  0.8255286  791.3708
##   0.10526316  0.8526316  1633.405  0.8258183  790.2048
##   0.10526316  0.8631579  1633.064  0.8260872  789.2252
##   0.10526316  0.8736842  1632.992  0.8263355  788.5132
##   0.10526316  0.8842105  1633.188  0.8265637  788.1901
##   0.10526316  0.8947368  1633.651  0.8267723  788.1484
##   0.10526316  0.9052632  1634.382  0.8269615  788.1860
##   0.10526316  0.9157895  1635.379  0.8271319  788.4404
##   0.10526316  0.9263158  1636.640  0.8272838  789.0227
##   0.10526316  0.9368421  1638.165  0.8274177  789.9979
##   0.10526316  0.9473684  1639.953  0.8275338  791.1721
##   0.10526316  0.9578947  1642.000  0.8276326  792.5859
##   0.10526316  0.9684211  1644.315  0.8277088  794.3763
##   0.10526316  0.9789474  1646.960  0.8277433  796.4570
##   0.10526316  0.9894737  1649.878  0.8277586  798.7159
##   0.10526316  1.0000000  1652.510  0.8277443  800.8477
##   0.11578947  0.8000000  1639.626  0.8241679  799.2381
##   0.11578947  0.8105263  1638.241  0.8245282  797.4189
##   0.11578947  0.8210526  1637.062  0.8248759  795.7098
##   0.11578947  0.8315789  1636.160  0.8252018  794.3098
##   0.11578947  0.8421053  1635.536  0.8255064  793.1432
##   0.11578947  0.8526316  1635.190  0.8257902  792.2822
##   0.11578947  0.8631579  1635.122  0.8260536  791.6245
##   0.11578947  0.8736842  1635.333  0.8262971  791.3161
##   0.11578947  0.8842105  1635.821  0.8265211  791.3443
##   0.11578947  0.8947368  1636.586  0.8267260  791.5316
##   0.11578947  0.9052632  1637.628  0.8269121  791.9195
##   0.11578947  0.9157895  1638.945  0.8270800  792.5698
##   0.11578947  0.9263158  1640.534  0.8272300  793.6021
##   0.11578947  0.9368421  1642.396  0.8273626  794.8365
##   0.11578947  0.9473684  1644.527  0.8274780  796.2876
##   0.11578947  0.9578947  1646.926  0.8275766  798.2356
##   0.11578947  0.9684211  1649.597  0.8276550  800.4001
##   0.11578947  0.9789474  1652.600  0.8276914  802.6660
##   0.11578947  0.9894737  1655.884  0.8277093  805.2068
##   0.11578947  1.0000000  1658.877  0.8277008  807.5841
##   0.12631579  0.8000000  1640.385  0.8241684  800.1978
##   0.12631579  0.8105263  1639.195  0.8245262  798.4837
##   0.12631579  0.8210526  1638.260  0.8248664  797.0622
##   0.12631579  0.8315789  1637.613  0.8251853  795.9646
##   0.12631579  0.8421053  1637.255  0.8254836  795.1034
##   0.12631579  0.8526316  1637.186  0.8257615  794.5419
##   0.12631579  0.8631579  1637.405  0.8260196  794.2696
##   0.12631579  0.8736842  1637.913  0.8262583  794.3320
##   0.12631579  0.8842105  1638.708  0.8264781  794.6365
##   0.12631579  0.8947368  1639.789  0.8266792  795.1870
##   0.12631579  0.9052632  1641.156  0.8268623  795.9471
##   0.12631579  0.9157895  1642.806  0.8270275  797.0122
##   0.12631579  0.9263158  1644.738  0.8271755  798.3034
##   0.12631579  0.9368421  1646.949  0.8273065  799.8984
##   0.12631579  0.9473684  1649.438  0.8274210  801.8492
##   0.12631579  0.9578947  1652.202  0.8275193  804.0912
##   0.12631579  0.9684211  1655.243  0.8275989  806.5709
##   0.12631579  0.9789474  1658.620  0.8276369  809.0791
##   0.12631579  0.9894737  1662.283  0.8276569  811.8636
##   0.12631579  1.0000000  1665.671  0.8276534  814.5830
##   0.13684211  0.8000000  1641.289  0.8241685  801.2337
##   0.13684211  0.8105263  1640.312  0.8245226  799.6983
##   0.13684211  0.8210526  1639.634  0.8248556  798.6307
##   0.13684211  0.8315789  1639.256  0.8251678  797.8107
##   0.13684211  0.8421053  1639.176  0.8254598  797.2210
##   0.13684211  0.8526316  1639.397  0.8257320  797.0378
##   0.13684211  0.8631579  1639.916  0.8259849  797.1659
##   0.13684211  0.8736842  1640.733  0.8262189  797.5355
##   0.13684211  0.8842105  1641.848  0.8264344  798.1680
##   0.13684211  0.8947368  1643.258  0.8266319  799.0627
##   0.13684211  0.9052632  1644.962  0.8268117  800.2041
##   0.13684211  0.9157895  1646.959  0.8269744  801.5514
##   0.13684211  0.9263158  1649.245  0.8271202  803.3136
##   0.13684211  0.9368421  1651.819  0.8272496  805.3203
##   0.13684211  0.9473684  1654.678  0.8273631  807.5619
##   0.13684211  0.9578947  1657.819  0.8274608  810.1632
##   0.13684211  0.9684211  1661.242  0.8275410  812.9046
##   0.13684211  0.9789474  1665.006  0.8275801  815.7220
##   0.13684211  0.9894737  1669.059  0.8276019  818.8352
##   0.13684211  1.0000000  1672.876  0.8276026  821.9239
##   0.14736842  0.8000000  1642.335  0.8241695  802.3745
##   0.14736842  0.8105263  1641.615  0.8245162  801.2545
##   0.14736842  0.8210526  1641.206  0.8248422  800.4291
##   0.14736842  0.8315789  1641.107  0.8251479  799.8450
##   0.14736842  0.8421053  1641.318  0.8254338  799.6691
##   0.14736842  0.8526316  1641.840  0.8257005  799.8625
##   0.14736842  0.8631579  1642.670  0.8259482  800.2687
##   0.14736842  0.8736842  1643.809  0.8261776  800.9595
##   0.14736842  0.8842105  1645.254  0.8263890  801.9502
##   0.14736842  0.8947368  1647.004  0.8265829  803.1593
##   0.14736842  0.9052632  1649.057  0.8267596  804.6551
##   0.14736842  0.9157895  1651.410  0.8269197  806.4847
##   0.14736842  0.9263158  1654.062  0.8270634  808.5611
##   0.14736842  0.9368421  1657.009  0.8271912  810.8726
##   0.14736842  0.9473684  1660.248  0.8273035  813.4750
##   0.14736842  0.9578947  1663.776  0.8274006  816.3706
##   0.14736842  0.9684211  1667.592  0.8274809  819.4174
##   0.14736842  0.9789474  1671.752  0.8275208  822.6373
##   0.14736842  0.9894737  1676.206  0.8275442  826.0982
##   0.14736842  1.0000000  1680.474  0.8275487  829.6293
##   0.15789474  0.8000000  1643.538  0.8241678  803.8305
##   0.15789474  0.8105263  1643.090  0.8245071  802.9769
##   0.15789474  0.8210526  1642.963  0.8248262  802.4061
##   0.15789474  0.8315789  1643.159  0.8251254  802.2364
##   0.15789474  0.8421053  1643.675  0.8254054  802.4176
##   0.15789474  0.8526316  1644.512  0.8256665  802.9057
##   0.15789474  0.8631579  1645.668  0.8259092  803.6094
##   0.15789474  0.8736842  1647.142  0.8261339  804.6866
##   0.15789474  0.8842105  1648.933  0.8263412  805.9801
##   0.15789474  0.8947368  1651.037  0.8265314  807.5572
##   0.15789474  0.9052632  1653.452  0.8267049  809.5161
##   0.15789474  0.9157895  1656.177  0.8268622  811.6370
##   0.15789474  0.9263158  1659.207  0.8270036  814.0269
##   0.15789474  0.9368421  1662.541  0.8271296  816.7211
##   0.15789474  0.9473684  1666.173  0.8272405  819.6860
##   0.15789474  0.9578947  1670.102  0.8273368  822.8679
##   0.15789474  0.9684211  1674.324  0.8274167  826.2949
##   0.15789474  0.9789474  1678.894  0.8274570  829.9155
##   0.15789474  0.9894737  1683.760  0.8274816  833.8697
##   0.15789474  1.0000000  1688.449  0.8274919  837.7241
##   0.16842105  0.8000000  1644.930  0.8241627  805.5032
##   0.16842105  0.8105263  1644.763  0.8244949  804.9234
##   0.16842105  0.8210526  1644.930  0.8248073  804.7077
##   0.16842105  0.8315789  1645.430  0.8251003  804.8776
##   0.16842105  0.8421053  1646.262  0.8253745  805.3941
##   0.16842105  0.8526316  1647.425  0.8256302  806.1231
##   0.16842105  0.8631579  1648.917  0.8258680  807.2437
##   0.16842105  0.8736842  1650.736  0.8260883  808.6241
##   0.16842105  0.8842105  1652.881  0.8262916  810.2746
##   0.16842105  0.8947368  1655.348  0.8264782  812.3418
##   0.16842105  0.9052632  1658.136  0.8266486  814.5910
##   0.16842105  0.9157895  1661.240  0.8268032  817.0408
##   0.16842105  0.9263158  1664.658  0.8269424  819.8316
##   0.16842105  0.9368421  1668.386  0.8270666  822.8908
##   0.16842105  0.9473684  1672.420  0.8271762  826.1590
##   0.16842105  0.9578947  1676.757  0.8272715  829.6881
##   0.16842105  0.9684211  1681.393  0.8273508  833.6413
##   0.16842105  0.9789474  1686.380  0.8273913  837.7712
##   0.16842105  0.9894737  1691.666  0.8274169  841.9847
##   0.16842105  1.0000000  1696.785  0.8274326  846.1587
##   0.17894737  0.8000000  1646.523  0.8241551  807.3583
##   0.17894737  0.8105263  1646.650  0.8244804  807.0957
##   0.17894737  0.8210526  1647.122  0.8247863  807.2335
##   0.17894737  0.8315789  1647.937  0.8250733  807.7285
##   0.17894737  0.8421053  1649.095  0.8253418  808.5181
##   0.17894737  0.8526316  1650.594  0.8255924  809.6572
##   0.17894737  0.8631579  1652.432  0.8258254  811.0895
##   0.17894737  0.8736842  1654.607  0.8260414  812.8021
##   0.17894737  0.8842105  1657.116  0.8262407  814.9490
##   0.17894737  0.8947368  1659.957  0.8264238  817.3163
##   0.17894737  0.9052632  1663.126  0.8265912  819.8924
##   0.17894737  0.9157895  1666.619  0.8267431  822.7604
##   0.17894737  0.9263158  1670.434  0.8268801  825.9592
##   0.17894737  0.9368421  1674.565  0.8270025  829.3290
##   0.17894737  0.9473684  1679.009  0.8271107  832.9486
##   0.17894737  0.9578947  1683.761  0.8272051  837.0569
##   0.17894737  0.9684211  1688.819  0.8272834  841.3935
##   0.17894737  0.9789474  1694.231  0.8273239  845.7289
##   0.17894737  0.9894737  1699.944  0.8273504  850.2820
##   0.17894737  1.0000000  1705.467  0.8273709  854.8477
##   0.18947368  0.8000000  1648.325  0.8241448  809.3866
##   0.18947368  0.8105263  1648.756  0.8244635  809.5517
##   0.18947368  0.8210526  1649.543  0.8247632  809.9924
##   0.18947368  0.8315789  1650.684  0.8250443  810.7452
##   0.18947368  0.8421053  1652.178  0.8253074  811.8904
##   0.18947368  0.8526316  1654.023  0.8255529  813.4040
##   0.18947368  0.8631579  1656.217  0.8257813  815.1599
##   0.18947368  0.8736842  1658.757  0.8259931  817.3588
##   0.18947368  0.8842105  1661.640  0.8261886  819.8090
##   0.18947368  0.8947368  1664.863  0.8263683  822.5265
##   0.18947368  0.9052632  1668.422  0.8265326  825.5497
##   0.18947368  0.9157895  1672.312  0.8266819  828.8014
##   0.18947368  0.9263158  1676.531  0.8268167  832.2915
##   0.18947368  0.9368421  1681.073  0.8269373  836.0084
##   0.18947368  0.9473684  1685.934  0.8270441  840.2082
##   0.18947368  0.9578947  1691.109  0.8271374  844.6523
##   0.18947368  0.9684211  1696.595  0.8272146  849.1594
##   0.18947368  0.9789474  1702.438  0.8272550  853.8308
##   0.18947368  0.9894737  1708.584  0.8272822  858.7855
##   0.18947368  1.0000000  1714.480  0.8273070  863.7013
##   0.20000000  0.8000000  1650.340  0.8241321  811.7262
##   0.20000000  0.8105263  1651.085  0.8244444  812.1765
##   0.20000000  0.8210526  1652.197  0.8247380  812.9125
##   0.20000000  0.8315789  1653.675  0.8250135  813.9728
##   0.20000000  0.8421053  1655.515  0.8252713  815.5258
##   0.20000000  0.8526316  1657.716  0.8255119  817.3595
##   0.20000000  0.8631579  1660.275  0.8257358  819.6078
##   0.20000000  0.8736842  1663.189  0.8259434  822.1026
##   0.20000000  0.8842105  1666.455  0.8261352  824.9574
##   0.20000000  0.8947368  1670.068  0.8263116  828.0905
##   0.20000000  0.9052632  1674.024  0.8264729  831.4404
##   0.20000000  0.9157895  1678.320  0.8266197  835.0089
##   0.20000000  0.9263158  1682.950  0.8267523  838.8427
##   0.20000000  0.9368421  1687.910  0.8268711  843.0953
##   0.20000000  0.9473684  1693.195  0.8269765  847.6169
##   0.20000000  0.9578947  1698.799  0.8270688  852.2574
##   0.20000000  0.9684211  1704.719  0.8271446  857.0843
##   0.20000000  0.9789474  1710.999  0.8271847  862.1782
##   0.20000000  0.9894737  1717.582  0.8272125  867.6137
##   0.20000000  1.0000000  1723.810  0.8272409  872.8446
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were fraction = 0.9473684 and lambda
##  = 0.01052632.

Penalized Generalized Linear Model

Adding penalties is a general technique that can be applied to many methods other than linear regression. In this section, we will introduce the penalized generalized linear model using glmnet().

Introduction to glmnet package

The default family option in the function glmnet() is gaussian. It is the linear regression we discussed so far in this chapter. But the parameterization is a little different in the generalized linear model framework (we have α and λ). Let’s start from our previous example, using the same training data but glmnet() to fit model:

dat <- read.csv("http://bit.ly/2P5gTw4")
# data cleaning: delete wrong observations with expense < 0
dat <- subset(dat, store_exp > 0 & online_exp > 0)
# get predictors
trainx <- dat[, grep("Q", names(dat))]
# get response
trainy <- dat$store_exp + dat$online_exp

glmfit = glmnet(as.matrix(trainx), trainy)

We can extract useful information using plot() and print() function from the fitted model.

plot(glmfit, label = T)

print(glmfit)
## 
## Call:  glmnet(x = as.matrix(trainx), y = trainy) 
## 
##    Df  %Dev  Lambda
## 1   0  0.00 3042.00
## 2   2 10.38 2771.00
## 3   2 19.19 2525.00
## 4   2 26.50 2301.00
## 5   3 32.64 2096.00
## 6   3 38.94 1910.00
## 7   3 44.17 1741.00
## 8   3 48.52 1586.00
## 9   3 52.12 1445.00
## 10  3 55.12 1317.00
## 11  3 57.60 1200.00
## 12  3 59.67 1093.00
## 13  3 61.38  996.00
## 14  3 62.80  907.50
## 15  3 63.98  826.90
## 16  3 64.96  753.40
## 17  3 65.78  686.50
## 18  3 66.45  625.50
## 19  3 67.01  569.90
## 20  3 67.48  519.30
## 21  3 67.87  473.20
## 22  3 68.19  431.10
## 23  4 68.50  392.80
## 24  4 68.78  357.90
## 25  6 69.02  326.10
## 26  6 69.21  297.20
## 27  6 69.38  270.80
## 28  6 69.51  246.70
## 29  6 69.63  224.80
## 30  6 69.72  204.80
## 31  6 69.80  186.60
## 32  6 69.86  170.10
## 33  6 69.92  154.90
## 34  6 69.96  141.20
## 35  6 70.00  128.60
## 36  7 70.06  117.20
## 37  7 70.19  106.80
## 38  7 70.30   97.31
## 39  7 70.39   88.67
## 40  7 70.47   80.79
## 41  7 70.53   73.61
## 42  8 70.58   67.07
## 43  9 70.64   61.11
## 44  9 70.70   55.68
## 45  9 70.74   50.74
## 46  9 70.78   46.23
## 47  9 70.81   42.12
## 48  9 70.84   38.38
## 49  9 70.86   34.97
## 50  9 70.88   31.86
## 51  9 70.89   29.03
## 52  9 70.90   26.45
## 53  9 70.91   24.10
## 54  9 70.92   21.96
## 55  9 70.93   20.01
## 56  9 70.93   18.23
## 57  9 70.94   16.61
## 58  9 70.94   15.14
## 59  9 70.95   13.79
## 60  9 70.95   12.57
## 61  9 70.95   11.45
## 62  9 70.95   10.43
## 63  9 70.96    9.51
## 64  9 70.96    8.66
## 65  9 70.96    7.89
## 66  9 70.96    7.19
## 67  9 70.96    6.55
## 68  9 70.96    5.97

We can also set up the value of λ in the LASSO penalty by setting s in coef() function. For example, the following code set λ to be 1200. and there are three coefficients with non-zero estimates(Q1, Q2 and Q3).

coef(glmfit, s = 1200)
## 11 x 1 sparse Matrix of class "dgCMatrix"
##                    s1
## (Intercept) 2255.2221
## Q1          -390.9214
## Q2           653.6437
## Q3           624.4068
## Q4             .     
## Q5             .     
## Q6             .     
## Q7             .     
## Q8             .     
## Q9             .     
## Q10            .

You can apply models with different values of tuning parameter to new data using predict():

newdat = matrix(sample(1:9, 30, replace = T), nrow = 3)
predict(glmfit, newdat, s = c(1741, 2000))
##            s1       s2
## [1,] 4008.107 3795.615
## [2,] 3051.974 3084.707
## [3,] 7099.473 7025.456

The tuning parameter λ can be determined by the performance of the cross validation data such as the minimum of error or one standard error of the minimum.

cvfit = cv.glmnet(as.matrix(trainx), trainy)
cvfit = cv.glmnet(as.matrix(trainx), trainy)
plot(cvfit)

# lambda with minimum mean cross-validated error
cvfit$lambda.min
## [1] 7.893144
# lambda with one standard error of the minimum
cvfit$lambda.1se
## [1] 1199.688
# coefficient estimates for model with the error
# that is within one standard error of the minimum
coef(cvfit, s = "lambda.1se")
## 11 x 1 sparse Matrix of class "dgCMatrix"
##                    s1
## (Intercept) 2255.3136
## Q1          -391.0562
## Q2           653.7079
## Q3           624.5119
## Q4             .     
## Q5             .     
## Q6             .     
## Q7             .     
## Q8             .     
## Q9             .     
## Q10            .

Penalized Logistic Regression

In this section, we will go over penalized logistic regression using glmnet() with “binomial” for the “family” parameter. Similarly, for the default parameter, it will be LASSO penalty and the tuning parameter can be determined by the performance of the cross validation dataset: minimum of error or one standard error of the minimum.

dat <- read.csv("http://bit.ly/2KXb1Qi")
trainx = dplyr::select(dat, -y)
trainy = dat$y
fit <- glmnet(as.matrix(trainx), trainy, family = "binomial")
levels(as.factor(trainy))
## [1] "0" "1"
newdat = as.matrix(trainx[1:3, ])
predict(fit, newdat, type = "link", s = c(2.833e-02, 3.110e-02))
##           s1         s2
## 1  0.1943472  0.1442796
## 2 -0.9913159 -1.0076600
## 3 -0.5840566 -0.5496074
cvfit = cv.glmnet(as.matrix(trainx), trainy,
                 family = "binomial", type.measure = "class")
plot(cvfit)

cvfit$lambda.min
## [1] 0.002522398
cvfit$lambda.1se
## [1] 0.005309407

Group LASSO Logistic Regression

data("sim1_da1")
# the last column of sim1_da1 response variable y
# trainx is the explanatory variable matrix
trainx = dplyr::select(sim1_da1, -y)
# save response variable as as trainy
trainy = sim1_da1$y
# get the group indicator
index <- gsub("\\..*", "", names(trainx))
index[1:50]
##  [1] "Q1"  "Q1"  "Q2"  "Q2"  "Q3"  "Q3"  "Q4"  "Q4"  "Q5"  "Q5"  "Q6"  "Q6" 
## [13] "Q7"  "Q7"  "Q8"  "Q8"  "Q9"  "Q9"  "Q10" "Q10" "Q11" "Q11" "Q12" "Q12"
## [25] "Q13" "Q13" "Q14" "Q14" "Q15" "Q15" "Q16" "Q16" "Q17" "Q17" "Q18" "Q18"
## [37] "Q19" "Q19" "Q20" "Q20" "Q21" "Q21" "Q22" "Q22" "Q23" "Q23" "Q24" "Q24"
## [49] "Q25" "Q25"
# Tune over 100 values
nlam <- 100
# set the type of prediction
# - `link`: return the predicted link function
# - `response`: return the predicted probability
# number of cross-validation folds
kfold <- 10
cv_fit <- cv_glasso(trainx, trainy,
nlam = nlam, kfold = kfold, type = "link")
str(cv_fit, width = 60, strict.width = "cut",  list.len = 9)
## List of 11
##  $ lambda            : num [1:100] 52.5 50.4 48.4 46.4 44...
##  $ pred              :'data.frame':  800 obs. of  100 varia..
##   ..$ X52.4935652246023 : num [1:800] -0.0556 -0.0556 -0.0..
##   ..$ X50.3938226156183 : num [1:800] -0.0556 -0.0556 -0.0..
##   ..$ X48.3780697109935 : num [1:800] -0.0559 -0.0559 -0.0..
##   ..$ X46.4429469225538 : num [1:800] -0.0589 -0.0589 -0.0..
##   ..$ X44.5852290456516 : num [1:800] -0.0617 -0.0617 -0.0..
##   ..$ X42.8018198838256 : num [1:800] -0.06438 -0.06438 -0..
##   ..$ X41.0897470884725 : num [1:800] -0.0729 -0.0417 -0.1..
##   ..$ X39.4461572049336 : num [1:800] -0.08801 -0.00791 -0..
##   ..$ X37.8683109167363 : num [1:800] -0.1092 0.0212 -0.08..
##   .. [list output truncated]
##  $ auc               : num [1:100] 0.573 0.568 0.557 0.518..
##  $ log_likelihood    : num [1:100] -556 -556 -556 -555 -55..
##  $ maxrho            : num [1:100] -0.0492 -0.0306 0.0302 ..
##  $ lambda.max.auc    : Named num [1:2] 0.922 0.949
##   ..- attr(*, "names")= chr [1:2] "lambda" "auc"
##  $ lambda.1se.auc    : Named num [1:2] 16.738 0.828
##   ..- attr(*, "names")= chr [1:2] "" "se.auc"
##  $ lambda.max.loglike: Named num [1:2] 1.57 -233.49
##   ..- attr(*, "names")= chr [1:2] "lambda" "loglike"
##  $ lambda.1se.loglike: Named num [1:2] 9.07 -349.13
##   ..- attr(*, "names")= chr [1:2] "lambda" "se.loglike"
##   [list output truncated]
##  - attr(*, "class")= chr "cv_glasso"
plot(cv_fit)

fitgl <- fitglasso(trainx, trainy,
                   lambda = 0.922, na_action = na.pass)
## Lambda: 0.922  nr.var: 229
head(coef(fitgl))
##                 0.922
## Intercept -53.1803922
## Q1.A        1.7566722
## Q1.B        1.7190500
## Q2.A        2.1699192
## Q2.B        0.6939251
## Q3.A        2.1020142
prey <- predict_glasso(fitgl, trainx)