glmnet
packageThis notebook illustrates how to implement regularization methods using R.
# install packages from CRAN
p_needed <- c('caret', 'elasticnet', 'glmnet', 'devtools',
'MASS', 'grplasso')
packages <- rownames(installed.packages())
p_to_install <- p_needed[!(p_needed %in% packages)]
if (length(p_to_install) > 0) {
install.packages(p_to_install)
}
lapply(p_needed, require, character.only = TRUE)
# install packages from GitHub
p_needed_gh <- c('NetlifyDS')
if (! p_needed_gh %in% packages) {
devtools::install_github("netlify/NetlifyDS")
}
library(NetlifyDS)
dat <- read.csv("http://bit.ly/2P5gTw4")
# data cleaning: delete wrong observations
# expense can't be negative
dat <- subset(dat, store_exp > 0 & online_exp > 0)
# get predictors
trainx <- dat[ , grep("Q", names(dat))]
# get response
trainy <- dat$store_exp + dat$online_exp
In Ridge regression, there is a tuning parameter λ that needs to set for the right level of regularization. The value of lambda can be determined by looking at the performance in the cross-validation data.
# set cross validation
ctrl <- trainControl(method = "cv", number = 10)
# set the parameter range
ridgeGrid <- data.frame(.lambda = seq(0, .1, length = 20))
set.seed(100)
ridgeRegTune <- train(trainx, trainy,
method = "ridge",
tuneGrid = ridgeGrid,
trControl = ctrl,
## center and scale predictors
preProc = c("center", "scale"))
ridgeRegTune
## Ridge Regression
##
## 999 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 899, 899, 899, 899, 899, 900, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.000000000 1620.980 0.8271792 756.8867
## 0.005263158 1620.079 0.8273987 757.8206
## 0.010526316 1619.773 0.8275476 758.9396
## 0.015789474 1619.901 0.8276512 760.1890
## 0.021052632 1620.366 0.8277243 761.5209
## 0.026315789 1621.107 0.8277760 763.1556
## 0.031578947 1622.083 0.8278122 764.8858
## 0.036842105 1623.267 0.8278370 766.7312
## 0.042105263 1624.639 0.8278529 768.6860
## 0.047368421 1626.185 0.8278620 770.7971
## 0.052631579 1627.892 0.8278657 772.9889
## 0.057894737 1629.753 0.8278649 775.2529
## 0.063157895 1631.760 0.8278605 777.6964
## 0.068421053 1633.908 0.8278530 780.2181
## 0.073684211 1636.190 0.8278430 782.8435
## 0.078947368 1638.604 0.8278307 785.5305
## 0.084210526 1641.144 0.8278165 788.2773
## 0.089473684 1643.808 0.8278006 791.2583
## 0.094736842 1646.592 0.8277832 794.3876
## 0.100000000 1649.494 0.8277644 797.5885
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was lambda = 0.01052632.
plot(ridgeRegTune)
Once we have determined the value for tuning parameter lambda, we can
use different ways to fit the ridge regression model, for example, using
enet()
function.
ridgefit = enet(x = as.matrix(trainx), y = trainy, lambda = 0.01,
# center and scale predictors
normalize = TRUE)
Once the model is fit, we can use the predict()
function
to predict new dataset.
ridgePred <- predict(ridgefit, newx = as.matrix(trainx),
s = 1, mode = "fraction", type = "fit")
names(ridgePred)
## [1] "s" "fraction" "mode" "fit"
head(ridgePred$fit)
## 1 2 3 4 5 6
## 1290.4697 224.1595 591.4406 1220.6384 853.3572 908.2040
ridgeCoef<-predict(ridgefit,newx = as.matrix(trainx),
s=1, mode="fraction", type="coefficients")
ridgeCoef$coefficients
## Q1 Q2 Q3 Q4 Q5 Q6
## -316.763403 684.044587 649.176319 246.932116 576.078371 692.149705
## Q7 Q8 Q9 Q10
## 6.181062 -116.500992 382.265663 -314.739623
LASSO method will shrink certain parameters to be zero which can be used as a variable selection tool. In LASSO, there is also a tuning parameter called fraction which can be determined by performance of the cross validation data set.
ctrl <- trainControl(method = "cv", number = 10)
lassoGrid <- data.frame(fraction = seq(.8, 1, length = 20))
set.seed(100)
lassoTune <- train(trainx, trainy,
## set the method to be lasso
method = "lars",
tuneGrid = lassoGrid,
trControl = ctrl,
## standardize the predictors
preProc = c("center", "scale"))
lassoTune
## Least Angle Regression
##
## 999 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 899, 899, 899, 899, 899, 900, ...
## Resampling results across tuning parameters:
##
## fraction RMSE Rsquared MAE
## 0.8000000 1633.994 0.8250185 788.8895
## 0.8105263 1631.956 0.8253421 785.7150
## 0.8210526 1630.196 0.8256225 782.6543
## 0.8315789 1628.441 0.8258906 779.6467
## 0.8421053 1626.539 0.8261791 776.6017
## 0.8526316 1624.745 0.8264526 773.6821
## 0.8631579 1623.107 0.8267066 771.0365
## 0.8736842 1621.643 0.8269375 768.4821
## 0.8842105 1620.390 0.8271378 766.0623
## 0.8947368 1619.349 0.8273079 763.8790
## 0.9052632 1618.520 0.8274481 761.9152
## 0.9157895 1617.903 0.8275585 760.2604
## 0.9263158 1617.516 0.8276338 758.9376
## 0.9368421 1617.382 0.8276544 757.8210
## 0.9473684 1617.452 0.8276476 757.0097
## 0.9578947 1617.802 0.8276001 756.4950
## 0.9684211 1618.367 0.8275239 756.1559
## 0.9789474 1619.138 0.8274210 756.0236
## 0.9894737 1620.048 0.8273066 756.3794
## 1.0000000 1620.980 0.8271792 756.8867
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.9368421.
plot(lassoTune)
Once the tuning parameter fraction is set, we can use different
functions to fit LASSO model, such as lars()
in
lars
, enet()
in elasticnet
, and
glmnet()
in glmnet
.
lassoModel <- enet(x = as.matrix(trainx), y = trainy,
lambda = 0, normalize = TRUE)
lassoFit <- predict(lassoModel, newx = as.matrix(trainx),
s = 0.957, mode = "fraction", type = "fit")
head(lassoFit$fit)
## 1 2 3 4 5 6
## 1357.2815 300.5298 690.2390 1228.1558 838.4467 1010.0713
lassoCoef <- predict(lassoModel,
newx = as.matrix(trainx),
s = 0.95, mode = "fraction",
type = "coefficients")
lassoCoef$coefficients
## Q1 Q2 Q3 Q4 Q5 Q6 Q7 Q8
## -326.7759 720.2801 722.7518 180.7107 542.1603 603.0865 0.0000 -75.9610
## Q9 Q10
## 342.6375 -281.7818
Elastic net is a generalization of LASSO and ridge regression. It is a combination of LASSO and ridge regression. For elastic net, there are two tuning parameters called lambda and fraction.
enetGrid <- expand.grid(.lambda = seq(0,0.2,length=20),
.fraction = seq(.8, 1, length = 20))
set.seed(100)
enetTune <- train(trainx, trainy,
method = "enet",
tuneGrid = enetGrid,
trControl = ctrl,
preProc = c("center", "scale"))
enetTune
## Elasticnet
##
## 999 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 899, 899, 899, 899, 899, 900, ...
## Resampling results across tuning parameters:
##
## lambda fraction RMSE Rsquared MAE
## 0.00000000 0.8000000 1633.994 0.8250185 788.8895
## 0.00000000 0.8105263 1631.956 0.8253421 785.7150
## 0.00000000 0.8210526 1630.196 0.8256225 782.6543
## 0.00000000 0.8315789 1628.441 0.8258906 779.6467
## 0.00000000 0.8421053 1626.539 0.8261791 776.6017
## 0.00000000 0.8526316 1624.745 0.8264526 773.6821
## 0.00000000 0.8631579 1623.107 0.8267066 771.0365
## 0.00000000 0.8736842 1621.643 0.8269375 768.4821
## 0.00000000 0.8842105 1620.390 0.8271378 766.0623
## 0.00000000 0.8947368 1619.349 0.8273079 763.8790
## 0.00000000 0.9052632 1618.520 0.8274481 761.9152
## 0.00000000 0.9157895 1617.903 0.8275585 760.2604
## 0.00000000 0.9263158 1617.516 0.8276338 758.9376
## 0.00000000 0.9368421 1617.382 0.8276544 757.8210
## 0.00000000 0.9473684 1617.452 0.8276476 757.0097
## 0.00000000 0.9578947 1617.802 0.8276001 756.4950
## 0.00000000 0.9684211 1618.367 0.8275239 756.1559
## 0.00000000 0.9789474 1619.138 0.8274210 756.0236
## 0.00000000 0.9894737 1620.048 0.8273066 756.3794
## 0.00000000 1.0000000 1620.980 0.8271792 756.8867
## 0.01052632 0.8000000 1635.826 0.8246536 792.3834
## 0.01052632 0.8105263 1633.554 0.8250088 789.1700
## 0.01052632 0.8210526 1631.502 0.8253351 785.9805
## 0.01052632 0.8315789 1629.488 0.8256497 782.7859
## 0.01052632 0.8421053 1627.400 0.8259707 779.6851
## 0.01052632 0.8526316 1625.519 0.8262635 776.6770
## 0.01052632 0.8631579 1623.789 0.8265383 773.9974
## 0.01052632 0.8736842 1622.231 0.8267914 771.4169
## 0.01052632 0.8842105 1620.827 0.8270245 768.9600
## 0.01052632 0.8947368 1619.623 0.8272307 766.6214
## 0.01052632 0.9052632 1618.627 0.8274092 764.5374
## 0.01052632 0.9157895 1617.839 0.8275603 762.6204
## 0.01052632 0.9263158 1617.259 0.8276842 761.0392
## 0.01052632 0.9368421 1616.888 0.8277814 759.8925
## 0.01052632 0.9473684 1616.838 0.8278132 759.0871
## 0.01052632 0.9578947 1617.006 0.8278170 758.5339
## 0.01052632 0.9684211 1617.445 0.8277844 758.1862
## 0.01052632 0.9789474 1618.087 0.8277273 758.1863
## 0.01052632 0.9894737 1618.885 0.8276494 758.4365
## 0.01052632 1.0000000 1619.773 0.8275476 758.9396
## 0.02105263 0.8000000 1636.749 0.8244436 794.3219
## 0.02105263 0.8105263 1634.399 0.8248178 791.0998
## 0.02105263 0.8210526 1632.324 0.8251543 787.9501
## 0.02105263 0.8315789 1630.101 0.8254999 784.7358
## 0.02105263 0.8421053 1627.975 0.8258362 781.5357
## 0.02105263 0.8526316 1626.056 0.8261444 778.6445
## 0.02105263 0.8631579 1624.264 0.8264405 775.8744
## 0.02105263 0.8736842 1622.668 0.8267113 773.2656
## 0.02105263 0.8842105 1621.279 0.8269554 770.8272
## 0.02105263 0.8947368 1620.099 0.8271730 768.6565
## 0.02105263 0.9052632 1619.127 0.8273644 766.5949
## 0.02105263 0.9157895 1618.365 0.8275300 764.9173
## 0.02105263 0.9263158 1617.813 0.8276701 763.5334
## 0.02105263 0.9368421 1617.470 0.8277849 762.2718
## 0.02105263 0.9473684 1617.360 0.8278664 761.4052
## 0.02105263 0.9578947 1617.537 0.8278957 760.8778
## 0.02105263 0.9684211 1617.974 0.8278925 760.6887
## 0.02105263 0.9789474 1618.638 0.8278626 760.7326
## 0.02105263 0.9894737 1619.458 0.8278079 761.0056
## 0.02105263 1.0000000 1620.366 0.8277243 761.5209
## 0.03157895 0.8000000 1637.268 0.8243143 795.3702
## 0.03157895 0.8105263 1634.805 0.8247123 792.1327
## 0.03157895 0.8210526 1632.613 0.8250724 789.0308
## 0.03157895 0.8315789 1630.397 0.8254248 785.9244
## 0.03157895 0.8421053 1628.345 0.8257606 782.8069
## 0.03157895 0.8526316 1626.479 0.8260740 779.9636
## 0.03157895 0.8631579 1624.758 0.8263731 777.2614
## 0.03157895 0.8736842 1623.248 0.8266463 774.7894
## 0.03157895 0.8842105 1621.949 0.8268939 772.5342
## 0.03157895 0.8947368 1620.862 0.8271163 770.4573
## 0.03157895 0.9052632 1619.988 0.8273137 768.7255
## 0.03157895 0.9157895 1619.327 0.8274865 767.1724
## 0.03157895 0.9263158 1618.879 0.8276351 765.8840
## 0.03157895 0.9368421 1618.644 0.8277598 764.7367
## 0.03157895 0.9473684 1618.623 0.8278610 764.0250
## 0.03157895 0.9578947 1618.878 0.8279155 763.6802
## 0.03157895 0.9684211 1619.400 0.8279324 763.5917
## 0.03157895 0.9789474 1620.176 0.8279200 763.7311
## 0.03157895 0.9894737 1621.106 0.8278819 764.1907
## 0.03157895 1.0000000 1622.083 0.8278122 764.8858
## 0.04210526 0.8000000 1637.336 0.8242673 795.7676
## 0.04210526 0.8105263 1634.988 0.8246580 792.6866
## 0.04210526 0.8210526 1632.865 0.8250148 789.7883
## 0.04210526 0.8315789 1630.709 0.8253703 786.7658
## 0.04210526 0.8421053 1628.769 0.8257030 783.8272
## 0.04210526 0.8526316 1626.981 0.8260211 781.0815
## 0.04210526 0.8631579 1625.383 0.8263184 778.5497
## 0.04210526 0.8736842 1624.002 0.8265908 776.3261
## 0.04210526 0.8842105 1622.837 0.8268386 774.2760
## 0.04210526 0.8947368 1621.890 0.8270623 772.4349
## 0.04210526 0.9052632 1621.161 0.8272620 770.8630
## 0.04210526 0.9157895 1620.651 0.8274383 769.4260
## 0.04210526 0.9263158 1620.360 0.8275915 768.1913
## 0.04210526 0.9368421 1620.287 0.8277218 767.3754
## 0.04210526 0.9473684 1620.432 0.8278297 766.9569
## 0.04210526 0.9578947 1620.819 0.8279065 766.9184
## 0.04210526 0.9684211 1621.492 0.8279369 767.0220
## 0.04210526 0.9789474 1622.441 0.8279368 767.3846
## 0.04210526 0.9894737 1623.547 0.8279114 767.9179
## 0.04210526 1.0000000 1624.639 0.8278529 768.6860
## 0.05263158 0.8000000 1637.408 0.8242360 796.1282
## 0.05263158 0.8105263 1635.203 0.8246179 793.2558
## 0.05263158 0.8210526 1633.116 0.8249752 790.4644
## 0.05263158 0.8315789 1631.093 0.8253287 787.5629
## 0.05263158 0.8421053 1629.276 0.8256597 784.7421
## 0.05263158 0.8526316 1627.611 0.8259779 782.1662
## 0.05263158 0.8631579 1626.167 0.8262718 779.9035
## 0.05263158 0.8736842 1624.947 0.8265417 777.8830
## 0.05263158 0.8842105 1623.951 0.8267879 776.1158
## 0.05263158 0.8947368 1623.179 0.8270109 774.4921
## 0.05263158 0.9052632 1622.632 0.8272109 773.0840
## 0.05263158 0.9157895 1622.310 0.8273884 771.8317
## 0.05263158 0.9263158 1622.214 0.8275437 770.8828
## 0.05263158 0.9368421 1622.342 0.8276773 770.3604
## 0.05263158 0.9473684 1622.695 0.8277893 770.2500
## 0.05263158 0.9578947 1623.272 0.8278802 770.4072
## 0.05263158 0.9684211 1624.145 0.8279207 770.7594
## 0.05263158 0.9789474 1625.308 0.8279300 771.3430
## 0.05263158 0.9894737 1626.645 0.8279158 772.0595
## 0.05263158 1.0000000 1627.892 0.8278657 772.9889
## 0.06315789 0.8000000 1637.532 0.8242144 796.5193
## 0.06315789 0.8105263 1635.488 0.8245865 793.7879
## 0.06315789 0.8210526 1633.451 0.8249476 791.0514
## 0.06315789 0.8315789 1631.584 0.8252951 788.2870
## 0.06315789 0.8421053 1629.890 0.8256275 785.6468
## 0.06315789 0.8526316 1628.396 0.8259407 783.4649
## 0.06315789 0.8631579 1627.132 0.8262302 781.4300
## 0.06315789 0.8736842 1626.100 0.8264966 779.6719
## 0.06315789 0.8842105 1625.299 0.8267401 778.1326
## 0.06315789 0.8947368 1624.731 0.8269612 776.7504
## 0.06315789 0.9052632 1624.395 0.8271603 775.4892
## 0.06315789 0.9157895 1624.291 0.8273377 774.5648
## 0.06315789 0.9263158 1624.420 0.8274937 773.9205
## 0.06315789 0.9368421 1624.781 0.8276289 773.5923
## 0.06315789 0.9473684 1625.374 0.8277434 773.6843
## 0.06315789 0.9578947 1626.197 0.8278377 774.1672
## 0.06315789 0.9684211 1627.300 0.8278907 774.6892
## 0.06315789 0.9789474 1628.705 0.8279081 775.4247
## 0.06315789 0.9894737 1630.314 0.8279032 776.5186
## 0.06315789 1.0000000 1631.760 0.8278605 777.6964
## 0.07368421 0.8000000 1637.738 0.8241989 796.9050
## 0.07368421 0.8105263 1635.823 0.8245637 794.3508
## 0.07368421 0.8210526 1633.895 0.8249260 791.6353
## 0.07368421 0.8315789 1632.191 0.8252692 789.1041
## 0.07368421 0.8421053 1630.653 0.8256000 786.8485
## 0.07368421 0.8526316 1629.353 0.8259075 784.8898
## 0.07368421 0.8631579 1628.292 0.8261922 783.1740
## 0.07368421 0.8736842 1627.470 0.8264545 781.6704
## 0.07368421 0.8842105 1626.890 0.8266946 780.3200
## 0.07368421 0.8947368 1626.549 0.8269131 779.1108
## 0.07368421 0.9052632 1626.450 0.8271104 778.2312
## 0.07368421 0.9157895 1626.591 0.8272867 777.6039
## 0.07368421 0.9263158 1626.972 0.8274425 777.2800
## 0.07368421 0.9368421 1627.592 0.8275782 777.1814
## 0.07368421 0.9473684 1628.451 0.8276941 777.4615
## 0.07368421 0.9578947 1629.548 0.8277906 778.0005
## 0.07368421 0.9684211 1630.916 0.8278526 778.8693
## 0.07368421 0.9789474 1632.595 0.8278761 780.0075
## 0.07368421 0.9894737 1634.513 0.8278789 781.4383
## 0.07368421 1.0000000 1636.190 0.8278430 782.8435
## 0.08421053 0.8000000 1638.058 0.8241860 797.3405
## 0.08421053 0.8105263 1636.219 0.8245492 794.8565
## 0.08421053 0.8210526 1634.473 0.8249074 792.3585
## 0.08421053 0.8315789 1632.921 0.8252503 790.1821
## 0.08421053 0.8421053 1631.587 0.8255746 788.1906
## 0.08421053 0.8526316 1630.499 0.8258763 786.5015
## 0.08421053 0.8631579 1629.661 0.8261559 785.0487
## 0.08421053 0.8736842 1629.071 0.8264137 783.7361
## 0.08421053 0.8842105 1628.730 0.8266501 782.6377
## 0.08421053 0.8947368 1628.639 0.8268656 781.8686
## 0.08421053 0.9052632 1628.797 0.8270605 781.3347
## 0.08421053 0.9157895 1629.204 0.8272352 781.0349
## 0.08421053 0.9263158 1629.860 0.8273902 780.9363
## 0.08421053 0.9368421 1630.762 0.8275257 781.1351
## 0.08421053 0.9473684 1631.911 0.8276423 781.5617
## 0.08421053 0.9578947 1633.305 0.8277401 782.4285
## 0.08421053 0.9684211 1634.966 0.8278086 783.5899
## 0.08421053 0.9789474 1636.945 0.8278368 785.0542
## 0.08421053 0.9894737 1639.182 0.8278450 786.6634
## 0.08421053 1.0000000 1641.144 0.8278165 788.2773
## 0.09473684 0.8000000 1638.510 0.8241734 797.9016
## 0.09473684 0.8105263 1636.746 0.8245397 795.4687
## 0.09473684 0.8210526 1635.189 0.8248924 793.3868
## 0.09473684 0.8315789 1633.818 0.8252326 791.4605
## 0.09473684 0.8421053 1632.704 0.8255504 789.6859
## 0.09473684 0.8526316 1631.847 0.8258462 788.3153
## 0.09473684 0.8631579 1631.248 0.8261205 787.0551
## 0.09473684 0.8736842 1630.908 0.8263736 786.0017
## 0.09473684 0.8842105 1630.826 0.8266061 785.2973
## 0.09473684 0.8947368 1631.003 0.8268182 784.8744
## 0.09473684 0.9052632 1631.438 0.8270105 784.7020
## 0.09473684 0.9157895 1632.130 0.8271833 784.6224
## 0.09473684 0.9263158 1633.079 0.8273369 784.8459
## 0.09473684 0.9368421 1634.284 0.8274718 785.3350
## 0.09473684 0.9473684 1635.742 0.8275884 786.2146
## 0.09473684 0.9578947 1637.453 0.8276870 787.4105
## 0.09473684 0.9684211 1639.430 0.8277601 788.7539
## 0.09473684 0.9789474 1641.732 0.8277919 790.4008
## 0.09473684 0.9894737 1644.299 0.8278041 792.4600
## 0.09473684 1.0000000 1646.592 0.8277832 794.3876
## 0.10526316 0.8000000 1639.012 0.8241683 798.4797
## 0.10526316 0.8105263 1637.414 0.8245335 796.3703
## 0.10526316 0.8210526 1636.035 0.8248844 794.5291
## 0.10526316 0.8315789 1634.891 0.8252174 792.7750
## 0.10526316 0.8421053 1634.014 0.8255286 791.3708
## 0.10526316 0.8526316 1633.405 0.8258183 790.2048
## 0.10526316 0.8631579 1633.064 0.8260872 789.2252
## 0.10526316 0.8736842 1632.992 0.8263355 788.5132
## 0.10526316 0.8842105 1633.188 0.8265637 788.1901
## 0.10526316 0.8947368 1633.651 0.8267723 788.1484
## 0.10526316 0.9052632 1634.382 0.8269615 788.1860
## 0.10526316 0.9157895 1635.379 0.8271319 788.4404
## 0.10526316 0.9263158 1636.640 0.8272838 789.0227
## 0.10526316 0.9368421 1638.165 0.8274177 789.9979
## 0.10526316 0.9473684 1639.953 0.8275338 791.1721
## 0.10526316 0.9578947 1642.000 0.8276326 792.5859
## 0.10526316 0.9684211 1644.315 0.8277088 794.3763
## 0.10526316 0.9789474 1646.960 0.8277433 796.4570
## 0.10526316 0.9894737 1649.878 0.8277586 798.7159
## 0.10526316 1.0000000 1652.510 0.8277443 800.8477
## 0.11578947 0.8000000 1639.626 0.8241679 799.2381
## 0.11578947 0.8105263 1638.241 0.8245282 797.4189
## 0.11578947 0.8210526 1637.062 0.8248759 795.7098
## 0.11578947 0.8315789 1636.160 0.8252018 794.3098
## 0.11578947 0.8421053 1635.536 0.8255064 793.1432
## 0.11578947 0.8526316 1635.190 0.8257902 792.2822
## 0.11578947 0.8631579 1635.122 0.8260536 791.6245
## 0.11578947 0.8736842 1635.333 0.8262971 791.3161
## 0.11578947 0.8842105 1635.821 0.8265211 791.3443
## 0.11578947 0.8947368 1636.586 0.8267260 791.5316
## 0.11578947 0.9052632 1637.628 0.8269121 791.9195
## 0.11578947 0.9157895 1638.945 0.8270800 792.5698
## 0.11578947 0.9263158 1640.534 0.8272300 793.6021
## 0.11578947 0.9368421 1642.396 0.8273626 794.8365
## 0.11578947 0.9473684 1644.527 0.8274780 796.2876
## 0.11578947 0.9578947 1646.926 0.8275766 798.2356
## 0.11578947 0.9684211 1649.597 0.8276550 800.4001
## 0.11578947 0.9789474 1652.600 0.8276914 802.6660
## 0.11578947 0.9894737 1655.884 0.8277093 805.2068
## 0.11578947 1.0000000 1658.877 0.8277008 807.5841
## 0.12631579 0.8000000 1640.385 0.8241684 800.1978
## 0.12631579 0.8105263 1639.195 0.8245262 798.4837
## 0.12631579 0.8210526 1638.260 0.8248664 797.0622
## 0.12631579 0.8315789 1637.613 0.8251853 795.9646
## 0.12631579 0.8421053 1637.255 0.8254836 795.1034
## 0.12631579 0.8526316 1637.186 0.8257615 794.5419
## 0.12631579 0.8631579 1637.405 0.8260196 794.2696
## 0.12631579 0.8736842 1637.913 0.8262583 794.3320
## 0.12631579 0.8842105 1638.708 0.8264781 794.6365
## 0.12631579 0.8947368 1639.789 0.8266792 795.1870
## 0.12631579 0.9052632 1641.156 0.8268623 795.9471
## 0.12631579 0.9157895 1642.806 0.8270275 797.0122
## 0.12631579 0.9263158 1644.738 0.8271755 798.3034
## 0.12631579 0.9368421 1646.949 0.8273065 799.8984
## 0.12631579 0.9473684 1649.438 0.8274210 801.8492
## 0.12631579 0.9578947 1652.202 0.8275193 804.0912
## 0.12631579 0.9684211 1655.243 0.8275989 806.5709
## 0.12631579 0.9789474 1658.620 0.8276369 809.0791
## 0.12631579 0.9894737 1662.283 0.8276569 811.8636
## 0.12631579 1.0000000 1665.671 0.8276534 814.5830
## 0.13684211 0.8000000 1641.289 0.8241685 801.2337
## 0.13684211 0.8105263 1640.312 0.8245226 799.6983
## 0.13684211 0.8210526 1639.634 0.8248556 798.6307
## 0.13684211 0.8315789 1639.256 0.8251678 797.8107
## 0.13684211 0.8421053 1639.176 0.8254598 797.2210
## 0.13684211 0.8526316 1639.397 0.8257320 797.0378
## 0.13684211 0.8631579 1639.916 0.8259849 797.1659
## 0.13684211 0.8736842 1640.733 0.8262189 797.5355
## 0.13684211 0.8842105 1641.848 0.8264344 798.1680
## 0.13684211 0.8947368 1643.258 0.8266319 799.0627
## 0.13684211 0.9052632 1644.962 0.8268117 800.2041
## 0.13684211 0.9157895 1646.959 0.8269744 801.5514
## 0.13684211 0.9263158 1649.245 0.8271202 803.3136
## 0.13684211 0.9368421 1651.819 0.8272496 805.3203
## 0.13684211 0.9473684 1654.678 0.8273631 807.5619
## 0.13684211 0.9578947 1657.819 0.8274608 810.1632
## 0.13684211 0.9684211 1661.242 0.8275410 812.9046
## 0.13684211 0.9789474 1665.006 0.8275801 815.7220
## 0.13684211 0.9894737 1669.059 0.8276019 818.8352
## 0.13684211 1.0000000 1672.876 0.8276026 821.9239
## 0.14736842 0.8000000 1642.335 0.8241695 802.3745
## 0.14736842 0.8105263 1641.615 0.8245162 801.2545
## 0.14736842 0.8210526 1641.206 0.8248422 800.4291
## 0.14736842 0.8315789 1641.107 0.8251479 799.8450
## 0.14736842 0.8421053 1641.318 0.8254338 799.6691
## 0.14736842 0.8526316 1641.840 0.8257005 799.8625
## 0.14736842 0.8631579 1642.670 0.8259482 800.2687
## 0.14736842 0.8736842 1643.809 0.8261776 800.9595
## 0.14736842 0.8842105 1645.254 0.8263890 801.9502
## 0.14736842 0.8947368 1647.004 0.8265829 803.1593
## 0.14736842 0.9052632 1649.057 0.8267596 804.6551
## 0.14736842 0.9157895 1651.410 0.8269197 806.4847
## 0.14736842 0.9263158 1654.062 0.8270634 808.5611
## 0.14736842 0.9368421 1657.009 0.8271912 810.8726
## 0.14736842 0.9473684 1660.248 0.8273035 813.4750
## 0.14736842 0.9578947 1663.776 0.8274006 816.3706
## 0.14736842 0.9684211 1667.592 0.8274809 819.4174
## 0.14736842 0.9789474 1671.752 0.8275208 822.6373
## 0.14736842 0.9894737 1676.206 0.8275442 826.0982
## 0.14736842 1.0000000 1680.474 0.8275487 829.6293
## 0.15789474 0.8000000 1643.538 0.8241678 803.8305
## 0.15789474 0.8105263 1643.090 0.8245071 802.9769
## 0.15789474 0.8210526 1642.963 0.8248262 802.4061
## 0.15789474 0.8315789 1643.159 0.8251254 802.2364
## 0.15789474 0.8421053 1643.675 0.8254054 802.4176
## 0.15789474 0.8526316 1644.512 0.8256665 802.9057
## 0.15789474 0.8631579 1645.668 0.8259092 803.6094
## 0.15789474 0.8736842 1647.142 0.8261339 804.6866
## 0.15789474 0.8842105 1648.933 0.8263412 805.9801
## 0.15789474 0.8947368 1651.037 0.8265314 807.5572
## 0.15789474 0.9052632 1653.452 0.8267049 809.5161
## 0.15789474 0.9157895 1656.177 0.8268622 811.6370
## 0.15789474 0.9263158 1659.207 0.8270036 814.0269
## 0.15789474 0.9368421 1662.541 0.8271296 816.7211
## 0.15789474 0.9473684 1666.173 0.8272405 819.6860
## 0.15789474 0.9578947 1670.102 0.8273368 822.8679
## 0.15789474 0.9684211 1674.324 0.8274167 826.2949
## 0.15789474 0.9789474 1678.894 0.8274570 829.9155
## 0.15789474 0.9894737 1683.760 0.8274816 833.8697
## 0.15789474 1.0000000 1688.449 0.8274919 837.7241
## 0.16842105 0.8000000 1644.930 0.8241627 805.5032
## 0.16842105 0.8105263 1644.763 0.8244949 804.9234
## 0.16842105 0.8210526 1644.930 0.8248073 804.7077
## 0.16842105 0.8315789 1645.430 0.8251003 804.8776
## 0.16842105 0.8421053 1646.262 0.8253745 805.3941
## 0.16842105 0.8526316 1647.425 0.8256302 806.1231
## 0.16842105 0.8631579 1648.917 0.8258680 807.2437
## 0.16842105 0.8736842 1650.736 0.8260883 808.6241
## 0.16842105 0.8842105 1652.881 0.8262916 810.2746
## 0.16842105 0.8947368 1655.348 0.8264782 812.3418
## 0.16842105 0.9052632 1658.136 0.8266486 814.5910
## 0.16842105 0.9157895 1661.240 0.8268032 817.0408
## 0.16842105 0.9263158 1664.658 0.8269424 819.8316
## 0.16842105 0.9368421 1668.386 0.8270666 822.8908
## 0.16842105 0.9473684 1672.420 0.8271762 826.1590
## 0.16842105 0.9578947 1676.757 0.8272715 829.6881
## 0.16842105 0.9684211 1681.393 0.8273508 833.6413
## 0.16842105 0.9789474 1686.380 0.8273913 837.7712
## 0.16842105 0.9894737 1691.666 0.8274169 841.9847
## 0.16842105 1.0000000 1696.785 0.8274326 846.1587
## 0.17894737 0.8000000 1646.523 0.8241551 807.3583
## 0.17894737 0.8105263 1646.650 0.8244804 807.0957
## 0.17894737 0.8210526 1647.122 0.8247863 807.2335
## 0.17894737 0.8315789 1647.937 0.8250733 807.7285
## 0.17894737 0.8421053 1649.095 0.8253418 808.5181
## 0.17894737 0.8526316 1650.594 0.8255924 809.6572
## 0.17894737 0.8631579 1652.432 0.8258254 811.0895
## 0.17894737 0.8736842 1654.607 0.8260414 812.8021
## 0.17894737 0.8842105 1657.116 0.8262407 814.9490
## 0.17894737 0.8947368 1659.957 0.8264238 817.3163
## 0.17894737 0.9052632 1663.126 0.8265912 819.8924
## 0.17894737 0.9157895 1666.619 0.8267431 822.7604
## 0.17894737 0.9263158 1670.434 0.8268801 825.9592
## 0.17894737 0.9368421 1674.565 0.8270025 829.3290
## 0.17894737 0.9473684 1679.009 0.8271107 832.9486
## 0.17894737 0.9578947 1683.761 0.8272051 837.0569
## 0.17894737 0.9684211 1688.819 0.8272834 841.3935
## 0.17894737 0.9789474 1694.231 0.8273239 845.7289
## 0.17894737 0.9894737 1699.944 0.8273504 850.2820
## 0.17894737 1.0000000 1705.467 0.8273709 854.8477
## 0.18947368 0.8000000 1648.325 0.8241448 809.3866
## 0.18947368 0.8105263 1648.756 0.8244635 809.5517
## 0.18947368 0.8210526 1649.543 0.8247632 809.9924
## 0.18947368 0.8315789 1650.684 0.8250443 810.7452
## 0.18947368 0.8421053 1652.178 0.8253074 811.8904
## 0.18947368 0.8526316 1654.023 0.8255529 813.4040
## 0.18947368 0.8631579 1656.217 0.8257813 815.1599
## 0.18947368 0.8736842 1658.757 0.8259931 817.3588
## 0.18947368 0.8842105 1661.640 0.8261886 819.8090
## 0.18947368 0.8947368 1664.863 0.8263683 822.5265
## 0.18947368 0.9052632 1668.422 0.8265326 825.5497
## 0.18947368 0.9157895 1672.312 0.8266819 828.8014
## 0.18947368 0.9263158 1676.531 0.8268167 832.2915
## 0.18947368 0.9368421 1681.073 0.8269373 836.0084
## 0.18947368 0.9473684 1685.934 0.8270441 840.2082
## 0.18947368 0.9578947 1691.109 0.8271374 844.6523
## 0.18947368 0.9684211 1696.595 0.8272146 849.1594
## 0.18947368 0.9789474 1702.438 0.8272550 853.8308
## 0.18947368 0.9894737 1708.584 0.8272822 858.7855
## 0.18947368 1.0000000 1714.480 0.8273070 863.7013
## 0.20000000 0.8000000 1650.340 0.8241321 811.7262
## 0.20000000 0.8105263 1651.085 0.8244444 812.1765
## 0.20000000 0.8210526 1652.197 0.8247380 812.9125
## 0.20000000 0.8315789 1653.675 0.8250135 813.9728
## 0.20000000 0.8421053 1655.515 0.8252713 815.5258
## 0.20000000 0.8526316 1657.716 0.8255119 817.3595
## 0.20000000 0.8631579 1660.275 0.8257358 819.6078
## 0.20000000 0.8736842 1663.189 0.8259434 822.1026
## 0.20000000 0.8842105 1666.455 0.8261352 824.9574
## 0.20000000 0.8947368 1670.068 0.8263116 828.0905
## 0.20000000 0.9052632 1674.024 0.8264729 831.4404
## 0.20000000 0.9157895 1678.320 0.8266197 835.0089
## 0.20000000 0.9263158 1682.950 0.8267523 838.8427
## 0.20000000 0.9368421 1687.910 0.8268711 843.0953
## 0.20000000 0.9473684 1693.195 0.8269765 847.6169
## 0.20000000 0.9578947 1698.799 0.8270688 852.2574
## 0.20000000 0.9684211 1704.719 0.8271446 857.0843
## 0.20000000 0.9789474 1710.999 0.8271847 862.1782
## 0.20000000 0.9894737 1717.582 0.8272125 867.6137
## 0.20000000 1.0000000 1723.810 0.8272409 872.8446
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were fraction = 0.9473684 and lambda
## = 0.01052632.
Adding penalties is a general technique that can be applied to many
methods other than linear regression. In this section, we will introduce
the penalized generalized linear model using glmnet()
.
glmnet
packageThe default family option in the function glmnet()
is
gaussian
. It is the linear regression we discussed so far
in this chapter. But the parameterization is a little different in the
generalized linear model framework (we have α and λ). Let’s start from our previous
example, using the same training data but glmnet()
to fit
model:
dat <- read.csv("http://bit.ly/2P5gTw4")
# data cleaning: delete wrong observations with expense < 0
dat <- subset(dat, store_exp > 0 & online_exp > 0)
# get predictors
trainx <- dat[, grep("Q", names(dat))]
# get response
trainy <- dat$store_exp + dat$online_exp
glmfit = glmnet(as.matrix(trainx), trainy)
We can extract useful information using plot()
and
print()
function from the fitted model.
plot(glmfit, label = T)
print(glmfit)
##
## Call: glmnet(x = as.matrix(trainx), y = trainy)
##
## Df %Dev Lambda
## 1 0 0.00 3042.00
## 2 2 10.38 2771.00
## 3 2 19.19 2525.00
## 4 2 26.50 2301.00
## 5 3 32.64 2096.00
## 6 3 38.94 1910.00
## 7 3 44.17 1741.00
## 8 3 48.52 1586.00
## 9 3 52.12 1445.00
## 10 3 55.12 1317.00
## 11 3 57.60 1200.00
## 12 3 59.67 1093.00
## 13 3 61.38 996.00
## 14 3 62.80 907.50
## 15 3 63.98 826.90
## 16 3 64.96 753.40
## 17 3 65.78 686.50
## 18 3 66.45 625.50
## 19 3 67.01 569.90
## 20 3 67.48 519.30
## 21 3 67.87 473.20
## 22 3 68.19 431.10
## 23 4 68.50 392.80
## 24 4 68.78 357.90
## 25 6 69.02 326.10
## 26 6 69.21 297.20
## 27 6 69.38 270.80
## 28 6 69.51 246.70
## 29 6 69.63 224.80
## 30 6 69.72 204.80
## 31 6 69.80 186.60
## 32 6 69.86 170.10
## 33 6 69.92 154.90
## 34 6 69.96 141.20
## 35 6 70.00 128.60
## 36 7 70.06 117.20
## 37 7 70.19 106.80
## 38 7 70.30 97.31
## 39 7 70.39 88.67
## 40 7 70.47 80.79
## 41 7 70.53 73.61
## 42 8 70.58 67.07
## 43 9 70.64 61.11
## 44 9 70.70 55.68
## 45 9 70.74 50.74
## 46 9 70.78 46.23
## 47 9 70.81 42.12
## 48 9 70.84 38.38
## 49 9 70.86 34.97
## 50 9 70.88 31.86
## 51 9 70.89 29.03
## 52 9 70.90 26.45
## 53 9 70.91 24.10
## 54 9 70.92 21.96
## 55 9 70.93 20.01
## 56 9 70.93 18.23
## 57 9 70.94 16.61
## 58 9 70.94 15.14
## 59 9 70.95 13.79
## 60 9 70.95 12.57
## 61 9 70.95 11.45
## 62 9 70.95 10.43
## 63 9 70.96 9.51
## 64 9 70.96 8.66
## 65 9 70.96 7.89
## 66 9 70.96 7.19
## 67 9 70.96 6.55
## 68 9 70.96 5.97
We can also set up the value of λ in the LASSO penalty by setting
s
in coef()
function. For example, the
following code set λ to be
1200. and there are three coefficients with non-zero
estimates(Q1
, Q2
and Q3
).
coef(glmfit, s = 1200)
## 11 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 2255.2221
## Q1 -390.9214
## Q2 653.6437
## Q3 624.4068
## Q4 .
## Q5 .
## Q6 .
## Q7 .
## Q8 .
## Q9 .
## Q10 .
You can apply models with different values of tuning parameter to new
data using predict()
:
newdat = matrix(sample(1:9, 30, replace = T), nrow = 3)
predict(glmfit, newdat, s = c(1741, 2000))
## s1 s2
## [1,] 4008.107 3795.615
## [2,] 3051.974 3084.707
## [3,] 7099.473 7025.456
The tuning parameter λ can be determined by the performance of the cross validation data such as the minimum of error or one standard error of the minimum.
cvfit = cv.glmnet(as.matrix(trainx), trainy)
cvfit = cv.glmnet(as.matrix(trainx), trainy)
plot(cvfit)
# lambda with minimum mean cross-validated error
cvfit$lambda.min
## [1] 7.893144
# lambda with one standard error of the minimum
cvfit$lambda.1se
## [1] 1199.688
# coefficient estimates for model with the error
# that is within one standard error of the minimum
coef(cvfit, s = "lambda.1se")
## 11 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 2255.3136
## Q1 -391.0562
## Q2 653.7079
## Q3 624.5119
## Q4 .
## Q5 .
## Q6 .
## Q7 .
## Q8 .
## Q9 .
## Q10 .
In this section, we will go over penalized logistic regression using
glmnet()
with “binomial
” for the
“family
” parameter. Similarly, for the default parameter,
it will be LASSO penalty and the tuning parameter can be determined by
the performance of the cross validation dataset: minimum of error or one
standard error of the minimum.
dat <- read.csv("http://bit.ly/2KXb1Qi")
trainx = dplyr::select(dat, -y)
trainy = dat$y
fit <- glmnet(as.matrix(trainx), trainy, family = "binomial")
levels(as.factor(trainy))
## [1] "0" "1"
newdat = as.matrix(trainx[1:3, ])
predict(fit, newdat, type = "link", s = c(2.833e-02, 3.110e-02))
## s1 s2
## 1 0.1943472 0.1442796
## 2 -0.9913159 -1.0076600
## 3 -0.5840566 -0.5496074
cvfit = cv.glmnet(as.matrix(trainx), trainy,
family = "binomial", type.measure = "class")
plot(cvfit)
cvfit$lambda.min
## [1] 0.002522398
cvfit$lambda.1se
## [1] 0.005309407
data("sim1_da1")
# the last column of sim1_da1 response variable y
# trainx is the explanatory variable matrix
trainx = dplyr::select(sim1_da1, -y)
# save response variable as as trainy
trainy = sim1_da1$y
# get the group indicator
index <- gsub("\\..*", "", names(trainx))
index[1:50]
## [1] "Q1" "Q1" "Q2" "Q2" "Q3" "Q3" "Q4" "Q4" "Q5" "Q5" "Q6" "Q6"
## [13] "Q7" "Q7" "Q8" "Q8" "Q9" "Q9" "Q10" "Q10" "Q11" "Q11" "Q12" "Q12"
## [25] "Q13" "Q13" "Q14" "Q14" "Q15" "Q15" "Q16" "Q16" "Q17" "Q17" "Q18" "Q18"
## [37] "Q19" "Q19" "Q20" "Q20" "Q21" "Q21" "Q22" "Q22" "Q23" "Q23" "Q24" "Q24"
## [49] "Q25" "Q25"
# Tune over 100 values
nlam <- 100
# set the type of prediction
# - `link`: return the predicted link function
# - `response`: return the predicted probability
# number of cross-validation folds
kfold <- 10
cv_fit <- cv_glasso(trainx, trainy,
nlam = nlam, kfold = kfold, type = "link")
str(cv_fit, width = 60, strict.width = "cut", list.len = 9)
## List of 11
## $ lambda : num [1:100] 52.5 50.4 48.4 46.4 44...
## $ pred :'data.frame': 800 obs. of 100 varia..
## ..$ X52.4935652246023 : num [1:800] -0.0556 -0.0556 -0.0..
## ..$ X50.3938226156183 : num [1:800] -0.0556 -0.0556 -0.0..
## ..$ X48.3780697109935 : num [1:800] -0.0559 -0.0559 -0.0..
## ..$ X46.4429469225538 : num [1:800] -0.0589 -0.0589 -0.0..
## ..$ X44.5852290456516 : num [1:800] -0.0617 -0.0617 -0.0..
## ..$ X42.8018198838256 : num [1:800] -0.06438 -0.06438 -0..
## ..$ X41.0897470884725 : num [1:800] -0.0729 -0.0417 -0.1..
## ..$ X39.4461572049336 : num [1:800] -0.08801 -0.00791 -0..
## ..$ X37.8683109167363 : num [1:800] -0.1092 0.0212 -0.08..
## .. [list output truncated]
## $ auc : num [1:100] 0.573 0.568 0.557 0.518..
## $ log_likelihood : num [1:100] -556 -556 -556 -555 -55..
## $ maxrho : num [1:100] -0.0492 -0.0306 0.0302 ..
## $ lambda.max.auc : Named num [1:2] 0.922 0.949
## ..- attr(*, "names")= chr [1:2] "lambda" "auc"
## $ lambda.1se.auc : Named num [1:2] 16.738 0.828
## ..- attr(*, "names")= chr [1:2] "" "se.auc"
## $ lambda.max.loglike: Named num [1:2] 1.57 -233.49
## ..- attr(*, "names")= chr [1:2] "lambda" "loglike"
## $ lambda.1se.loglike: Named num [1:2] 9.07 -349.13
## ..- attr(*, "names")= chr [1:2] "lambda" "se.loglike"
## [list output truncated]
## - attr(*, "class")= chr "cv_glasso"
plot(cv_fit)
fitgl <- fitglasso(trainx, trainy,
lambda = 0.922, na_action = na.pass)
## Lambda: 0.922 nr.var: 229
head(coef(fitgl))
## 0.922
## Intercept -53.1803922
## Q1.A 1.7566722
## Q1.B 1.7190500
## Q2.A 2.1699192
## Q2.B 0.6939251
## Q3.A 2.1020142
prey <- predict_glasso(fitgl, trainx)