library(glmnet)
library(MASS)

standardize <- function(x) {
  x <- (x - mean(x)) / sd(x)
}

set.seed(100100)

ns<-100                    # sample size
noiselev<-0.2              # noiselevel
p<-c(10,10,10,50)          # how many features in each of 4 groups - change as you will
ps<-sum(p)                 # total number of features
pr<-0.1                    # proportion of features in true model
sigmean<-6                 # signal strength - how large true coefficients are is controlled
sigs<-2                    # spread of values of true coefficients is controlled by this
#
# I will generate 3 groups of variables that are correlated
grcorr1<-1                 # smaller value here makes xs in each group more correlated
grcorr2<-1                 # because it's how much noise I add to copies of identical features in the groups
grcorr3<-1
#
Truth<-rep(0,ps)
Truth<-round(rbinom(n=ps,size=1,prob=pr))  # generates the true model, either at random or by choice
#
nt<-sum(Truth)
tr.coef<-rnorm(nt,mean=sigmean,sd=sigs)*sign(rbinom(n=nt,size=1,prob=.5)*2-1) # true coefficient values
all.coef<-rep(0,ps)
all.coef[Truth==1]<-tr.coef
# 
# 
# create the x variabes in 3 correlated groups
x1<-rnorm(ns)
x1<-matrix(rep(x1,p[1]),ns,p[1],byrow=F)+matrix(rnorm(ns*p[1],sd=grcorr1),ns,p[1])
x1<-apply(x1,2,standardize)
#
x2<-rnorm(ns)
x2<-matrix(rep(x2,p[2]),ns,p[2],byrow=F)+matrix(rnorm(ns*p[2],sd=grcorr2),ns,p[2])
x2<-apply(x2,2,standardize)
#
x3<-rnorm(ns)
x3<-matrix(rep(x3,p[3]),ns,p[3],byrow=F)+matrix(rnorm(ns*p[3],sd=grcorr3),ns,p[3])
x3<-apply(x3,2,standardize)
#  a fourth group with no correlation
x4<-matrix(rnorm(ns*p[4]),ns,p[4])
x4<-apply(x4,2,standardize)
# creating the response...
y<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
df<-as.data.frame(cbind(x1,x2,x3,x4,y))
names(df)<-c(as.character(seq(1,ps)),"y")
# ...and a test data set
y.new<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
df.new<-as.data.frame(cbind(x1,x2,x3,x4,y.new))
names(df.new)<-c(as.character(seq(1,ps)),"y")
#### Now data set is created
# Run OLS if possible
if (ns>ps) {
    mm<-lm(y~., data=df)
    Coeflm<-mm$coef[-1]
    pm<-predict(mm,newdata=df.new)
    MSElm<-sum((y.new-pm)^2)/length(pm) }
#
# Run lasso
cv.gg<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y)
CoefLasso<-coef(cv.gg,s="lambda.1se")[-1]
pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg,s="lambda.1se")
MSELasso<-sum((y.new-pm)^2)/length(pm)
# Run ridge  - not you can try different lambdas here of course
ma<-lm.ridge(y~., data=df,lambda=1)
CoefRidge<-coef(ma)[-1]
pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(ma)
MSERidge<-sum((y.new-pm)^2)/length(pm)
# run adaptive lasso with ridge initialization - small lambda (try what happens with bigger lambda here)
gamma<-1
ma<-lm.ridge(y~., data=df,lambda=.01)
penf<-1/(abs(coef(ma)[-1])^gamma)
cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf)
cc<-coef(cv.gg2,s="lambda.1se")[-1]
CoefAlasso<-cc
pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg2,s="lambda.1se")
MSEalasso<-sum((y.new-pm)^2)/length(pm)
# more aggressive adaption with higher gamma
gamma<-2
penf<-1/(abs(coef(ma)[-1])^gamma)
cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf)
cc<-coef(cv.gg2,s="lambda.1se")[-1]
CoefAlasso3<-cc
pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg2,s="lambda.1se")
MSEalasso3<-sum((y.new-pm)^2)/length(pm)
# or less aggressive
gamma<-.5
penf<-1/(abs(coef(ma)[-1])^gamma)
cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf)
cc<-coef(cv.gg2,s="lambda.1se")[-1]
CoefAlasso4<-cc
pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg2,s="lambda.1se")
MSEalasso4<-sum((y.new-pm)^2)/length(pm)


plot(all.coef, CoefRidge-all.coef,xlab="true coefficient value",ylab="Bias")
abline(h=0,lty=2)
points(all.coef,CoefLasso-all.coef,col=2,pch=2)
points(all.coef,CoefAlasso-all.coef,col=3,pch=3)
points(all.coef,CoefAlasso3-all.coef,col=4,pch=4)
points(all.coef,CoefAlasso4-all.coef,col=5,pch=5)
legend(x = "topright",         
       legend = c("Ridge","Lasso","Alasso-1","Alasso-2","Alasso-0.5"), 
       col = c(1,2,3,4,5),
       lwd = 1)


gg<-glmnet(x=cbind(x1,x2,x3,x4),y=y)
gamma<-1
ma<-lm.ridge(y~., data=df,lambda=.01)
penf<-1/(abs(coef(ma)[-1])^gamma)
gg2<-glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf)
plot(apply(abs(gg$beta),2,sum),gg$beta[1,],type="l",ylim=c(min(gg$beta),max(gg$beta)),xlab="L1 norm",ylab="coefficients")
lines(apply(abs(gg2$beta),2,sum),gg2$beta[1,],lty=3)
for (bb in (2:dim(gg$beta)[1])) {
    lines(apply(abs(gg$beta),2,sum),gg$beta[bb,],col=bb)
    lines(apply(abs(gg2$beta),2,sum),gg2$beta[bb,],lty=3,col=bb)
}
for (kk in (1:length(tr.coef))) {
    abline(h=tr.coef[kk],lty=2)
}
# plotting solutions paths for lasso (solid) and adaptive lasso (dashed). Horizontal lines are the true coefficient values.


#### Model selection simulation ####

## Setup
library(glmnet)
library(MASS)

standardize <- function(x) {
  x <- (x - mean(x)) / sd(x)
}


set.seed(100100)
# Try different sample size (ns), noiselevel, correlation within groups of variables, size model
B<-50 # Number of times you run the loop
ns<-50                     # sample size
noiselev<-0.25             # noise level
p<-c(20,50,20,50)          # how many features in each of 4 groups - change as you will
ps<-sum(p)                 # total number of features
pr<-0.1                    # proportion of features in true model
sigmean<-5                 # signal strength - how large true coefficients are is controlled
sigs<-0.5                    # spread of values of true coefficients is controlled by this
# 3 groups of variables that are correlated
grcorr1<-1 # smaller value here makes xs in each group more correlated
grcorr2<-1
grcorr3<-1
#
Truth<-rep(0,ps)
Truth<-round(rbinom(n=ps,size=1,prob=pr))
# generates the true model, either at random or by choice
#
# create matrices to store the results across the B runs
Coeflm<-matrix(0,ps,B)
CoefR<-matrix(0,ps,B)
Coefla<-matrix(0,ps,B)
Coefala<-matrix(0,ps,B)
Coefala2<-matrix(0,ps,B)
MSElm<-matrix(0,B,1)
MSER<-matrix(0,B,1)
MSEla<-matrix(0,B,1)
MSEala<-matrix(0,B,1)
#
nt<-sum(Truth)
tr.coef<-rnorm(nt,mean=sigmean,sd=sigs)*sign(rbinom(n=nt,size=1,prob=.5)*2-1)
all.coef<-rep(0,ps)
all.coef[Truth==1]<-tr.coef
# coefficients in true model
for (b in (1:B)) {
  # create the x variabes in 3 correlated groups
  x1<-rnorm(ns)
  x1<-matrix(rep(x1,p[1]),ns,p[1],byrow=F)+matrix(rnorm(ns*p[1],sd=grcorr1),ns,p[1])
  x1<-apply(x1,2,standardize)
  #
  x2<-rnorm(ns)
  x2<-matrix(rep(x2,p[2]),ns,p[2],byrow=F)+matrix(rnorm(ns*p[2],sd=grcorr2),ns,p[2])
  x2<-apply(x2,2,standardize)
  #
  x3<-rnorm(ns)
  x3<-matrix(rep(x3,p[3]),ns,p[3],byrow=F)+matrix(rnorm(ns*p[3],sd=grcorr3),ns,p[3])
  x3<-apply(x3,2,standardize)
  #
  x4<-matrix(rnorm(ns*p[4]),ns,p[4])
  x4<-apply(x4,2,standardize)
  #
  y<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df<-as.data.frame(cbind(x1,x2,x3,x4,y))
  names(df)<-c(as.character(seq(1,ps)),"y")
  ####
  y.new<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df.new<-as.data.frame(cbind(x1,x2,x3,x4,y.new))
  names(df.new)<-c(as.character(seq(1,ps)),"y")
  #### Now data set is created
  # Run OLS if possible
  if (ns>ps) {
    mm<-lm(y~., data=df)
    Coeflm[,b]<-mm$coef[-1]
    pm<-predict(mm,newdata=df.new)
    MSElm[b]<-sum((y.new-pm)^2)/length(pm) }
  #
  # lasso
  cv.gg<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y)
  Coefla[,b]<-coef(cv.gg,s="lambda.1se")[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg,s="lambda.1se")
  MSEla[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # adaptive lasso with ridge initialization
  gamma<-1
  ma<-lm.ridge(y~., data=df,lambda=1)
  CoefR[,b]<-coef(ma)[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(ma)
  MSER[b]<-sum((y.new-pm)^2)/length(pm)
  #
  ma<-lm.ridge(y~., data=df,lambda=.01)
  penf<-1/(abs(coef(ma)[-1])^gamma)
  cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf)
  cc<-coef(cv.gg2,s="lambda.1se")[-1]
  Coefala[,b]<-cc
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg2,s="lambda.1se")
  MSEala[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # gamma=2
  gamma <- 2
  xx3<-cbind(x1,x2,x3,x4)%*%diag(abs(coef(ma)[-1])^gamma)
  gg3<-cv.glmnet(x=xx3,y=y,standardize=F)
  cc<-(coef(gg3,s="lambda.1se")[-1])*(abs(coef(ma)[-1])^gamma)
  Coefala2[,b]<-cc
  #####
}


# Result summaries
# Looking at all the coefficients
par(mfrow=c(2,2))
if (ns>ps) {
  boxplot(t(Coeflm),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="OLS") }
if (ns<=ps) {
  boxplot(t(CoefR),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="Ridge") }
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefla),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="Lasso")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="ALasso")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala2),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="ALasso-2")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
####

if (ns>ps) {
Bias<-rbind(apply(Coeflm,1,mean)[Truth==1]-tr.coef,
            apply(CoefR,1,mean)[Truth==1]-tr.coef,
            apply(Coefla,1,mean)[Truth==1]-tr.coef,
            apply(Coefala,1,mean)[Truth==1]-tr.coef,
            apply(Coefala2,1,mean)[Truth==1]-tr.coef)

Var<-rbind(apply(Coeflm,1,var)[Truth==1],
           apply(CoefR,1,var)[Truth==1],
           apply(Coefla,1,var)[Truth==1],
           apply(Coefala,1,var)[Truth==1],
           apply(Coefala2,1,var)[Truth==1])
# bias and variance of each of the methods
row.names(Bias)<-c("LM","R","Lasso","aLasso","aLasso2")
row.names(Var)<-c("LM","R","Lasso","aLasso","aLasso2")
print("BIAS")
print(Bias)
print("VARIANCE")
print(Var)
}

if (ns<=ps) {
  Bias<-abs(rbind(apply(CoefR,1,mean)[Truth==1]-tr.coef,
              apply(Coefla,1,mean)[Truth==1]-tr.coef,
              apply(Coefala,1,mean)[Truth==1]-tr.coef,
              apply(Coefala2,1,mean)[Truth==1]-tr.coef))

  Var<-rbind(apply(CoefR,1,var)[Truth==1],
             apply(Coefla,1,var)[Truth==1],
             apply(Coefala,1,var)[Truth==1],
             apply(Coefala2,1,var)[Truth==1])
  # bias and variance of each of the methods
  row.names(Bias)<-c("R","Lasso","aLasso","aLasso2")
  row.names(Var)<-c("R","Lasso","aLasso","aLasso2")
  print("BIAS")
  print(Bias)
  print("VARIANCE")
  print(Var)
}
# Collecting selection results
TPFP<-function(Coefs,truth) {
  tr<-truth
  cp<-Coefs
  TP<-length(cp[cp!=0 & tr==1])/length(tr[tr==1])
  FP<-length(cp[cp!=0 & tr==0])/length(tr[tr==0])
  FDR<-length(cp[cp!=0 & tr==0])/length(cp[cp!=0])
  return(list(FP=FP,FDR=FDR,TP=TP))
}
# Plotting false and true positives as well as false detection rate for lasso and adaptive lasso
par(mfrow=c(2,2))
LassoSel<-apply(Coefla,2,TPFP,truth=Truth)
ll<-unlist(LassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="LASSO")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))
ALassoSel<-apply(Coefala,2,TPFP,truth=Truth)
ll<-unlist(ALassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="ALASSO")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))

# solution paths
plot(cv.gg)
plot(cv.gg2)
# mean bias across coefficients for the different methods
plot(Bias[1,],type="l",ylim=c(min(Bias),max(Bias)),main="Bias",xlab="coefficients")
abline(h=0)
lines(Bias[2,],col=2)
lines(Bias[3,],col=3)
lines(Bias[4,],col=4)
if (ns > ps)   {
    lines(Bias[5,],col=5)
    legend("topleft",legend=c("Lm","Ridge","lasso","alasso","alasso2"),pch=1,col=c("black","red","green","blue","cyan"))
}
if (ns <= ps) {
    legend("topleft",legend=c("Ridge","lasso","alasso","alasso2"),pch=1,col=c("black","red","green","blue"))
}

#pMSE performance for the different methods
if (ns > ps) {
   boxplot(cbind(MSElm,MSER,MSEla,MSEala),names=c("LM","R","Lasso","ALasso"),main="pMSE") }
if (ns <= ps) {
   boxplot(cbind(MSER,MSEla,MSEala),names=c("R","Lasso","ALasso"),main="pMSE") }

[1] "BIAS"
            [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
R       2.992268 4.139875 3.667571 3.853016 3.276135 3.319800 3.034510 3.438462
Lasso   2.664470 3.565051 3.380221 3.399649 3.317777 4.049142 3.539672 4.091079
aLasso  2.002150 2.253183 2.764768 2.660315 2.487104 3.584635 2.748118 3.330700
aLasso2 2.139970 2.280017 2.929660 2.704977 2.558652 3.776982 2.967143 3.505331
            [,9]    [,10]    [,11]    [,12]    [,13]    [,14]    [,15]    [,16]
R       3.376729 3.767126 2.927236 2.326812 2.317412 2.957165 2.922036 2.838067
Lasso   3.076171 4.408100 3.590300 2.439111 2.758784 2.710659 2.741145 2.843550
aLasso  2.400872 3.615416 2.801537 1.522594 1.835698 1.667067 1.717539 1.991917
aLasso2 2.599598 3.911956 3.014452 1.770450 2.012640 1.744308 1.902128 2.158181
           [,17]    [,18]
R       2.728900 2.431554
Lasso   2.583429 2.494227
aLasso  1.556868 1.602851
aLasso2 1.634207 1.741878
[1] "VARIANCE"
             [,1]     [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
R       0.6594229 0.518657 0.8568302 0.6217469 0.7233292 0.6872276 0.4360901
Lasso   3.1902168 4.034574 4.3030130 4.0899836 3.9953006 0.4463653 1.7943692
aLasso  3.1505595 4.498399 4.8065350 4.9826153 4.5297993 1.5653642 2.3439780
aLasso2 4.2775789 5.636743 6.4714705 6.6958261 6.1170375 1.3228034 2.8719667
             [,8]      [,9]     [,10]     [,11]    [,12]     [,13]    [,14]
R       0.8452421 0.7178209 0.7059723 0.6028597 0.693734 0.5890533 0.661491
Lasso   1.9750890 3.4814970 1.5004362 1.2061879 3.171213 2.7676567 2.630338
aLasso  3.4952142 3.6365982 2.7218490 2.7817993 2.904630 3.2875656 2.191335
aLasso2 3.2438688 4.5459846 2.8604955 3.0842843 3.840428 3.9898918 2.305789
            [,15]     [,16]     [,17]     [,18]
R       0.6827418 0.7659433 0.7173337 0.8201459
Lasso   3.6187790 3.1505927 3.9154211 4.1417743
aLasso  3.0213837 2.5366166 3.4990990 3.1414376
aLasso2 3.8460824 3.0819211 4.0537348 4.1798120


#### Model selection simulation ####

## Setup
library(glmnet)
library(MASS)

standardize <- function(x) {
  x <- (x - mean(x)) / sd(x)
}


set.seed(100100)
# Try different sample size (ns), noiselevel, correlation within groups of variables, size model
B<-50
ns<-50
noiselev<-0.25
p<-c(20,50,20,50) # how many features in each of 4 groups - change as you will
ps<-sum(p) # total number of features
pr<-0.1 # proportion of features in true model
sigs<-0.5 # signal strength - how large true coefficients are is controlled
           # by this
# 3 groups of variables that are correlated
grcorr1<-.3 # smaller value here makes xs in each group more correlated
grcorr2<-1
grcorr3<-.3
#
Truth<-rep(0,ps)
Truth<-round(rbinom(n=ps,size=1,prob=pr))
# generates the true model, either at random or by choice
#
# create matrices to store the results
Coeflm<-matrix(0,ps,B)
CoefR<-matrix(0,ps,B)
Coefla<-matrix(0,ps,B)
Coefala<-matrix(0,ps,B)
Coefala2<-matrix(0,ps,B)
MSElm<-matrix(0,B,1)
MSER<-matrix(0,B,1)
MSEla<-matrix(0,B,1)
MSEala<-matrix(0,B,1)
#
nt<-sum(Truth)
tr.coef<-rnorm(nt,mean=5,sd=sigs)*sign(rbinom(n=nt,size=1,prob=.5)*2-1)
all.coef<-rep(0,ps)
all.coef[Truth==1]<-tr.coef
# coefficients in true model
for (b in (1:B)) {
  # create the x variabes in 3 correlated groups
  x1<-rnorm(ns)
  x1<-matrix(rep(x1,p[1]),ns,p[1],byrow=F)+matrix(rnorm(ns*p[1],sd=grcorr1),ns,p[1])
  x1<-apply(x1,2,standardize)
  #
  x2<-rnorm(ns)
  x2<-matrix(rep(x2,p[2]),ns,p[2],byrow=F)+matrix(rnorm(ns*p[2],sd=grcorr2),ns,p[2])
  x2<-apply(x2,2,standardize)
  #
  x3<-rnorm(ns)
  x3<-matrix(rep(x3,p[3]),ns,p[3],byrow=F)+matrix(rnorm(ns*p[3],sd=grcorr3),ns,p[3])
  x3<-apply(x3,2,standardize)
  #
  x4<-matrix(rnorm(ns*p[4]),ns,p[4])
  x4<-apply(x4,2,standardize)
  #
#for (b in (1:B)) {
  y<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df<-as.data.frame(cbind(x1,x2,x3,x4,y))
  names(df)<-c(as.character(seq(1,ps)),"y")
  ####
  y.new<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df.new<-as.data.frame(cbind(x1,x2,x3,x4,y.new))
  names(df.new)<-c(as.character(seq(1,ps)),"y")
  #### Now data set is created
  # Run OLS if possible
  if (ns>ps) {
    mm<-lm(y~., data=df)
    Coeflm[,b]<-mm$coef[-1]
    pm<-predict(mm,newdata=df.new)
    MSElm[b]<-sum((y.new-pm)^2)/length(pm) }
  #
  # lasso
  cv.gg<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y)
  Coefla[,b]<-coef(cv.gg,s="lambda.1se")[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg,s="lambda.1se")
  MSEla[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # adaptive lasso with ridge initialization
  gamma<-1
  ma<-lm.ridge(y~., data=df,lambda=1)
  CoefR[,b]<-coef(ma)[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(ma)
  MSER[b]<-sum((y.new-pm)^2)/length(pm)
  #
  ma<-lm.ridge(y~., data=df,lambda=.01)
  penf<-1/(abs(coef(ma)[-1])^gamma)
  cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf)
  cc<-coef(cv.gg2,s="lambda.1se")[-1]
  Coefala[,b]<-cc
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg2,s="lambda.1se")
  MSEala[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # gamma=2
  gamma <- 2
  xx3<-cbind(x1,x2,x3,x4)%*%diag(abs(coef(ma)[-1])^gamma)
  gg3<-cv.glmnet(x=xx3,y=y,standardize=F)
  cc<-(coef(gg3,s="lambda.1se")[-1])*(abs(coef(ma)[-1])^gamma)
  Coefala2[,b]<-cc
  #####
}


# Result summaries
# Looking at all the coefficients
par(mfrow=c(2,2))
if (ns>ps) {
  boxplot(t(Coeflm),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="OLS") }
if (ns<=ps) {
  boxplot(t(CoefR),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="Ridge") }
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefla),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="Lasso")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="ALasso")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala2),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="ALasso-2")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
####

if (ns>ps) {
Bias<-rbind(apply(Coeflm,1,mean)[Truth==1]-tr.coef,
            apply(CoefR,1,mean)[Truth==1]-tr.coef,
            apply(Coefla,1,mean)[Truth==1]-tr.coef,
            apply(Coefala,1,mean)[Truth==1]-tr.coef,
            apply(Coefala2,1,mean)[Truth==1]-tr.coef)

Var<-rbind(apply(Coeflm,1,var)[Truth==1],
           apply(CoefR,1,var)[Truth==1],
           apply(Coefla,1,var)[Truth==1],
           apply(Coefala,1,var)[Truth==1],
           apply(Coefala2,1,var)[Truth==1])
# bias and variance of each of the methods
row.names(Bias)<-c("LM","R","Lasso","aLasso","aLasso2")
row.names(Var)<-c("LM","R","Lasso","aLasso","aLasso2")
print(Bias)
print(Var)
}

if (ns<=ps) {
  Bias<-abs(rbind(apply(CoefR,1,mean)[Truth==1]-tr.coef,
              apply(Coefla,1,mean)[Truth==1]-tr.coef,
              apply(Coefala,1,mean)[Truth==1]-tr.coef,
              apply(Coefala2,1,mean)[Truth==1]-tr.coef))

  Var<-rbind(apply(CoefR,1,var)[Truth==1],
             apply(Coefla,1,var)[Truth==1],
             apply(Coefala,1,var)[Truth==1],
             apply(Coefala2,1,var)[Truth==1])
  # bias and variance of each of the methods
  row.names(Bias)<-c("R","Lasso","aLasso","aLasso2")
  row.names(Var)<-c("R","Lasso","aLasso","aLasso2")
  print(Bias)
  print(Var)
}

TPFP<-function(Coefs,truth) {
  tr<-truth
  cp<-Coefs
  TP<-length(cp[cp!=0 & tr==1])/length(tr[tr==1])
  FP<-length(cp[cp!=0 & tr==0])/length(tr[tr==0])
  FDR<-length(cp[cp!=0 & tr==0])/length(cp[cp!=0])
  return(list(FP=FP,FDR=FDR,TP=TP))
}

locator()
par(mfrow=c(2,2))
LassoSel<-apply(Coefla,2,TPFP,truth=Truth)
ll<-unlist(LassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="LASSO")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))
ALassoSel<-apply(Coefala,2,TPFP,truth=Truth)
ll<-unlist(ALassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="ALASSO")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))

locator()
plot(cv.gg)
plot(cv.gg2)
plot(Bias[1,],type="l",ylim=c(min(Bias),max(Bias)),main="Bias",xlab="coefficients")
abline(h=0)
lines(Bias[2,],col=2)
lines(Bias[3,],col=3)
lines(Bias[4,],col=4)
if (ns > ps)   {
    lines(Bias[5,],col=5)
    legend("topleft",legend=c("Lm","Ridge","lasso","alasso","alasso2"),pch=1,col=c("black","red","green","blue","cyan"))
}
if (ns <= ps) {
    legend("topleft",legend=c("Ridge","lasso","alasso","alasso2"),pch=1,col=c("black","red","green","blue"))
}

locator()

if (ns > ps) {
   boxplot(cbind(MSElm,MSER,MSEla,MSEala),names=c("LM","R","Lasso","ALasso"),main="pMSE") }
if (ns <= ps) {
   boxplot(cbind(MSER,MSEla,MSEala),names=c("R","Lasso","ALasso"),main="pMSE") }

            [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
R       3.897784 5.149277 3.304016 3.535305 2.981375 3.006127 2.706846 3.036030
Lasso   3.033786 4.154670 2.558025 2.717177 2.333483 3.440439 2.634268 3.366372
aLasso  2.979858 3.572062 2.189936 1.990551 1.767869 2.914369 2.039663 2.783535
aLasso2 2.949620 4.239624 2.378666 2.249812 2.050684 3.337076 2.391084 3.200849
            [,9]    [,10]    [,11]    [,12]    [,13]    [,14]    [,15]    [,16]
R       4.402908 4.857871 3.695528 1.984406 1.970823 2.520077 2.448503 2.441941
Lasso   4.017694 5.101378 3.870798 1.623626 1.873097 1.752947 1.863167 1.801198
aLasso  4.073272 5.099793 3.777557 1.204429 1.302535 1.241291 1.335068 1.410644
aLasso2 4.483129 5.099793 4.081966 1.464996 1.491906 1.251053 1.456746 1.523203
           [,17]    [,18]
R       2.342688 2.081927
Lasso   1.584995 1.688841
aLasso  1.033988 1.151779
aLasso2 1.089787 1.319722
              [,1]      [,2]     [,3]      [,4]      [,5]      [,6]      [,7]
R        0.2654105 0.2269242 1.037960 0.6701389 0.7878236 0.7333742 0.5413031
Lasso    5.3846029 6.5552918 3.304726 3.8694943 3.1864549 1.5146237 1.7884153
aLasso   7.4631396 8.1179806 4.493294 3.8753855 3.8343798 2.3009758 2.4176252
aLasso2 11.1917830 9.8308702 6.054814 5.6247131 5.3079425 1.9569845 2.6306697
             [,8]      [,9]        [,10]     [,11]     [,12]     [,13]
R       0.9527643 0.2544381 0.2398061471 0.1896730 0.6842919 0.6299485
Lasso   3.0034239 2.4025585 0.0001256531 0.8761561 2.3036623 2.3405942
aLasso  3.5307616 2.2030511 0.0000000000 1.4932402 1.8458822 2.6548447
aLasso2 3.8409643 1.5002829 0.0000000000 0.7128379 2.7386802 3.9547803
            [,14]    [,15]    [,16]     [,17]     [,18]
R       0.6494082 0.805248 0.875650 0.7848968 0.8570789
Lasso   1.9819030 2.393406 2.125965 2.6972456 2.5189989
aLasso  1.4477314 1.742709 1.843048 2.0952460 2.3143522
aLasso2 1.9290512 2.424622 2.558422 2.9423652 3.0184288


#### Model selection simulation ####

## Setup
library(glmnet)
library(MASS)

standardize <- function(x) {
  x <- (x - mean(x)) / sd(x)
}


set.seed(100100)
# Try different sample size (ns), noiselevel, correlation within groups of variables, size model
B<-25
ns<-200                     # larger sample size
noiselev<-0.1               # noise level
p<-c(20,50,20,50)           # how many features in each of 4 groups - change as you will
ps<-sum(p)                  # total number of features
pr<-0.1                     # proportion of features in true model
sigmean<-5                  # signal strength - how large true coefficients are is controlled
sigs<-0.5 
# 3 groups of variables that are correlated
grcorr1<-.3 # smaller value here makes xs in each group more correlated
grcorr2<-1
grcorr3<-.3
#
Truth<-rep(0,ps)
Truth<-round(rbinom(n=ps,size=1,prob=pr))
# generates the true model, either at random or by choice
#
# create matrices to store the results
Coeflm<-matrix(0,ps,B)
CoefR<-matrix(0,ps,B)
Coefla<-matrix(0,ps,B)
Coefala<-matrix(0,ps,B)
Coefala2<-matrix(0,ps,B)
MSElm<-matrix(0,B,1)
MSER<-matrix(0,B,1)
MSEla<-matrix(0,B,1)
MSEala<-matrix(0,B,1)
#
nt<-sum(Truth)
tr.coef<-rnorm(nt,mean=sigmean,sd=sigs)*sign(rbinom(n=nt,size=1,prob=.5)*2-1)
all.coef<-rep(0,ps)
all.coef[Truth==1]<-tr.coef
# coefficients in true model
for (b in (1:B)) {
  # create the x variabes in 3 correlated groups
  x1<-rnorm(ns)
  x1<-matrix(rep(x1,p[1]),ns,p[1],byrow=F)+matrix(rnorm(ns*p[1],sd=grcorr1),ns,p[1])
  x1<-apply(x1,2,standardize)
  #
  x2<-rnorm(ns)
  x2<-matrix(rep(x2,p[2]),ns,p[2],byrow=F)+matrix(rnorm(ns*p[2],sd=grcorr2),ns,p[2])
  x2<-apply(x2,2,standardize)
  #
  x3<-rnorm(ns)
  x3<-matrix(rep(x3,p[3]),ns,p[3],byrow=F)+matrix(rnorm(ns*p[3],sd=grcorr3),ns,p[3])
  x3<-apply(x3,2,standardize)
  #
  x4<-matrix(rnorm(ns*p[4]),ns,p[4])
  x4<-apply(x4,2,standardize)
  #
  y<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df<-as.data.frame(cbind(x1,x2,x3,x4,y))
  names(df)<-c(as.character(seq(1,ps)),"y")
  ####
  y.new<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df.new<-as.data.frame(cbind(x1,x2,x3,x4,y.new))
  names(df.new)<-c(as.character(seq(1,ps)),"y")
  #### Now data set is created
  # Run OLS if possible
  if (ns>ps) {
    mm<-lm(y~., data=df)
    Coeflm[,b]<-mm$coef[-1]
    pm<-predict(mm,newdata=df.new)
    MSElm[b]<-sum((y.new-pm)^2)/length(pm) }
  #
  # lasso
  cv.gg<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y)
  Coefla[,b]<-coef(cv.gg,s="lambda.1se")[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg,s="lambda.1se")
  MSEla[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # adaptive lasso with ridge initialization
  gamma<-1
  ma<-lm.ridge(y~., data=df,lambda=1)
  CoefR[,b]<-coef(ma)[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(ma)
  MSER[b]<-sum((y.new-pm)^2)/length(pm)
  #
  ma<-lm.ridge(y~., data=df,lambda=.01)
  penf<-1/(abs(coef(ma)[-1])^gamma)
  cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,penalty.factor=penf)
  cc<-coef(cv.gg2,s="lambda.1se")[-1]
  Coefala[,b]<-cc
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg2,s="lambda.1se")
  MSEala[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # gamma=2
  gamma <- 2
  xx3<-cbind(x1,x2,x3,x4)%*%diag(abs(coef(ma)[-1])^gamma)
  gg3<-cv.glmnet(x=xx3,y=y,standardize=F)
  cc<-(coef(gg3,s="lambda.1se")[-1])*(abs(coef(ma)[-1])^gamma)
  Coefala2[,b]<-cc
  #####
}


# Result summaries
# Looking at all the coefficients
par(mfrow=c(2,2))
if (ns>ps) {
  boxplot(t(Coeflm),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="OLS") }
if (ns<=ps) {
  boxplot(t(CoefR),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="Ridge") }
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefla),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="Lasso")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="ALasso")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala2),ylim=c(min(tr.coef)*1.1,max(tr.coef)*1.1),main="ALasso-2")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
####

if (ns>ps) {
Bias<-rbind(apply(Coeflm,1,mean)[Truth==1]-tr.coef,
            apply(CoefR,1,mean)[Truth==1]-tr.coef,
            apply(Coefla,1,mean)[Truth==1]-tr.coef,
            apply(Coefala,1,mean)[Truth==1]-tr.coef,
            apply(Coefala2,1,mean)[Truth==1]-tr.coef)

Var<-rbind(apply(Coeflm,1,var)[Truth==1],
           apply(CoefR,1,var)[Truth==1],
           apply(Coefla,1,var)[Truth==1],
           apply(Coefala,1,var)[Truth==1],
           apply(Coefala2,1,var)[Truth==1])
# bias and variance of each of the methods
row.names(Bias)<-c("LM","R","Lasso","aLasso","aLasso2")
row.names(Var)<-c("LM","R","Lasso","aLasso","aLasso2")
print("BIAS")
print(Bias)
print("VARIANCE")
print(Var)
}

if (ns<=ps) {
  Bias<-abs(rbind(apply(CoefR,1,mean)[Truth==1]-tr.coef,
              apply(Coefla,1,mean)[Truth==1]-tr.coef,
              apply(Coefala,1,mean)[Truth==1]-tr.coef,
              apply(Coefala2,1,mean)[Truth==1]-tr.coef))

  Var<-rbind(apply(CoefR,1,var)[Truth==1],
             apply(Coefla,1,var)[Truth==1],
             apply(Coefala,1,var)[Truth==1],
             apply(Coefala2,1,var)[Truth==1])
  # bias and variance of each of the methods
  row.names(Bias)<-c("R","Lasso","aLasso","aLasso2")
  row.names(Var)<-c("R","Lasso","aLasso","aLasso2")
  print("BIAS")
  print(Bias)
  print("VARIANCE")
  print(Var)
}
# store selection results
TPFP<-function(Coefs,truth) {
  tr<-truth
  cp<-Coefs
  TP<-length(cp[cp!=0 & tr==1])/length(tr[tr==1])
  FP<-length(cp[cp!=0 & tr==0])/length(tr[tr==0])
  FDR<-length(cp[cp!=0 & tr==0])/length(cp[cp!=0])
  return(list(FP=FP,FDR=FDR,TP=TP))
}

#
par(mfrow=c(2,2))
LassoSel<-apply(Coefla,2,TPFP,truth=Truth)
ll<-unlist(LassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="LASSO")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))
ALassoSel<-apply(Coefala,2,TPFP,truth=Truth)
ll<-unlist(ALassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="ALASSO")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))

#solution paths
plot(cv.gg)
plot(cv.gg2)
#mean bias across runs
plot(Bias[1,],type="l",ylim=c(min(Bias),max(Bias)),main="Bias",xlab="coefficients")
abline(h=0)
lines(Bias[2,],col=2)
lines(Bias[3,],col=3)
lines(Bias[4,],col=4)
if (ns > ps)   {
    lines(Bias[5,],col=5)
    legend("topleft",legend=c("Lm","Ridge","lasso","alasso","alasso2"),pch=1,col=c("black","red","green","blue","cyan"))
}
if (ns <= ps) {
    legend("topleft",legend=c("Ridge","lasso","alasso","alasso2"),pch=1,col=c("black","red","green","blue"))
}

#predictive performance
if (ns > ps) {
   boxplot(cbind(MSElm,MSER,MSEla,MSEala),names=c("LM","R","Lasso","ALasso"),main="pMSE") }
if (ns <= ps) {
   boxplot(cbind(MSER,MSEla,MSEala),names=c("R","Lasso","ALasso"),main="pMSE") }

[1] "BIAS"
               [,1]         [,2]         [,3]        [,4]         [,5]
LM      -0.02127976 -0.002007826 0.0000489111 0.002463317 -0.004466826
R        0.68814013  0.890513179 0.1656747030 0.157196417  0.136071593
Lasso    0.21819860  0.171199380 0.1459399907 0.152752819  0.131248652
aLasso   0.18457653 -0.090542475 0.1290926522 0.126042602  0.122537394
aLasso2  0.26963692 -0.184275106 0.1222526951 0.110609068  0.123989448
                 [,6]         [,7]          [,8]         [,9]       [,10]
LM       0.0005232078 -0.002430799 -7.237869e-05 -0.004372747 -0.01089654
R       -0.1559201958  0.125393687 -1.562329e-01  0.732864161 -0.79451656
Lasso   -0.2178241978  0.127749944 -2.318746e-01  0.751747863 -1.47686868
aLasso  -0.2300609513  0.133370165 -2.206933e-01  0.608015219 -1.40960633
aLasso2 -0.2413979250  0.151255731 -2.097782e-01  0.495881807 -1.37852737
                [,11]        [,12]       [,13]         [,14]         [,15]
LM      -1.661023e-05 -0.001625983 0.002616519  0.0008957757 -0.0004016437
R        6.119878e-01  0.051633348 0.058652446 -0.0609231032  0.0749246036
Lasso    7.606697e-01  0.092792612 0.085490292 -0.0875141906  0.0865820398
aLasso   8.409328e-01  0.101535467 0.095711854 -0.0768461992  0.0780106385
aLasso2  9.271034e-01  0.115908836 0.112268280 -0.0655061247  0.0719888323
              [,16]       [,17]        [,18]
LM      0.003715203 0.001255414 -0.003453488
R       0.087386275 0.081983420  0.056094366
Lasso   0.105358996 0.096665044  0.102955499
aLasso  0.094102214 0.087717516  0.098611639
aLasso2 0.087113209 0.081897961  0.098936100
[1] "VARIANCE"
               [,1]        [,2]         [,3]         [,4]         [,5]
LM      0.001652329 0.001551689 0.0004644424 0.0003015967 0.0002223756
R       0.018733383 0.047025629 0.0089082852 0.0090004422 0.0080971986
Lasso   0.032861975 0.018624142 0.0037998363 0.0063584990 0.0042555717
aLasso  0.017026431 0.014916352 0.0031590068 0.0064520198 0.0038982337
aLasso2 0.017719882 0.016007442 0.0030513149 0.0063748156 0.0040845780
                [,6]         [,7]         [,8]        [,9]       [,10]
LM      0.0004504758 0.0003514955 0.0003221012 0.003072808 0.002915319
R       0.0094050609 0.0084886236 0.0100382427 0.039039089 0.049721110
Lasso   0.0037550393 0.0055560367 0.0017522359 0.035058835 0.029520398
aLasso  0.0032922942 0.0048529151 0.0018965546 0.026337192 0.024844227
aLasso2 0.0030815938 0.0048119691 0.0020275460 0.026213646 0.027580767
             [,11]        [,12]        [,13]        [,14]        [,15]
LM      0.00190021 0.0002335536 0.0002054959 0.0001507107 0.0001436432
R       0.05125529 0.0067490816 0.0039724475 0.0077207839 0.0038137405
Lasso   0.02068068 0.0024974191 0.0018246425 0.0019639308 0.0024931179
aLasso  0.02020485 0.0021616802 0.0015862603 0.0016203684 0.0023443286
aLasso2 0.02214812 0.0020928157 0.0016870705 0.0016837456 0.0024616929
               [,16]        [,17]        [,18]
LM      0.0001712008 0.0001828665 0.0001646154
R       0.0039545110 0.0044227526 0.0034078352
Lasso   0.0017945508 0.0023719377 0.0018712512
aLasso  0.0015900799 0.0018153146 0.0016920797
aLasso2 0.0016054716 0.0016589538 0.0018014670


#### Model selection simulation ####

## Setup
library(glmnet)
library(MASS)

standardize <- function(x) {
  x <- (x - mean(x)) / sd(x)
}


#set.seed(100125)
set.seed(5667)
# Try different sample size (ns), noiselevel, correlation within groups of variables, size model
B<-25
ns<-50            # sample size
noiselev<-0.35    # noise level
p<-c(30,20,30,20) # how many features in each of 4 groups - change as you will
ps<-sum(p)        # total number of features
pr<-0.1           # proportion of features in true model
sigmean<-1.5
sigs<-0.15 # signal strength - how large true coefficients are
# 3 groups of variables that are correlated
grcorr1<-.1 # smaller value here makes xs in each group more correlated
grcorr2<-5
grcorr3<-.1
#
Truth<-rep(0,ps)
Truth<-round(rbinom(n=ps,size=1,prob=pr))
# generates the true model, either at random or by choice
#
# create matrices to store the results
Coeflm<-matrix(0,ps,B)
CoefR<-matrix(0,ps,B)
Coefla<-matrix(0,ps,B)
Coefala<-matrix(0,ps,B)
Coefala2<-matrix(0,ps,B)
MSElm<-matrix(0,B,1)
MSER<-matrix(0,B,1)
MSEla<-matrix(0,B,1)
MSEala<-matrix(0,B,1)
#
nt<-sum(Truth)
tr.coef<-rnorm(nt,mean=sigmean,sd=sigs) # here I made all coefficients positive because it was easier to visualize below
#*sign(rbinom(n=nt,size=1,prob=.5)*2-1) 
all.coef<-rep(0,ps)
all.coef[Truth==1]<-tr.coef
# coefficients in true model
for (b in (1:B)) {
  # create the x variabes in 3 correlated groups
  x1<-rnorm(ns)
  x1<-matrix(rep(x1,p[1]),ns,p[1],byrow=F)+matrix(rnorm(ns*p[1],sd=grcorr1),ns,p[1])
  x1<-apply(x1,2,standardize)
  #
  x2<-rnorm(ns)
  x2<-matrix(rep(x2,p[2]),ns,p[2],byrow=F)+matrix(rnorm(ns*p[2],sd=grcorr2),ns,p[2])
  x2<-apply(x2,2,standardize)
  #
  x3<-rnorm(ns)
  x3<-matrix(rep(x3,p[3]),ns,p[3],byrow=F)+matrix(rnorm(ns*p[3],sd=grcorr3),ns,p[3])
  x3<-apply(x3,2,standardize)
  #
  x4<-matrix(rnorm(ns*p[4]),ns,p[4])
  x4<-apply(x4,2,standardize)
  #
  y<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df<-as.data.frame(cbind(x1,x2,x3,x4,y))
  names(df)<-c(as.character(seq(1,ps)),"y")
  ####
  y.new<-cbind(x1,x2,x3,x4)%*%all.coef+rnorm(ns)*noiselev
  df.new<-as.data.frame(cbind(x1,x2,x3,x4,y.new))
  names(df.new)<-c(as.character(seq(1,ps)),"y")
  #### Now data set is created
  # Run OLS if possible
  if (ns>ps) {
    mm<-lm(y~., data=df)
    Coeflm[,b]<-mm$coef[-1]
    pm<-predict(mm,newdata=df.new)
    MSElm[b]<-sum((y.new-pm)^2)/length(pm) }
  #
  # lasso
  cv.gg<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y)
  Coefla[,b]<-coef(cv.gg,s="lambda.1se")[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg,s="lambda.1se")
  MSEla[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # Ridge
  gamma<-1
  ma<-lm.ridge(y~., data=df,lambda=1)
  CoefR[,b]<-coef(ma)[-1]
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(ma)
  MSER[b]<-sum((y.new-pm)^2)/length(pm)
  # elastic net, alpha =.5
  cv.gg2<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,alpha=.5)
  cc<-coef(cv.gg2,s="lambda.1se")[-1]
  Coefala[,b]<-cc
  pm<-cbind(rep(1,ns),cbind(x1,x2,x3,x4))%*%coef(cv.gg2,s="lambda.1se")
  MSEala[b]<-sum((y.new-pm)^2)/length(pm)
  #
  # alpha = .75
  cv.gg3<-cv.glmnet(x=cbind(x1,x2,x3,x4),y=y,alpha=.75)
  cc<-coef(cv.gg3,s="lambda.1se")[-1]
  Coefala2[,b]<-cc
  #####
}


# Result summaries
# Looking at all the coefficients
par(mfrow=c(2,2))
if (ns>ps) {
  boxplot(t(Coeflm),ylim=c(0*min(tr.coef)*1.1,max(tr.coef)*1.1),main="OLS") }
if (ns<=ps) {
  boxplot(t(CoefR),ylim=c(0*min(tr.coef)*1.1,max(tr.coef)*1.1),main="Ridge") }
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefla),ylim=c(0*min(tr.coef)*1.1,max(tr.coef)*1.1),main="Lasso")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala),ylim=c(0*min(tr.coef)*1.1,max(tr.coef)*1.1),main="Enet0.5")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
boxplot(t(Coefala2),ylim=c(0*min(tr.coef)*1.1,max(tr.coef)*1.1),main="Enet0.75")
abline(h=0)
points(seq(1,ps)[Truth==1],tr.coef,col=2,pch=3)
####

TPFP<-function(Coefs,truth) {
  tr<-truth
  cp<-Coefs
  TP<-length(cp[cp!=0 & tr==1])/length(tr[tr==1])
  FP<-length(cp[cp!=0 & tr==0])/length(tr[tr==0])
  FDR<-length(cp[cp!=0 & tr==0])/length(cp[cp!=0])
  return(list(FP=FP,FDR=FDR,TP=TP))
}
# selection results
par(mfrow=c(2,2))
LassoSel<-apply(Coefla,2,TPFP,truth=Truth)
ll<-unlist(LassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="LASSO")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))
ALassoSel<-apply(Coefala,2,TPFP,truth=Truth)
ll<-unlist(ALassoSel)
boxplot(cbind(ll[seq(1,(B*3),by=3)],1-ll[seq(3,(B*3),by=3)]),names=c("FPR","1-TPR"),main="Enet0.5")
boxplot(ll[seq(2,(B*3),by=3)],xlab=c("FDR"),ylim=c(0,1))

#solution paths
plot(cv.gg)
plot(cv.gg2)

#predictive performance

if (ns > ps) {
   boxplot(cbind(MSElm,MSER,MSEla,MSEala),names=c("LM","R","Lasso","Enet0.5"),main="pMSE") }
if (ns <= ps) {
   boxplot(cbind(MSER,MSEla,MSEala),names=c("R","Lasso","Enet0.5"),main="pMSE") }


gg<-glmnet(x=cbind(x1,x2,x3,x4),y=y)
gg2<-glmnet(x=cbind(x1,x2,x3,x4),y=y,alpha=.5)
grcol<-c(rep(1,p[1]),rep(2,p[2]),rep(3,p[3]),rep(4,p[4]))
trlwd<-Truth*3
par(mfrow=c(1,2))
plot(apply(abs(gg$beta),2,sum),gg$beta[1,],type="l",ylim=c(min(gg$beta),max(gg$beta)),xlab="L1 norm",ylab="coefficients")
for (bb in (1:dim(gg$beta)[1])) {
    lines(apply(abs(gg$beta),2,sum),gg$beta[bb,],col=grcol[bb],lwd=trlwd[bb])
}
for (kk in (1:length(tr.coef))) {
    abline(h=tr.coef[kk],lty=2)
}
plot(apply(abs(gg2$beta),2,sum),gg2$beta[1,],type="l",ylim=c(min(gg2$beta),max(gg2$beta)),xlab="L1 norm",ylab="coefficients")
for (bb in (1:dim(gg$beta)[1])) {
    lines(apply(abs(gg2$beta),2,sum),gg2$beta[bb,],col=grcol[bb],lwd=trlwd[bb])
}
for (kk in (1:length(tr.coef))) {
    abline(h=tr.coef[kk],lty=2)
} # solution paths for lasso (left) and elastic net (right)

Model selection demo¶

Lasso and Adaptive lasso¶