######
library(ElemStatLearn)
library(glmnet)
data(SAheart)
SAheart$famhist<-as.numeric(SAheart$famhist)
SAheart[,-10]<-as.matrix(SAheart[,-10])
######
mm <- glm(chd~.,data=SAheart,family="binomial") # the unconstrained logistic regression fit
summary(mm) # which feature coefficients are significant?

Warning message:
"package 'glmnet' was built under R version 4.1.3"
Loading required package: Matrix

Loaded glmnet 4.1-4

Call:
glm(formula = chd ~ ., family = "binomial", data = SAheart)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.7781  -0.8213  -0.4387   0.8889   2.5435  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -7.0760913  1.3404862  -5.279  1.3e-07 ***
sbp          0.0065040  0.0057304   1.135 0.256374    
tobacco      0.0793764  0.0266028   2.984 0.002847 ** 
ldl          0.1739239  0.0596617   2.915 0.003555 ** 
adiposity    0.0185866  0.0292894   0.635 0.525700    
famhist      0.9253704  0.2278940   4.061  4.9e-05 ***
typea        0.0395950  0.0123202   3.214 0.001310 ** 
obesity     -0.0629099  0.0442477  -1.422 0.155095    
alcohol      0.0001217  0.0044832   0.027 0.978350    
age          0.0452253  0.0121298   3.728 0.000193 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 596.11  on 461  degrees of freedom
Residual deviance: 472.14  on 452  degrees of freedom
AIC: 492.14

Number of Fisher Scoring iterations: 5


gg.cv <- cv.glmnet(as.matrix(SAheart[,-10]),SAheart[,10],family="binomial") # fit a logistic regression model to the data
           # use cross-validation to select the regularzation parameter
plot(gg.cv)


gg <- glmnet(as.matrix(SAheart[,-10]),SAheart[,10],family="binomial", lambda=gg.cv$lambda.1se)
whichsel <- (as.matrix(gg$beta)!=0)
whichsel
XM <- as.matrix((SAheart[,-10])[,whichsel])
dd <- data.frame(cbind(XM,SAheart$chd))
names(dd)<-c(names(SAheart[,-10])[whichsel],"chd")
mmg <- glm(chd~.,data=dd,family="binomial")
summary(mmg)

Call:
glm(formula = chd ~ ., family = "binomial", data = dd)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.7559  -0.8632  -0.4545   0.9457   2.4904  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -5.128392   0.575061  -8.918  < 2e-16 ***
tobacco      0.080701   0.025514   3.163  0.00156 ** 
ldl          0.167584   0.054189   3.093  0.00198 ** 
famhist      0.924117   0.223178   4.141 3.46e-05 ***
age          0.044042   0.009743   4.521 6.17e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 596.11  on 461  degrees of freedom
Residual deviance: 485.44  on 457  degrees of freedom
AIC: 495.44

Number of Fisher Scoring iterations: 4


# Splitting procedure
set.seed(1012)
nsamp <- dim(SAheart)[1]
i1 <- sample(seq(1,nsamp),floor(nsamp/2)) # train and select on this
i2 <- sample(seq(1,nsamp))[-i1]           # p-values and confidence intervals from this
###
gg.cv <- cv.glmnet(as.matrix(SAheart[i1,-10]),SAheart[i1,10],family="binomial")
plot(gg.cv)
#
gg <- glmnet(as.matrix(SAheart[i1,-10]),SAheart[i1,10],family="binomial", lambda=gg.cv$lambda.1se)
whichsel <- (as.matrix(gg$beta)!=0)
whichsel
#
XM <- as.matrix((SAheart[i2,-10])[,whichsel]) #  split 2
dd <- data.frame(cbind(XM,SAheart$chd[i2]))
names(dd)<-c(names(SAheart[,-10])[whichsel],"chd")
mmg <- glm(chd~.,data=dd,family="binomial")
summary(mmg)

Call:
glm(formula = chd ~ ., family = "binomial", data = dd)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.2739  -0.7218  -0.3226   0.8282   2.3307  

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -8.18526    1.42228  -5.755 8.66e-09 ***
tobacco      0.10105    0.03902   2.590   0.0096 ** 
famhist      1.36506    0.33562   4.067 4.76e-05 ***
typea        0.04408    0.01772   2.487   0.0129 *  
age          0.06179    0.01586   3.897 9.75e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 295.44  on 230  degrees of freedom
Residual deviance: 219.26  on 226  degrees of freedom
AIC: 229.26

Number of Fisher Scoring iterations: 5


#Splitting procedure - repeated
B <- 250
pvalMat <- matrix(1,B,dim(SAheart)[2]-1) # if not selected, the p-value is 1 since the coefficient value is explicitly 0
#
for (bb in (1:B)) {
 nsamp <- dim(SAheart)[1]
 i1 <- sample(seq(1,nsamp),floor(nsamp/2)) # train and select on this
 i2 <- sample(seq(1,nsamp))[-i1]           # p-values and confidence intervals from this
 ###
 gg.cv <- cv.glmnet(as.matrix(SAheart[i1,-10]),SAheart[i1,10],family="binomial")
 #
 gg <- glmnet(as.matrix(SAheart[i1,-10]),SAheart[i1,10],family="binomial", lambda=gg.cv$lambda.1se)
 whichsel <- (as.matrix(gg$beta)!=0)
 #
 XM <- as.matrix((SAheart[i2,-10])[,whichsel]) #  split 2
 dd <- data.frame(cbind(XM,SAheart$chd[i2]))
 names(dd)<-c(names(SAheart[,-10])[whichsel],"chd")
 mmg <- glm(chd~.,data=dd,family="binomial")
 smg <- summary(mmg)$coef[-1,4] # p-values except for the intercept 
 smg <- smg*sum(whichsel) # correct for multiple testing - just the selected features
 smg[smg > 1] <-1
 pvalMat[bb,whichsel]<-smg
}


par(mfrow=c(3,3)) 
for (aa in (1:dim(pvalMat)[2])) {
   hist(c(0,pvalMat[,aa]),breaks=100,main=paste(c("p-value",(names(SAheart)[-10])[aa]))) }
    # add an extra 0 just for visualization if all p-values are 1


p.final <- apply(pvalMat,2,median)*2 # in the hdi package you can use optimal quantiles here for minimum p-values since
                                     # the histogram of p-values look very different from case-to-case 
p.final[p.final > 1] <-1 
     # The use of the factor 2 comes from accounting for dependency between the multiple splits of data
     # in a conservative fashion
p.raw <-summary(mm)$coef[-1,4]
p.b <- p.raw*dim(SAheart[,-10])[2]
p.b[p.b > 1] <- 1
pf <- data.frame(cbind(round(p.raw,4),round(p.b,4),round(p.final,4)))
names(pf)<-c("p.raw","p.Bonf","p.multi")
row.names(pf)<-names(SAheart[,-10])
pf


library(hdi)
phdi<-multi.split(x=as.matrix(SAheart[,-10]),y=SAheart[,10],return.nonaggr=T, fraction=.5, gamma=.5, B=250, 
                args.model.selector=list(family="binomial"))

Warning message:
"package 'hdi' was built under R version 4.1.3"
Loading required package: scalreg

Warning message:
"package 'scalreg' was built under R version 4.1.3"
Loading required package: lars

Warning message:
"package 'lars' was built under R version 4.1.3"
Loaded lars 1.3


par(mfrow=c(3,3))  # histograms of the non-aggregated corrected p-values
for (aa in (1:dim(phdi$pvals.non)[2])) {
   hist(c(0,phdi$pvals.non[,aa]),breaks=100,main=paste(c("p-value",(names(SAheart)[-10])[aa]))) } 
           # fudging adding a 0 for display purposes


phdi.final <- apply(phdi$pvals.non,2,median)*2
phdi.final[phdi.final > 1] <- 1
pf <- data.frame(cbind(round(p.raw,4),round(p.b,4),round(p.final,4),round(phdi.final,4),round(phdi$pval.corr,4)))
names(pf)<-c("p.raw","p.Bonf","p.multi","p.hdi","p.hdi.corr")
row.names(pf)<-names(SAheart[,-10])
pf


library(hdi)
phdi<-multi.split(x=as.matrix(SAheart[,-10]),y=SAheart[,10],return.nonaggr=T, B=250, 
                args.model.selector=list(family="binomial"))  # let hdi pick the best quantile 
phdi.final <- apply(phdi$pvals.non,2,median)*2
phdi.final[phdi.final > 1] <- 1
pf <- data.frame(cbind(round(p.raw,4),round(p.b,4),round(p.final,4),round(phdi.final,4),round(phdi$pval.corr,4)))
names(pf)<-c("p.raw","p.Bonf","p.multi","p.hdi - median","p.hdi.corr")
row.names(pf)<-names(SAheart[,-10])
pf
paste("optimal quantiles",phdi$gamma)


standardize <- function(x) {(x-mean(x))/sd(x)}
SA <- SAheart
SA[,-10] <- apply(SA[,-10],2,standardize) # easier to plot solutions paths if standardized
#
gg <- glmnet(x=SA[,-10],y=SA[,10],family="binomial")
plot(gg)
gg.cv <- cv.glmnet(x=as.matrix(SA[,-10]),y=SA[,10],family="binomial")
plot(gg.cv)
gg <- glmnet(as.matrix(SAheart[,-10]),SAheart[,10],family="binomial", lambda=gg.cv$lambda.1se)
whichsel <- (as.matrix(gg$beta)!=0) 
whichsel
#


gg <- glmnet(x=SA[,-10],y=SA[,10],family="binomial")
lamuse <- gg$lambda[gg$df<dim(SA[,-10])[2]-1] # use this grid for lambda
####
B <- 500
selMat <- matrix(0,length(lamuse),dim(SA)[2]-1) # add 1 each time for each lambda the feature is selected 
#
for (bb in (1:B)) {
 nsamp <- dim(SA)[1]
 i1 <- sample(seq(1,nsamp),floor(nsamp/2)) # train and select on this
 i2 <- sample(seq(1,nsamp))[-i1]    
 gg <- glmnet(x=SA[i1,-10],y=SA[i1,10],lambda=lamuse,family="binomial")
 for (ll in (1:length(lamuse))) {
    whichsel <- (as.matrix(gg$beta[,ll])!=0)
    selMat[ll,whichsel]<-selMat[ll,whichsel]+1/B }
    }


for (aa in (1:dim(SA[,-10])[2])) {
   if (aa==1) {
       plot(lamuse,selMat[,aa],ylim=c(0,1),type="l",xlab="lambda",ylab="selection probability", 
            lwd=2, col=gray(.1+.9*aa/dim(SA[,-10])[2]))
   } 
   if (aa>1) {
       lines(lamuse,selMat[,aa],
             lwd=2, col=gray(.1+.9*aa/dim(SA[,-10])[2]))
             }   
}
abline(h=0.8, lty=2, col="darkorange",lwd=4)


ql <- round(apply(selMat,1,sum),2) # average model size q(lambda) for each lambda
qL <- mean(apply(selMat,1,sum),2)  # across all lambda in search range
paste("average model size = ",qL) 
thresh <- 0.9
#
EV <- (qL^2/dim(SA[,-10])[2])/(2*thresh-1)
paste("threshold = ", thresh, " E(V) <= ", round(EV,4)) # too many false positives ?
# want at most 2
thresh <- 0.95
EV <- (qL^2/dim(SA[,-10])[2])/(2*thresh-1)
paste("threshold = ", thresh, " E(V) <= ", round(EV,4))
### you may have to adjust the range of lambda here
stab <- stability(x=SA[,-10],y=SA[,10], args.model.selector=list(family="binomial"), EV=2)
rbind(c("selected","threshold","q"), c(length(stab$selected),stab$threshold,stab$q))
stab$selected


# More interesting with higher dimensional case
X <- matrix(rnorm(100*dim(SA)[1]),dim(SA)[1],100) # 100 extra null features
SA2 <- data.frame(cbind(SA,X))
names(SA2) <- c(names(SA),paste("X",seq(1,100),sep=""))
gg <- glmnet(x=SA2[,-10],y=SA2[,10],family="binomial")
#
lamuse <- gg$lambda[gg$df<(.5*dim(SA2[,-10])[2])] # use this grid for lambda


####
B <- 500
selMat <- matrix(0,length(lamuse),dim(SA2)[2]-1) # add 1 each time for each lambda the feature is selected 
#
for (bb in (1:B)) {
 nsamp <- dim(SA2)[1]
 i1 <- sample(seq(1,nsamp),floor(nsamp/2)) # train and select on this
 i2 <- sample(seq(1,nsamp))[-i1]    
 gg <- glmnet(x=SA2[i1,-10],y=SA2[i1,10],lambda=lamuse,family="binomial")
 for (ll in (1:length(lamuse))) {
    whichsel <- (as.matrix(gg$beta[,ll])!=0)
    selMat[ll,whichsel]<-selMat[ll,whichsel]+1/B }
    }


for (aa in (1:dim(SA2[,-10])[2])) {
   if (aa==1) {
       plot(lamuse,selMat[,aa],ylim=c(0,1),type="l",xlab="lambda",ylab="selection probability", 
            lwd=3)
   } 
   if (aa>1 & aa<10) {
       lines(lamuse,selMat[,aa],
             lwd=3)
             } 
   if (aa>=10) {
       lines(lamuse,selMat[,aa],
             lwd=2, col=gray(.75))
             } 
    
}
abline(h=0.8, lty=2, col="darkorange",lwd=4)


ql <- round(apply(selMat,1,sum),2) # average model size q(lambda) for each lambda
qL <- mean(apply(selMat,1,sum),2)  # across all lambda in search range
paste("average model size = ",qL) 
thresh <- 0.75
EV <- (qL^2/dim(SA[,-10])[2])/(2*thresh-1)
paste("threshold = ", thresh, " E(V) <= ", round(EV,4)) # too many false positives ?
thp <- apply(selMat,2,max)
c("selected:", names(SA2[,-10])[thp>thresh])
# want at most 2
thresh <- 0.9
EV <- (qL^2/dim(SA[,-10])[2])/(2*thresh-1)
paste("threshold = ", thresh, " E(V) <= ", round(EV,4))
c("selected:", names(SA2[,-10])[thp>thresh])
# 
thresh <- 0.99
EV <- (qL^2/dim(SA[,-10])[2])/(2*thresh-1)
paste("threshold = ", thresh, " E(V) <= ", round(EV,4))
c("selected:", names(SA2[,-10])[thp>thresh])


stab <- stability(x=SA2[,-10],y=SA2[,10], args.model.selector=list(family="binomial"), EV=25)
rbind(c("selected","threshold","q"), c(length(stab$selected),stab$threshold,stab$q))
stab$selected
####
stab <- stability(x=SA2[,-10],y=SA2[,10], args.model.selector=list(family="binomial"), EV=5)
rbind(c("selected","threshold","q"), c(length(stab$selected),stab$threshold,stab$q))
stab$selected


ll.spl <- multi.split(x=as.matrix(SA2[,-10]),y=SA2[,10],args.model.selector=list(family="binomial"))


plot(-1*(log10(ll.spl$pval.corr)),type="h")
(names(SA2)[-10])[ll.spl$pval.corr<0.05]


ll<-lasso.proj(x=as.matrix(SA2[,-10]), y=SA2[,10], family="binomial")

Nodewise regressions will be computed as no argument Z was provided.

You can store Z to avoid the majority of the computation next time around.

Z only depends on the design matrix x.

Warning message in warning.sigma.message():
"Overriding the error variance estimate with your own value. The initial estimate implies an error variance estimate and if they don't correspond the testing might not be correct anymore."


plot(-1*log10(ll$pval.corr),type="h")
(names(SA2)[-10])[ll$pval.corr<0.05]


rr<-ridge.proj(x=as.matrix(SA2[,-10]), y=SA2[,10], family="binomial")

Warning message in warning.sigma.message():
"Overriding the error variance estimate with your own value. The initial estimate implies an error variance estimate and if they don't correspond the testing might not be correct anymore."


plot(-1*log10(rr$pval.corr),type="h")
(names(SA2)[-10])[rr$pval.corr<0.1]


ll<-lasso.proj(x=as.matrix(SA2[,-10]), y=SA2[,10], family="binomial", multiplecorr.method="BH")
plot(-1*log10(ll$pval.corr),type="h")
(names(SA2)[-10])[ll$pval.corr<0.05]

Nodewise regressions will be computed as no argument Z was provided.

You can store Z to avoid the majority of the computation next time around.

Z only depends on the design matrix x.

Warning message in warning.sigma.message():
"Overriding the error variance estimate with your own value. The initial estimate implies an error variance estimate and if they don't correspond the testing might not be correct anymore."


cc<-ll$clusterGroupTest()
plot(cc)
cbind(round(cc$pval[cc$pval<0.01],4),cc$clusters[cc$pval<0.01])
names(cc)


SA3 <- SA2
SA3 <- data.frame(cbind(SA3, SA3$age+rnorm(dim(SA3)[1],sd=.25)))
names(SA3)<-c(names(SA2),"ageX")
ll<-lasso.proj(x=as.matrix(SA3[,-10]), y=SA3[,10], family="binomial", multiplecorr.method="BH")
plot(-1*log10(ll$pval.corr),type="h")
(names(SA3)[-10])[ll$pval.corr<0.1]

Nodewise regressions will be computed as no argument Z was provided.

You can store Z to avoid the majority of the computation next time around.

Z only depends on the design matrix x.

Warning message in warning.sigma.message():
"Overriding the error variance estimate with your own value. The initial estimate implies an error variance estimate and if they don't correspond the testing might not be correct anymore."


cc<-ll$clusterGroupTest()
plot(cc)
cbind(round(cc$pval[cc$pval<0.01],4),cc$clusters[cc$pval<0.01])


gfull <- glm(chd~., data=SA2, family="binomial")
summary(gfull)

Call:
glm(formula = chd ~ ., family = "binomial", data = SA2)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.7435  -0.5111  -0.1921   0.5070   2.6056  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -1.425234   0.208325  -6.841 7.84e-12 ***
sbp          0.107831   0.171658   0.628 0.529889    
tobacco      0.731386   0.216051   3.385 0.000711 ***
ldl          0.468522   0.194495   2.409 0.016000 *  
adiposity    0.199934   0.337750   0.592 0.553878    
famhist      0.617799   0.171316   3.606 0.000311 ***
typea        0.738736   0.185608   3.980 6.89e-05 ***
obesity     -0.378919   0.268641  -1.411 0.158392    
alcohol      0.183533   0.165135   1.111 0.266391    
age          1.054217   0.274632   3.839 0.000124 ***
X1          -0.286820   0.180449  -1.589 0.111951    
X2           0.176880   0.174216   1.015 0.309967    
X3           0.332270   0.179654   1.850 0.064385 .  
X4          -0.105162   0.167184  -0.629 0.529334    
X5          -0.358471   0.170819  -2.099 0.035857 *  
X6          -0.106997   0.186043  -0.575 0.565212    
X7          -0.137970   0.180650  -0.764 0.445020    
X8           0.017233   0.170048   0.101 0.919277    
X9           0.166897   0.164987   1.012 0.311739    
X10         -0.064223   0.188912  -0.340 0.733883    
X11          0.398099   0.165151   2.411 0.015930 *  
X12         -0.127676   0.181838  -0.702 0.482592    
X13         -0.076770   0.175747  -0.437 0.662241    
X14         -0.025025   0.166917  -0.150 0.880823    
X15         -0.004951   0.182215  -0.027 0.978323    
X16          0.012129   0.170954   0.071 0.943440    
X17         -0.318885   0.176706  -1.805 0.071135 .  
X18          0.314516   0.182248   1.726 0.084391 .  
X19         -0.024582   0.173895  -0.141 0.887585    
X20          0.060103   0.182987   0.328 0.742568    
X21          0.217751   0.180163   1.209 0.226804    
X22          0.370957   0.179481   2.067 0.038750 *  
X23         -0.123638   0.153888  -0.803 0.421726    
X24         -0.393488   0.182546  -2.156 0.031118 *  
X25         -0.045592   0.165226  -0.276 0.782597    
X26         -0.007203   0.154695  -0.047 0.962863    
X27          0.354202   0.183219   1.933 0.053209 .  
X28         -0.114092   0.165966  -0.687 0.491807    
X29          0.554087   0.180066   3.077 0.002090 ** 
X30         -0.119559   0.177410  -0.674 0.500364    
X31         -0.177411   0.172643  -1.028 0.304130    
X32          0.358009   0.170438   2.101 0.035683 *  
X33          0.234259   0.174247   1.344 0.178817    
X34         -0.377044   0.169529  -2.224 0.026144 *  
X35         -0.005915   0.178491  -0.033 0.973565    
X36         -0.046254   0.165842  -0.279 0.780319    
X37         -0.373034   0.185554  -2.010 0.044391 *  
X38         -0.249704   0.174320  -1.432 0.152015    
X39          0.072192   0.162121   0.445 0.656105    
X40          0.223938   0.176332   1.270 0.204092    
X41         -0.171719   0.151847  -1.131 0.258111    
X42          0.018884   0.177013   0.107 0.915040    
X43         -0.174345   0.175535  -0.993 0.320603    
X44          0.071935   0.151125   0.476 0.634078    
X45          0.261115   0.177374   1.472 0.140990    
X46          0.021756   0.156586   0.139 0.889499    
X47         -0.051039   0.172465  -0.296 0.767275    
X48          0.047143   0.181311   0.260 0.794856    
X49         -0.402031   0.166093  -2.421 0.015499 *  
X50          0.102163   0.169131   0.604 0.545814    
X51          0.137315   0.168124   0.817 0.414073    
X52         -0.315252   0.177665  -1.774 0.075994 .  
X53          0.076160   0.181382   0.420 0.674570    
X54         -0.038469   0.180532  -0.213 0.831258    
X55          0.027840   0.173520   0.160 0.872530    
X56          0.154287   0.174890   0.882 0.377670    
X57          0.015644   0.164522   0.095 0.924247    
X58         -0.085134   0.149174  -0.571 0.568201    
X59          0.559524   0.187891   2.978 0.002902 ** 
X60         -0.033206   0.167903  -0.198 0.843227    
X61          0.049659   0.168613   0.295 0.768363    
X62          0.241852   0.181544   1.332 0.182797    
X63         -0.061058   0.171946  -0.355 0.722515    
X64          0.096249   0.172311   0.559 0.576450    
X65          0.165388   0.175324   0.943 0.345512    
X66          0.433478   0.187017   2.318 0.020457 *  
X67          0.045712   0.173365   0.264 0.792031    
X68          0.162334   0.165802   0.979 0.327540    
X69         -0.347140   0.185592  -1.870 0.061422 .  
X70         -0.018304   0.162986  -0.112 0.910580    
X71          0.183007   0.167793   1.091 0.275418    
X72          0.495987   0.167630   2.959 0.003088 ** 
X73         -0.146100   0.194134  -0.753 0.451707    
X74          0.119979   0.164950   0.727 0.467003    
X75         -0.113851   0.182758  -0.623 0.533312    
X76         -0.374574   0.170107  -2.202 0.027666 *  
X77         -0.180877   0.173995  -1.040 0.298549    
X78         -0.067614   0.165122  -0.409 0.682188    
X79         -0.306985   0.180402  -1.702 0.088817 .  
X80         -0.183966   0.166513  -1.105 0.269239    
X81         -0.012434   0.171634  -0.072 0.942249    
X82          0.046850   0.167777   0.279 0.780063    
X83          0.301337   0.161921   1.861 0.062743 .  
X84         -0.057065   0.166701  -0.342 0.732113    
X85         -0.128639   0.167974  -0.766 0.443780    
X86          0.040579   0.164815   0.246 0.805519    
X87          0.020340   0.182069   0.112 0.911048    
X88         -0.347611   0.185472  -1.874 0.060903 .  
X89          0.170406   0.180174   0.946 0.344256    
X90         -0.311041   0.180224  -1.726 0.084374 .  
X91         -0.137232   0.168649  -0.814 0.415807    
X92          0.111165   0.160180   0.694 0.487683    
X93          0.132186   0.174859   0.756 0.449674    
X94         -0.264874   0.179042  -1.479 0.139035    
X95         -0.003352   0.171463  -0.020 0.984405    
X96         -0.153455   0.166601  -0.921 0.357000    
X97          0.081893   0.183474   0.446 0.655347    
X98          0.195524   0.183156   1.068 0.285733    
X99          0.332744   0.172451   1.929 0.053669 .  
X100         0.100028   0.167411   0.597 0.550176    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 596.11  on 461  degrees of freedom
Residual deviance: 331.29  on 352  degrees of freedom
AIC: 551.29

Number of Fisher Scoring iterations: 6


library(multtest) # a library in R with many different p-value adjustment methods
praw<-summary(gfull)$coef[-1,4]
padjBH<-p.adjust(praw, method="BH") 
padjBonf<-p.adjust(praw, method="bonferroni") 
print(c("Selected from raw p-values: "))
      (names(SA2)[-10])[praw<0.05]
print(c("Bonferroni selection: "))
      (names(SA2)[-10])[padjBonf<0.05]
print(c("Selected with FDR<0.05 control: "))
      (names(SA2)[-10])[padjBH<0.05]

[1] "Selected from raw p-values: "

[1] "Bonferroni selection: "

[1] "Selected with FDR<0.05 control: "

	s0
sbp	FALSE
tobacco	TRUE
ldl	TRUE
adiposity	FALSE
famhist	TRUE
typea	FALSE
obesity	FALSE
alcohol	FALSE
age	TRUE

	s0
sbp	FALSE
tobacco	TRUE
ldl	FALSE
adiposity	FALSE
famhist	TRUE
typea	TRUE
obesity	FALSE
alcohol	FALSE
age	TRUE

	p.raw	p.Bonf	p.multi
	<dbl>	<dbl>	<dbl>
sbp	0.2564	1.0000	1.0000
tobacco	0.0028	0.0256	0.3013
ldl	0.0036	0.0320	0.4407
adiposity	0.5257	1.0000	1.0000
famhist	0.0000	0.0004	0.0299
typea	0.0013	0.0118	1.0000
obesity	0.1551	1.0000	1.0000
alcohol	0.9784	1.0000	1.0000
age	0.0002	0.0017	0.0052

	p.raw	p.Bonf	p.multi	p.hdi	p.hdi.corr
	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
sbp	0.2564	1.0000	1.0000	1.0000	1.0000
tobacco	0.0028	0.0256	0.3013	0.1936	0.1936
ldl	0.0036	0.0320	0.4407	0.4401	0.4401
adiposity	0.5257	1.0000	1.0000	1.0000	1.0000
famhist	0.0000	0.0004	0.0299	0.0367	0.0367
typea	0.0013	0.0118	1.0000	1.0000	1.0000
obesity	0.1551	1.0000	1.0000	1.0000	1.0000
alcohol	0.9784	1.0000	1.0000	1.0000	1.0000
age	0.0002	0.0017	0.0052	0.0101	0.0101

	p.raw	p.Bonf	p.multi	p.hdi - median	p.hdi.corr
	<dbl>	<dbl>	<dbl>	<dbl>	<dbl>
sbp	0.2564	1.0000	1.0000	1.0000	1.0000
tobacco	0.0028	0.0256	0.3013	0.2162	0.3229
ldl	0.0036	0.0320	0.4407	0.5210	1.0000
adiposity	0.5257	1.0000	1.0000	1.0000	1.0000
famhist	0.0000	0.0004	0.0299	0.0299	0.0294
typea	0.0013	0.0118	1.0000	1.0000	1.0000
obesity	0.1551	1.0000	1.0000	1.0000	1.0000
alcohol	0.9784	1.0000	1.0000	1.0000	1.0000
age	0.0002	0.0017	0.0052	0.0050	0.0001

High-dimensional inference¶

Simple splitting strategy¶

Multi-splitting strategy¶

Stability selection¶

Summary¶

1e-04	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109
1e-04	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 68, 69, 70, 72, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109
1e-04	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 18, 19, 20, 23, 24, 25, 26, 27, 31, 34, 35, 36, 38, 40, 42, 43, 44, 47, 49, 51, 53, 57, 60, 61, 62, 64, 66, 70, 72, 75, 77, 79, 82, 84, 86, 87, 88, 89, 90, 94, 97, 98, 99, 101, 102, 106, 107, 109
1e-04	1, 2, 3, 4, 5, 7, 9, 11, 14, 15, 18, 19, 20, 23, 25, 26, 31, 34, 35, 36, 40, 42, 43, 44, 47, 49, 51, 53, 57, 60, 61, 62, 70, 77, 84, 88, 89, 90, 94, 98, 102, 106, 107, 109
1e-04	1, 2, 3, 4, 7, 9, 14, 15, 19, 31, 36, 40, 43, 44, 51, 53, 57, 60, 61, 62, 70, 84, 90, 94, 102, 109
0.0028	5, 11, 18, 20, 23, 25, 26, 34, 35, 42, 47, 49, 77, 88, 89, 98, 106, 107
1e-04	1, 2, 3, 4, 7, 9, 14, 15, 53, 57
0.0028	5, 18, 23, 26, 47, 49, 89, 98, 107
1e-04	1, 2, 3, 4, 7, 9, 15, 57
0.0028	5, 18, 49, 89, 107
1e-04	1, 2, 3, 4, 7, 9
0.0028	5, 89
0.0028	5
1e-04	1, 2, 9
1e-04	2, 9
1e-04	9

5e-04	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110
5e-04	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 68, 69, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110
5e-04	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 17, 18, 19, 20, 22, 23, 24, 26, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 47, 49, 50, 51, 52, 53, 57, 60, 61, 62, 63, 64, 66, 72, 74, 75, 76, 77, 78, 80, 82, 83, 84, 87, 88, 89, 90, 91, 92, 94, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 109, 110
5e-04	1, 2, 3, 4, 6, 7, 9, 11, 14, 17, 20, 22, 23, 26, 30, 31, 34, 35, 37, 39, 40, 42, 43, 44, 45, 47, 50, 51, 52, 53, 60, 61, 62, 63, 74, 77, 78, 88, 90, 91, 94, 97, 98, 102, 103, 105, 106, 109, 110
0.0017	5, 8, 10, 15, 18, 19, 24, 27, 29, 36, 38, 49, 57, 64, 66, 72, 75, 76, 80, 82, 83, 84, 87, 89, 92, 99, 101, 104, 107
5e-04	1, 2, 3, 4, 6, 7, 9, 14, 40, 43, 44, 51, 53, 60, 61, 62, 74, 94, 97, 109, 110
0.0017	5, 18, 36, 49, 80, 83, 84, 89, 92, 104, 107
5e-04	1, 2, 3, 4, 6, 7, 9, 40, 43, 44, 51, 60, 61, 62, 74, 94, 97, 109, 110
5e-04	1, 2, 3, 4, 6, 7, 9, 40, 51, 60, 74, 97, 109, 110
0.0017	5, 18, 49, 89, 107
5e-04	1, 2, 3, 4, 6, 7, 9, 74, 97, 110
5e-04	1, 2, 3, 4, 7, 9, 74, 110
0.0017	5, 89
5e-04	1, 2, 3, 4, 7, 9, 110
0.0017	5
5e-04	1, 2, 9, 110
5e-04	2, 9, 110
5e-04	9, 110
5e-04	9