# below is the function using Binary Search to approximate root L. # m is the rank of supspace (generically should be group size) # Z is the observed norm # survive is the set of Monte Carlo samples that are in R_Y # sigma is the same as the globle one # alpha is the level # maxL is the maximum considered L. # output: L (left endpoint of CI) BinarySearch_Sampling=function(m,Z,survive,sigma,alpha,tol=0.01,maxL=2*Z){ err=1+tol; L_Upper=maxL; L_Lower=-m; index=(survive>Z); C=dnorm(survive,mean=Z,sd=sigma); while (err>tol){ L=(L_Upper+L_Lower)/2; weights=(survive^(m-1))*(exp(-(survive^2-2*survive*L)/2))/C; f1=sum(weights[index]); f2=sum(weights); f=f1/f2-alpha; if ((f>=0) || is.nan(f) ){ L_Upper=L; } else { L_Lower=L; } err=L_Upper-L_Lower; } return(L) } # Group Lasso # input: # (X,y): data; # G: number of groups, where groups are assumed to have equal size and are in natural order # lambda : tuning parameter in group lasso # alpha: (1-alpha) is the confidence level # B: number of samples in Importance Sampling # sigma: Standard deviation of error. Default is 1. # Output: # a list containing two elements: # pval: a vector of length G giving the p-values of each selected variable.(NA for Non-selected variable) # CI: a vector giving the left endpoint of one-sided confidence interval for each selected variable. (NA for Non-selected variable) GL=function(X,y,G,lambda,alpha=0.05,B=500,sigma=1){ if(!require(gglasso)){ install.packages('gglasso') } library('gglasso') n=nrow(X); p=ncol(X); m=p/G; group=rep(1:G,each=m); lambda_unscaled=lambda/n; pval=rep(NA,G); CI=rep(NA,G); model=gglasso(X,y,group=group,pf=rep(1,G),lambda=lambda_unscaled,eps=1e-9,maxit=1e9,intercept=FALSE); betahat=model\$beta; E=unique(group[betahat!=0]); # selected group EC=setdiff(1:G,E); k=length(E); if (k==0) return(pval); XE=X[,is.element(rep(1:G,each=m),E)]; for (j in 1:k){ X0=X[,(1:m)+m*(E[j]-1)]; XSh0 = X[,is.element(rep(1:G,each=m),setdiff(E,E[j]))]; if (length(XSh0)==0){ X0perp = X0; } else { X0perp = X0 - XSh0%*%solve(t(XSh0)%*%XSh0,t(XSh0)%*%X0); } # choose direction u to test group E[j] u = X0perp%*%solve(t(X0perp)%*%X0perp,t(X0perp)%*%y) Z = sqrt(sum(u^2)); u = u / Z; yperp = y - Z*u; # Do Monte Carlo (Importance Sampling) sigma1=sqrt(m-2*(gamma((m+1)/2)/gamma(m/2))^2); simu_z=Z+rnorm(B)*sigma1*sigma; group1=rep(1:k,each=m); survive=c(); for (i in 1:B){ flag=T; if (simu_z[i]<0) next; y1=yperp+simu_z[i]*u; model1=gglasso(XE,y1,group=group1,pf=rep(1,k),lambda=lambda_unscaled,eps=1e-9,maxit=1e9,intercept=FALSE); betahat1=model1\$beta; if (length(unique(group1[betahat1!=0]))!=k) next resi=y1-XE%*%betahat1; for (g in EC){ Xtemp=X[,(1:m)+m*(g-1)]; if (sqrt(sum((t(Xtemp)%*%resi)^2))>lambda) {flag=F} } if (flag==T){survive=c(survive,simu_z[i])} } survive=survive/sigma; Z=Z/sigma; # calculate p-value if (length(survive)==0) { print('Warning: no sample survives') pval[E[j]]=NaN; CI[E[j]]=NaN; next; } weights=(survive^(m-1))*(exp(-survive^2/2))/dnorm(survive,mean=Z,sd=sigma1); pval[E[j]]=sum(weights[survive>Z])/sum(weights); # calculate (one-sided) confidence interval L=sigma*BinarySearch_Sampling(m,Z,survive,sigma1,alpha); CI[E[j]]=L; } output=list('pval'=pval,'CI'=CI); return(output) }