##################################################### # PCA gives the first k principal components of genotypes at genomic markers. # x represents genotypes at genomic markers with each row corresponding to # genotypes of one individual. ##################################################### PCA=function(x,k) { a=apply(x,2,var) x=(t(x)-colMeans(x))/a xx=t(x)%*%x BB=eigen(xx)$vectors BB[,1:k] } ################################################################# # choose_OPT_SMP chooses the optimal value of smoothing parameter ################################################################# choose_OPT_SMP=function(y, x, pc, permutation, numper) { # y is trait values # x is genotypes at genomic markers # pc is the first few principal components # permutation is a indicator. If permutation=1, we use permutation to # evaluate pvalues; otherwise, we use asymptotic distribution to evaluate pvalues. # numper is the number of permutation ####### select polymorphism variants ############ a=colSums(x) ind=which(a>0) x=x[,ind] ############################################### k=ncol(pc) # number of principal components n=length(y) # number of individuals m=ncol(x) # number of polymorphism variants xx=cbind(y,x) ######## rescale principal components to interval [0,1] ########### for(i in 1:k) pc[,i]=(pc[,i]-min(pc[,i]))/(max(pc[,i])-min(pc[,i])) ######## choose searching grids ################# hh=2**c(7:-22) hh=hh**(2/(5+k)) # these grids work well in general. ############################################### ww=matrix(0,n,n) pv=rep(0,length(hh)) pvalue=rep(0,m) Tper=matrix(0,m,numper) nn=(c(1:m)-0.5)/m iter=0 for(h in hh) { iter=iter+1 ######## nonparametric regression ############### for(i in 1:n) { w=rep(1,n) for(j in 1:k) { t=(pc[,j]-pc[i,j])/h t=(1-t^2)^2*(t^2<1) w=w*t } ww[i,]=w/sum(w) } x=xx-ww%*%xx # residuals of nonparametric regression yh=x[,1] x=x[,-1] ######### use permutation to evaluate pvalues ############# if(permutation==1) { T=cor(yh,x) for(i in 1:numper) { yper=sample(yh) Tper[,i]=cor(yper,x) } for(i in 1:m) pvalue[i]=sum(Tper[i,]>T[i])/numper+sum(Tper[i,]==T[i])/numper/2 } ######### use asymptotic distribution to evaluate pvalues ############# else pvalue=1-pchisq((n-1)*cor(yh,x)^2,1) pvalue=sort(pvalue) pv[iter]=max(abs(pvalue-nn)) } hh[which(pv==min(pv))[1]] #optimal value of smoothing parameter } ###################################################### # Given the value of smoothing parameter, Resid_Nonp calculates # residuals of trait values and genotypes at candidate region by # applying nonparametric regression for PCs of genotypes at genomic markers ###################################################### Resid_Nonp=function(y, x, pc, h) { # y is trait values # x is genotypes at variants in candidate region # pc is the first few principal components # h is the optimal value of smoothing parameter ####### select polymorphism variants ############ a=colSums(x) ind=which(a>0) x=x[,ind] ############################################### k=ncol(pc) # number of principal components n=length(y) # number of individuals m=ncol(x) # number of polymorphism variants x=cbind(y,x) ######## rescale principal components to interval [0,1] ########### for(i in 1:k) pc[,i]=(pc[,i]-min(pc[,i]))/(max(pc[,i])-min(pc[,i])) ww=matrix(0,n,n) ######## nonparametric regression ############### for(i in 1:n) { w=rep(1,n) for(j in 1:k) { t=(pc[,j]-pc[i,j])/h t=(1-t^2)^2*(t^2<1) w=w*t } ww[i,]=w/sum(w) } residual=x-ww%*%x # residuals of nonparametric regression }