#include "iqlaestimator.h" IQLAEstimator::IQLAEstimator(vector<Family *> & fam_list, vector<Genotype *> & complete_geno_list, vector<Genotype *> & geno_list, vector<Missing *> & missing_list, map<int, double> & hap_list, int size) :fam_list(fam_list), complete_geno_list(complete_geno_list), geno_list(geno_list), missing_list(missing_list), hap_freq_table(hap_list), loci_num(size){ sample_size=0; for(int i=0;i<fam_list.size();i++) sample_size+=fam_list[i]->N_typed; init_freq_table(); compute_count(); geno_missing_patten(); pair_missing_pattern(); flag_pd = 1; df = 0; pvalue = 0; test_perform = 0; } void IQLAEstimator::init_freq_table(){ int tot; if(hap_freq_table.size()==0){ tot = 1<<loci_num; for(int i=0;i<tot;i++) hap_freq_table[i] = 1.0/double(tot); } else{ tot = hap_freq_table.size(); map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ iter->second = 1.0/double(tot); iter++; } } geno_freq_table.clear(); map<int, double>::iterator iter1=hap_freq_table.begin(); map<int, double>::iterator iter2; while(iter1!=hap_freq_table.end()){ iter2 = iter1; while(iter2!=hap_freq_table.end()){ int h1 = iter1->first; int h2 = iter2->first; if(h1 == h2) geno_freq_table[pair<int, int>(h1,h2)] = iter1->second*iter2->second; else geno_freq_table[pair<int, int>(h1,h2)] = 2*iter1->second*iter2->second; iter2++; } iter1++; } } void IQLAEstimator::reset_freq_table(map<int,double> & hap_freq){ hap_freq_table.clear(); hap_freq_table = hap_freq; geno_freq_table.clear(); map<int, double>::iterator iter1=hap_freq_table.begin(); map<int, double>::iterator iter2; while(iter1!=hap_freq_table.end()){ iter2 = iter1; while(iter2!=hap_freq_table.end()){ int h1 = iter1->first; int h2 = iter2->first; if(h1 == h2) geno_freq_table[pair<int, int>(h1,h2)] = iter1->second*iter2->second; else geno_freq_table[pair<int, int>(h1,h2)] = 2*iter1->second*iter2->second; iter2++; } iter1++; } } void IQLAEstimator::show_incom_geno_freq(){ for(int i=0;i<geno_list.size();i++){ int *gcode = geno_list[i]->get_gcode(); printf(" "); for(int j=0;j<loci_num;j++){ if(gcode[j]==-1) printf("*"); else printf("%d", gcode[j]); if(j!=loci_num-1) printf("-"); } double freq = 0; const vector<pair<int, int> > hv = geno_list[i]->get_hap_vec(); for(int j=0;j<hv.size();j++) freq+=geno_freq_table[hv[j]]; printf(" %6d ", geno_list[i]->get_num()); printf(" %.4f \n", freq); } printf("\n"); } void IQLAEstimator::show_hap_freq(){ fprintf(outfp, "SNP haplotype frequency estimates in full sample\n"); fprintf(outfp, "Haplotype\t Frequency (choice a)\n"); map<int, double>::iterator iter = hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ if(iter->second<0.00005){ iter++; continue; } translate_hap(outfp,iter->first,loci_num); fprintf(outfp, " \t %.4f\n", iter->second); iter++; } fprintf(outfp, "\n"); } void IQLAEstimator::show_hap_freq(vector<char> &a0v, vector<char> &a1v){ fprintf(outfp, "SNP haplotype frequency estimates in full sample\n"); fprintf(outfp, "Haplotype\t Frequency (choice a)\n"); map<int, double>::iterator iter = hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ if(iter->second<0.00005){ iter++; continue; } translate_hap(outfp,iter->first,loci_num,a0v,a1v); if(loci_num==2) fprintf(outfp, "\t"); fprintf(outfp, " \t %.4f\n", iter->second); iter++; } fprintf(outfp, "\n"); } void IQLAEstimator::show_geno_freq(){ map<pair<int, int>, double>::iterator giter = geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ if(giter->second<0.00005){ giter++; continue; } printf("(%2d,%2d) ", (giter->first).first,(giter->first).second); printf(" %.4f\n", giter->second); giter++; } printf("\n"); } void IQLAEstimator::compute_count(){ int row = hap_freq_table.size(); int col = geno_freq_table.size(); count = new int*[row-1]; for(int i=0;i<row-1;i++){ count[i] = new int[col]; memset(count[i],0,col*sizeof(int)); } map<int, int> hap_map; map<int, double>::iterator iter=hap_freq_table.begin(); int loc = 0; while(iter!=hap_freq_table.end()){ hap_map[iter->first] = loc; loc++; iter++; } loc = 0; map<pair<int, int>, double>::iterator giter=geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ int h1=(giter->first).first; int h2=(giter->first).second; int loc1 = hap_map[h1]; int loc2 = hap_map[h2]; if(loc1!=row-1) count[loc1][loc]++; if(loc2!=row-1) count[loc2][loc]++; loc++; giter++; } hap_map.clear(); } void IQLAEstimator::geno_missing_patten(){ for(int i=0;i<geno_list.size();i++){ int *a=geno_list[i]->get_gcode(); for(int j=0;j<missing_list.size();j++){ int flag=1; int *b=missing_list[j]->get_gcode(); for(int k=0;k<loci_num;k++){ if((a[k]==-1 && b[k]!=-1) || (b[k]==-1 && a[k]!=-1)){ flag = 0; break; } } if(flag==0) continue; geno_missing_map[i] = j; break; } } if(geno_missing_map.size()!=geno_list.size()){ printf("geno_missing_map assignment is wrong. Please check.\n"); exit(1); } } void IQLAEstimator::show_test(vector<int *> *cluster, vector<char> &a0v, vector<char> &a1v){ map<int, int> trans; map<int, double>::iterator iter = hap_freq_table.begin(); int count = 0; while(iter!=hap_freq_table.end()){ trans[count] = iter->first; iter++; count++; } if(df<(1<<loci_num)-1){ fprintf(outfp, "Haplotype clusters in the full-df test\n"); for(int i=0;i<=df;i++){ fprintf(outfp, "cluster %d:\n", i+1); for(int j=1;j<=(*cluster)[i][0];j++){ fprintf(outfp, " "); translate_hap(outfp,trans[(*cluster)[i][j]],loci_num,a0v,a1v); fprintf(outfp, "\n"); } } fprintf(outfp, "\n"); } fprintf(outfp, "Full-df MIQLS_a test\n"); fprintf(outfp, "Statistic = %.6f\t pvalue = %g\t df = %d\n\n", test,pvalue,df); map<int, int> dis; for(int i=0;i<col_1df;i++) dis[trans[(*cluster)[i][1]]] = i; fprintf(outfp, "1-df MIQLS_a tests\n"); fprintf(outfp, "Haplotype tested\t Statistic\t P-value\n"); map<int, int>::iterator diter = dis.begin(); while(diter!=dis.end()){ translate_hap(outfp,diter->first,loci_num,a0v,a1v); fprintf(outfp, "\t"); if(loci_num==2) fprintf(outfp, "\t"); fprintf(outfp, " \t %.6f\t %g\n", Btest[diter->second],Bpvalue[diter->second]); diter++; } fprintf(outfp, "\n"); trans.clear(); dis.clear(); } void IQLAEstimator::update_hap_freq(){ map<int, double>::iterator iter = hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ iter->second = 0; iter++; } map<pair<int, int>, double>::iterator giter = geno_freq_table.begin(); while(giter!= geno_freq_table.end()){ pair<int, int> conf = giter->first; if(conf.first==conf.second) hap_freq_table[conf.first] += geno_freq_table[conf]; else{ hap_freq_table[conf.first] += .5*geno_freq_table[conf]; hap_freq_table[conf.second] += .5*geno_freq_table[conf]; } giter++; } } void IQLAEstimator::update_geno_freq(){ map<pair<int, int>, double>::iterator giter = geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ giter->second = 0; giter++; } giter = geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ pair<int, int> conf = giter->first; int k1 = conf.first; int k2 = conf.second; if(k1==k2) giter->second = hap_freq_table[k1]*hap_freq_table[k2]; else giter->second = 2*hap_freq_table[k1]*hap_freq_table[k2]; giter++; } } IQLAEstimator::~IQLAEstimator(){ int row = hap_freq_table.size(); for(int k=0;k<row-1;k++) delete[] count[k]; delete[] count; hap_freq_table.clear(); geno_freq_table.clear(); geno_missing_map.clear(); aff_hap_freq_table.clear(); unaff_hap_freq_table.clear(); unknown_hap_freq_table.clear(); full_hap_freq_table.clear(); aff_hap_freq_table_EW.clear(); unaff_hap_freq_table_EW.clear(); unknown_hap_freq_table_EW.clear(); full_hap_freq_table_EW.clear(); if(test_perform==1){ delete[] Btest; delete[] Bpvalue; } } // single-marker analysis void IQLAEstimator::single(){ int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); double denominator = 0; // D_p^T K^(-1) D_p double numerator = 0; // D_p^T K^(-1) Y for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ int dim=fam_list[fam]->N_typed; int *typed = fam_list[fam]->typed; double **cholent = new double*[dim]; for(int i=0;i<dim;i++){ cholent[i] = new double[h_size]; memset(cholent[i],0,h_size*sizeof(double)); } double **chol = new double*[dim]; for(int i=0;i<dim;i++){ chol[i] = new double[dim]; memset(chol[i],0,dim*sizeof(double)); } double **cholaug = new double*[dim]; for(int i=0;i<dim;i++){ cholaug[i] = new double[h_size]; memset(cholaug[i],0,h_size*sizeof(double)); } double **covMatrix = new double*[dim]; for(int i=0;i<dim;i++){ covMatrix[i] = new double[dim]; memset(covMatrix[i],0,dim*sizeof(double)); } // populate covariance matrix int nb1 = 0; int nb2 = 0; int ind_1, ind_2; map<pair<int, int>, double>::iterator iterk; for(int i=0;i<fam_list[fam]->N;i++){ if(typed[i]==1){ nb2 = nb1; ind_1 = fam_list[fam]->fam_member[i][0]; ind_2 = ind_1; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient for individual %d from family %d. Please check...\n\n", ind_1,fam_list[fam]->fam_id); exit(1); } covMatrix[nb1][nb2] = 1; nb2++; for(int j=i+1;j<fam_list[fam]->N;j++){ if(typed[j]==1){ ind_2 = fam_list[fam]->fam_member[j][0]; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No kinship coefficient between individual %d and individual %d from family %d. Please check...\n\n", ind_1,ind_2,fam_list[fam]->fam_id); exit(1); } if(iterk->second>1e-8){ // i is related to j covMatrix[nb1][nb2] = 2*iterk->second; covMatrix[nb2][nb1] = covMatrix[nb1][nb2]; } // i, j related nb2++; } // j is typed } // complete j cholent[nb1][0] = 1; cholent[nb1][h_size-1] = 1-fam_list[fam]->gcode[i][0]/2.0; nb1++; } // i is typed } // complete i if(cholesky(covMatrix,dim,cholent,h_size,chol,cholaug,0) != 1){ printf("cholesky decomposition of the cov matrix failed for family %d. \n", fam_list[fam]->fam_id); exit(1); } double **s = self_colmultiply(cholaug,dim,1,h_size-1,1,h_size-1); double **t = self_colmultiply(cholaug,dim,1,h_size-1,h_size,h_size); denominator+=s[0][0]; numerator+=t[0][0]; clear_matrix(cholent,dim,h_size); clear_matrix(chol,dim,dim); clear_matrix(cholaug,dim,h_size); clear_matrix(covMatrix,dim,dim); clear_matrix(s,h_size-1,h_size-1); clear_matrix(t,h_size-1,1); } } map<int, double>::iterator iter=hap_freq_table.begin(); double n = numerator/denominator; iter->second = n; iter++; iter->second = 1-n; update_geno_freq(); } void IQLAEstimator::single_Score_test() { int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); double denominator = 0; // D_p^T K^(-1) D_p double A1 = 0; // D_r^T K^(-1) D_r double A2 = 0; // D_p^T K^(-1) D_r double numerator = 0; // D_r^T K^(-1) (Y-\mu) for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ int dim=fam_list[fam]->N_typed; int *typed = fam_list[fam]->typed; double **cholent = new double*[dim]; for(int i=0;i<dim;i++){ cholent[i] = new double[2*h_size-1]; memset(cholent[i],0,(2*h_size-1)*sizeof(double)); } double **chol = new double*[dim]; for(int i=0;i<dim;i++){ chol[i] = new double[dim]; memset(chol[i],0,dim*sizeof(double)); } double **cholaug = new double*[dim]; for(int i=0;i<dim;i++){ cholaug[i] = new double[2*h_size-1]; memset(cholaug[i],0,(2*h_size-1)*sizeof(double)); } double **covMatrix = new double*[dim]; for(int i=0;i<dim;i++){ covMatrix[i] = new double[dim]; memset(covMatrix[i],0,dim*sizeof(double)); } // populate covariance matrix int nb1 = 0; int nb2 = 0; int ind_1, ind_2; map<pair<int, int>, double>::iterator iterk; double test_weight; for(int i=0;i<fam_list[fam]->N;i++){ if(typed[i]==1){ nb2 = nb1; ind_1 = fam_list[fam]->fam_member[i][0]; ind_2 = ind_1; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient for individual %d from family %d. Please check...\n\n", ind_1,fam_list[fam]->fam_id); exit(1); } covMatrix[nb1][nb2] = 1; nb2++; for(int j=i+1;j<fam_list[fam]->N;j++){ if(typed[j]==1){ ind_2 = fam_list[fam]->fam_member[j][0]; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient between individual %d and individual %d from family %d. Please check...\n\n", ind_1,ind_2,fam_list[fam]->fam_id); exit(1); } if(iterk->second>1e-8){ // i is related to j covMatrix[nb1][nb2] = 2*iterk->second; covMatrix[nb2][nb1] = covMatrix[nb1][nb2]; } // i, j related nb2++; } // j is typed } // complete j test_weight = fam_list[fam]->test_weight[i]; cholent[nb1][0] = 1; cholent[nb1][h_size] = test_weight; cholent[nb1][h_size-1] = 1-fam_list[fam]->gcode[i][0]/2.0-hap_freq_table[0]; nb1++; } // i is typed } // complete i if(cholesky(covMatrix,dim,cholent,(2*h_size-1),chol,cholaug,0) != 1){ printf("cholesky decomposition of the cov matrix failed for family %d. \n", fam_list[fam]->fam_id); exit(1); } double **s = self_colmultiply(cholaug,dim,1,h_size-1,1,h_size-1); double **t = self_colmultiply(cholaug,dim,h_size+1,2*h_size-1,h_size+1,2*h_size-1); double **u = self_colmultiply(cholaug,dim,1,h_size-1,h_size+1,2*h_size-1); double **v = self_colmultiply(cholaug,dim,h_size+1,2*h_size-1,h_size,h_size); denominator+=s[0][0]; A1+=t[0][0]; A2+=u[0][0]; numerator+=v[0][0]; clear_matrix(s,h_size-1,h_size-1); clear_matrix(t,h_size-1,h_size-1); clear_matrix(u,h_size-1,h_size-1); clear_matrix(v,h_size-1,1); clear_matrix(cholent,dim,2*h_size-1); clear_matrix(chol,dim,dim); clear_matrix(cholaug,dim,2*h_size-1); clear_matrix(covMatrix,dim,dim); } } // MQLS test double orig = A1-A2*A2/denominator; test = 2*numerator*numerator/orig/hap_freq_table[0]/hap_freq_table[1]; pvalue = pochisq(test,1); } // find all possible pair of missing pattern void IQLAEstimator::pair_missing_pattern(){ int ind_1, ind_2; int dim_1, dim_2; int cat_1, cat_2; int mis_cat_1, mis_cat_2; map<pair<int, int>, double>::iterator iterk; map<pair<int, int>, double>::iterator iter1; map<pair<int, int>, double>::iterator iter2; for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ for(int i=0;i<fam_list[fam]->N;i++){ cat_1 = fam_list[fam]->cat[i]; mis_cat_1 = fam_list[fam]->mis_cat[i]; dim_1 = missing_list[mis_cat_1]->dim; if(dim_1 > 0 && geno_list[cat_1]->info==1){ ind_1 = fam_list[fam]->fam_member[i][0]; for(int j=i+1;j<fam_list[fam]->N;j++){ cat_2 = fam_list[fam]->cat[j]; mis_cat_2 = fam_list[fam]->mis_cat[j]; dim_2 = missing_list[mis_cat_2]->dim; if(dim_2 > 0 && geno_list[cat_2]->info==1){ ind_2 = fam_list[fam]->fam_member[j][0]; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient between individual %d and individual %d from family %d. Please check...\n\n", ind_1,ind_2,fam_list[fam]->fam_id); exit(1); } if(iterk->second>1e-8){ // i is related to j if(pair_missing_v.find(mis_cat_1)==pair_missing_v.end()) pair_missing_v[mis_cat_1] = 0; if(pair_missing_v.find(mis_cat_2)==pair_missing_v.end()) pair_missing_v[mis_cat_2] = 0; int min = mis_cat_1<=mis_cat_2 ? mis_cat_1 : mis_cat_2; int max = mis_cat_1+mis_cat_2-min; if(fam_list[fam]->IBD1_coeff[pair<int, int>(ind_1,ind_2)]>1e-8){ if(pair_missing_IBD1.find(pair<int, int>(min,max))==pair_missing_IBD1.end()) pair_missing_IBD1[pair<int, int>(min,max)] = 0; } if(fam_list[fam]->IBD2_coeff[pair<int, int>(ind_1,ind_2)]>1e-8){ if(pair_missing_IBD2.find(pair<int, int>(min,max))==pair_missing_IBD2.end()) pair_missing_IBD2[pair<int, int>(min,max)] = 0; } } // i, j related } // j is typed } // complete j } // i is typed } // complete i } } } void IQLAEstimator::allocate(){ map<pair<int, int>, double>::iterator giter=geno_freq_table.begin(); int loc = 0; while(giter!=geno_freq_table.end()){ hap_pair_map[giter->first] = loc; loc++; giter++; } int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); rst = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ rst[i] = new double[col]; memset(rst[i],0,col*sizeof(double)); } // record derivative for each pair of haplotypes freq = new double[h_size]; memset(freq,0,h_size*sizeof(double)); int complete_geno = complete_geno_list.size(); L = new double*[complete_geno]; for(int i=0;i<complete_geno;i++){ L[i] = new double[complete_geno]; memset(L[i],0,complete_geno*sizeof(double)); } int v_size = pair_missing_v.size(); if(v_size>0){ V = new double **[v_size]; map<int, int>::iterator pi=pair_missing_v.begin(); for(int i=0;i<v_size;i++){ pi->second = i; int mis = pi->first; int dim_1 = missing_list[mis]->dim; V[i] = new double*[dim_1]; for(int j=0;j<dim_1;j++){ V[i][j] = new double[complete_geno]; memset(V[i][j],0,complete_geno*sizeof(double)); } pi++; } } int IBD1_size = pair_missing_IBD1.size(); int IBD2_size = pair_missing_IBD2.size(); if(IBD1_size>0){ mat_cov_1 = new double **[IBD1_size]; map<pair<int, int>, int>::iterator pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; mat_cov_1[i] = new double*[dim_1]; for(int j=0;j<dim_1;j++){ mat_cov_1[i][j] = new double[dim_2]; memset(mat_cov_1[i][j],0,dim_2*sizeof(double)); } pmi++; } } if(IBD2_size>0){ mat_cov_2 = new double **[IBD2_size]; map<pair<int, int>, int>::iterator pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; mat_cov_2[i] = new double*[dim_1]; for(int j=0;j<dim_1;j++){ mat_cov_2[i][j] = new double[dim_2]; memset(mat_cov_2[i][j],0,dim_2*sizeof(double)); } pmi++; } } denominator = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ denominator[i] = new double[h_size-1]; memset(denominator[i],0,(h_size-1)*sizeof(double)); } // F_p^T Omega^(-1) F_p numerator = new double[h_size-1]; memset(numerator,0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) (Z-\mu) tot_dim = 0; for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ int cat; int mis_cat; int dim=0; for(int i=0;i<fam_list[fam]->N;i++){ cat = fam_list[fam]->cat[i]; if(geno_list[cat]->info==1){ mis_cat = fam_list[fam]->mis_cat[i]; dim+=missing_list[mis_cat]->dim; } } if(dim>tot_dim) tot_dim = dim; } } cholent = new double*[tot_dim]; for(int i=0;i<tot_dim;i++){ cholent[i] = new double[h_size]; memset(cholent[i],0,h_size*sizeof(double)); } chol = new double*[tot_dim]; for(int i=0;i<tot_dim;i++){ chol[i] = new double[tot_dim]; memset(chol[i],0,tot_dim*sizeof(double)); } cholaug = new double*[tot_dim]; for(int i=0;i<tot_dim;i++){ cholaug[i] = new double[h_size]; memset(cholaug[i],0,h_size*sizeof(double)); } covMatrix = new double*[tot_dim]; for(int i=0;i<tot_dim;i++){ covMatrix[i] = new double[tot_dim]; memset(covMatrix[i],0,tot_dim*sizeof(double)); } } void IQLAEstimator::deallocate(){ hap_pair_map.clear(); int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); clear_matrix(rst,h_size-1,col); delete[] freq; int complete_geno = complete_geno_list.size(); clear_matrix(L,complete_geno,complete_geno); int v_size = pair_missing_v.size(); if(v_size>0){ map<int, int>::iterator pi=pair_missing_v.begin(); for(int i=0;i<v_size;i++){ int dim_1 = missing_list[pi->first]->dim; for(int j=0;j<dim_1;j++) delete[] V[i][j]; delete[] V[i]; pi++; } delete[] V; } for(int i=0;i<tot_dim;i++){ delete[] cholent[i]; delete[] chol[i]; delete[] cholaug[i]; delete[] covMatrix[i]; } delete[] cholent; delete[] chol; delete[] cholaug; delete[] covMatrix; int IBD1_size = pair_missing_IBD1.size(); int IBD2_size = pair_missing_IBD2.size(); if(IBD1_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; for(int j=0;j<dim_1;j++) delete[] mat_cov_1[i][j]; delete[] mat_cov_1[i]; pmi++; } delete[] mat_cov_1; } if(IBD2_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; for(int j=0;j<dim_1;j++) delete[] mat_cov_2[i][j]; delete[] mat_cov_2[i]; pmi++; } delete[] mat_cov_2; } clear_matrix(denominator,h_size-1,h_size-1); delete[] numerator; } // a single Newton-Raphson iteration void IQLAEstimator::NR_Iteration_new(){ int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); for(int i=0;i<h_size-1;i++) memset(rst[i],0,col*sizeof(double)); // record derivative for each pair of haplotypes int loc = 0; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ freq[loc] = iter->second; loc++; iter++; } int h_last=0; iter = hap_freq_table.end(); iter--; h_last=iter->first; map<pair<int, int>, double>::iterator giter=geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ pair<int, int> conf = giter->first; loc = 0; iter = hap_freq_table.begin(); while(loc<h_size-1){ int h=iter->first; if(conf.first == conf.second && conf.first == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[h]; else if(conf.first == conf.second && conf.first == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[h_last]; else if(conf.first < conf.second && conf.first == h && conf.second != h_last) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.second]; else if(conf.first < conf.second && conf.second == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.first]; else if(conf.first == h && conf.second == h_last) rst[loc][hap_pair_map[conf]] = 2*(hap_freq_table[h_last]-hap_freq_table[h]); else if(conf.first != h && conf.first != h_last && conf.second == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[conf.first]; loc++; iter++; } giter++; } for(int i=0;i<geno_list.size();i++){ if(geno_list[i]->get_num()>0 && geno_list[i]->info==1 && missing_list[geno_missing_map[i]]->dim>0) geno_list[i]->expected_hap_num(freq,geno_freq_table,hap_pair_map,count,missing_list[geno_missing_map[i]]->sites); } for(int i=0;i<missing_list.size();i++){ if(missing_list[i]->get_num()>0 && missing_list[i]->dim>0){ missing_list[i]->set_weight(geno_freq_table,hap_pair_map,count); missing_list[i]->compute_centmatrix(freq,geno_freq_table); missing_list[i]->compute_derivative(hap_freq_table,hap_pair_map,count,rst); } } // compute L matrix int complete_geno = complete_geno_list.size(); for(int i=0;i<complete_geno;i++){ complete_geno_list[i]->expectation(geno_freq_table); complete_geno_list[i]->expectation_IBD1(hap_freq_table); } for(int i=0;i<complete_geno;i++){ memset(L[i],0,complete_geno*sizeof(double)); L[i][i] = complete_geno_list[i]->prob_IBD1; const vector<pair<int, int> > hv_1 = complete_geno_list[i]->get_hap_vec(); for(int j=i+1;j<complete_geno;j++){ const vector<pair<int, int> > hv_2 = complete_geno_list[j]->get_hap_vec(); for(int s=0;s<hv_1.size();s++) for(int t=0;t<hv_2.size();t++) L[i][j]+=expfortwo(hv_1[s],hv_2[t],hap_freq_table); } } // construct V int v_size = pair_missing_v.size(); if(v_size>0){ map<int, int>::iterator pi=pair_missing_v.begin(); for(int i=0;i<v_size;i++){ pi->second = i; int mis = pi->first; int dim_1 = missing_list[mis]->dim; for(int j=0;j<dim_1;j++) for(int k=0;k<complete_geno;k++) V[i][j][k] = missing_list[mis]->weight[j][missing_list[mis]->mapping_function[complete_geno_list[k]->gname]]; pi++; } } // compute VLV^T-hh^T, and VDV^T-hh^T for all pair of missing pattern int IBD1_size = pair_missing_IBD1.size(); int IBD2_size = pair_missing_IBD2.size(); if(IBD1_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; for(int j=0;j<dim_1;j++) memset(mat_cov_1[i][j],0,dim_2*sizeof(double)); pmi++; } pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pair<int, int> mispair = pmi->first; if(mispair.first==mispair.second){ int dim_1 = missing_list[mispair.first]->dim; int it_1 = pair_missing_v[mispair.first]; for(int j=0;j<dim_1;j++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][j]+=V[it_1][j][s]*V[it_1][j][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][j]+=2*V[it_1][j][s]*V[it_1][j][t]*L[s][t]; } mat_cov_1[i][j][j]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[j]]; for(int k=j+1;k<dim_1;k++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][k]+=V[it_1][j][s]*V[it_1][k][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][k]+=(V[it_1][j][s]*V[it_1][k][t]+V[it_1][j][t]*V[it_1][k][s])*L[s][t]; } mat_cov_1[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; mat_cov_1[i][k][j] = mat_cov_1[i][j][k]; } } } else{ int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; int it_1 = pair_missing_v[mispair.first]; int it_2 = pair_missing_v[mispair.second]; for(int j=0;j<dim_1;j++) for(int k=0;k<dim_2;k++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][k]+=V[it_1][j][s]*V[it_2][k][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][k]+=(V[it_1][j][s]*V[it_2][k][t]+V[it_1][j][t]*V[it_2][k][s])*L[s][t]; } mat_cov_1[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; } } pmi++; } } if(IBD2_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; for(int j=0;j<dim_1;j++) memset(mat_cov_2[i][j],0,dim_2*sizeof(double)); pmi++; } pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pair<int, int> mispair = pmi->first; if(mispair.first==mispair.second){ int dim_1 = missing_list[mispair.first]->dim; int it_1 = pair_missing_v[mispair.first]; for(int j=0;j<dim_1;j++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][j]+=V[it_1][j][s]*V[it_1][j][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][j]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[j]]; for(int k=j+1;k<dim_1;k++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][k]+=V[it_1][j][s]*V[it_1][k][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; mat_cov_2[i][k][j] = mat_cov_2[i][j][k]; } } } else{ int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; int it_1 = pair_missing_v[mispair.first]; int it_2 = pair_missing_v[mispair.second]; for(int j=0;j<dim_1;j++) for(int k=0;k<dim_2;k++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][k]+=V[it_1][j][s]*V[it_2][k][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; } } pmi++; } } for(int i=0;i<h_size-1;i++) memset(denominator[i],0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) F_p memset(numerator,0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) (Z-\mu) for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ int cat; int mis_cat; int dim=0; for(int i=0;i<fam_list[fam]->N;i++){ cat = fam_list[fam]->cat[i]; if(geno_list[cat]->info==1){ mis_cat = fam_list[fam]->mis_cat[i]; dim+=missing_list[mis_cat]->dim; } } if(dim<=0) continue; for(int i=0;i<tot_dim;i++){ memset(cholent[i],0,h_size*sizeof(double)); memset(chol[i],0,tot_dim*sizeof(double)); memset(cholaug[i],0,h_size*sizeof(double)); memset(covMatrix[i],0,tot_dim*sizeof(double)); } // populate covariance matrix int nb1 = 0; int nb2 = 0; int ind_1, ind_2; int dim_1, dim_2; int cat_1, cat_2; int mis_cat_1, mis_cat_2; double IBD_2, IBD_1; map<pair<int, int>, double>::iterator iterk; map<pair<int, int>, double>::iterator iter1; map<pair<int, int>, double>::iterator iter2; double value; flag_pd = 1; for(int i=0;i<fam_list[fam]->N;i++){ cat_1 = fam_list[fam]->cat[i]; mis_cat_1 = fam_list[fam]->mis_cat[i]; dim_1 = missing_list[mis_cat_1]->dim; if(dim_1 > 0 && geno_list[cat_1]->info==1){ nb2 = nb1; ind_1 = fam_list[fam]->fam_member[i][0]; ind_2 = ind_1; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient for individual %d from family %d. Please check...\n\n", ind_1,fam_list[fam]->fam_id); exit(1); } for(int j=0;j<dim_1;j++) for(int k=0;k<dim_1;k++) covMatrix[nb1+j][nb2+k] = missing_list[mis_cat_1]->centmatrix[j][k]; nb2+=dim_1; for(int j=i+1;j<fam_list[fam]->N;j++){ cat_2 = fam_list[fam]->cat[j]; mis_cat_2 = fam_list[fam]->mis_cat[j]; dim_2 = missing_list[mis_cat_2]->dim; if(dim_2 > 0 && geno_list[cat_2]->info==1){ ind_2 = fam_list[fam]->fam_member[j][0]; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient between individual %d and individual %d from family %d. Please check...\n\n", ind_1,ind_2,fam_list[fam]->fam_id); exit(1); } if(iterk->second>1e-8){ // i is related to j iter2 = (fam_list[fam]->IBD2_coeff).find(pair<int, int>(ind_1,ind_2)); iter1 = (fam_list[fam]->IBD1_coeff).find(pair<int, int>(ind_1,ind_2)); IBD_2 = iter2->second; IBD_1 = iter1->second; if(mis_cat_1==mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=k;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][k][l]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=k;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][k][l]; } for(int k=0;k<dim_1;k++) for(int l=k+1;l<dim_2;l++) covMatrix[nb1+l][nb2+k] = covMatrix[nb1+k][nb2+l]; } else if(mis_cat_1<mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][k][l]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][k][l]; } } else if(mis_cat_1>mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_2,mis_cat_1)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][l][k]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_2,mis_cat_1)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][l][k]; } } for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb2+l][nb1+k] = covMatrix[nb1+k][nb2+l]; } // i, j related nb2+=dim_2; } // j is typed } // complete j const vector<double> hap_num = geno_list[cat_1]->get_hap_num(); for(int j=0;j<dim_1;j++){ for(int k=0;k<h_size-1;k++) cholent[nb1+j][k] = missing_list[mis_cat_1]->derivative[j][k]; cholent[nb1+j][h_size-1] = hap_num[j]; } nb1+=dim_1; } // i is typed } // complete i if(cholesky(covMatrix,dim,cholent,h_size,chol,cholaug,0) != 1){ printf("cholesky decomposition of the cov matrix failed for family %d. \n", fam_list[fam]->fam_id); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaug,dim,1,h_size-1,1,h_size-1); double **t = self_colmultiply(cholaug,dim,1,h_size-1,h_size,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) numerator[i]+=t[i][0]; clear_matrix(s,h_size-1,h_size-1); clear_matrix(t,h_size-1,1); } else{ double var; double &determ = var; double **v = inverse(covMatrix,dim,determ); double **s = quadratic_product(cholent,dim,h_size,v,dim,dim,cholent,dim,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) numerator[i]+=s[i][h_size-1]; clear_matrix(s,h_size,h_size); clear_matrix(v,dim,dim); } } } flag_pd = 1; double **cholentmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholentmain[i] = new double[h_size]; memset(cholentmain[i],0,h_size*sizeof(double)); } double **cholmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholmain[i] = new double[h_size-1]; memset(cholmain[i],0,(h_size-1)*sizeof(double)); } double **cholaugmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholaugmain[i] = new double[h_size]; memset(cholaugmain[i],0,h_size*sizeof(double)); } for(int i=0;i<h_size-1;i++){ cholentmain[i][i] = 1; cholentmain[i][h_size-1] = numerator[i]; } if(cholesky(denominator,h_size-1,cholentmain,h_size,cholmain,cholaugmain,0) != 1){ printf("cholesky decomposition of the denominator matrix failed.\n"); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaugmain,h_size-1,1,h_size-1,h_size,h_size); iter=hap_freq_table.begin(); double n=0; for(int i=0;i<h_size-1;i++){ iter->second+=s[i][0]; n+=iter->second; iter++; } iter->second = 1-n; clear_matrix(s,h_size-1,1); } else{ double var; double &determ = var; double **v = inverse(denominator,h_size-1,determ); double **w = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ w[i] = new double[1]; w[i][0] = numerator[i]; } double **s = multiply(v,h_size-1,h_size-1,w,h_size-1,1); iter=hap_freq_table.begin(); double n=0; for(int i=0;i<h_size-1;i++){ iter->second+=s[i][0]; n+=iter->second; iter++; } iter->second = 1-n; clear_matrix(s,h_size-1,1); clear_matrix(v,h_size-1,h_size-1); clear_matrix(w,h_size-1,1); } clear_matrix(cholentmain,h_size-1,h_size); clear_matrix(cholmain,h_size-1,h_size-1); clear_matrix(cholaugmain,h_size-1,h_size); update_geno_freq(); } void IQLAEstimator::Score_test_new(){ int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); for(int i=0;i<h_size-1;i++) memset(rst[i],0,col*sizeof(double)); // record derivative for each pair of haplotypes int loc = 0; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ freq[loc] = iter->second; loc++; iter++; } int h_last=0; iter = hap_freq_table.end(); iter--; h_last=iter->first; map<pair<int, int>, double>::iterator giter=geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ pair<int, int> conf = giter->first; loc = 0; iter = hap_freq_table.begin(); while(loc<h_size-1){ int h=iter->first; if(conf.first == conf.second && conf.first == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[h]; else if(conf.first == conf.second && conf.first == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[h_last]; else if(conf.first < conf.second && conf.first == h && conf.second != h_last) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.second]; else if(conf.first < conf.second && conf.second == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.first]; else if(conf.first == h && conf.second == h_last) rst[loc][hap_pair_map[conf]] = 2*(hap_freq_table[h_last]-hap_freq_table[h]); else if(conf.first != h && conf.first != h_last && conf.second == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[conf.first]; loc++; iter++; } giter++; } for(int i=0;i<geno_list.size();i++){ if(geno_list[i]->get_num()>0 && geno_list[i]->info==1 && missing_list[geno_missing_map[i]]->dim>0) geno_list[i]->expected_hap_num(freq,geno_freq_table,hap_pair_map,count,missing_list[geno_missing_map[i]]->sites); } for(int i=0;i<missing_list.size();i++){ if(missing_list[i]->get_num()>0 && missing_list[i]->dim>0){ missing_list[i]->set_weight(geno_freq_table,hap_pair_map,count); missing_list[i]->compute_centmatrix(freq,geno_freq_table); missing_list[i]->compute_derivative(hap_freq_table,hap_pair_map,count,rst); } } // compute L matrix int complete_geno = complete_geno_list.size(); for(int i=0;i<complete_geno;i++){ complete_geno_list[i]->expectation(geno_freq_table); complete_geno_list[i]->expectation_IBD1(hap_freq_table); } for(int i=0;i<complete_geno;i++){ memset(L[i],0,complete_geno*sizeof(double)); L[i][i] = complete_geno_list[i]->prob_IBD1; const vector<pair<int, int> > hv_1 = complete_geno_list[i]->get_hap_vec(); for(int j=i+1;j<complete_geno;j++){ const vector<pair<int, int> > hv_2 = complete_geno_list[j]->get_hap_vec(); for(int s=0;s<hv_1.size();s++) for(int t=0;t<hv_2.size();t++) L[i][j]+=expfortwo(hv_1[s],hv_2[t],hap_freq_table); } } // construct V int v_size = pair_missing_v.size(); if(v_size>0){ map<int, int>::iterator pi=pair_missing_v.begin(); for(int i=0;i<v_size;i++){ pi->second = i; int mis = pi->first; int dim_1 = missing_list[mis]->dim; for(int j=0;j<dim_1;j++) for(int k=0;k<complete_geno;k++) V[i][j][k] = missing_list[mis]->weight[j][missing_list[mis]->mapping_function[complete_geno_list[k]->gname]]; pi++; } } // compute VLV^T-hh^T, and VDV^T-hh^T for all pair of missing pattern int IBD1_size = pair_missing_IBD1.size(); int IBD2_size = pair_missing_IBD2.size(); if(IBD1_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; for(int j=0;j<dim_1;j++) memset(mat_cov_1[i][j],0,dim_2*sizeof(double)); pmi++; } pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pair<int, int> mispair = pmi->first; if(mispair.first==mispair.second){ int dim_1 = missing_list[mispair.first]->dim; int it_1 = pair_missing_v[mispair.first]; for(int j=0;j<dim_1;j++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][j]+=V[it_1][j][s]*V[it_1][j][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][j]+=2*V[it_1][j][s]*V[it_1][j][t]*L[s][t]; } mat_cov_1[i][j][j]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[j]]; for(int k=j+1;k<dim_1;k++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][k]+=V[it_1][j][s]*V[it_1][k][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][k]+=(V[it_1][j][s]*V[it_1][k][t]+V[it_1][j][t]*V[it_1][k][s])*L[s][t]; } mat_cov_1[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; mat_cov_1[i][k][j] = mat_cov_1[i][j][k]; } } } else{ int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; int it_1 = pair_missing_v[mispair.first]; int it_2 = pair_missing_v[mispair.second]; for(int j=0;j<dim_1;j++) for(int k=0;k<dim_2;k++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][k]+=V[it_1][j][s]*V[it_2][k][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][k]+=(V[it_1][j][s]*V[it_2][k][t]+V[it_1][j][t]*V[it_2][k][s])*L[s][t]; } mat_cov_1[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; } } pmi++; } } if(IBD2_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; for(int j=0;j<dim_1;j++) memset(mat_cov_2[i][j],0,dim_2*sizeof(double)); pmi++; } pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pair<int, int> mispair = pmi->first; if(mispair.first==mispair.second){ int dim_1 = missing_list[mispair.first]->dim; int it_1 = pair_missing_v[mispair.first]; for(int j=0;j<dim_1;j++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][j]+=V[it_1][j][s]*V[it_1][j][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][j]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[j]]; for(int k=j+1;k<dim_1;k++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][k]+=V[it_1][j][s]*V[it_1][k][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; mat_cov_2[i][k][j] = mat_cov_2[i][j][k]; } } } else{ int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; int it_1 = pair_missing_v[mispair.first]; int it_2 = pair_missing_v[mispair.second]; for(int j=0;j<dim_1;j++) for(int k=0;k<dim_2;k++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][k]+=V[it_1][j][s]*V[it_2][k][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; } } pmi++; } } for(int i=0;i<h_size-1;i++) memset(denominator[i],0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) F_p double **A1 = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ A1[i] = new double[h_size-1]; memset(A1[i],0,(h_size-1)*sizeof(double)); } // F_r^T Omega^(-1) F_r double **A2 = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ A2[i] = new double[h_size-1]; memset(A2[i],0,(h_size-1)*sizeof(double)); } // F_p^T Omega^(-1) F_r memset(numerator,0,(h_size-1)*sizeof(double)); // F_r^T Omega^(-1) (Z-\mu) for(int i=0;i<tot_dim;i++){ delete[] cholent[i]; cholent[i] = new double[2*h_size-1]; memset(cholent[i],0,(2*h_size-1)*sizeof(double)); } for(int i=0;i<tot_dim;i++){ delete[] cholaug[i]; cholaug[i] = new double[2*h_size-1]; memset(cholaug[i],0,(2*h_size-1)*sizeof(double)); } for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ int cat; int mis_cat; int dim=0; for(int i=0;i<fam_list[fam]->N;i++){ cat = fam_list[fam]->cat[i]; if(geno_list[cat]->info==1){ mis_cat = fam_list[fam]->mis_cat[i]; dim+=missing_list[mis_cat]->dim; } } if(dim<=0) continue; for(int i=0;i<tot_dim;i++){ memset(cholent[i],0,(2*h_size-1)*sizeof(double)); memset(chol[i],0,tot_dim*sizeof(double)); memset(cholaug[i],0,(2*h_size-1)*sizeof(double)); memset(covMatrix[i],0,tot_dim*sizeof(double)); } // populate covariance matrix int nb1 = 0; int nb2 = 0; int ind_1, ind_2; int dim_1, dim_2; int cat_1, cat_2; int mis_cat_1, mis_cat_2; double IBD_2, IBD_1; map<pair<int, int>, double>::iterator iterk; map<pair<int, int>, double>::iterator iter1; map<pair<int, int>, double>::iterator iter2; double value; double test_weight; flag_pd = 1; for(int i=0;i<fam_list[fam]->N;i++){ cat_1 = fam_list[fam]->cat[i]; mis_cat_1 = fam_list[fam]->mis_cat[i]; dim_1 = missing_list[mis_cat_1]->dim; if(dim_1 > 0 && geno_list[cat_1]->info==1){ nb2 = nb1; ind_1 = fam_list[fam]->fam_member[i][0]; ind_2 = ind_1; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient for individual %d from family %d. Please check...\n\n", ind_1,fam_list[fam]->fam_id); exit(1); } for(int j=0;j<dim_1;j++) for(int k=0;k<dim_1;k++) covMatrix[nb1+j][nb2+k] = missing_list[mis_cat_1]->centmatrix[j][k]; nb2+=dim_1; for(int j=i+1;j<fam_list[fam]->N;j++){ cat_2 = fam_list[fam]->cat[j]; mis_cat_2 = fam_list[fam]->mis_cat[j]; dim_2 = missing_list[mis_cat_2]->dim; if(dim_2 > 0 && geno_list[cat_2]->info==1){ ind_2 = fam_list[fam]->fam_member[j][0]; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient between individual %d and individual %d from family %d. Please check...\n\n", ind_1,ind_2,fam_list[fam]->fam_id); exit(1); } if(iterk->second>1e-8){ // i is related to j iter2 = (fam_list[fam]->IBD2_coeff).find(pair<int, int>(ind_1,ind_2)); iter1 = (fam_list[fam]->IBD1_coeff).find(pair<int, int>(ind_1,ind_2)); IBD_2 = iter2->second; IBD_1 = iter1->second; if(mis_cat_1==mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=k;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][k][l]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=k;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][k][l]; } for(int k=0;k<dim_1;k++) for(int l=k+1;l<dim_2;l++) covMatrix[nb1+l][nb2+k] = covMatrix[nb1+k][nb2+l]; } else if(mis_cat_1<mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][k][l]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][k][l]; } } else if(mis_cat_1>mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_2,mis_cat_1)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][l][k]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_2,mis_cat_1)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][l][k]; } } for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb2+l][nb1+k] = covMatrix[nb1+k][nb2+l]; } // i, j related nb2+=dim_2; } // j is typed } // complete j const vector<double> hap_num = geno_list[cat_1]->get_hap_num(); test_weight = fam_list[fam]->test_weight[i]; for(int j=0;j<dim_1;j++){ for(int k=0;k<h_size-1;k++){ cholent[nb1+j][k] = missing_list[mis_cat_1]->derivative[j][k]; cholent[nb1+j][h_size+k] = test_weight*cholent[nb1+j][k]; } cholent[nb1+j][h_size-1] = hap_num[j]; } nb1+=dim_1; } // i is typed } // complete i if(cholesky(covMatrix,dim,cholent,2*h_size-1,chol,cholaug,0) != 1){ printf("cholesky decomposition of the cov matrix failed for family %d. \n", fam_list[fam]->fam_id); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaug,dim,1,h_size-1,1,h_size-1); double **t = self_colmultiply(cholaug,dim,h_size+1,2*h_size-1,h_size+1,2*h_size-1); double **u = self_colmultiply(cholaug,dim,1,h_size-1,h_size+1,2*h_size-1); double **v = self_colmultiply(cholaug,dim,h_size+1,2*h_size-1,h_size,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) A1[i][j]+=t[i][j]; for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) A2[i][j]+=u[i][j]; for(int i=0;i<h_size-1;i++) numerator[i]+=v[i][0]; clear_matrix(s,h_size-1,h_size-1); clear_matrix(t,h_size-1,h_size-1); clear_matrix(u,h_size-1,h_size-1); clear_matrix(v,h_size-1,1); } else{ double var; double &determ = var; double **v = inverse(covMatrix,dim,determ); double **s = quadratic_product(cholent,dim,2*h_size-1,v,dim,dim,cholent,dim,2*h_size-1); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) A1[i][j]+=s[i+h_size][j+h_size]; for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) A2[i][j]+=s[i][j+h_size]; for(int i=0;i<h_size-1;i++) numerator[i]+=s[i+h_size][h_size-1]; clear_matrix(s,2*h_size-1,2*h_size-1); clear_matrix(v,dim,dim); } } } // IQLS test flag_pd = 1; double **cholmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholmain[i] = new double[h_size-1]; memset(cholmain[i],0,(h_size-1)*sizeof(double)); } double **cholaugmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholaugmain[i] = new double[h_size-1]; memset(cholaugmain[i],0,(h_size-1)*sizeof(double)); } if(cholesky(denominator,h_size-1,A2,h_size-1,cholmain,cholaugmain,0) != 1){ printf("cholesky decomposition of the denominator matrix failed.\n"); flag_pd = 0; exit(1); } double **orig = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ orig[i] = new double[h_size-1]; memset(orig[i],0,(h_size-1)*sizeof(double)); } if(flag_pd==1){ double **s = self_colmultiply(cholaugmain,h_size-1,1,h_size-1,1,h_size-1); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) orig[i][j] = A1[i][j]-s[i][j]; clear_matrix(s,h_size-1,h_size-1); } else{ double var; double &determ = var; double **v = inverse(denominator,h_size-1,determ); double **s = quadratic_product(A2,h_size-1,h_size-1,v,h_size-1,h_size-1,A2,h_size-1,h_size-1); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) orig[i][j] = A1[i][j]-s[i][j]; clear_matrix(s,h_size-1,h_size-1); clear_matrix(v,h_size-1,h_size-1); } // MIQLS flag_pd = 1; double **cholentmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholentmain[i] = new double[1]; cholentmain[i][0] = numerator[i]; } for(int i=0;i<h_size-1;i++) memset(cholmain[i],0,(h_size-1)*sizeof(double)); for(int i=0;i<h_size-1;i++){ delete[] cholaugmain[i]; cholaugmain[i] = new double[1]; memset(cholaugmain[i],0,sizeof(double)); } if(cholesky(orig,h_size-1,cholentmain,1,cholmain,cholaugmain,0) != 1){ printf("cholesky decomposition of the denominator matrix failed.\n"); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaugmain,h_size-1,1,1,1,1); test = s[0][0]; clear_matrix(s,1,1); } else{ double var; double &determ = var; double **v = inverse(orig,h_size-1,determ); double **s = quadratic_product(cholentmain,h_size-1,1,v,h_size-1,h_size-1,cholentmain,h_size-1,1); test = s[0][0]; clear_matrix(s,1,1); clear_matrix(v,h_size-1,h_size-1); } clear_matrix(cholentmain,h_size-1,1); clear_matrix(cholmain,h_size-1,h_size-1); clear_matrix(cholaugmain,h_size-1,1); clear_matrix(orig,h_size-1,h_size-1); df = h_size-1; pvalue = pochisq(test,df); clear_matrix(A1,h_size-1,h_size-1); clear_matrix(A2,h_size-1,h_size-1); } int IQLAEstimator::Score_test_cluster_new(vector<int *> *cluster){ col_1df = cluster->size()-1; int col_clus = col_1df; if((*cluster)[col_clus][0]==0) col_clus--; df = col_clus; if(df<=0){ fprintf(outfp, "The df is %d, please check!\n\n", df); return -1; } int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); for(int i=0;i<h_size-1;i++) memset(rst[i],0,col*sizeof(double)); // record derivative for each pair of haplotypes int loc = 0; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ freq[loc] = iter->second; loc++; iter++; } // create a matrix to cluster haplotypes double **mul_clus = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ mul_clus[i] = new double[col_clus]; memset(mul_clus[i],0,col_clus*sizeof(double)); } int num_hap; double sum_hap; for(int i=0;i<col_clus;i++){ num_hap = (*cluster)[i][0]; if(num_hap==1){ if((*cluster)[i][1]<h_size-1) mul_clus[(*cluster)[i][1]][i] = 1; } else{ sum_hap = 0; for(int j=1;j<=num_hap;j++) sum_hap+=freq[(*cluster)[i][j]]; for(int j=1;j<=num_hap;j++){ if((*cluster)[i][j]<h_size-1) mul_clus[(*cluster)[i][j]][i] = freq[(*cluster)[i][j]]/sum_hap; } } } num_hap = (*cluster)[col_clus][0]; if(num_hap==1){ if((*cluster)[col_clus][1]<h_size-1){ for(int i=0;i<col_clus;i++) mul_clus[(*cluster)[col_clus][1]][i] = -1; } } else{ sum_hap = 0; for(int j=1;j<=num_hap;j++) sum_hap+=freq[(*cluster)[col_clus][j]]; for(int j=1;j<=num_hap;j++){ if((*cluster)[col_clus][j]<h_size-1){ for(int i=0;i<col_clus;i++) mul_clus[(*cluster)[col_clus][j]][i] = -freq[(*cluster)[col_clus][j]]/sum_hap; } } } // create a matrix to use in 1df test double **mul_1df = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ mul_1df[i] = new double[col_1df]; memset(mul_1df[i],0,col_1df*sizeof(double)); } for(int i=0;i<col_1df;i++){ int h_cur = (*cluster)[i][1]; if(h_cur==h_size-1){ for(int j=0;j<h_size-1;j++) mul_1df[j][i] = -freq[j]/(1-freq[h_cur]); } else{ for(int j=0;j<h_size-1;j++){ if(j==h_cur) mul_1df[j][i] = 1; else mul_1df[j][i] = -freq[j]/(1-freq[h_cur]); } } } int h_last=0; iter = hap_freq_table.end(); iter--; h_last=iter->first; map<pair<int, int>, double>::iterator giter=geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ pair<int, int> conf = giter->first; loc = 0; iter = hap_freq_table.begin(); while(loc<h_size-1){ int h=iter->first; if(conf.first == conf.second && conf.first == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[h]; else if(conf.first == conf.second && conf.first == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[h_last]; else if(conf.first < conf.second && conf.first == h && conf.second != h_last) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.second]; else if(conf.first < conf.second && conf.second == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.first]; else if(conf.first == h && conf.second == h_last) rst[loc][hap_pair_map[conf]] = 2*(hap_freq_table[h_last]-hap_freq_table[h]); else if(conf.first != h && conf.first != h_last && conf.second == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[conf.first]; loc++; iter++; } giter++; } for(int i=0;i<geno_list.size();i++){ if(geno_list[i]->get_num()>0 && geno_list[i]->info==1 && missing_list[geno_missing_map[i]]->dim>0) geno_list[i]->expected_hap_num(freq,geno_freq_table,hap_pair_map,count,missing_list[geno_missing_map[i]]->sites); } for(int i=0;i<missing_list.size();i++){ if(missing_list[i]->get_num()>0 && missing_list[i]->dim>0){ missing_list[i]->set_weight(geno_freq_table,hap_pair_map,count); missing_list[i]->compute_centmatrix(freq,geno_freq_table); missing_list[i]->compute_derivative(hap_freq_table,hap_pair_map,count,rst); missing_list[i]->compute_derivative_cluster(hap_freq_table,mul_clus,col_clus); missing_list[i]->compute_derivative_1df(hap_freq_table,mul_1df,col_1df); } } // compute L matrix int complete_geno = complete_geno_list.size(); for(int i=0;i<complete_geno;i++){ complete_geno_list[i]->expectation(geno_freq_table); complete_geno_list[i]->expectation_IBD1(hap_freq_table); } for(int i=0;i<complete_geno;i++){ memset(L[i],0,complete_geno*sizeof(double)); L[i][i] = complete_geno_list[i]->prob_IBD1; const vector<pair<int, int> > hv_1 = complete_geno_list[i]->get_hap_vec(); for(int j=i+1;j<complete_geno;j++){ const vector<pair<int, int> > hv_2 = complete_geno_list[j]->get_hap_vec(); for(int s=0;s<hv_1.size();s++) for(int t=0;t<hv_2.size();t++) L[i][j]+=expfortwo(hv_1[s],hv_2[t],hap_freq_table); } } // construct V int v_size = pair_missing_v.size(); if(v_size>0){ map<int, int>::iterator pi=pair_missing_v.begin(); for(int i=0;i<v_size;i++){ pi->second = i; int mis = pi->first; int dim_1 = missing_list[mis]->dim; for(int j=0;j<dim_1;j++) for(int k=0;k<complete_geno;k++) V[i][j][k] = missing_list[mis]->weight[j][missing_list[mis]->mapping_function[complete_geno_list[k]->gname]]; pi++; } } // compute VLV^T-hh^T, and VDV^T-hh^T for all pair of missing pattern int IBD1_size = pair_missing_IBD1.size(); int IBD2_size = pair_missing_IBD2.size(); if(IBD1_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; for(int j=0;j<dim_1;j++) memset(mat_cov_1[i][j],0,dim_2*sizeof(double)); pmi++; } pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pair<int, int> mispair = pmi->first; if(mispair.first==mispair.second){ int dim_1 = missing_list[mispair.first]->dim; int it_1 = pair_missing_v[mispair.first]; for(int j=0;j<dim_1;j++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][j]+=V[it_1][j][s]*V[it_1][j][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][j]+=2*V[it_1][j][s]*V[it_1][j][t]*L[s][t]; } mat_cov_1[i][j][j]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[j]]; for(int k=j+1;k<dim_1;k++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][k]+=V[it_1][j][s]*V[it_1][k][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][k]+=(V[it_1][j][s]*V[it_1][k][t]+V[it_1][j][t]*V[it_1][k][s])*L[s][t]; } mat_cov_1[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; mat_cov_1[i][k][j] = mat_cov_1[i][j][k]; } } } else{ int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; int it_1 = pair_missing_v[mispair.first]; int it_2 = pair_missing_v[mispair.second]; for(int j=0;j<dim_1;j++) for(int k=0;k<dim_2;k++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][k]+=V[it_1][j][s]*V[it_2][k][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][k]+=(V[it_1][j][s]*V[it_2][k][t]+V[it_1][j][t]*V[it_2][k][s])*L[s][t]; } mat_cov_1[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; } } pmi++; } } if(IBD2_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; for(int j=0;j<dim_1;j++) memset(mat_cov_2[i][j],0,dim_2*sizeof(double)); pmi++; } pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pair<int, int> mispair = pmi->first; if(mispair.first==mispair.second){ int dim_1 = missing_list[mispair.first]->dim; int it_1 = pair_missing_v[mispair.first]; for(int j=0;j<dim_1;j++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][j]+=V[it_1][j][s]*V[it_1][j][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][j]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[j]]; for(int k=j+1;k<dim_1;k++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][k]+=V[it_1][j][s]*V[it_1][k][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; mat_cov_2[i][k][j] = mat_cov_2[i][j][k]; } } } else{ int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; int it_1 = pair_missing_v[mispair.first]; int it_2 = pair_missing_v[mispair.second]; for(int j=0;j<dim_1;j++) for(int k=0;k<dim_2;k++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][k]+=V[it_1][j][s]*V[it_2][k][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; } } pmi++; } } for(int i=0;i<h_size-1;i++) memset(denominator[i],0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) F_p double **A1 = new double*[col_clus]; for(int i=0;i<col_clus;i++){ A1[i] = new double[col_clus]; memset(A1[i],0,col_clus*sizeof(double)); } // F_r^T Omega^(-1) F_r double **A2 = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ A2[i] = new double[col_clus]; memset(A2[i],0,col_clus*sizeof(double)); } // F_p^T Omega^(-1) F_r double *numerator_1 = new double[col_clus]; memset(numerator_1,0,col_clus*sizeof(double)); // F_r^T Omega^(-1) (Z-\mu) double **B1 = new double*[col_1df]; for(int i=0;i<col_1df;i++){ B1[i] = new double[col_1df]; memset(B1[i],0,col_1df*sizeof(double)); } // F_r^T Omega^(-1) F_r double **B2 = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ B2[i] = new double[col_1df]; memset(B2[i],0,col_1df*sizeof(double)); } // F_p^T Omega^(-1) F_r double *Bnumerator = new double[col_1df]; memset(Bnumerator,0,col_1df*sizeof(double)); // F_r^T Omega^(-1) (Z-\mu) for(int i=0;i<tot_dim;i++){ delete[] cholent[i]; cholent[i] = new double[h_size+col_clus+col_1df]; memset(cholent[i],0,(h_size+col_clus+col_1df)*sizeof(double)); } for(int i=0;i<tot_dim;i++){ delete[] cholaug[i]; cholaug[i] = new double[h_size+col_clus+col_1df]; memset(cholaug[i],0,(h_size+col_clus+col_1df)*sizeof(double)); } for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ int cat; int mis_cat; int dim=0; for(int i=0;i<fam_list[fam]->N;i++){ cat = fam_list[fam]->cat[i]; if(geno_list[cat]->info==1){ mis_cat = fam_list[fam]->mis_cat[i]; dim+=missing_list[mis_cat]->dim; } } if(dim<=0) continue; for(int i=0;i<tot_dim;i++){ memset(cholent[i],0,(h_size+col_clus+col_1df)*sizeof(double)); memset(chol[i],0,tot_dim*sizeof(double)); memset(cholaug[i],0,(h_size+col_clus+col_1df)*sizeof(double)); memset(covMatrix[i],0,tot_dim*sizeof(double)); } // populate covariance matrix int nb1 = 0; int nb2 = 0; int ind_1, ind_2; int dim_1, dim_2; int cat_1, cat_2; int mis_cat_1, mis_cat_2; double IBD_2, IBD_1; map<pair<int, int>, double>::iterator iterk; map<pair<int, int>, double>::iterator iter1; map<pair<int, int>, double>::iterator iter2; double value; double test_weight; flag_pd = 1; for(int i=0;i<fam_list[fam]->N;i++){ cat_1 = fam_list[fam]->cat[i]; mis_cat_1 = fam_list[fam]->mis_cat[i]; dim_1 = missing_list[mis_cat_1]->dim; if(dim_1 > 0 && geno_list[cat_1]->info==1){ nb2 = nb1; ind_1 = fam_list[fam]->fam_member[i][0]; ind_2 = ind_1; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient for individual %d from family %d. Please check...\n\n", ind_1,fam_list[fam]->fam_id); exit(1); } for(int j=0;j<dim_1;j++) for(int k=0;k<dim_1;k++) covMatrix[nb1+j][nb2+k] = missing_list[mis_cat_1]->centmatrix[j][k]; nb2+=dim_1; for(int j=i+1;j<fam_list[fam]->N;j++){ cat_2 = fam_list[fam]->cat[j]; mis_cat_2 = fam_list[fam]->mis_cat[j]; dim_2 = missing_list[mis_cat_2]->dim; if(dim_2 > 0 && geno_list[cat_2]->info==1){ ind_2 = fam_list[fam]->fam_member[j][0]; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient between individual %d and individual %d from family %d. Please check...\n\n", ind_1,ind_2,fam_list[fam]->fam_id); exit(1); } if(iterk->second>1e-8){ // i is related to j iter2 = (fam_list[fam]->IBD2_coeff).find(pair<int, int>(ind_1,ind_2)); iter1 = (fam_list[fam]->IBD1_coeff).find(pair<int, int>(ind_1,ind_2)); IBD_2 = iter2->second; IBD_1 = iter1->second; if(mis_cat_1==mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=k;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][k][l]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=k;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][k][l]; } for(int k=0;k<dim_1;k++) for(int l=k+1;l<dim_2;l++) covMatrix[nb1+l][nb2+k] = covMatrix[nb1+k][nb2+l]; } else if(mis_cat_1<mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][k][l]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][k][l]; } } else if(mis_cat_1>mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_2,mis_cat_1)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][l][k]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_2,mis_cat_1)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][l][k]; } } for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb2+l][nb1+k] = covMatrix[nb1+k][nb2+l]; } // i, j related nb2+=dim_2; } // j is typed } // complete j const vector<double> hap_num = geno_list[cat_1]->get_hap_num(); test_weight = fam_list[fam]->test_weight[i]; for(int j=0;j<dim_1;j++){ for(int k=0;k<h_size-1;k++) cholent[nb1+j][k] = missing_list[mis_cat_1]->derivative[j][k]; cholent[nb1+j][h_size-1] = hap_num[j]; } for(int j=0;j<dim_1;j++){ for(int k=0;k<col_clus;k++) cholent[nb1+j][h_size+k] = test_weight*missing_list[mis_cat_1]->derivative_clus[j][k]; for(int k=0;k<col_1df;k++) cholent[nb1+j][h_size+col_clus+k] = test_weight*missing_list[mis_cat_1]->derivative_1df[j][k]; } nb1+=dim_1; } // i is typed } // complete i if(cholesky(covMatrix,dim,cholent,h_size+col_clus+col_1df,chol,cholaug,0) != 1){ printf("cholesky decomposition of the cov matrix failed for family %d. \n", fam_list[fam]->fam_id); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaug,dim,1,h_size-1,1,h_size-1); double **t = self_colmultiply(cholaug,dim,h_size+1,h_size+col_clus,h_size+1,h_size+col_clus); double **u = self_colmultiply(cholaug,dim,1,h_size-1,h_size+1,h_size+col_clus); double **v = self_colmultiply(cholaug,dim,h_size+1,h_size+col_clus,h_size,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<col_clus;i++) for(int j=0;j<col_clus;j++) A1[i][j]+=t[i][j]; for(int i=0;i<h_size-1;i++) for(int j=0;j<col_clus;j++) A2[i][j]+=u[i][j]; for(int i=0;i<col_clus;i++) numerator_1[i]+=v[i][0]; clear_matrix(s,h_size-1,h_size-1); clear_matrix(t,col_clus,col_clus); clear_matrix(u,h_size-1,col_clus); clear_matrix(v,col_clus,1); t = self_colmultiply(cholaug,dim,h_size+col_clus+1,h_size+col_clus+col_1df,h_size+col_clus+1,h_size+col_clus+col_1df); u = self_colmultiply(cholaug,dim,1,h_size-1,h_size+col_clus+1,h_size+col_clus+col_1df); v = self_colmultiply(cholaug,dim,h_size+col_clus+1,h_size+col_clus+col_1df,h_size,h_size); for(int i=0;i<col_1df;i++) for(int j=0;j<col_1df;j++) B1[i][j]+=t[i][j]; for(int i=0;i<h_size-1;i++) for(int j=0;j<col_1df;j++) B2[i][j]+=u[i][j]; for(int i=0;i<col_1df;i++) Bnumerator[i]+=v[i][0]; clear_matrix(t,col_1df,col_1df); clear_matrix(u,h_size-1,col_1df); clear_matrix(v,col_1df,1); } else{ double var; double &determ = var; double **v = inverse(covMatrix,dim,determ); double **s = quadratic_product(cholent,dim,h_size+col_clus+col_1df,v,dim,dim,cholent,dim,h_size+col_clus+col_1df); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<col_clus;i++) for(int j=0;j<col_clus;j++) A1[i][j]+=s[i+h_size][j+h_size]; for(int i=0;i<h_size-1;i++) for(int j=0;j<col_clus;j++) A2[i][j]+=s[i][j+h_size]; for(int i=0;i<col_clus;i++) numerator_1[i]+=s[i+h_size][h_size-1]; for(int i=0;i<col_1df;i++) for(int j=0;j<col_1df;j++) B1[i][j]+=s[i+h_size+col_clus][j+h_size+col_clus]; for(int i=0;i<h_size-1;i++) for(int j=0;j<col_1df;j++) B2[i][j]+=s[i][j+h_size+col_clus]; for(int i=0;i<col_1df;i++) Bnumerator[i]+=s[i+h_size+col_clus][h_size-1]; clear_matrix(s,h_size+col_clus+col_1df,h_size+col_clus+col_1df); clear_matrix(v,dim,dim); } } } clear_matrix(mul_clus,h_size-1,col_clus); clear_matrix(mul_1df,h_size-1,col_1df); // IQLS test flag_pd = 1; double **cholentmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholentmain[i] = new double[col_clus+col_1df]; memset(cholentmain[i],0,(col_clus+col_1df)*sizeof(double)); } double **cholmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholmain[i] = new double[h_size-1]; memset(cholmain[i],0,(h_size-1)*sizeof(double)); } double **cholaugmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholaugmain[i] = new double[col_clus+col_1df]; memset(cholaugmain[i],0,(col_clus+col_1df)*sizeof(double)); } for(int i=0;i<h_size-1;i++){ for(int j=0;j<col_clus;j++) cholentmain[i][j] = A2[i][j]; for(int j=0;j<col_1df;j++) cholentmain[i][col_clus+j] = B2[i][j]; } if(cholesky(denominator,h_size-1,cholentmain,col_clus+col_1df,cholmain,cholaugmain,0) != 1){ printf("cholesky decomposition of the denominator matrix failed.\n"); flag_pd = 0; exit(1); } double **orig = new double*[col_clus]; for(int i=0;i<col_clus;i++){ orig[i] = new double[col_clus]; memset(orig[i],0,col_clus*sizeof(double)); } double **Borig = new double*[col_1df]; for(int i=0;i<col_1df;i++){ Borig[i] = new double[col_1df]; memset(Borig[i],0,col_1df*sizeof(double)); } if(flag_pd==1){ double **s = self_colmultiply(cholaugmain,h_size-1,1,col_clus,1,col_clus); for(int i=0;i<col_clus;i++) for(int j=0;j<col_clus;j++) orig[i][j] = A1[i][j]-s[i][j]; clear_matrix(s,col_clus,col_clus); s = self_colmultiply(cholaugmain,h_size-1,col_clus+1,col_clus+col_1df,col_clus+1,col_clus+col_1df); for(int i=0;i<col_1df;i++) for(int j=0;j<col_1df;j++) Borig[i][j] = B1[i][j]-s[i][j]; clear_matrix(s,col_1df,col_1df); } else{ double var; double &determ = var; double **v = inverse(denominator,h_size-1,determ); double **s = quadratic_product(A2,h_size-1,col_clus,v,h_size-1,h_size-1,A2,h_size-1,col_clus); for(int i=0;i<col_clus;i++) for(int j=0;j<col_clus;j++) orig[i][j] = A1[i][j]-s[i][j]; clear_matrix(s,col_clus,col_clus); s = quadratic_product(B2,h_size-1,col_1df,v,h_size-1,h_size-1,B2,h_size-1,col_1df); for(int i=0;i<col_1df;i++) for(int j=0;j<col_1df;j++) Borig[i][j] = B1[i][j]-s[i][j]; clear_matrix(s,col_1df,col_1df); clear_matrix(v,h_size-1,h_size-1); } // MIQLS flag_pd = 1; clear_matrix(cholentmain,h_size-1,col_clus+col_1df); clear_matrix(cholmain,h_size-1,h_size-1); clear_matrix(cholaugmain,h_size-1,col_clus+col_1df); cholentmain = new double*[col_clus]; for(int i=0;i<col_clus;i++){ cholentmain[i] = new double[1]; cholentmain[i][0] = numerator_1[i]; } cholmain = new double*[col_clus]; for(int i=0;i<col_clus;i++){ cholmain[i] = new double[col_clus]; memset(cholmain[i],0,col_clus*sizeof(double)); } cholaugmain = new double*[col_clus]; for(int i=0;i<col_clus;i++){ cholaugmain[i] = new double[1]; memset(cholaugmain[i],0,sizeof(double)); } if(cholesky(orig,col_clus,cholentmain,1,cholmain,cholaugmain,0) != 1){ printf("cholesky decomposition of the denominator matrix failed.\n"); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaugmain,col_clus,1,1,1,1); test = s[0][0]; clear_matrix(s,1,1); } else{ double var; double &determ = var; double **v = inverse(orig,col_clus,determ); double **s = quadratic_product(cholentmain,col_clus,1,v,col_clus,col_clus,cholentmain,col_clus,1); test = s[0][0]; clear_matrix(s,1,1); clear_matrix(v,col_clus,col_clus); } test_perform = 1; Btest = new double[col_1df]; Bpvalue = new double[col_1df]; for(int i=0;i<col_1df;i++){ Btest[i] = Bnumerator[i]*Bnumerator[i]/Borig[i][i]; Bpvalue[i] = pochisq(Btest[i],1); } minpvalue = Bpvalue[0]; for(int i=1;i<col_1df;i++){ if(Bpvalue[i]<minpvalue) minpvalue = Bpvalue[i]; } clear_matrix(cholentmain,col_clus,1); clear_matrix(cholmain,col_clus,col_clus); clear_matrix(cholaugmain,col_clus,1); clear_matrix(orig,col_clus,col_clus); clear_matrix(Borig,col_1df,col_1df); pvalue = pochisq(test,df); clear_matrix(A1,col_clus,col_clus); clear_matrix(A2,h_size-1,col_clus); delete[] numerator_1; clear_matrix(B1,col_1df,col_1df); clear_matrix(B2,h_size-1,col_1df); delete[] Bnumerator; return 1; } void IQLAEstimator::estimate_subgroup(){ full_hap_freq_table = hap_freq_table; // affected // unaffected // unknown aff=0; unaff=0; unknown=0; int cat_1; int mis_cat_1; int dim_1; for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ for(int i=0;i<fam_list[fam]->N;i++){ cat_1 = fam_list[fam]->cat[i]; mis_cat_1 = fam_list[fam]->mis_cat[i]; dim_1 = missing_list[mis_cat_1]->dim; if(dim_1 > 0 && geno_list[cat_1]->info==1){ if(fam_list[fam]->fam_member[i][4]==0) unknown++; else if(fam_list[fam]->fam_member[i][4]==1) unaff++; else if(fam_list[fam]->fam_member[i][4]==2) aff++; } } } } if(aff>=15){ reset_freq_table(full_hap_freq_table); int count=1; int flag; while(1){ map<int, double> of = hap_freq_table; NR_Iteration_subgroup(2); map<int, double> nf = hap_freq_table; double diff = 0; flag = 1; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0.0000001){ flag = 0; break; } iter++; } if(diff<1e-5 || count>5 || flag==0) break; count++; } aff_hap_freq_table = hap_freq_table; } if(unaff>=15){ reset_freq_table(full_hap_freq_table); int count=1; int flag; while(1){ map<int, double> of = hap_freq_table; NR_Iteration_subgroup(1); map<int, double> nf = hap_freq_table; double diff = 0; flag = 1; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0.0000001){ flag = 0; break; } iter++; } if(diff<1e-5 || count>5 || flag==0) break; count++; } unaff_hap_freq_table = hap_freq_table; } if(unknown>=15){ reset_freq_table(full_hap_freq_table); int count=1; int flag; while(1){ map<int, double> of = hap_freq_table; NR_Iteration_subgroup(0); map<int, double> nf = hap_freq_table; double diff = 0; flag = 1; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0.0000001){ flag = 0; break; } iter++; } if(diff<1e-5 || count>5 || flag==0) break; count++; } unknown_hap_freq_table = hap_freq_table; } reset_freq_table(full_hap_freq_table); } void IQLAEstimator::NR_Iteration_subgroup(int group){ int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); for(int i=0;i<h_size-1;i++) memset(rst[i],0,col*sizeof(double)); // record derivative for each pair of haplotypes int loc = 0; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ freq[loc] = iter->second; loc++; iter++; } int h_last=0; iter = hap_freq_table.end(); iter--; h_last=iter->first; map<pair<int, int>, double>::iterator giter=geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ pair<int, int> conf = giter->first; loc = 0; iter = hap_freq_table.begin(); while(loc<h_size-1){ int h=iter->first; if(conf.first == conf.second && conf.first == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[h]; else if(conf.first == conf.second && conf.first == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[h_last]; else if(conf.first < conf.second && conf.first == h && conf.second != h_last) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.second]; else if(conf.first < conf.second && conf.second == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.first]; else if(conf.first == h && conf.second == h_last) rst[loc][hap_pair_map[conf]] = 2*(hap_freq_table[h_last]-hap_freq_table[h]); else if(conf.first != h && conf.first != h_last && conf.second == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[conf.first]; loc++; iter++; } giter++; } for(int i=0;i<geno_list.size();i++){ if(geno_list[i]->get_num()>0 && geno_list[i]->info==1 && missing_list[geno_missing_map[i]]->dim>0) geno_list[i]->expected_hap_num(freq,geno_freq_table,hap_pair_map,count,missing_list[geno_missing_map[i]]->sites); } for(int i=0;i<missing_list.size();i++){ if(missing_list[i]->get_num()>0 && missing_list[i]->dim>0){ missing_list[i]->set_weight(geno_freq_table,hap_pair_map,count); missing_list[i]->compute_centmatrix(freq,geno_freq_table); missing_list[i]->compute_derivative(hap_freq_table,hap_pair_map,count,rst); } } // compute L matrix int complete_geno = complete_geno_list.size(); for(int i=0;i<complete_geno;i++){ complete_geno_list[i]->expectation(geno_freq_table); complete_geno_list[i]->expectation_IBD1(hap_freq_table); } for(int i=0;i<complete_geno;i++){ memset(L[i],0,complete_geno*sizeof(double)); L[i][i] = complete_geno_list[i]->prob_IBD1; const vector<pair<int, int> > hv_1 = complete_geno_list[i]->get_hap_vec(); for(int j=i+1;j<complete_geno;j++){ const vector<pair<int, int> > hv_2 = complete_geno_list[j]->get_hap_vec(); for(int s=0;s<hv_1.size();s++) for(int t=0;t<hv_2.size();t++) L[i][j]+=expfortwo(hv_1[s],hv_2[t],hap_freq_table); } } // construct V int v_size = pair_missing_v.size(); if(v_size>0){ map<int, int>::iterator pi=pair_missing_v.begin(); for(int i=0;i<v_size;i++){ pi->second = i; int mis = pi->first; int dim_1 = missing_list[mis]->dim; for(int j=0;j<dim_1;j++) for(int k=0;k<complete_geno;k++) V[i][j][k] = missing_list[mis]->weight[j][missing_list[mis]->mapping_function[complete_geno_list[k]->gname]]; pi++; } } // compute VLV^T-hh^T, and VDV^T-hh^T for all pair of missing pattern int IBD1_size = pair_missing_IBD1.size(); int IBD2_size = pair_missing_IBD2.size(); if(IBD1_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; for(int j=0;j<dim_1;j++) memset(mat_cov_1[i][j],0,dim_2*sizeof(double)); pmi++; } pmi=pair_missing_IBD1.begin(); for(int i=0;i<IBD1_size;i++){ pair<int, int> mispair = pmi->first; if(mispair.first==mispair.second){ int dim_1 = missing_list[mispair.first]->dim; int it_1 = pair_missing_v[mispair.first]; for(int j=0;j<dim_1;j++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][j]+=V[it_1][j][s]*V[it_1][j][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][j]+=2*V[it_1][j][s]*V[it_1][j][t]*L[s][t]; } mat_cov_1[i][j][j]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[j]]; for(int k=j+1;k<dim_1;k++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][k]+=V[it_1][j][s]*V[it_1][k][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][k]+=(V[it_1][j][s]*V[it_1][k][t]+V[it_1][j][t]*V[it_1][k][s])*L[s][t]; } mat_cov_1[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; mat_cov_1[i][k][j] = mat_cov_1[i][j][k]; } } } else{ int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; int it_1 = pair_missing_v[mispair.first]; int it_2 = pair_missing_v[mispair.second]; for(int j=0;j<dim_1;j++) for(int k=0;k<dim_2;k++){ for(int s=0;s<complete_geno;s++){ mat_cov_1[i][j][k]+=V[it_1][j][s]*V[it_2][k][s]*L[s][s]; for(int t=s+1;t<complete_geno;t++) mat_cov_1[i][j][k]+=(V[it_1][j][s]*V[it_2][k][t]+V[it_1][j][t]*V[it_2][k][s])*L[s][t]; } mat_cov_1[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; } } pmi++; } } if(IBD2_size>0){ map<pair<int, int>, int>::iterator pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pmi->second = i; pair<int, int> mispair = pmi->first; int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; for(int j=0;j<dim_1;j++) memset(mat_cov_2[i][j],0,dim_2*sizeof(double)); pmi++; } pmi=pair_missing_IBD2.begin(); for(int i=0;i<IBD2_size;i++){ pair<int, int> mispair = pmi->first; if(mispair.first==mispair.second){ int dim_1 = missing_list[mispair.first]->dim; int it_1 = pair_missing_v[mispair.first]; for(int j=0;j<dim_1;j++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][j]+=V[it_1][j][s]*V[it_1][j][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][j]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[j]]; for(int k=j+1;k<dim_1;k++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][k]+=V[it_1][j][s]*V[it_1][k][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; mat_cov_2[i][k][j] = mat_cov_2[i][j][k]; } } } else{ int dim_1 = missing_list[mispair.first]->dim; int dim_2 = missing_list[mispair.second]->dim; int it_1 = pair_missing_v[mispair.first]; int it_2 = pair_missing_v[mispair.second]; for(int j=0;j<dim_1;j++) for(int k=0;k<dim_2;k++){ for(int s=0;s<complete_geno;s++) mat_cov_2[i][j][k]+=V[it_1][j][s]*V[it_2][k][s]*complete_geno_list[s]->prob; mat_cov_2[i][j][k]-=freq[missing_list[mispair.first]->sites[j]]*freq[missing_list[mispair.second]->sites[k]]; } } pmi++; } } for(int i=0;i<h_size-1;i++) memset(denominator[i],0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) F_p memset(numerator,0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) (Z-\mu) for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ int cat; int mis_cat; int dim=0; for(int i=0;i<fam_list[fam]->N;i++){ cat = fam_list[fam]->cat[i]; if(geno_list[cat]->info==1 && fam_list[fam]->fam_member[i][4]==group){ mis_cat = fam_list[fam]->mis_cat[i]; dim+=missing_list[mis_cat]->dim; } } if(dim<=0) continue; for(int i=0;i<tot_dim;i++){ memset(cholent[i],0,h_size*sizeof(double)); memset(chol[i],0,tot_dim*sizeof(double)); memset(cholaug[i],0,h_size*sizeof(double)); memset(covMatrix[i],0,tot_dim*sizeof(double)); } // populate covariance matrix int nb1 = 0; int nb2 = 0; int ind_1, ind_2; int dim_1, dim_2; int cat_1, cat_2; int mis_cat_1, mis_cat_2; double IBD_2, IBD_1; map<pair<int, int>, double>::iterator iterk; map<pair<int, int>, double>::iterator iter1; map<pair<int, int>, double>::iterator iter2; double value; flag_pd = 1; for(int i=0;i<fam_list[fam]->N;i++){ cat_1 = fam_list[fam]->cat[i]; mis_cat_1 = fam_list[fam]->mis_cat[i]; dim_1 = missing_list[mis_cat_1]->dim; if(dim_1 > 0 && geno_list[cat_1]->info==1 && fam_list[fam]->fam_member[i][4]==group){ nb2 = nb1; ind_1 = fam_list[fam]->fam_member[i][0]; ind_2 = ind_1; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient for individual %d from family %d. Please check...\n\n", ind_1,fam_list[fam]->fam_id); exit(1); } for(int j=0;j<dim_1;j++) for(int k=0;k<dim_1;k++) covMatrix[nb1+j][nb2+k] = missing_list[mis_cat_1]->centmatrix[j][k]; nb2+=dim_1; for(int j=i+1;j<fam_list[fam]->N;j++){ cat_2 = fam_list[fam]->cat[j]; mis_cat_2 = fam_list[fam]->mis_cat[j]; dim_2 = missing_list[mis_cat_2]->dim; if(dim_2 > 0 && geno_list[cat_2]->info==1 && fam_list[fam]->fam_member[j][4]==group){ ind_2 = fam_list[fam]->fam_member[j][0]; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_2)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient between individual %d and individual %d from family %d. Please check...\n\n", ind_1,ind_2,fam_list[fam]->fam_id); exit(1); } if(iterk->second>1e-8){ // i is related to j iter2 = (fam_list[fam]->IBD2_coeff).find(pair<int, int>(ind_1,ind_2)); iter1 = (fam_list[fam]->IBD1_coeff).find(pair<int, int>(ind_1,ind_2)); IBD_2 = iter2->second; IBD_1 = iter1->second; if(mis_cat_1==mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=k;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][k][l]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=k;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][k][l]; } for(int k=0;k<dim_1;k++) for(int l=k+1;l<dim_2;l++) covMatrix[nb1+l][nb2+k] = covMatrix[nb1+k][nb2+l]; } else if(mis_cat_1<mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][k][l]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_1,mis_cat_2)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][k][l]; } } else if(mis_cat_1>mis_cat_2){ if(IBD_1>1e-8){ int tic = pair_missing_IBD1[pair<int, int>(mis_cat_2,mis_cat_1)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_1*mat_cov_1[tic][l][k]; } if(IBD_2>1e-8){ int tic = pair_missing_IBD2[pair<int, int>(mis_cat_2,mis_cat_1)]; for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb1+k][nb2+l]+=IBD_2*mat_cov_2[tic][l][k]; } } for(int k=0;k<dim_1;k++) for(int l=0;l<dim_2;l++) covMatrix[nb2+l][nb1+k] = covMatrix[nb1+k][nb2+l]; } // i, j related nb2+=dim_2; } // j is typed } // complete j const vector<double> hap_num = geno_list[cat_1]->get_hap_num(); for(int j=0;j<dim_1;j++){ for(int k=0;k<h_size-1;k++) cholent[nb1+j][k] = missing_list[mis_cat_1]->derivative[j][k]; cholent[nb1+j][h_size-1] = hap_num[j]; } nb1+=dim_1; } // i is typed } // complete i if(cholesky(covMatrix,dim,cholent,h_size,chol,cholaug,0) != 1){ printf("cholesky decomposition of the cov matrix failed for family %d. \n", fam_list[fam]->fam_id); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaug,dim,1,h_size-1,1,h_size-1); double **t = self_colmultiply(cholaug,dim,1,h_size-1,h_size,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) numerator[i]+=t[i][0]; clear_matrix(s,h_size-1,h_size-1); clear_matrix(t,h_size-1,1); } else{ double var; double &determ = var; double **v = inverse(covMatrix,dim,determ); double **s = quadratic_product(cholent,dim,h_size,v,dim,dim,cholent,dim,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) numerator[i]+=s[i][h_size-1]; clear_matrix(s,h_size,h_size); clear_matrix(v,dim,dim); } } } flag_pd = 1; double **cholentmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholentmain[i] = new double[h_size]; memset(cholentmain[i],0,h_size*sizeof(double)); } double **cholmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholmain[i] = new double[h_size-1]; memset(cholmain[i],0,(h_size-1)*sizeof(double)); } double **cholaugmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholaugmain[i] = new double[h_size]; memset(cholaugmain[i],0,h_size*sizeof(double)); } for(int i=0;i<h_size-1;i++){ cholentmain[i][i] = 1; cholentmain[i][h_size-1] = numerator[i]; } if(cholesky(denominator,h_size-1,cholentmain,h_size,cholmain,cholaugmain,0) != 1){ printf("cholesky decomposition of the denominator matrix failed.\n"); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaugmain,h_size-1,1,h_size-1,h_size,h_size); iter=hap_freq_table.begin(); double n=0; for(int i=0;i<h_size-1;i++){ iter->second+=s[i][0]; n+=iter->second; iter++; } iter->second = 1-n; clear_matrix(s,h_size-1,1); } else{ double var; double &determ = var; double **v = inverse(denominator,h_size-1,determ); double **w = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ w[i] = new double[1]; w[i][0] = numerator[i]; } double **s = multiply(v,h_size-1,h_size-1,w,h_size-1,1); iter=hap_freq_table.begin(); double n=0; for(int i=0;i<h_size-1;i++){ iter->second+=s[i][0]; n+=iter->second; iter++; } iter->second = 1-n; clear_matrix(s,h_size-1,1); clear_matrix(v,h_size-1,h_size-1); clear_matrix(w,h_size-1,1); } clear_matrix(cholentmain,h_size-1,h_size); clear_matrix(cholmain,h_size-1,h_size-1); clear_matrix(cholaugmain,h_size-1,h_size); update_geno_freq(); } void IQLAEstimator::print_subgroup(vector<char> &a0v, vector<char> &a1v){ fprintf(outfp, "Haplotype frequency estimates by phenotype category (IQL_a estimator)\n"); fprintf(outfp, "Haplotype\tOverall\t"); if(aff>=15) fprintf(outfp, "\tAffected"); if(unaff>=15) fprintf(outfp, "\tUnaffected"); if(unknown>=15) fprintf(outfp, "\tUnknown"); fprintf(outfp, "\n"); double sum=0, sum_aff=0, sum_unaff=0, sum_unknown=0; int ind=0, ind_aff=0, ind_unaff=0, ind_unknown=0; map<int, double>::iterator iter = hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ if(full_hap_freq_table[iter->first]>=0) sum+=full_hap_freq_table[iter->first]; else ind=1; if(aff>=15){ if(aff_hap_freq_table[iter->first]>=0) sum_aff+=aff_hap_freq_table[iter->first]; else ind_aff=1; } if(unaff>=15){ if(unaff_hap_freq_table[iter->first]>=0) sum_unaff+=unaff_hap_freq_table[iter->first]; else ind_unaff=1; } if(unknown>=15){ if(unknown_hap_freq_table[iter->first]>=0) sum_unknown+=unknown_hap_freq_table[iter->first]; else ind_unknown=1; } iter++; } iter = hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ translate_hap(outfp,iter->first,loci_num,a0v,a1v); if(loci_num==2) fprintf(outfp, "\t"); if(full_hap_freq_table[iter->first]>=0){ if(ind==0) fprintf(outfp, "\t%.4f\t", full_hap_freq_table[iter->first]); else fprintf(outfp, "\t%.4f\t", full_hap_freq_table[iter->first]/sum); } else fprintf(outfp, "\t0.0000\t"); if(aff>=15){ if(aff_hap_freq_table[iter->first]>=0){ if(ind_aff==0) fprintf(outfp, "\t%.4f\t", aff_hap_freq_table[iter->first]); else fprintf(outfp, "\t%.4f\t", aff_hap_freq_table[iter->first]/sum_aff); } else fprintf(outfp, "\t0.0000\t"); } if(unaff>=15){ if(unaff_hap_freq_table[iter->first]>=0){ if(ind_unaff==0) fprintf(outfp, "\t%.4f\t", unaff_hap_freq_table[iter->first]); else fprintf(outfp, "\t%.4f\t", unaff_hap_freq_table[iter->first]/sum_unaff); } else fprintf(outfp, "\t0.0000\t"); } if(unknown>=15){ if(unknown_hap_freq_table[iter->first]>=0){ if(ind_unknown==0) fprintf(outfp, "\t%.4f", unknown_hap_freq_table[iter->first]); else fprintf(outfp, "\t%.4f", unknown_hap_freq_table[iter->first]/sum_unknown); } else fprintf(outfp, "\t0.0000"); } fprintf(outfp, "\n"); iter++; } fprintf(outfp, "\n"); } void IQLAEstimator::NR_Iteration_EW(){ int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); for(int i=0;i<h_size-1;i++) memset(rst[i],0,col*sizeof(double)); // record derivative for each pair of haplotypes int loc = 0; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ freq[loc] = iter->second; loc++; iter++; } int h_last=0; iter = hap_freq_table.end(); iter--; h_last=iter->first; map<pair<int, int>, double>::iterator giter=geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ pair<int, int> conf = giter->first; loc = 0; iter = hap_freq_table.begin(); while(loc<h_size-1){ int h=iter->first; if(conf.first == conf.second && conf.first == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[h]; else if(conf.first == conf.second && conf.first == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[h_last]; else if(conf.first < conf.second && conf.first == h && conf.second != h_last) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.second]; else if(conf.first < conf.second && conf.second == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.first]; else if(conf.first == h && conf.second == h_last) rst[loc][hap_pair_map[conf]] = 2*(hap_freq_table[h_last]-hap_freq_table[h]); else if(conf.first != h && conf.first != h_last && conf.second == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[conf.first]; loc++; iter++; } giter++; } for(int i=0;i<geno_list.size();i++){ if(geno_list[i]->get_num()>0 && geno_list[i]->info==1 && missing_list[geno_missing_map[i]]->dim>0) geno_list[i]->expected_hap_num(freq,geno_freq_table,hap_pair_map,count,missing_list[geno_missing_map[i]]->sites); } for(int i=0;i<missing_list.size();i++){ if(missing_list[i]->get_num()>0 && missing_list[i]->dim>0){ missing_list[i]->set_weight(geno_freq_table,hap_pair_map,count); missing_list[i]->compute_centmatrix(freq,geno_freq_table); missing_list[i]->compute_derivative(hap_freq_table,hap_pair_map,count,rst); } } for(int i=0;i<h_size-1;i++) memset(denominator[i],0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) F_p memset(numerator,0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) (Z-\mu) for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ int cat; int mis_cat; int dim=0; for(int i=0;i<fam_list[fam]->N;i++){ cat = fam_list[fam]->cat[i]; if(geno_list[cat]->info==1){ mis_cat = fam_list[fam]->mis_cat[i]; dim+=missing_list[mis_cat]->dim; } } if(dim<=0) continue; for(int i=0;i<tot_dim;i++){ memset(cholent[i],0,h_size*sizeof(double)); memset(chol[i],0,tot_dim*sizeof(double)); memset(cholaug[i],0,h_size*sizeof(double)); memset(covMatrix[i],0,tot_dim*sizeof(double)); } // populate covariance matrix int nb1 = 0; int ind_1; int dim_1; int cat_1; int mis_cat_1; map<pair<int, int>, double>::iterator iterk; flag_pd = 1; for(int i=0;i<fam_list[fam]->N;i++){ cat_1 = fam_list[fam]->cat[i]; mis_cat_1 = fam_list[fam]->mis_cat[i]; dim_1 = missing_list[mis_cat_1]->dim; if(dim_1 > 0 && geno_list[cat_1]->info==1){ ind_1 = fam_list[fam]->fam_member[i][0]; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_1)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient for individual %d from family %d. Please check...\n\n", ind_1,fam_list[fam]->fam_id); exit(1); } for(int j=0;j<dim_1;j++) for(int k=0;k<dim_1;k++) covMatrix[nb1+j][nb1+k] = missing_list[mis_cat_1]->centmatrix[j][k]; const vector<double> hap_num = geno_list[cat_1]->get_hap_num(); for(int j=0;j<dim_1;j++){ for(int k=0;k<h_size-1;k++) cholent[nb1+j][k] = missing_list[mis_cat_1]->derivative[j][k]; cholent[nb1+j][h_size-1] = hap_num[j]; } nb1+=dim_1; } // i is typed } // complete i if(cholesky(covMatrix,dim,cholent,h_size,chol,cholaug,0) != 1){ printf("cholesky decomposition of the cov matrix failed for family %d. \n", fam_list[fam]->fam_id); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaug,dim,1,h_size-1,1,h_size-1); double **t = self_colmultiply(cholaug,dim,1,h_size-1,h_size,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) numerator[i]+=t[i][0]; clear_matrix(s,h_size-1,h_size-1); clear_matrix(t,h_size-1,1); } else{ double var; double &determ = var; double **v = inverse(covMatrix,dim,determ); double **s = quadratic_product(cholent,dim,h_size,v,dim,dim,cholent,dim,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) numerator[i]+=s[i][h_size-1]; clear_matrix(s,h_size,h_size); clear_matrix(v,dim,dim); } } } flag_pd = 1; double **cholentmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholentmain[i] = new double[h_size]; memset(cholentmain[i],0,h_size*sizeof(double)); } double **cholmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholmain[i] = new double[h_size-1]; memset(cholmain[i],0,(h_size-1)*sizeof(double)); } double **cholaugmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholaugmain[i] = new double[h_size]; memset(cholaugmain[i],0,h_size*sizeof(double)); } for(int i=0;i<h_size-1;i++){ cholentmain[i][i] = 1; cholentmain[i][h_size-1] = numerator[i]; } if(cholesky(denominator,h_size-1,cholentmain,h_size,cholmain,cholaugmain,0) != 1){ printf("cholesky decomposition of the denominator matrix failed.\n"); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaugmain,h_size-1,1,h_size-1,h_size,h_size); iter=hap_freq_table.begin(); double n=0; for(int i=0;i<h_size-1;i++){ iter->second+=s[i][0]; n+=iter->second; iter++; } iter->second = 1-n; clear_matrix(s,h_size-1,1); } else{ double var; double &determ = var; double **v = inverse(denominator,h_size-1,determ); double **w = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ w[i] = new double[1]; w[i][0] = numerator[i]; } double **s = multiply(v,h_size-1,h_size-1,w,h_size-1,1); iter=hap_freq_table.begin(); double n=0; for(int i=0;i<h_size-1;i++){ iter->second+=s[i][0]; n+=iter->second; iter++; } iter->second = 1-n; clear_matrix(s,h_size-1,1); clear_matrix(v,h_size-1,h_size-1); clear_matrix(w,h_size-1,1); } clear_matrix(cholentmain,h_size-1,h_size); clear_matrix(cholmain,h_size-1,h_size-1); clear_matrix(cholaugmain,h_size-1,h_size); update_geno_freq(); } void IQLAEstimator::NR_Iteration_subgroup_EW(int group){ int h_size = hap_freq_table.size(); int col = geno_freq_table.size(); for(int i=0;i<h_size-1;i++) memset(rst[i],0,col*sizeof(double)); // record derivative for each pair of haplotypes int loc = 0; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ freq[loc] = iter->second; loc++; iter++; } int h_last=0; iter = hap_freq_table.end(); iter--; h_last=iter->first; map<pair<int, int>, double>::iterator giter=geno_freq_table.begin(); while(giter!=geno_freq_table.end()){ pair<int, int> conf = giter->first; loc = 0; iter = hap_freq_table.begin(); while(loc<h_size-1){ int h=iter->first; if(conf.first == conf.second && conf.first == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[h]; else if(conf.first == conf.second && conf.first == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[h_last]; else if(conf.first < conf.second && conf.first == h && conf.second != h_last) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.second]; else if(conf.first < conf.second && conf.second == h) rst[loc][hap_pair_map[conf]] = 2*hap_freq_table[conf.first]; else if(conf.first == h && conf.second == h_last) rst[loc][hap_pair_map[conf]] = 2*(hap_freq_table[h_last]-hap_freq_table[h]); else if(conf.first != h && conf.first != h_last && conf.second == h_last) rst[loc][hap_pair_map[conf]] = -2*hap_freq_table[conf.first]; loc++; iter++; } giter++; } for(int i=0;i<geno_list.size();i++){ if(geno_list[i]->get_num()>0 && geno_list[i]->info==1 && missing_list[geno_missing_map[i]]->dim>0) geno_list[i]->expected_hap_num(freq,geno_freq_table,hap_pair_map,count,missing_list[geno_missing_map[i]]->sites); } for(int i=0;i<missing_list.size();i++){ if(missing_list[i]->get_num()>0 && missing_list[i]->dim>0){ missing_list[i]->set_weight(geno_freq_table,hap_pair_map,count); missing_list[i]->compute_centmatrix(freq,geno_freq_table); missing_list[i]->compute_derivative(hap_freq_table,hap_pair_map,count,rst); } } for(int i=0;i<h_size-1;i++) memset(denominator[i],0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) F_p memset(numerator,0,(h_size-1)*sizeof(double)); // F_p^T Omega^(-1) (Z-\mu) for(int fam=0;fam<fam_list.size();fam++){ if(fam_list[fam]->N_typed>0){ int cat; int mis_cat; int dim=0; for(int i=0;i<fam_list[fam]->N;i++){ cat = fam_list[fam]->cat[i]; if(geno_list[cat]->info==1 && fam_list[fam]->fam_member[i][4]==group){ mis_cat = fam_list[fam]->mis_cat[i]; dim+=missing_list[mis_cat]->dim; } } if(dim<=0) continue; for(int i=0;i<tot_dim;i++){ memset(cholent[i],0,h_size*sizeof(double)); memset(chol[i],0,tot_dim*sizeof(double)); memset(cholaug[i],0,h_size*sizeof(double)); memset(covMatrix[i],0,tot_dim*sizeof(double)); } // populate covariance matrix int nb1 = 0; int ind_1; int dim_1; int cat_1; int mis_cat_1; map<pair<int, int>, double>::iterator iterk; flag_pd = 1; for(int i=0;i<fam_list[fam]->N;i++){ cat_1 = fam_list[fam]->cat[i]; mis_cat_1 = fam_list[fam]->mis_cat[i]; dim_1 = missing_list[mis_cat_1]->dim; if(dim_1 > 0 && geno_list[cat_1]->info==1 && fam_list[fam]->fam_member[i][4]==group){ ind_1 = fam_list[fam]->fam_member[i][0]; iterk = (fam_list[fam]->kinship_coeff).find(pair<int, int>(ind_1,ind_1)); if(iterk == (fam_list[fam]->kinship_coeff).end()){ printf("No IBD coefficient for individual %d from family %d. Please check...\n\n", ind_1,fam_list[fam]->fam_id); exit(1); } for(int j=0;j<dim_1;j++) for(int k=0;k<dim_1;k++) covMatrix[nb1+j][nb1+k] = missing_list[mis_cat_1]->centmatrix[j][k]; const vector<double> hap_num = geno_list[cat_1]->get_hap_num(); for(int j=0;j<dim_1;j++){ for(int k=0;k<h_size-1;k++) cholent[nb1+j][k] = missing_list[mis_cat_1]->derivative[j][k]; cholent[nb1+j][h_size-1] = hap_num[j]; } nb1+=dim_1; } // i is typed } // complete i if(cholesky(covMatrix,dim,cholent,h_size,chol,cholaug,0) != 1){ printf("cholesky decomposition of the cov matrix failed for family %d. \n", fam_list[fam]->fam_id); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaug,dim,1,h_size-1,1,h_size-1); double **t = self_colmultiply(cholaug,dim,1,h_size-1,h_size,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) numerator[i]+=t[i][0]; clear_matrix(s,h_size-1,h_size-1); clear_matrix(t,h_size-1,1); } else{ double var; double &determ = var; double **v = inverse(covMatrix,dim,determ); double **s = quadratic_product(cholent,dim,h_size,v,dim,dim,cholent,dim,h_size); for(int i=0;i<h_size-1;i++) for(int j=0;j<h_size-1;j++) denominator[i][j]+=s[i][j]; for(int i=0;i<h_size-1;i++) numerator[i]+=s[i][h_size-1]; clear_matrix(s,h_size,h_size); clear_matrix(v,dim,dim); } } } flag_pd = 1; double **cholentmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholentmain[i] = new double[h_size]; memset(cholentmain[i],0,h_size*sizeof(double)); } double **cholmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholmain[i] = new double[h_size-1]; memset(cholmain[i],0,(h_size-1)*sizeof(double)); } double **cholaugmain = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ cholaugmain[i] = new double[h_size]; memset(cholaugmain[i],0,h_size*sizeof(double)); } for(int i=0;i<h_size-1;i++){ cholentmain[i][i] = 1; cholentmain[i][h_size-1] = numerator[i]; } if(cholesky(denominator,h_size-1,cholentmain,h_size,cholmain,cholaugmain,0) != 1){ printf("cholesky decomposition of the denominator matrix failed.\n"); flag_pd = 0; exit(1); } if(flag_pd==1){ double **s = self_colmultiply(cholaugmain,h_size-1,1,h_size-1,h_size,h_size); iter=hap_freq_table.begin(); double n=0; for(int i=0;i<h_size-1;i++){ iter->second+=s[i][0]; n+=iter->second; iter++; } iter->second = 1-n; clear_matrix(s,h_size-1,1); } else{ double var; double &determ = var; double **v = inverse(denominator,h_size-1,determ); double **w = new double*[h_size-1]; for(int i=0;i<h_size-1;i++){ w[i] = new double[1]; w[i][0] = numerator[i]; } double **s = multiply(v,h_size-1,h_size-1,w,h_size-1,1); iter=hap_freq_table.begin(); double n=0; for(int i=0;i<h_size-1;i++){ iter->second+=s[i][0]; n+=iter->second; iter++; } iter->second = 1-n; clear_matrix(s,h_size-1,1); clear_matrix(v,h_size-1,h_size-1); clear_matrix(w,h_size-1,1); } clear_matrix(cholentmain,h_size-1,h_size); clear_matrix(cholmain,h_size-1,h_size-1); clear_matrix(cholaugmain,h_size-1,h_size); update_geno_freq(); } void IQLAEstimator::estimate_subgroup_EW(){ reset_freq_table(full_hap_freq_table); int count=1; int flag; while(1){ map<int, double> of = hap_freq_table; NR_Iteration_EW(); map<int, double> nf = hap_freq_table; double diff = 0; flag = 1; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0.0000001){ flag = 0; break; } iter++; } if(diff<1e-5 || count>5 || flag==0) break; count++; } full_hap_freq_table_EW = hap_freq_table; if(aff>=15){ reset_freq_table(full_hap_freq_table); int count=1; int flag; while(1){ map<int, double> of = hap_freq_table; NR_Iteration_subgroup_EW(2); map<int, double> nf = hap_freq_table; double diff = 0; flag = 1; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0.0000001){ flag = 0; break; } iter++; } if(diff<1e-5 || count>5 || flag==0) break; count++; } aff_hap_freq_table_EW = hap_freq_table; } if(unaff>=15){ reset_freq_table(full_hap_freq_table); int count=1; int flag; while(1){ map<int, double> of = hap_freq_table; NR_Iteration_subgroup_EW(1); map<int, double> nf = hap_freq_table; double diff = 0; flag = 1; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0.0000001){ flag = 0; break; } iter++; } if(diff<1e-5 || count>5 || flag==0) break; count++; } unaff_hap_freq_table_EW = hap_freq_table; } if(unknown>=15){ reset_freq_table(full_hap_freq_table); int count=1; int flag; while(1){ map<int, double> of = hap_freq_table; NR_Iteration_subgroup_EW(0); map<int, double> nf = hap_freq_table; double diff = 0; flag = 1; map<int, double>::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0.0000001){ flag = 0; break; } iter++; } if(diff<1e-5 || count>5 || flag==0) break; count++; } unknown_hap_freq_table_EW = hap_freq_table; } reset_freq_table(full_hap_freq_table); } void IQLAEstimator::print_subgroup_EW(vector<char> &a0v, vector<char> &a1v){ fprintf(outfp, "Haplotype frequency estimates by phenotype category (EW_a estimator)\n"); fprintf(outfp, "Haplotype\tOverall\t"); if(aff>=15) fprintf(outfp, "\tAffected"); if(unaff>=15) fprintf(outfp, "\tUnaffected"); if(unknown>=15) fprintf(outfp, "\tUnknown"); fprintf(outfp, "\n"); double sum=0, sum_aff=0, sum_unaff=0, sum_unknown=0; int ind=0, ind_aff=0, ind_unaff=0, ind_unknown=0; map<int, double>::iterator iter = hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ if(full_hap_freq_table_EW[iter->first]>=0) sum+=full_hap_freq_table_EW[iter->first]; else ind=1; if(aff>=15){ if(aff_hap_freq_table_EW[iter->first]>=0) sum_aff+=aff_hap_freq_table_EW[iter->first]; else ind_aff=1; } if(unaff>=15){ if(unaff_hap_freq_table_EW[iter->first]>=0) sum_unaff+=unaff_hap_freq_table_EW[iter->first]; else ind_unaff=1; } if(unknown>=15){ if(unknown_hap_freq_table_EW[iter->first]>=0) sum_unknown+=unknown_hap_freq_table_EW[iter->first]; else ind_unknown=1; } iter++; } iter = hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ translate_hap(outfp,iter->first,loci_num,a0v,a1v); if(loci_num==2) fprintf(outfp, "\t"); if(full_hap_freq_table_EW[iter->first]>=0){ if(ind==0) fprintf(outfp, "\t%.4f\t", full_hap_freq_table_EW[iter->first]); else fprintf(outfp, "\t%.4f\t", full_hap_freq_table_EW[iter->first]/sum); } else fprintf(outfp, "\t0.0000\t"); if(aff>=15){ if(aff_hap_freq_table_EW[iter->first]>=0){ if(ind_aff==0) fprintf(outfp, "\t%.4f\t", aff_hap_freq_table_EW[iter->first]); else fprintf(outfp, "\t%.4f\t", aff_hap_freq_table_EW[iter->first]/sum_aff); } else fprintf(outfp, "\t0.0000\t"); } if(unaff>=15){ if(unaff_hap_freq_table_EW[iter->first]>=0){ if(ind_unaff==0) fprintf(outfp, "\t%.4f\t", unaff_hap_freq_table_EW[iter->first]); else fprintf(outfp, "\t%.4f\t", unaff_hap_freq_table_EW[iter->first]/sum_unaff); } else fprintf(outfp, "\t0.0000\t"); } if(unknown>=15){ if(unknown_hap_freq_table_EW[iter->first]>=0){ if(ind_unknown==0) fprintf(outfp, "\t%.4f", unknown_hap_freq_table_EW[iter->first]); else fprintf(outfp, "\t%.4f", unknown_hap_freq_table_EW[iter->first]/sum_unknown); } else fprintf(outfp, "\t0.0000"); } fprintf(outfp, "\n"); iter++; } fprintf(outfp, "\n\n"); }