#include "peddata.h" PedData::PedData(){ MAXTOP = 20; snp_size = 0; sample_size = 0; chrom = 0; outfp = stdout; sigfp = stdout; sigfp_min = stdout; beg = -1; end = -1; outoption = 98; // a(97) -- choice a; b(98) -- choice b full_thresh = 15; } int PedData::run(ParamSet &ps){ fprintf(outfp, "****** Results of the Case-control Association Tests ******\n\n"); fprintf(stderr, "Loading phenotype data ... \n"); if(read_pheno(ps.phenofile)<0) return -1; fprintf(stderr, "Loading IBD coefficients ... \n"); if(load_ibdcoeff(ps.ibdfile)<0) return -1; fprintf(stderr, "Loading genotype data ... \n"); if(read_geno(ps.genofile)<0) return -1; this->beg = ps.begin; this->end = ps.end; this->outoption = ps.outoption; this->full_thresh = ps.full_thresh; fprintf(stderr, "Loading parameter ... \n"); if(load_parameter(ps.parameterfile)<0) return -1; fprintf(outfp, "The prevalence value used in the MIQLS calculations is %.6f.\n\n", prevalence); fprintf(outfp, "The number of markers in each haplotype window is %d.\n\n", nmark); if(signal_scan()<0) return -1; return 1; } int PedData::signal_scan(){ fprintf(stderr, "Starting IQLS ... \n"); int st=0, en=0; // record the order of start and end SNPs if(beg == -1 && end == -1){ st = 0; en = snp_size-1; // by default } else{ if(beg<0 || end<0 || end=marker_info[snp_size-1].position) en = snp_size-1; if(beg>marker_info[0].position){ for(int i=1;i=beg){ st = i; break; } } } if(end0;i--){ if(marker_info[i].position<=end && marker_info[i+1].position>end){ en = i; break; } } } if(en-st+1 cand_list; for(int i=st;i<=en;i++) cand_list.push_back(i); // vector * vec = genCombSet(cand_list,nmark,nmark-1); vector * vec = windSet(cand_list,nmark); vector marker_list; vector marker_list_min; for(int iter=0;itersize();iter++){ fprintf(stderr, "markers "); for(int i=0;i a0v; vector a1v; for(int i=0;i::iterator hiter = hap_freq_table.begin(); while(hiter!=hap_freq_table.end()){ if(hiter->second>=0.0000001) hap_list[hiter->first] = 0; hiter++; } vector hpcontent; hiter = hap_list.begin(); while(hiter!=hap_list.end()){ char *pl = new char[nmark+1]; memset(pl,0,nmark+1); int h = hiter->first; for(int j=0;j>1; } hpcontent.push_back(pl); hiter++; } for(int i=0;i::iterator hiter = hap_freq_table.begin(); while(hiter!=hap_freq_table.end()){ if(hiter->second<0.0000001){ flag = 0; break; } hiter++; } if(flag==1) break; else{ hap_list.clear(); for(int i=0;isecond>=0.0000001) hap_list[hiter->first] = 0; hiter++; } hiter = hap_list.begin(); while(hiter!=hap_list.end()){ char *pl = new char[nmark+1]; memset(pl,0,nmark+1); int h = hiter->first; for(int j=0;j>1; } hpcontent.push_back(pl); hiter++; } for(int i=0;i::iterator hiter = hap_freq_table.begin(); while(hiter!=hap_freq_table.end()){ if(hiter->second<0.00005){ flag = 0; break; } hiter++; } if(flag==1) break; else{ hap_list.clear(); for(int i=0;isecond>=0.00005) hap_list[hiter->first] = 0; hiter++; } hiter = hap_list.begin(); while(hiter!=hap_list.end()){ char *pl = new char[nmark+1]; memset(pl,0,nmark+1); int h = hiter->first; for(int j=0;j>1; } hpcontent.push_back(pl); hiter++; } for(int i=0;i::iterator hiter = hap_freq_table.begin(); while(hiter!=hap_freq_table.end()){ if(hiter->second<0.00005){ flag = 0; break; } hiter++; } if(flag==1) break; else{ hap_list.clear(); for(int i=0;isecond>=0.00005) hap_list[hiter->first] = 0; hiter++; } hiter = hap_list.begin(); while(hiter!=hap_list.end()){ char *pl = new char[nmark+1]; memset(pl,0,nmark+1); int h = hiter->first; for(int j=0;j>1; } hpcontent.push_back(pl); hiter++; } for(int i=0;i= marker_list.size()) top = marker_list.size(); if(outoption==97) fprintf(sigfp, "Top %d results for the full-df MIQLS_a test (list of the SNP haplotype windows having the %d smallest p-values among all SNP haplotype windows that were tested) \n\n", top,top); else if(outoption==98) fprintf(sigfp, "Top %d results for the full-df MIQLS_b test (list of the SNP haplotype windows having the %d smallest p-values among all SNP haplotype windows that were tested) \n\n", top,top); fprintf(sigfp, "**************************************************\n\n"); // printf("size of p is %d\n", marker_list.size()); std::sort(marker_list.begin(),marker_list.end(),inc_sort); for(int j=0;j fam_member; while(getline(phenodata,line)){ lc++; ins.clear(); ins.str(line); ins>>ws; if(ins.eof()) continue; if(ins >> fam_id >> ind_id >> father_id >> mother_id >> sex >> phenotype){ if(phenotype < 0 || phenotype > 2) fprintf(stderr, "individual %d from family %d does not have a phenotype value of 0, 1, or 2 ...skipped\n\n", ind_id,fam_id); else individual_list[ind_id] = -1; if(fam_id!=fam_old){ if(fam_old!=-1){ fam_list.push_back(new Family(fam_member,fam_old)); fam_member.clear(); } if(fam_map.find(fam_id)!=fam_map.end()){ fprintf(stderr, "family %d is not well ordered\n\n", fam_id); return 0; } else{ fam_map[fam_id] = fam_list.size(); fam_old=fam_id; } } int *a = new int[5]; memset(a,0,5*sizeof(int)); a[0] = ind_id; a[1] = father_id; a[2] = mother_id; a[3] = sex; a[4] = phenotype; fam_member.push_back(a); } } fam_list.push_back(new Family(fam_member,fam_old)); fam_member.clear(); for(int i=0;iAffected; n_unaff+=fam_list[i]->UnAffected; n_unknown+=fam_list[i]->Unknown; } sample_size=n_aff+n_unaff+n_unknown; fprintf(outfp, "There are %d individuals from %d independent families in the phenotype data file.\n", sample_size,fam_list.size()); fprintf(outfp, "Of these, %d are affected, %d are unaffected and %d are of unknown phenotype.\n\n", n_aff,n_unaff,n_unknown); return 1; } int PedData::load_ibdcoeff(char *ibdco_file){ int ind_1, ind_2; double coef8, coef7, coef6, coef5, coef4, coef3, coef2, coef1, coef0; int loc; map ind_fam; for(int i=0;iN;j++) ind_fam[fam_list[i]->fam_member[j][0]] = i; ifstream ibdcodata(ibdco_file); istringstream ins; string line; int lc=0; // line number in the file while(getline(ibdcodata,line)){ lc++; ins.clear(); ins.str(line); ins>>ws; if(ins.eof()) continue; if(ins >> ind_1 >> ind_2 >> coef8 >> coef7 >> coef6>> coef5 >> coef4 >> coef3 >> coef2 >> coef1 >> coef0){ if(ind_1==ind_2){ if(ind_fam.find(ind_1)==ind_fam.end()) fprintf(stderr, "Skip line %d in ibd coefficients file \"%s\"\n", lc,ibdco_file); else{ loc = ind_fam[ind_1]; if(coef8>0 || coef7>0 || coef6>0 || coef5>0 || coef4>0 || coef3>0 || coef1>0 || coef0>0 || fabs(coef2-1.0)>1e-8){ fprintf(stderr, "Skip line %d in ibd coefficients file \"%s\"\n", lc,ibdco_file); if(individual_list.find(ind_1)!=individual_list.end()) individual_list.erase(ind_1); } else fam_list[loc]->kinship_coeff[pair(ind_1,ind_2)] = 0; } } else{ if(ind_fam.find(ind_1)==ind_fam.end() || ind_fam.find(ind_2)==ind_fam.end()) fprintf(stderr, "Skip line %d in ibd coefficients file \"%s\"\n", lc,ibdco_file); else{ loc = ind_fam[ind_1]; if(ind_fam[ind_2]!=loc) fprintf(stderr, "Skip line %d in ibd coefficients file \"%s\"\n", lc,ibdco_file); else{ if(coef8>0 || coef7>0 || coef6>0 || coef5>0 || coef4>0 || coef3>0) fprintf(stderr, "Skip line %d in ibd coefficients file \"%s\"\n", lc,ibdco_file); else{ fam_list[loc]->IBD2_coeff[pair(ind_1,ind_2)] = coef2; fam_list[loc]->IBD1_coeff[pair(ind_1,ind_2)] = coef1; fam_list[loc]->IBD0_coeff[pair(ind_1,ind_2)] = coef0; fam_list[loc]->kinship_coeff[pair(ind_1,ind_2)] = coef2/2.0+coef1/4.0; fam_list[loc]->IBD2_coeff[pair(ind_2,ind_1)] = coef2; fam_list[loc]->IBD1_coeff[pair(ind_2,ind_1)] = coef1; fam_list[loc]->IBD0_coeff[pair(ind_2,ind_1)] = coef0; fam_list[loc]->kinship_coeff[pair(ind_2,ind_1)] = coef2/2.0+coef1/4.0; } } } } } } ind_fam.clear(); return 1; } int PedData::read_geno(char *geno_file){ string marker, alias; int chr, pos; char orientation, allele0, allele1; string geno; vector genotype_line; int ind_num, count=0; vector record; ifstream genodata(geno_file); istringstream ins; string line; int lc=0; // line number in the file if(getline(genodata,line)){ // get the first line ins.clear(); ins.str(line); ins>>ws; if(ins >> alias >> alias >> alias >> alias >> alias >> alias){ // rsnumber; chromosome; position; orientation; alleleA; alleleB while(ins >> ind_num){ if(individual_list.find(ind_num)!=individual_list.end()){ individual_list[ind_num] = count; count++; record.push_back(1); } else // individual is not in phenotype file record.push_back(0); } people_size = count; fprintf(outfp, "There are %d individuals listed in the genotype file, %d of whom are also in the phenotype file (excluding inbred individuals).\n\n", record.size(),people_size); } } while(getline(genodata,line)){ lc++; ins.clear(); ins.str(line); ins>>ws; if(ins.eof()) continue; if(ins >> marker >> chr >> pos >> orientation >> allele0 >> allele1){ if(chrom == 0) chrom = chr; MarkerInfo mi; mi.rsnumber = marker; mi.position = pos; if(allele0>=97) // convert to capital letter allele0-=32; if(allele1>=97) allele1-=32; // direct assignment, no assumption about allele0 < allele1 mi.allele0 = allele0; mi.allele1 = allele1; marker_info.push_back(mi); snp_size++; count = 0; while(ins >> geno){ if(record[count]==1) genotype_line.push_back(geno); count++; } if(count!=record.size()) printf("error in genofile\n"); if(people_size != genotype_line.size()){ fprintf(stderr, "Error: Unequal number of persons recorded in genotyo data file\n"); return -1; } string *cl = new string[genotype_line.size()]; for(int i=0;i geno_code; geno_code.clear(); geno_code["NN"] = -1; if(ta0=='A' && ta1=='T'){ geno_code["AA"] = 0; geno_code["AT"] = geno_code["TA"] = 1; geno_code["TT"] = 2; } else if(ta0=='A' && ta1=='C'){ geno_code["AA"] = 0; geno_code["AC"] = geno_code["CA"] = 1; geno_code["CC"] = 2; } else if(ta0=='A' && ta1=='G'){ geno_code["AA"] = 0; geno_code["AG"] = geno_code["GA"] = 1; geno_code["GG"] = 2; } else if(ta0=='T' && ta1=='A'){ geno_code["TT"] = 0; geno_code["TA"] = geno_code["AT"] = 1; geno_code["AA"] = 2; } else if(ta0=='T' && ta1=='C'){ geno_code["TT"] = 0; geno_code["TC"] = geno_code["CT"] = 1; geno_code["CC"] = 2; } else if(ta0=='T' && ta1=='G'){ geno_code["TT"] = 0; geno_code["TG"] = geno_code["GT"] = 1; geno_code["GG"] = 2; } else if(ta0=='C' && ta1=='A'){ geno_code["CC"] = 0; geno_code["CA"] = geno_code["AC"] = 1; geno_code["AA"] = 2; } else if(ta0=='C' && ta1=='T'){ geno_code["CC"] = 0; geno_code["CT"] = geno_code["TC"] = 1; geno_code["TT"] = 2; } else if(ta0=='C' && ta1=='G'){ geno_code["CC"] = 0; geno_code["CG"] = geno_code["GC"] = 1; geno_code["GG"] = 2; } else if(ta0=='G' && ta1=='A'){ geno_code["GG"] = 0; geno_code["GA"] = geno_code["AG"] = 1; geno_code["AA"] = 2; } else if(ta0=='G' && ta1=='T'){ geno_code["GG"] = 0; geno_code["GT"] = geno_code["TG"] = 1; geno_code["TT"] = 2; } else if(ta0=='G' && ta1=='C'){ geno_code["GG"] = 0; geno_code["GC"] = geno_code["CG"] = 1; geno_code["CC"] = 2; } int *data = new int[people_size]; for(int j=0;j>ws; if(ins >> kp) prevalence = kp; getline(parameterdata,line); ins.clear(); ins.str(line); ins>>ws; if(ins >> loci) nmark = loci; return 1; } void PedData::assign_gcode(){ for(int i=0;iset_gcode(nmark); } void PedData::assign_gcode(int *loci){ int *gd = new int[nmark]; int count=0; int count_aff=0; int count_unaff=0; int count_unknown=0; for(int i=0;iset_gcode(); for(int j=0;jN;j++){ int ind_num = fam_list[i]->fam_member[j][0]; if(individual_list.find(ind_num)!=individual_list.end()){ int col = individual_list[ind_num]; if(col>=0){ for(int k=0;kset_gcode(gd,j); for(int k=0;kfam_member[j][4]==2) count_aff++; else if(fam_list[i]->fam_member[j][4]==1) count_unaff++; else if(fam_list[i]->fam_member[j][4]==0) count_unknown++; break; } } } } } fam_list[i]->fill_typed(); } delete[] gd; fprintf(outfp, "There are %d individuals with genotype data for at least one SNP in the current haplotype window.\n", count); fprintf(outfp, "Of these, %d are affected, %d are unaffected and %d are of unknown phenotype. \n\n", count_aff,count_unaff,count_unknown); } void PedData::assign_info(){ for(int i=0;ifill_info_gcode(geno_list); } void PedData::unique_gcode(){ complete_geno_list.clear(); int count=0; int *gd = new int[nmark]; for(int k=0;k *complete_geno_code = geno->get_geno_code(); // complete-data genotypes count = complete_geno_code->size(); // printf("There are %d complete genotypes\n", count); for(int i=0;i > hv = complete_geno->get_hap_vec(); if(hv.size()>0){ complete_geno->compute_gname(); complete_geno_list.push_back(complete_geno); } else delete complete_geno; } delete geno; // printf("There are %d complete genotypes stored\n", complete_geno_list.size()); geno_list.clear(); map gmap; for(int i=0;iN;j++){ int code=0; for(int k=0;kgcode[j][k]; gd[k] = gc; code = 10*code+gc; } if(gmap.find(code)==gmap.end()){ gmap[code] = geno_list.size(); geno_list.push_back(new Genotype(gd,hap_list,nmark)); } fam_list[i]->set_cat(gmap[code],j); geno_list[gmap[code]]->increase_num(); } delete[] gd; /* for(int i=0;ishow_gstat(); printf("\n"); for(int i=0;ishow_gstat(); */ } void PedData::EW_a_estimator(){ HFEstimator est(fam_list,geno_list,hap_list,nmark); int count = 1; int flag; while(1){ map of = est.get_hap_freq(); est.EM_Iteration(0); map nf = est.get_hap_freq(); double diff = 0; flag = 1; map::iterator iter=nf.begin(); while(iter!=nf.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0){ flag = 0; break; } iter++; } if(diff<1e-8 || count>100 || flag==0) break; count++; } hap_freq_table = est.get_hap_freq(); // printf("It takes %d iterations.\n", count); // est.show_hap_freq(); /* printf("EW estimator:\n"); map::iterator iter = hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ printf("%d \t %.10f\n", iter->first,iter->second); iter++; } printf("\n"); */ } void PedData::CDW_a_estimator(){ // compute complete-data weight for(int i=0;icompute_weight(); // sum up for(int i=0;iN;j++){ int cat=fam_list[i]->cat[j]; geno_list[cat]->increase_count(fam_list[i]->weight[j]); } HFEstimator est(fam_list,geno_list,hap_list,nmark); est.reset_freq_table(hap_freq_table); int count = 1; int flag; while(1){ map of = est.get_hap_freq(); est.EM_Iteration(1); map nf = est.get_hap_freq(); double diff = 0; flag = 1; map::iterator iter=nf.begin(); while(iter!=nf.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0){ flag = 0; break; } iter++; } if(diff<1e-6 || count>20 || flag==0) break; count++; } if(flag==1) hap_freq_table = est.get_hap_freq(); // printf("It takes %d iterations.\n", count); // est.show_hap_freq(); } void PedData::unique_mis_patten(const vector & hpcontent){ missing_list.clear(); map mis_map; for(int i=0;iN;j++){ int code=0; vector missing_site; for(int k=0;kgcode[j][k]; if(gc==-1){ code = 10*code+gc; missing_site.push_back(k); } else code*=10; } if(mis_map.find(code)==mis_map.end()){ mis_map[code] = missing_list.size(); missing_list.push_back(new Missing(missing_site,hap_list,hpcontent,nmark)); } fam_list[i]->set_mis_cat(mis_map[code],j); missing_list[mis_map[code]]->increase_num(); missing_site.clear(); } /* for(int i=0;idim,missing_list[i]->mapping_function.size()); printf("\n"); */ } void PedData::IQL_a_estimator(){ for(int i=0;icompute_gname(); // IQLa IQLAEstimator est(fam_list,complete_geno_list,geno_list,missing_list,hap_list,nmark); est.set_output(outfp); est.reset_freq_table(hap_freq_table); int count = 1; int flag; est.allocate(); while(1){ map of = est.get_hap_freq(); est.NR_Iteration_new(); map nf = est.get_hap_freq(); double diff = 0; flag = 1; map::iterator iter=nf.begin(); while(iter!=nf.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0.0000001){ flag = 0; break; } iter++; } if(diff<1e-5 || count>7 || flag==0) break; count++; } hap_freq_table = est.get_hap_freq(); est.deallocate(); } void PedData::IQL_a_estimator(vector &a0v, vector &a1v){ // IQLa IQLAEstimator est(fam_list,complete_geno_list,geno_list,missing_list,hap_list,nmark); est.set_output(outfp); est.reset_freq_table(hap_freq_table); est.allocate(); // need frequencies in affected, unaffected, and unknown group est.estimate_subgroup(); est.estimate_subgroup_EW(); freq_thresh = double(full_thresh)/double(est.aff+est.unaff+est.unknown)/2.0; // compute test weight double mean = -prevalence/(1-prevalence); for(int i=0;icompute_test_weight(mean); int row = hap_freq_table.size(); hap_clus_list.clear(); int count = 0; map::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ hap_unit hu; hu.name = iter->first; hu.order = count; hu.freq = iter->second; hap_clus_list.push_back(hu); count++; iter++; } std::sort(hap_clus_list.begin(),hap_clus_list.end(),dec_sort); // sort based on haplotype frequencies if(row!=hap_clus_list.size()){ printf("error, missing haplotypes\n"); exit(1); } cluster = new vector(); hap_cluster(); if(est.Score_test_cluster_new(cluster)>0){ est.show_test(cluster,a0v,a1v); p = est.get_p_value(); p_min = est.get_min_p_value(); } else{ p = 1; p_min = 1; } est.deallocate(); cluster->clear(); delete cluster; /* if(hap_clus_list[row-1].freq(); hap_cluster(); est.Score_test_cluster_new(cluster); cluster->clear(); delete cluster; } else{ // association test // IQLS_a test est.Score_test_new(); } */ est.print_subgroup(a0v,a1v); est.print_subgroup_EW(a0v,a1v); } void PedData::unique_trio_gcode(){ geno_trio_list.clear(); vector triolist; oneparent = 0; for(int i=0;iN;j++){ trio m; int id_1, id_2, ind_1, ind_2; int loc; int founder=0; int cat_1, cat_2; int typed_1, typed_2; int swi=0; m.cat_o = fam_list[i]->cat[j]; m.typed_o = fam_list[i]->typed[j]; ind_1 = fam_list[i]->fam_member[j][1]; ind_2 = fam_list[i]->fam_member[j][2]; if(ind_1==0 && ind_2==0){ // j is a founder m.cat_par1 = -1; m.typed_par1 = 0; m.cat_par2 = -1; m.typed_par2 = 0; founder = 1; } else{ id_1 = fam_list[i]->member_map[ind_1]; cat_1 = fam_list[i]->cat[id_1]; typed_1 = fam_list[i]->typed[id_1]; id_2 = fam_list[i]->member_map[ind_2]; cat_2 = fam_list[i]->cat[id_2]; typed_2 = fam_list[i]->typed[id_2]; if(typed_1==typed_2){ // 00 or 11 if(cat_1<=cat_2){ m.cat_par1 = cat_1; m.typed_par1 = typed_1; m.cat_par2 = cat_2; m.typed_par2 = typed_2; } else{ m.cat_par1 = cat_2; m.typed_par1 = typed_2; m.cat_par2 = cat_1; m.typed_par2 = typed_1; swi = 1; } } else if(typed_1set_trio_cat(geno_trio_list.size(),j); triolist.push_back(m); if(founder==1) geno_trio_list.push_back(new Genotypetrio(geno_list[m.cat_o],m.typed_o,0,0,nmark)); else geno_trio_list.push_back(new Genotypetrio(geno_list[m.cat_o],geno_list[m.cat_par1],geno_list[m.cat_par2],m.typed_o,m.typed_par1,m.typed_par2,nmark)); geno_trio_list[geno_trio_list.size()-1]->increase_num(); if(oneparent==0){ if((typed_1==1 && typed_2==0) || (typed_1==0 && typed_2==1)) oneparent = 1; } } else{ fam_list[i]->set_trio_cat(loc,j); geno_trio_list[loc]->increase_num(); } } } triolist.clear(); } void PedData::unique_mis_trio_patten(const vector & hpcontent){ missing_trio_list.clear(); vector mis_triolist; for(int i=0;iN;j++){ trio m; int id_1, id_2, ind_1, ind_2; int loc; int founder=0; int cat_1, cat_2; int typed_1, typed_2; int swi=0; m.cat_o = fam_list[i]->mis_cat[j]; m.typed_o = fam_list[i]->typed[j]; ind_1 = fam_list[i]->fam_member[j][1]; ind_2 = fam_list[i]->fam_member[j][2]; if(ind_1==0 && ind_2==0){ // j is a founder m.cat_par1 = -1; m.typed_par1 = 0; m.cat_par2 = -1; m.typed_par2 = 0; founder = 1; } else{ id_1 = fam_list[i]->member_map[ind_1]; cat_1 = fam_list[i]->mis_cat[id_1]; typed_1 = fam_list[i]->typed[id_1]; id_2 = fam_list[i]->member_map[ind_2]; cat_2 = fam_list[i]->mis_cat[id_2]; typed_2 = fam_list[i]->typed[id_2]; if(typed_1==typed_2){ // 00 or 11 if(cat_1<=cat_2){ m.cat_par1 = cat_1; m.typed_par1 = typed_1; m.cat_par2 = cat_2; m.typed_par2 = typed_2; } else{ m.cat_par1 = cat_2; m.typed_par1 = typed_2; m.cat_par2 = cat_1; m.typed_par2 = typed_1; swi = 1; } } else if(typed_1set_mis_trio_cat(missing_trio_list.size(),j); mis_triolist.push_back(m); if(founder==1) missing_trio_list.push_back(new Missingtrio(missing_list[m.cat_o],m.typed_o,0,0,hap_list,hpcontent,nmark)); else missing_trio_list.push_back(new Missingtrio(missing_list[m.cat_o],missing_list[m.cat_par1],missing_list[m.cat_par2],m.typed_o,m.typed_par1,m.typed_par2,hap_list,hpcontent,nmark)); missing_trio_list[missing_trio_list.size()-1]->increase_num(); } else{ fam_list[i]->set_mis_trio_cat(loc,j); missing_trio_list[loc]->increase_num(); } fam_list[i]->set_mis_par_order(swi,j); } } mis_triolist.clear(); } void PedData::IQL_b_estimator(){ IQLBEstimator est(fam_list,geno_trio_list,missing_trio_list,hap_list,nmark,oneparent); est.set_output(outfp); est.reset_freq_table(hap_freq_table); int count = 1; int flag; est.allocate(); while(1){ map of = est.get_hap_freq(); est.NR_Iteration_new(); map nf = est.get_hap_freq(); double diff = 0; flag = 1; map::iterator iter=nf.begin(); while(iter!=nf.end()){ diff += (nf[iter->first]-of[iter->first])*(nf[iter->first]-of[iter->first]); if(nf[iter->first]<0.0000001){ flag = 0; break; } iter++; } if(diff<1e-5 || count>5 || flag==0) break; count++; } hap_freq_table = est.get_hap_freq(); est.deallocate(); } void PedData::IQL_b_estimator(vector &a0v, vector &a1v){ IQLBEstimator est(fam_list,geno_trio_list,missing_trio_list,hap_list,nmark,oneparent); est.set_output(outfp); est.reset_freq_table(hap_freq_table); est.allocate(); // need frequencies in affected, unaffected, and unknown group est.estimate_subgroup(); est.estimate_subgroup_EW(); freq_thresh = double(full_thresh)/double(est.aff+est.unaff+est.unknown)/2.0; // compute test weight double mean = -prevalence/(1-prevalence); for(int i=0;icompute_test_weight(mean); fam_list[i]->compute_test_trio_weight(mean); } /* // compute test trio weight double mean = -prevalence/(1-prevalence); for(int i=0;icompute_test_trio_weight(mean); */ int row = hap_freq_table.size(); hap_clus_list.clear(); int count = 0; map::iterator iter=hap_freq_table.begin(); while(iter!=hap_freq_table.end()){ hap_unit hu; hu.name = iter->first; hu.order = count; hu.freq = iter->second; hap_clus_list.push_back(hu); count++; iter++; } std::sort(hap_clus_list.begin(),hap_clus_list.end(),dec_sort); // sort based on haplotype frequencies if(row!=hap_clus_list.size()){ printf("error, missing haplotypes\n"); exit(1); } cluster = new vector(); hap_cluster(); if(est.Score_test_cluster_new(cluster)>0){ est.show_test(cluster,a0v,a1v); p = est.get_p_value(); p_min = est.get_min_p_value(); } else{ p = 1; p_min = 1; } est.deallocate(); cluster->clear(); delete cluster; /* if(hap_clus_list[row-1].freq(); hap_cluster(); est.Score_test_cluster_new(cluster); cluster->clear(); delete cluster; } else{ // association test // IQLS_b test est.Score_test_new(); } */ /* if(hap_clus_list[row-1].freq>=freq_thresh){ if(est.aff>0){ iter = hap_freq_table.begin(); double min_hap = est.aff_hap_freq_table[iter->first]; iter++; while(iter!=hap_freq_table.end()){ if(est.aff_hap_freq_table[iter->first]first]; iter++; } if(2*est.aff*min_hap<5) fprintf(outfp, "The full-df p-value might not be exact because of the small number of expected haplotype counts in affecteds\n"); } if(est.unaff>=est.unknown){ if(est.unaff>0){ iter = hap_freq_table.begin(); double min_hap = est.unaff_hap_freq_table[iter->first]; iter++; while(iter!=hap_freq_table.end()){ if(est.unaff_hap_freq_table[iter->first]first]; iter++; } if(2*est.unaff*min_hap<5) fprintf(outfp, "The full-df p-value might not be exact because of the small number of expected haplotype counts in unaffecteds\n"); } } else{ if(est.unknown>0){ iter = hap_freq_table.begin(); double min_hap = est.unknown_hap_freq_table[iter->first]; iter++; while(iter!=hap_freq_table.end()){ if(est.unknown_hap_freq_table[iter->first]first]; iter++; } if(2*est.unknown*min_hap<5) fprintf(outfp, "The full-df p-value might not be exact because of the small number of expected haplotype counts in unknowns\n"); } } fprintf(outfp, "\n"); } */ est.print_subgroup(a0v,a1v); est.print_subgroup_EW(a0v,a1v); } void PedData::hap_cluster(){ int row = hap_freq_table.size(); // determin the common haplotypes double max_freq = hap_clus_list[0].freq; double cut_off = freq_thresh; if(cut_off>max_freq) cut_off = max_freq; int count = 1; for(int i=1;i=cut_off) count++; else break; } // printf("There are %d common haplotypes\n", count); // fprintf(outfp, "%d\t", count); for(int i=0;ipush_back(a); } int *a = new int[row-count+1]; memset(a,-1,(row-count+1)*sizeof(int)); a[0] = 0; // record how many haplotypes in the last cluster cluster->push_back(a); // cluster rare haplotypes for(int i=count;i>k; int n = (hap_clus_list[j].name&mask)>>k; if(s!=n) mutation++; } if(mutation==0) printf("error, the two haplotypes are the same\n"); else if(mutation==1){ // sign i to j int loc = (*cluster)[j][0]; (*cluster)[j][loc+1] = hap_clus_list[i].order; (*cluster)[j][0]++; sign = 1; break; } } if(sign==0){ // sign i to the last cluster int loc = (*cluster)[count][0]; (*cluster)[count][loc+1] = hap_clus_list[i].order; (*cluster)[count][0]++; } } /* for(int i=0;i<=count;i++){ printf("cluster %d: ", i+1); for(int j=0;j<=(*cluster)[i][0];j++) printf("%d, ", (*cluster)[i][j]); printf("\n"); } printf("\n"); */ } void PedData::print_data() { for(int i=0;iprint(); } vector *PedData::genCombSet(vector & all, const int tsize, int index) { // terminating condition if(index==0) { vector *vec = new vector(); for(int i=0;ipush_back(a1); } //printf("returned\n"); return vec; } // else recursive run vector *nvec = new vector(); for(int i=0;i nall = all; nall.erase(nall.begin(),nall.begin()+i+1); vector *vec = genCombSet(nall,tsize,index-1); for(int j=0;jsize();j++) { (*vec)[j][index] = all[i]; nvec->push_back((*vec)[j]); } vec->clear(); delete vec; } return nvec; } vector *PedData::windSet(vector & all, const int tsize) { vector *vec = new vector(); int stop = all.size()-tsize+1; for(int i=0;ipush_back(a1); } return vec; } void PedData::mem_clean() { for(int i=0;i & s_vec, const trio & m) { for(int i=0;i b.freq; }