#include "includes.h" #include "grid.h" #include "vector.h" #include <fstream.h> extern ofstream errorfs; #include "declare.h" void makejf3(dgrid &jointfreq3mkv2, const dgrid &jointfreq2, const dgrid &freqfreq, const ivectors &allele_list, const int &debug) { int i,j,k,l; const int numloc=allele_list.dim(); int dum; double value; //initialize three-locus-haplotype frequencies object //find number of rows needed int max=0; for (l=0;l<numloc-2;l++) { dum=allele_list[l].dim()*allele_list[l+1].dim()*allele_list[l+2].dim(); if (dum>max) max=dum; } //cout << "max " << max << endl; jointfreq3mkv2.init(max,numloc-2); //cout << jointfreq3mkv2.dimx() << " " << jointfreq3mkv2.dimy() << endl; jointfreq3mkv2.clear(0); //fill in for (l=0;l<numloc-2;l++) { for (i=0;i<allele_list[l].dim();i++) { for (j=0;j<allele_list[l+1].dim();j++) { for (k=0;k<allele_list[l+2].dim();k++) { if (freqfreq(j,l+1)>0) value= jointfreq2(i*allele_list[l+1].dim()+j,l)* jointfreq2(j*allele_list[l+2].dim()+k,l+1)/freqfreq(j,l+1); else value=0.0; jointfreq3mkv2(i*allele_list[l+1].dim()*allele_list[l+2].dim()+ j*allele_list[l+2].dim()+ k,l)=value; } } } } //check that all columns sum to 1 for (l=0;l<jointfreq3mkv2.dimy();l++) { value=0.0; for (i=0;i<jointfreq3mkv2.dimx();i++) value+=jointfreq3mkv2(i,l); if ( !(0.999<value && value <1.001) ) { errorfs << "ERROR: jointfreq3mkv2 hap freqs at loc " << l <<" (+1) and two following loci do not sum to 1 (" << value << ")" << endl; exit(1); } } if (debug) { cout << "jointfreq3mkv2 (from two-loc hap freqs) " << endl; for (j=0;j<jointfreq3mkv2.dimy();j++) { for (i=0;i<jointfreq3mkv2.dimx();i++) cout << jointfreq3mkv2(i,j) << " "; cout << endl; } } } void makejf3f(dgrid &jointfreq3fmkv2, const dgrid &jointfreq3mkv2, const ivectors &allele_list, const int &debug) { int i,j,k,l; const int numloc=allele_list.dim(); int dum; double value; //initialize object //find number of rows needed int max=0; for (l=0;l<numloc-2;l++) { dum=allele_list[l].dim()*allele_list[l+2].dim(); if (dum>max) max=dum; } //cout << "max " << max << endl; jointfreq3fmkv2.init(max,numloc-2); // cout << jointfreq3mkv2.dimx() << " " << jointfreq3mkv2.dimy() << endl; jointfreq3fmkv2.clear(0); //fill in for (l=0;l<numloc-2;l++) { for (i=0;i<allele_list[l].dim();i++) { for (k=0;k<allele_list[l+2].dim();k++) { value=0.0; for (j=0;j<allele_list[l+1].dim();j++) value+= jointfreq3mkv2(i*allele_list[l+1].dim()*allele_list[l+2].dim()+ j*allele_list[l+2].dim()+ k,l); jointfreq3fmkv2(i*allele_list[l+2].dim()+k,l)=value; } } } if (debug) { cout << "jointfreq3fmkv2 (from three-loc hap freqs) " << endl; for (j=0;j<jointfreq3fmkv2.dimy();j++) { for (i=0;i<jointfreq3fmkv2.dimx();i++) cout << jointfreq3fmkv2(i,j) << " "; cout << endl; } } //check that all columns sum to 1 for (l=0;l<jointfreq3fmkv2.dimy();l++) { value=0.0; for (i=0;i<jointfreq3fmkv2.dimx();i++) value+=jointfreq3fmkv2(i,l); if ( !(0.999<value && value <1.001) ) { errorfs << "ERROR: jointfreq3fmkv2 hap freqs at loc " << l <<" (+1) and two following loci do not sum to 1 (" << value << ")" << endl; exit(1); } } } void makeaugjointfreq4mkv2(dgrid &jointfreq4mkv2, const dgrid &jointfreq3mkv2, const int &C) { int i; for (i=0;i<jointfreq3mkv2.dimx();i++) { if (C>1) jointfreq4mkv2(i,0)=jointfreq3mkv2(i,C-2); if (C<jointfreq3mkv2.dimy()+1) jointfreq4mkv2(i,1)=jointfreq3mkv2(i,C-1); } } void makeaugjointfreq4fmkv2(dgrid &jointfreq4fmkv2, const dgrid &jointfreq3fmkv2, const int &C) { int i; for (i=0;i<jointfreq3fmkv2.dimx();i++) { if (C>1) jointfreq4fmkv2(i,0)=jointfreq3fmkv2(i,C-2); if (C<jointfreq3fmkv2.dimy()+1) jointfreq4fmkv2(i,1)=jointfreq3fmkv2(i,C-1); } } double getjointfreq4fmkv2(const igrid &freqallele, const ivector &allelecounts, const dgrid &jointfreq4fmkv2, const int &ilocus, const int &iallele, const int &jallele, const int &C) { //this function retrieves a conditional allele frequency int k; int ipos=-1, jpos=-1; int pos4=1; if (C-ilocus==2) pos4=0; //find positions for iallele and jallele for (k=0;k<allelecounts(ilocus);k++) { if (freqallele(k,ilocus)==iallele) { ipos=k; break; } } for (k=0;k<allelecounts(ilocus+3);k++) { if (freqallele(k,ilocus+3)==jallele) { jpos=k; break; } } if (ipos==-1) errorfs << "WARNING: Allele " << iallele << " not found at locus " << ilocus << " +1 by getjointfreq4fmkv2()" << endl; if (jpos==-1) errorfs << "WARNING: Allele " << jallele << " not found at locus " << ilocus+3 << " +1 by getjointfreq4fmkv2()" << endl; if ( !(-0.0001 < jointfreq4fmkv2(ipos*allelecounts(ilocus+3)+jpos,pos4) && 1.0001 > jointfreq4fmkv2(ipos*allelecounts(ilocus+3)+jpos,pos4) ) ) errorfs <<"WARNING: freq. retrieved from jointfreq4fmkv2 is <=0" << endl; return jointfreq4fmkv2(ipos*allelecounts(ilocus+3)+jpos,pos4); } double getjointfreq4mkv2(const igrid &freqallele, const ivector &allelecounts, const dgrid &jointfreq4mkv2, const int &ilocus, const int &iallele, const int &jallele, const int &kallele, const int &C) { //this function retrieves a conditional allele frequency int l; int ipos=-1, jpos=-1, kpos=-1; int pos4=1; if (C-ilocus==2) pos4=0; //find positions for iallele and jallele and kallele for (l=0;l<allelecounts(ilocus );l++) { if (freqallele(l,ilocus )==iallele) { ipos=l; break; } } for (l=0;l<allelecounts(ilocus+1+pos4);l++) { if (freqallele(l,ilocus+1+pos4)==jallele) { jpos=l; break; } } for (l=0;l<allelecounts(ilocus+3);l++) { if (freqallele(l,ilocus+3)==kallele) { kpos=l; break; } } if (ipos==-1) errorfs << "WARNING: Allele " << iallele << " not found at locus " << ilocus << " +1 by getjointfreq4mkv2()" << endl; if (jpos==-1) errorfs << "WARNING: Allele " << jallele << " not found at locus " << ilocus+1+pos4 << " +1 by getjointfreq4mkv2()" << endl; if (kpos==-1) errorfs << "WARNING: Allele " << kallele << " not found at locus " << ilocus+3 << " +1 by getjointfreq4mkv2()" << endl; if ( !(-0.0001<jointfreq4mkv2(ipos* allelecounts(ilocus+1+pos4)* allelecounts(ilocus+3)+ jpos* allelecounts(ilocus+3)+ kpos ,pos4) && 1.0001>jointfreq4mkv2(ipos* allelecounts(ilocus+1+pos4)* allelecounts(ilocus+3)+ jpos* allelecounts(ilocus+3)+ kpos ,pos4) ) ) errorfs <<"WARNING: freq. retrieved from jointfreq4mkv2 is <=0" << endl; return jointfreq4mkv2(ipos* allelecounts(ilocus+1+pos4)* allelecounts(ilocus+3)+ jpos* allelecounts(ilocus+3)+ kpos ,pos4); } void augmentdatamkv2(dgrid &augjointfreq3mkv2, const dgrid &jointfreq3mkv2, dgrid &augjointfreq3fmkv2, const dgrid &jointfreq3fmkv2, const dgrid &jointfreq2, const dgrid &freqfreq, const ivector &allelecounts, const int &C, const int &debug) { int i,j,k,l; double value=0.0; //MAKE augjointfreq3mkv2 augjointfreq3mkv2.clear(0.0); //make augjointfreq3mkv2 //make columns 0,...,C-3 for (j = 0; j < C-2; j++) { for (i=0; i< jointfreq3mkv2.dimx(); i++) { augjointfreq3mkv2(i,j)=jointfreq3mkv2(i,j); } } //make column C-2 if (C>1) { for (i=0; i<jointfreq3mkv2.dimx();i++) { augjointfreq3mkv2(i,C-2)=0; } for (i=0;i<allelecounts(C-2);i++) { for (j=0; j<allelecounts(C-1); j++) { augjointfreq3mkv2(i*allelecounts(C-1)+j,C-2)= jointfreq2(i*allelecounts(C-1)+j,C-2); } } } //make column C-1 for (i=0; i<jointfreq3mkv2.dimx();i++) { augjointfreq3mkv2(i,C-1)=0; } for (i=0;i<allelecounts(C-1);i++) { for (k=0;k<allelecounts(C);k++) { augjointfreq3mkv2(i*allelecounts(C)+k,C-1)= jointfreq2(i*allelecounts(C)+k,C-1); } } //make column C if (C<allelecounts.dim()-1) { for (i=0; i<jointfreq3mkv2.dimx();i++) { augjointfreq3mkv2(i,C)=0; } for (i=0;i<allelecounts(C);i++) { for (j=0;j<allelecounts(C+1);j++) { augjointfreq3mkv2(i*allelecounts(C+1)+j,C)= jointfreq2(i*allelecounts(C+1)+j,C); } } } //make rest of object for (j=C+1; j<augjointfreq3mkv2.dimy(); j++) { for (i=0; i< jointfreq3mkv2.dimx(); i++) { augjointfreq3mkv2(i,j)=jointfreq3mkv2(i,j-1); } } //check that all columns sum to 1 for (l=0;l<augjointfreq3mkv2.dimy();l++) { value=0.0; for (i=0;i<augjointfreq3mkv2.dimx();i++) value += augjointfreq3mkv2(i,l); if ( !(0.999 < value && value < 1.001) ) { errorfs << "ERROR: column " << l << " (+1) in augjointfreq3mkv2 " << "does not sum to 1 (" << value << ")" << endl; exit(1); } } if (debug==1) { cout << "augjointfreq3mkv2" << endl; for (i=0;i<augjointfreq3mkv2.dimy();i++) { for (j=0;j<augjointfreq3mkv2.dimx();j++) { cout << augjointfreq3mkv2(j,i) << " "; } cout << endl; } } //MAKE augjointfreq3fmkv2 augjointfreq3fmkv2.clear(0.0); //make columns 0,...,C-3 for (j = 0; j < C-2; j++) { for (i=0; i< jointfreq3fmkv2.dimx(); i++) { augjointfreq3fmkv2(i,j)=jointfreq3fmkv2(i,j); } } //make column C-2 if (C>1) { for (i=0; i<jointfreq3fmkv2.dimx();i++) { augjointfreq3fmkv2(i,C-2)=0; } for (i=0;i<allelecounts(C-2);i++) { augjointfreq3fmkv2(i,C-2)= freqfreq(i,C-2); } } //make column C-1 for (i=0; i<jointfreq3fmkv2.dimx();i++) { augjointfreq3fmkv2(i,C-1)=0; } for (i=0;i<allelecounts(C-1);i++) { for (k=0;k<allelecounts(C);k++) { //cout << i*allelecounts(C)+k << " " << C-1 << " " // << augjointfreq3fmkv2.dimx() << " " << augjointfreq3fmkv2.dimy() <<" " // << i*allelecounts(C)+k << " " << C-1 << " " // << jointfreq2.dimx() << " " << jointfreq2.dimy() << endl; augjointfreq3fmkv2(i*allelecounts(C)+k,C-1)= jointfreq2(i*allelecounts(C)+k,C-1); } } //make column C if (C<allelecounts.dim()-1) { for (i=0; i<jointfreq3fmkv2.dimx();i++) { augjointfreq3fmkv2(i,C)=0; } for (i=0;i<allelecounts(C+1);i++) { augjointfreq3fmkv2(i,C)=freqfreq(i,C+1); } } //make rest of object for (j=C+1; j<augjointfreq3fmkv2.dimy(); j++) { for (i=0; i< jointfreq3fmkv2.dimx(); i++) { augjointfreq3fmkv2(i,j)=jointfreq3fmkv2(i,j-1); } } if (debug==1) { cout << "augjointfreq3fmkv2" << endl; for (i=0;i<augjointfreq3fmkv2.dimy();i++) { for (j=0;j<augjointfreq3fmkv2.dimx();j++) { cout << augjointfreq3fmkv2(j,i) << " "; } cout << endl; } } //check that all columns sum to 1 for (l=0;l<augjointfreq3fmkv2.dimy();l++) { value=0.0; for (i=0;i<augjointfreq3fmkv2.dimx();i++) value += augjointfreq3fmkv2(i,l); if ( !(0.999 < value && value < 1.001) ) { errorfs << "ERROR: column " << l << " (+1) in augjointfreq3fmkv2 " << "does not sum to 1 (" << value << ")" << endl; exit(1); } } } void makecondfreq3lmkv2(dgrid &condfreq3lmkv2, const igrid &freqallele, const dgrid &jointfreq2, const dgrid &jointfreq3mkv2, const ivector &allelecounts, const int &C, const int &debug) { //this function takes 3-marker hap. frequencies and marginal //allele frequencies and forms conditional haplotype frequencies //where the allele on which we condition is on the left condfreq3lmkv2.clear(); int i,j,k,t; double sum=0.0; for (t=0;t<C-1;t++) { for (i=0;i<allelecounts(t);i++) { for (j=0;j<allelecounts(t+1);j++) { for (k=0;k<allelecounts(t+2);k++) { if (jointfreq2(i*allelecounts(t+1)+j,t) >0.0) condfreq3lmkv2(i*allelecounts(t+1)*allelecounts(t+2)+ j*allelecounts(t+2)+ k,t) =jointfreq3mkv2(i*allelecounts(t+1)*allelecounts(t+2)+ j*allelecounts(t+2)+ k,t)/ jointfreq2(i*allelecounts(t+1)+j,t); } } } } if (debug==1) { cout << "condfreq3lmkv2" << endl; for (i=0;i<condfreq3lmkv2.dimy();i++) { for (j=0;j<condfreq3lmkv2.dimx();j++) { cout << condfreq3lmkv2(j,i) << " "; } cout << endl; } } //assert that all entries are between 0 and 1 for (i=0;i<condfreq3lmkv2.dimx();i++) { for (j=0;j<condfreq3lmkv2.dimy();j++) { if ( !(-0.00001<condfreq3lmkv2(i,j) && condfreq3lmkv2(i,j)<1.00001) ) { errorfs << "ERROR: condfreq3lmkv2 has entry outside unit interval" << endl; exit(1); } } } //check that all sum to 1 for (t=0;t<C-1;t++) { for (i=0;i<allelecounts(t);i++) { for (j=0;j<allelecounts(t+1);j++) { sum=0.0; for (k=0;k<allelecounts(t+2);k++) sum+=condfreq3lmkv2(i*allelecounts(t+1)*allelecounts(t+2)+ j*allelecounts(t+2)+ k,t); if ( !( (0.999<sum && sum<1.001) || sum==0.0 ) ) { errorfs<< "ERROR: condfreq3lmkv2 does not sum to 1.0 (" << sum << ") for some haplotype" << endl; exit(1); } } } } } void makecondfreq3rmkv2(dgrid &condfreq3rmkv2, const igrid &freqallele, const dgrid &jointfreq2, const dgrid &jointfreq3mkv2, const ivector &allelecounts, const int &C, const int &debug) { //this function takes 3-marker hap. frequencies and marginal //haplotype frequencies and forms conditional allele frequencies //where the allele on which we condition is on the right int i,j,k,t; double sum=0.0; condfreq3rmkv2.clear(0); const int numloc=freqallele.dimy(); for (t=C;t<numloc-2;t++) { for (i=0;i<allelecounts(t);i++) { for (j=0;j<allelecounts(t+1);j++) { for (k=0;k<allelecounts(t+1);k++) { if ( (jointfreq2(j*allelecounts(t+2)+k,t+1)>0.0) ) condfreq3rmkv2(i*allelecounts(t+1)*allelecounts(t+2)+ j*allelecounts(t+2)+ k,t) =jointfreq3mkv2(i*allelecounts(t+1)*allelecounts(t+2)+ j*allelecounts(t+2)+ k,t)/ jointfreq2(j*allelecounts(t+2)+k,t+1); } } } } if (debug==1) { cout << "condfreq3rmkv2" << endl; for (i=0;i<condfreq3rmkv2.dimy();i++) { for (j=0;j<condfreq3rmkv2.dimx();j++) { cout << condfreq3rmkv2(j,i) << " "; } cout << endl; } } //assert that all entries are between 0 and 1 for (i=0;i<condfreq3rmkv2.dimx();i++) { for (j=0;j<condfreq3rmkv2.dimy();j++) { if ( !(-0.00001<condfreq3rmkv2(i,j) && condfreq3rmkv2(i,j)<1.00001) ) { errorfs << "ERROR: condfreq3rmkv2 has entry outside unit interval:" << condfreq3rmkv2(i,j) << " " << i << " " << j << endl; exit(1); } } } //check that all sum to 1 for (t=C;t<numloc-2;t++) { for (j=0;j<allelecounts(t+1);j++) { for (k=0;k<allelecounts(t+2);k++) { sum=0.0; for (i=0;i<allelecounts(t);i++) sum+=condfreq3rmkv2(i*allelecounts(t+1)*allelecounts(t+2)+ j*allelecounts(t+2)+ k,t); if ( !( (0.999<sum && sum<1.001) || sum==0.0) ) { errorfs<< "ERROR: condfreq3rmkv2 does not sum to 1.0 (" << sum << ") for some haplotype" << endl; exit(1); } } } } } void makecondfreq3fmkv2(dgrid &condfreq3fmkv2, const dgrid &jointfreq3fmkv2, const dgrid &freqfreq, const ivector &allelecounts, const int &C, const int &debug) { int i,j,k,l; condfreq3fmkv2.clear(0.0); const int numloc=allelecounts.dim(); double sum=0.0; for (l=0; l<C-2;l++) { for (i=0;i<allelecounts(l);i++) { for (k=0;k<allelecounts(l+2);k++) { condfreq3fmkv2(i*allelecounts(l+2)+k,l)= jointfreq3fmkv2(i*allelecounts(l+2)+k,l)/ freqfreq(i,l); } } } for (l=C+1;l<numloc-2;l++) { for (i=0;i<allelecounts(l);i++) { for (k=0;k<allelecounts(l+2);k++) { condfreq3fmkv2(i*allelecounts(l+2)+k,l)= jointfreq3fmkv2(i*allelecounts(l+2)+k,l)/ freqfreq(k,l+2); } } } if (debug) { cout << "condfreq3fmkv2" << endl; for (j=0;j<condfreq3fmkv2.dimy();j++) { for (i=0;i<condfreq3fmkv2.dimx();i++) cout << condfreq3fmkv2(i,j) << " "; cout << endl; } } //check that sum to 1 for each allele for (l=0;l<condfreq3fmkv2.dimy();l++) { if ( l<C-2 ) { for (i=0;i<allelecounts(l);i++) { sum=0.0; for (k=0;k<allelecounts(l+2);k++) sum+=condfreq3fmkv2(i*allelecounts(l+2)+k,l); if ( !(0.999<sum && sum <1.001) ) { errorfs << "ERROR: condfreq3fmkv2 has allele for which probs. do not " << "sum to 1.0 (" << sum << ")" << endl; exit(1); } } } if (l>C) { for (k=0;k<allelecounts(l+2);k++) { sum=0.0; for (i=0;i<allelecounts(l);i++) sum+=condfreq3fmkv2(i*allelecounts(l+2)+k,l); if ( !(0.999<sum && sum <1.001) ) { errorfs << "ERROR: condfreq3fmkv2 has allele for which probs. do not " << "sum to 1.0 (" << sum << ")" << endl; exit(1); } } } } } void makenullsharemkv2(dgrid &obsprobs, const igrid &data, const igrid &freqallele, const dgrid &freqfreq, const dgrid &condfreq2l, const dgrid &condfreq2r, const dgrid &condfreq3fmkv2, const dgrid &condfreq3lmkv2, const dgrid &condfreq3rmkv2, const ivector &allelecounts, const int &LE, const int &RE, const int &C, const int &debug) { //this function generates parts of observation probabilities for HMM //that do not involve parameters, i.e., conditional allele frequencies const int numhap=data.dimx(); int l,h; int LHSle, LHSre, RHSle, RHSre; obsprobs.clear(0.0); if (LE<C && C<RE) { LHSle=LE; LHSre=C-1; RHSle=C+1; RHSre=RE; } else { if (C<LE) { LHSle=-1,LHSre=-1; RHSle=LE; RHSre=RE; } else {//RE<C LHSle=LE; LHSre=RE; RHSle=-1,RHSre=-1; } } for (h=0;h<numhap;h++) { //make LHS if (LHSle>=0) { //make column of allele frequencies if (data(h,LHSle)==0) obsprobs(2*h+1,LHSle)=1; else obsprobs(2*h+1,LHSle)= getfreq(freqallele, freqfreq, data(h,LHSle), LHSle); //make column of conditional allele frequencies if (LHSre-LHSle>0) { if (data(h,LHSle+1)==0) obsprobs(2*h+1,LHSle+1)=1; else { if (data(h,LHSle)==0) obsprobs(2*h+1,LHSle+1)= getfreq(freqallele, freqfreq, data(h,LHSle+1), LHSle+1); else obsprobs(2*h+1,LHSle+1)=getcondfreq2l(freqallele, allelecounts, condfreq2l, LHSle, data(h,LHSle), data(h,LHSle+1)); } if (LHSre-LHSle>1) { for (l=LHSle+2;l<=LHSre;l++) { if (data(h,l)==0) obsprobs(2*h+1,l)=1; else { if (data(h,l-1)==0) { if (data(h,l-2)==0) { obsprobs(2*h+1,l)= getfreq(freqallele, freqfreq, data(h,l), l); } else {//data(h,l-2)!=0) obsprobs(2*h+1,l)= getcondfreq3fmkv2(freqallele, allelecounts, condfreq3fmkv2, l-2, data(h,l-2), data(h,l) ); } } else {//data(h,l-1)!=0 if (data(h,l-2)==0) { obsprobs(2*h+1,l)= getcondfreq2l(freqallele, allelecounts, condfreq2l, l-1, data(h,l-1), data(h,l)); } else {//data(h,l-2)!=0) obsprobs(2*h+1,l)= getcondfreq3mkv2(freqallele, allelecounts, condfreq3lmkv2, l-2, data(h,l-2), data(h,l-1), data(h,l)); } } } } } } } //make RHS if (RHSre>=0) { //make column of allele frequencies if (data(h,RHSre)==0) obsprobs(2*h+1,RHSre)=1; else obsprobs(2*h+1,RHSre)= getfreq(freqallele, freqfreq, data(h,RHSre), RHSre); //make column of conditional allele frequencies if (RHSre-RHSle>0) { if (data(h,RHSre-1)==0) obsprobs(2*h+1,RHSre-1)=1; else { if (data(h,RHSre)==0) obsprobs(2*h+1,RHSre-1)= getfreq(freqallele, freqfreq, data(h,RHSre-1), RHSre-1); else obsprobs(2*h+1,RHSre-1)=getcondfreq2r(freqallele, allelecounts, condfreq2r, RHSre-1, data(h,RHSre-1), data(h,RHSre), C); } if (RHSre-RHSle>1) { for (l=RHSre-2;l>=RHSle;l--) { if (data(h,l)==0) obsprobs(2*h+1,l)=1; else { if (data(h,l+1)==0) { if (data(h,l+2)==0) { obsprobs(2*h+1,l)= getfreq(freqallele, freqfreq, data(h,l), l); } else {//data(h,l+2)!=0) //delete me obsprobs(2*h+1,l)= getcondfreq3fmkv2(freqallele, allelecounts, condfreq3fmkv2, l, data(h,l), data(h,l+2) ); } } else {//data(h,l+1)!=0 if (data(h,l+2)==0) { obsprobs(2*h+1,l)= getcondfreq2r(freqallele, allelecounts, condfreq2r, l, data(h,l), data(h,l+1), C); } else {//data(h,l+2)!=0) obsprobs(2*h+1,l)= getcondfreq3mkv2(freqallele, allelecounts, condfreq3rmkv2, l, data(h,l), data(h,l+1), data(h,l+2)); } } } } } } } } if (debug) { cout << "obsprobs (N: haplotype frequencies)" << endl; for (l=0;l<obsprobs.dimy();l++) { for (h=0;h<obsprobs.dimx()/2;h++) cout << obsprobs(2*h+1,l) << " "; cout << endl; } } } double getcondfreq3fmkv2(const igrid &freqallele, const ivector &allelecounts, const dgrid &condfreq3fmkv2, const int &ilocus, const int &iallele, const int &jallele) { //this function retrieves a conditional allele frequency int k; int ipos=-1, jpos=-1; //find positions for iallele and jallele for (k=0;k<allelecounts(ilocus);k++) { if (freqallele(k,ilocus)==iallele) { ipos=k; break; } } for (k=0;k<allelecounts(ilocus+2);k++) { if (freqallele(k,ilocus+2)==jallele) { jpos=k; break; } } if (ipos==-1) errorfs << "WARNING: Allele " << iallele << " not found at locus " << ilocus << " +1 by getcondfreq2l()" << endl; if (jpos==-1) errorfs << "WARNING: Allele " << jallele << " not found at locus " << ilocus+2 << " +1 by getcondfreq2l()" << endl; if ( !(-0.0001<condfreq3fmkv2(ipos*allelecounts(ilocus+2)+jpos,ilocus) && 1.0001>condfreq3fmkv2(ipos*allelecounts(ilocus+2)+jpos,ilocus) ) ) errorfs <<"WARNING: freq. retrieved from condfreq3fmkv2 is <=0" << endl; return condfreq3fmkv2(ipos*allelecounts(ilocus+2)+jpos,ilocus); } double getcondfreq3mkv2(const igrid &freqallele, const ivector &allelecounts, const dgrid &condfreq3mkv2, const int &ilocus, const int &iallele, const int &jallele, const int &kallele) { //this function retrieves a conditional allele frequency int l; int ipos=-1, jpos=-1, kpos=-1; //find positions for iallele and jallele and kallele for (l=0;l<allelecounts(ilocus );l++) { if (freqallele(l,ilocus )==iallele) { ipos=l; break; } } for (l=0;l<allelecounts(ilocus+1);l++) { if (freqallele(l,ilocus+1)==jallele) { jpos=l; break; } } for (l=0;l<allelecounts(ilocus+2);l++) { if (freqallele(l,ilocus+2)==kallele) { kpos=l; break; } } if (ipos==-1) errorfs << "WARNING: Allele " << iallele << " not found at locus " << ilocus << " +1 by getcondfreq3mkv2()" << endl; if (jpos==-1) errorfs << "WARNING: Allele " << jallele << " not found at locus " << ilocus+1 << " +1 by getcondfreq3mkv2()" << endl; if (kpos==-1) errorfs << "WARNING: Allele " << kallele << " not found at locus " << ilocus+2 << " +1 by getcondfreq3mkv2()" << endl; //delete me // cout << condfreq3mkv2.dimx() << " " << condfreq3mkv2.dimy() << " " // << ipos << " " << jpos << " " << kpos << " " // << allelecounts(ilocus+1) << " " << allelecounts(ilocus+2) << " " // << ipos*allelecounts(ilocus+1)*allelecounts(ilocus+2)+ //jpos*allelecounts(ilocus+2)+kpos << endl; assert(condfreq3mkv2(ipos* allelecounts(ilocus+1)* allelecounts(ilocus+2)+ jpos* allelecounts(ilocus+2)+ kpos ,ilocus)>0); if ( !(-0.0001<condfreq3mkv2(ipos* allelecounts(ilocus+1)* allelecounts(ilocus+2)+ jpos* allelecounts(ilocus+2)+ kpos ,ilocus) && 1.0001>condfreq3mkv2(ipos* allelecounts(ilocus+1)* allelecounts(ilocus+2)+ jpos* allelecounts(ilocus+2)+ kpos ,ilocus) ) ) { errorfs << ilocus << " " << ipos << " " << jpos << " " << kpos << " " << iallele << " " << jallele << " " << kallele << " " << condfreq3mkv2(ipos*allelecounts(ilocus+1)* allelecounts(ilocus+2)+ jpos*allelecounts(ilocus+2)+kpos,ilocus) << endl; errorfs <<"WARNING: freq. retrieved from condfreq3mkv2 is " << endl << "not in unit interval" << endl; exit(1);} return condfreq3mkv2(ipos* allelecounts(ilocus+1)* allelecounts(ilocus+2)+ jpos* allelecounts(ilocus+2)+ kpos ,ilocus); } void makealphamkv2(dgrid &alpha, const dgrid &initstore, const dgrid &transstore, const dgrid &obsprobs, const dvector &obsprobsAstore, const int &C, const int &LE, const int &RE) { //assembles alpha object int i,j,h,t; const int numhap=alpha.dimx()/2; int LHSle, LHSre, RHSle, RHSre; double sum; //these are endpoints for t, rather than t+1 if (LE<C && C<RE) { LHSle=LE; LHSre=C-2; RHSle=C; RHSre=RE-1; } else { if (C<LE) { LHSle=0,LHSre=-1; RHSle=LE; RHSre=RE-1; } else {//RE<C LHSle=LE; LHSre=RE-1; RHSle=0,RHSre=-1; } } for (h=0;h<numhap;h++) { if (C<LE) { //make column(s) C for (i=0;i<2;i++) { alpha(2*h+i,C)=1.0; } //make column(s) LE for (j=0;j<2;j++) { sum=0; for (i=0;i<2;i++) { sum+= alpha(2*h+i,C)*transstore(2*i+j,C)*obsprobs(2*h+j,LE); } alpha(2*h+j,LE)=sum; } } else {//LE<C //make column(s) LE for (i=0;i<2;i++) { alpha(2*h+i,LE)=initstore(i,0)*obsprobs(2*h+i,LE); } } //make column(s) LE+1:C-1 or LE+1:RE for (t=LHSle;t<=LHSre;t++) { for (j=0;j<2;j++) { sum=0; for (i=0;i<2;i++) { sum+= alpha(2*h+i,t)*transstore(2*i+j,t)*obsprobs(2*h+j,t+1); } alpha(2*h+j,t+1)=sum; } } if (LE<C && C<RE) { //make column(s) C for (j=0;j<2;j++) { sum=0; for (i=0;i<2;i++) { sum+=alpha(2*h+i,C-1)*transstore(2*i+j,C-1); } sum*=obsprobsAstore(2*h+j); if (initstore(j,1)!=0) sum/=initstore(j,1); alpha(2*h+j,C)=sum; } /* //delete me if (C==1 && h==45){ cout << "blue " << alpha(2*h+1,0) << " " << transstore(2*1+1,0) << " " << initstore(1,1) << " " << obsprobsAstore(2*h+1) << endl; exit(1);} */ } //make column(s) C+1:RE or LE+1:RE for (t=RHSle;t<=RHSre;t++) { for (j=0;j<2;j++) { sum=0; for (i=0;i<2;i++) { sum+= alpha(2*h+i,t)*transstore(2*i+j,t)*obsprobs(2*h+j,t+1); } alpha(2*h+j,t+1)=sum; } } if (RE<C) { //make column(s) C for (j=0;j<2;j++) { if (initstore(j,1)==0) alpha(2*h+j,C)=0; else { sum=0; for (i=0;i<2;i++) { sum+= alpha(2*h+i,RE)*transstore(2*i+j,RE); } sum/=initstore(j,1); alpha(2*h+j,C)=sum; } } } } for (h=0;h<alpha.dimx()/2;h++) { for (i=0;i<2;i++) { if ( C<LE || RE<C) if ( !(-0.00001<alpha(2*h+i,C) && alpha(2*h+i,C)<1.00001) ) { errorfs << "alp out of bounds " << h << " " << alpha(2*h+i,C) << endl; exit(1); } for (j=LE;j<=RE;j++) { if ( !(-0.00001<alpha(2*h+i,j) && alpha(2*h+i,j)<1.00001) ) { errorfs << "alp out of bounds " << h << " " << j << " " << alpha(2*h+i,j) << endl; exit(1); } } } } for (h=0;h<numhap;h++) { for (i=0;i<2;i++) { if (C<LE || RE<C) if ( !(-0.0001<=alpha(2*h+i,C) && alpha(2*h+i,C)<=1.0001) ) { errorfs << "ERROR: alpha(2*" << h << "+" << i << ",C) is not between 0 and 1 (" << alpha(2*h+i,C) << ")" << endl; exit(1); } for (j=LE;j<=RE;j++) if ( !(-0.0001<=alpha(2*h+i,j) && alpha(2*h+i,j)<=1.0001) ) { errorfs << "ERROR: alpha(2*" << h << "+" << i << "," << j <<") is not between 0 and 1 (" << alpha(2*h+i,j) << ")" << endl; exit(1); } } } } void makebetamkv2(dgrid &beta, const dgrid &initstore, const dgrid &transstore, const dgrid &transstoreC, const dgrid &obsprobs, const dvector &obsprobsAstore, const int &C, const int &LE, const int &RE) { //assembles beta object int i,j,h,t; const int numhap=beta.dimx()/2; int LHSle, LHSre, RHSle, RHSre; double sum; if (LE<C && C<RE) { LHSle=LE; LHSre=C-2; RHSle=C; RHSre=RE-1; } else { if (C<LE) { LHSle=0,LHSre=-1; RHSle=LE; RHSre=RE-1; } else {//RE<C LHSle=LE; LHSre=RE-1; RHSle=0,RHSre=-1; } } for (h=0;h<numhap;h++) { if (RE<C) { //make column(s) C for (i=0;i<2;i++) { beta(2*h+i,C)=initstore(i,2); } //make column(s) RE for (i=0;i<2;i++) { beta(2*h+i,RE)=1; }//bc condition on matching at C in haps. See notes! } else {//C<RE //make column(s) RE for (i=0;i<2;i++) { beta(2*h+i,RE)= initstore(i,2); } } //make column(s) RE-1:C or RE-1:LE for (t=RHSre;t>=RHSle;t--) { for (i=0;i<2;i++) { sum=0; for (j=0;j<2;j++) { sum+= beta(2*h+j,t+1)*transstore(2*i+j,t)*obsprobs(2*h+j,t+1); } beta(2*h+i,t)=sum; } } if (LE<C && C<RE) { //make column(s) C-1 for (i=0;i<2;i++) { sum=0; for (j=0;j<2;j++) { sum+=beta(2*h+j,C)*transstoreC(2*i+j,0)*obsprobsAstore(2*h+j); } if (initstore(i,3)!=0) { sum/=initstore(i,3); } beta(2*h+i,C-1)=sum; } } //make column(s) C-2:LE or RE-1:LE for (t=LHSre;t>=LHSle;t--) { for (i=0;i<2;i++) { sum=0; for (j=0;j<2;j++) { sum+= beta(2*h+j,t+1)*transstore(2*i+j,t)*obsprobs(2*h+j,t+1); } beta(2*h+i,t)=sum; } } if (C<LE) { //make column(s) C for (i=0;i<2;i++) { sum=0; for (j=0;j<2;j++) { sum+= beta(2*h+j,LE)*transstore(2*i+j,C)*obsprobs(2*h+j,LE); } beta(2*h+i,C)=sum; } } } for (h=0;h<numhap;h++) { for (i=0;i<2;i++) { if (C<LE || RE<C) if ( !(-0.0001<=beta(2*h+i,C) && beta(2*h+i,C)<=1.0001) ) { errorfs << "ERROR: beta(2*" << h << "+" << i << ",C) is not between 0 and 1 (" << beta(2*h+i,C) << ")" << endl; exit(1); } for (j=LE;j<=RE;j++) if ( !(-0.0001<=beta(2*h+i,j) && beta(2*h+i,j)<=1.0001) ) { errorfs << "ERROR: beta(2*" << h << "+" << i << "," << j <<") is not between 0 and 1 (" << beta(2*h+i,j) << ")" << endl; exit(1); } } } } double getfreqSMART(const igrid &freqallele, const dgrid &freqfreq, const int &allele, const int &locus) { if (allele==0) return 1.0; else return getfreq(freqallele, freqfreq, allele, locus); } double getcondfreq2lSMART(const igrid &freqallele, const dgrid &freqfreq, const ivector &allelecounts, const dgrid &jointfreq2, const int &l, const int &allelel, const int &allelelp1) { if (allelelp1==0) return 1.0; else { if (allelel==0) return getfreq(freqallele, freqfreq, allelelp1, l+1); else { if (getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1)<1e-100 && getfreq(freqallele, freqfreq, allelel, l)<1e-100) return 0.0; else return getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1)/ getfreq(freqallele, freqfreq, allelel, l); } } } double getcondfreq2rSMART(const igrid &freqallele, const dgrid &freqfreq, const ivector &allelecounts, const dgrid &jointfreq2, const int &l, const int &allelel, const int &allelelp1) { if (allelel==0) return 1.0; else { if (allelelp1==0) return getfreq(freqallele, freqfreq, allelel, l); else { if (getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1)<1e-100 && getfreq(freqallele, freqfreq, allelelp1, l+1)<1e-100) return 0.0; else return getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1)/ getfreq(freqallele, freqfreq, allelelp1, l+1); } } } double getcondfreq3lSMART(const igrid &freqallele, const dgrid &freqfreq, const ivector &allelecounts, const dgrid &jointfreq2, const dgrid &jointfreq3mkv2, const dgrid &jointfreq3fmkv2, const int &l, const int &allelel, const int &allelelp1, const int &allelelp2) { if (allelelp2==0) return 1.0; else { if (allelelp1==0) { if (allelel==0) { return getfreq(freqallele, freqfreq, allelelp2, l+2); } else { if (getcondfreq3fmkv2(freqallele, allelecounts, jointfreq3fmkv2, l, allelel, allelelp2 )<1e-100 && getfreq(freqallele, freqfreq, allelel, l)<1e-100) return 0; else return getcondfreq3fmkv2(freqallele, allelecounts, jointfreq3fmkv2, l, allelel, allelelp2 )/ getfreq(freqallele, freqfreq, allelel, l); } } else { if (allelel==0) { if (getcondfreq2l(freqallele, allelecounts, jointfreq2, l+1, allelelp1, allelelp2)<1e-100 && getfreq(freqallele, freqfreq, allelelp1, l+1)<1e-100) return 0.0; else return getcondfreq2l(freqallele, allelecounts, jointfreq2, l+1, allelelp1, allelelp2)/ getfreq(freqallele, freqfreq, allelelp1, l+1); } else { if (getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l, allelel, allelelp1, allelelp2)<1e-100 && getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1)<1e-100) return 0; else return getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l, allelel, allelelp1, allelelp2)/ getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1); } } } } double getcondfreq3rSMART(const igrid &freqallele, const dgrid &freqfreq, const ivector &allelecounts, const dgrid &jointfreq2, const dgrid &jointfreq3mkv2, const dgrid &jointfreq3fmkv2, const int &l, const int &allelel, const int &allelelp1, const int &allelelp2) { if (allelel==0) return 1.0; else { if (allelelp1==0) { if (allelelp2==0) { return getfreq(freqallele, freqfreq, allelel, l); } else { if (getcondfreq3fmkv2(freqallele, allelecounts, jointfreq3fmkv2, l, allelel, allelelp2 )<1e-100 && getfreq(freqallele, freqfreq, allelelp2, l+2)<1e-100) return 0.0; else return getcondfreq3fmkv2(freqallele, allelecounts, jointfreq3fmkv2, l, allelel, allelelp2 )/ getfreq(freqallele, freqfreq, allelelp2, l+2); } } else { if (allelelp2==0) { if (getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1) <1e-100 && getfreq(freqallele, freqfreq, allelelp1, l+1)<1e-100) return 0.0; else return getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1)/ getfreq(freqallele, freqfreq, allelelp1, l+1); } else { if (getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l, allelel, allelelp1, allelelp2)<1e-100 && getcondfreq2l(freqallele, allelecounts, jointfreq2, l+1, allelelp1, allelelp2)<1e-100) return 0.0; else return getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l, allelel, allelelp1, allelelp2)/ getcondfreq2l(freqallele, allelecounts, jointfreq2, l+1, allelelp1, allelelp2); } } } } double getcondfreq4lSMART(const igrid &freqallele, const dgrid &freqfreq, const ivector &allelecounts, const dgrid &jointfreq2, const dgrid &jointfreq3mkv2, const dgrid &jointfreq4mkv2, const dgrid &jointfreq4fmkv2, const int &l, const int &allelel, const int &allelelp1, const int &allelelp2, const int &allelelp3, const int &C) { if (allelelp3==0) return 1.0; else { if (C-l==2) { if (allelelp1==0) { if (allelel==0) { return getfreq(freqallele, freqfreq, allelelp3, l+3); } else { if (getjointfreq4fmkv2(freqallele, allelecounts, jointfreq4fmkv2, l, allelel, allelelp3, C)<1e-100 && getfreq(freqallele, freqfreq, allelel, l)<1e-100) return 0.0; else return getjointfreq4fmkv2(freqallele, allelecounts, jointfreq4fmkv2, l, allelel, allelelp3, C)/ getfreq(freqallele, freqfreq, allelel, l); } } else { if (allelel==0) { if (getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l+1, allelelp1, allelelp2, allelelp3)<1e-100 && getfreq(freqallele, freqfreq, allelelp1, l+1)<1e-100) return 0.0; else return getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l+1, allelelp1, allelelp2, allelelp3)/ getfreq(freqallele, freqfreq, allelelp1, l+1); } else { if (getjointfreq4mkv2(freqallele, allelecounts, jointfreq4mkv2, l, allelel, allelelp1, allelelp3, C)<1e-100 && getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1)<1e-100) return 0.0; else return getjointfreq4mkv2(freqallele, allelecounts, jointfreq4mkv2, l, allelel, allelelp1, allelelp3, C)/ getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1); } } } else {//C-l==1 if (allelelp2==0) { if (allelel==0) { return getfreq(freqallele, freqfreq, allelelp3, l+3); } else { if (getjointfreq4fmkv2(freqallele, allelecounts, jointfreq4fmkv2, l, allelel, allelelp3, C)<1e-100 && getfreq(freqallele, freqfreq, allelel, l)<1e-100) return 0.0; else return getjointfreq4fmkv2(freqallele, allelecounts, jointfreq4fmkv2, l, allelel, allelelp3, C)/ getfreq(freqallele, freqfreq, allelel, l); } } else { if (allelel==0) { if (getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l+1, allelelp1, allelelp2, allelelp3)<1e-100 && getfreq(freqallele, freqfreq, allelelp2, l+2)<1e-100) return 0.0; else return getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l+1, allelelp1, allelelp2, allelelp3)/ getfreq(freqallele, freqfreq, allelelp2, l+2); } else { if (getjointfreq4mkv2(freqallele, allelecounts, jointfreq4mkv2, l, allelel, allelelp2, allelelp3, C)<1e-100 && getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l, allelel, allelelp1, allelelp2)<1e-100) return 0.0; else return getjointfreq4mkv2(freqallele, allelecounts, jointfreq4mkv2, l, allelel, allelelp2, allelelp3, C)/ getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l, allelel, allelelp1, allelelp2);} } } } } double getcondfreq4rSMART(const igrid &freqallele, const dgrid &freqfreq, const ivector &allelecounts, const dgrid &jointfreq2, const dgrid &jointfreq3mkv2, const dgrid &jointfreq4mkv2, const dgrid &jointfreq4fmkv2, const int &l, const int &allelel, const int &allelelp1, const int &allelelp2, const int &allelelp3, const int &C) { if (allelel==0) return 1.0; else { if (C-l==2) { if (allelelp1==0) { if (allelelp3==0) { return getfreq(freqallele, freqfreq, allelel, l); } else { if (getjointfreq4fmkv2(freqallele, allelecounts, jointfreq4fmkv2, l, allelel, allelelp3, C)<1e-100 && getfreq(freqallele, freqfreq, allelelp3, l+3)<1e-100) return 0.0; else return getjointfreq4fmkv2(freqallele, allelecounts, jointfreq4fmkv2, l, allelel, allelelp3, C)/ getfreq(freqallele, freqfreq, allelelp3, l+3); } } else { if (allelelp3==0) { if (getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1)<1e-100 && getfreq(freqallele, freqfreq, allelelp1, l+1)<1e-100) return 0.0; else return getcondfreq2l(freqallele, allelecounts, jointfreq2, l, allelel, allelelp1)/ getfreq(freqallele, freqfreq, allelelp1, l+1); } else { if (getjointfreq4mkv2(freqallele, allelecounts, jointfreq4mkv2, l, allelel, allelelp1, allelelp3, C)<1e-100 && getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l+1, allelelp1, allelelp2, allelelp3)<1e-100) return 0.0; else return getjointfreq4mkv2(freqallele, allelecounts, jointfreq4mkv2, l, allelel, allelelp1, allelelp3, C)/ getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l+1, allelelp1, allelelp2, allelelp3); } } } else {//C-l==1 if (allelelp2==0) { if (allelelp3==0) { return getfreq(freqallele, freqfreq, allelel, l); } else { if (getjointfreq4fmkv2(freqallele, allelecounts, jointfreq4fmkv2, l, allelel, allelelp3, C)<1e-100 && getfreq(freqallele, freqfreq, allelelp3, l+3)<1e-100) return 0.0; else return getjointfreq4fmkv2(freqallele, allelecounts, jointfreq4fmkv2, l, allelel, allelelp3, C)/ getfreq(freqallele, freqfreq, allelelp3, l+3); } } else { if (allelelp3==0) { if (getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l, allelel, allelelp1, allelelp2)<1e-100 && getfreq(freqallele, freqfreq, allelelp2, l+2)<1e-100) return 0.0; else return getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l, allelel, allelelp1, allelelp2)/ getfreq(freqallele, freqfreq, allelelp2, l+2); } else { if (getjointfreq4mkv2(freqallele, allelecounts, jointfreq4mkv2, l, allelel, allelelp2, allelelp3, C)<1e-100 && getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l+1, allelelp1, allelelp2, allelelp3)<1e-100) return 0.0; else return getjointfreq4mkv2(freqallele, allelecounts, jointfreq4mkv2, l, allelel, allelelp2, allelelp3, C)/ getcondfreq3mkv2(freqallele, allelecounts, jointfreq3mkv2, l+1, allelelp1, allelelp2, allelelp3); } } } } } void makeobsprobsAstore(dvector &obsprobsAstore, const igrid &data, const ivector &allelecounts, const igrid &freqallele, const dgrid &freqfreq, const dgrid &jointfreq2, const dgrid &jointfreq3mkv2, const dgrid &jointfreq3fmkv2, const dgrid &jointfreq4mkv2, const dgrid &jointfreq4fmkv2, const int &LE, const int &RE, const int &C, const int &debug) { //assumes LE<C<RE, assumes every haplotype shares common allele at 0 (C) int h; const int numhap=obsprobsAstore.dim()/2; for (h=0;h<numhap;h++) { obsprobsAstore(2*h)=1.0; if (LE<C-1 && RE>C+1) { /* //delete me if (C==16 && h==11) { cout << data(h,C-2) << " " << data(h,C-1) << " " << data(h,C) << " " << data(h,C+1) << " " << data(h,C+2) << " " << "obsA " << getcondfreq4lSMART(freqallele, freqfreq, allelecounts, jointfreq2, jointfreq3mkv2, jointfreq4mkv2, jointfreq4fmkv2, C-1, data(h,C-1), data(h,C), data(h,C+1), data(h,C+2), C) << " " << getcondfreq4lSMART(freqallele, freqfreq, allelecounts, jointfreq2, jointfreq3mkv2, jointfreq4mkv2, jointfreq4fmkv2, C-2, data(h,C-2), data(h,C-1), data(h,C), data(h,C+1), C) << " " << getfreqSMART(freqallele,freqfreq,data(h,C+1),C+1) << " " << getcondfreq2lSMART(freqallele,freqfreq, allelecounts,jointfreq2, C+1,data(h,C+1),data(h,C+2)) << endl; exit(1);} */ obsprobsAstore(2*h+1)= getcondfreq4lSMART(freqallele, freqfreq, allelecounts, jointfreq2, jointfreq3mkv2, jointfreq4mkv2, jointfreq4fmkv2, C-2, data(h,C-2), data(h,C-1), data(h,C), data(h,C+1), C)* getcondfreq4lSMART(freqallele, freqfreq, allelecounts, jointfreq2, jointfreq3mkv2, jointfreq4mkv2, jointfreq4fmkv2, C-1, data(h,C-1), data(h,C), data(h,C+1), data(h,C+2), C)/ ( getfreqSMART(freqallele, freqfreq, data(h,C+1), C+1)* getcondfreq2lSMART(freqallele, freqfreq, allelecounts, jointfreq2, C+1, data(h,C+1), data(h,C+2)) ); } else { if (LE<C-1 && RE==C+1) { obsprobsAstore(2*h+1)= getcondfreq4lSMART(freqallele, freqfreq, allelecounts, jointfreq2, jointfreq3mkv2, jointfreq4mkv2, jointfreq4fmkv2, C-2, data(h,C-2), data(h,C-1), data(h,C), data(h,C+1), C)/ getfreqSMART(freqallele, freqfreq, data(h,C+1), C+1); } else { if (LE==C-1 && RE>C+1) { obsprobsAstore(2*h+1)= getcondfreq3lSMART(freqallele,freqfreq,allelecounts, jointfreq2, jointfreq3mkv2,jointfreq3fmkv2, C-1, data(h,C-1),data(h,C),data(h,C+1))* getcondfreq4lSMART(freqallele, freqfreq, allelecounts, jointfreq2, jointfreq3mkv2, jointfreq4mkv2, jointfreq4fmkv2, C-1, data(h,C-1), data(h,C), data(h,C+1), data(h,C+2),C)/ ( getfreqSMART(freqallele, freqfreq, data(h,C+1), C+1)* getcondfreq2lSMART(freqallele, freqfreq, allelecounts, jointfreq2, C+1, data(h,C+1), data(h,C+2)) ); } else { obsprobsAstore(2*h+1)= getcondfreq3lSMART(freqallele, freqfreq, allelecounts, jointfreq2, jointfreq3mkv2, jointfreq3fmkv2, C-1, data(h,C-1),data(h,C),data(h,C+1))/ getfreqSMART(freqallele, freqfreq, data(h,C+1), C+1); } } } } if (debug) { cout << "ObsprobsAstore" << endl; for (h=0;h<obsprobsAstore.dim();h++) cout << obsprobsAstore(h) << " "; cout << endl; } //check that all are positive for (h=0;h<obsprobsAstore.dim();h++) { if ( !(0<obsprobsAstore(h)) ) { errorfs << "ERROR: obsprobsAstore(" << h << ") is nonpositive (" << obsprobsAstore(h) << ")" << endl; exit(1); } } } void makejf3fromh3fl(dgrid &jointfreq3mkv2, const dvectors &h3_freq_lists, const int &debug) { int l,i,j; const int numloc=h3_freq_lists.dim()+2; int dum=0; double value=0.0; //initialize three-locus-haplotype frequencies object //find number of rows needed int max=0; for (l=0;l<numloc-2;l++) { dum=h3_freq_lists[l].dim(); if (dum>max) max=dum; } //cout << "max " << max << endl; jointfreq3mkv2.init(max,numloc-2); // cout << jointfreq3mkv2.dimx() << " " << jointfreq3mkv2.dimy() << endl; jointfreq3mkv2.clear(0); //fill in for (l=0;l<numloc-2;l++) { for (i=0;i<h3_freq_lists[l].dim();i++) { jointfreq3mkv2(i,l)=h3_freq_lists[l][i]; } } //check that all columns sum to 1 for (l=0;l<jointfreq3mkv2.dimy();l++) { value=0.0; for (i=0;i<jointfreq3mkv2.dimx();i++) value+=jointfreq3mkv2(i,l); if ( !(0.999<value && value <1.001) ) { errorfs << "ERROR: jointfreq3mkv2 hap freqs at loc " << l <<" (+1) and two following loci do not sum to 1 (" << value << ")" << endl; exit(1); } } if (debug) { cout << "jointfreq3mkv2" << endl; for (j=0;j<jointfreq3mkv2.dimy();j++) { for (i=0;i<jointfreq3mkv2.dimx();i++) cout << jointfreq3mkv2(i,j) << " "; cout << endl; } } } void makejf2fromjf3(dgrid &jointfreq2, const dgrid &jointfreq3mkv2, ivectors &a_list, const int &debug) { int i,j,k,l; const int numloc=a_list.dim(); jointfreq2.clear(0.0); double sum=0.0; int dum; //initialize jointfreq2 //find number of rows needed int max=0; for (l=0;l<numloc-1;l++) { dum=a_list[l].dim()*a_list[l+1].dim(); if (dum>max) max=dum; } //cout << "max " << max << endl; jointfreq2.init(max,numloc-1); // cout << jointfreq3mkv2.dimx() << " " << jointfreq3mkv2.dimy() << endl; jointfreq2.clear(0); for (l=0;l<numloc-2;l++) { for (i=0;i<a_list[l ].dim();i++) { for (j=0;j<a_list[l+1].dim();j++) { for (k=0;k<a_list[l+2].dim();k++) { jointfreq2(i*a_list[l+1].dim()+j,l)+= jointfreq3mkv2(i*a_list[l+1].dim()*a_list[l+2].dim()+ j*a_list[l+2].dim()+ k,l); if (l==numloc-3) jointfreq2(j*a_list[l+2].dim()+k,l+1)+= jointfreq3mkv2(i*a_list[l+1].dim()*a_list[l+2].dim()+ j*a_list[l+2].dim()+ k,l); } } } } if (debug) { cout << "jointfreq2" << endl; for (i=0;i<jointfreq2.dimx();i++) { for (l=0;l<jointfreq2.dimy();l++) { cout << jointfreq2(i,l) << " "; } cout << endl; } } //check that sum to 1 for (l=0;l<numloc-1;l++) { sum=0.0; for (i=0;i<jointfreq2.dimx();i++) sum+=jointfreq2(i,l); if ( !(0.999<sum && sum<1.001) ) { errorfs << "ERROR: jointfreq2 column " << l << "does not sum to 1.0 (" << sum << ")" << endl; exit(1); } } } void makeindjointfreq3(dgrid &jointfreq3, const ivector &allelecountsff, const dgrid &freqfreq, const int &num_markers, const int &debug) { //when only allele frequencies are available, this creates array //of 3-marker haplotype frequencies ASSUMING LINKAGE EQUILIBRIUM int t,rw,rwtemp,i,j,k; //find number of rows needed for jointfreq3 rw=allelecountsff(0)*allelecountsff(1)*allelecountsff(2); for (t=1;t<num_markers-2;t++) { rwtemp=allelecountsff(t)*allelecountsff(t+1)*allelecountsff(t+2); if (rwtemp>rw) rw=rwtemp; } //initialize jointfreq2; jointfreq3.init(rw,num_markers-2); //fill in jointfreq3 for (t=0;t<num_markers-2;t++) { for (i=0;i<allelecountsff(t);i++) { for (j=0;j<allelecountsff(t+1);j++) { for (k=0;k<allelecountsff(t+2);k++) { jointfreq3( allelecountsff(t+1)*allelecountsff(t+2)*i+ allelecountsff(t+2)*j+k,t )= freqfreq(i,t)*freqfreq(j,t+1)*freqfreq(k,t+2); } } } } if (debug==1) { cout << "jointfreq3" << endl; cout << jointfreq3.dimx() << " " << jointfreq3.dimy() << endl; for (i=0;i<jointfreq3.dimx();i++) { for (j=0;j<jointfreq3.dimy();j++) { cout << jointfreq3(i,j) << " "; } cout << endl; } } }//makeindjointfreq3