/* This version allows some parameters to be fixed, allows for no CIs, CIs just for segregation parameters, or CIs for all parameters. Allows specification of starting values for EM algorithm. Allows equality relationships to be set separately for the deposit 1 and deposit 2 parameters, simulations!!*/ #include #include #include #include int main(int argc,char *argv[]) { int i,j,k,l, **x, **y, **z, **code, **imatrix(), n2loc, n1loc, ndon, nal; void makexyzcode(int **x,int **y,int **z,int **code); int *sind2, *sind1, **pind2, **pind1; double **twoloc, **oneloc; double **twolocmp, **dmatrix(), *onelocrf, *dvector(); int **amal2, **coal2, **amal1, **coal1; double output[21], prpheno0[16], **prpheno1, **prpheno2, mpp[4]; double denom[16], tmp[16], tmpt[16], ltmp[16],*s, **p, *am, *co; double *pin, *amin, *coin, **prpheno11, **prpheno21; int *ivector(), **x1, **y1, **z1, **code1; void makeonexyzcode(int **x1,int **y1,int **z1,int **code1); double loglike[1]; double **exp2, **exp1, *sden, **pden, *amden, *coden, *maxloglike; int *sfix, **pfix, *amfix, *cofix; double *sval, **pval, *amval, *coval, *maxvals, quan[1]; int *ssetstart, **psetstart, *amsetstart, *cosetstart; double *sstart, **pstart, *amstart, *costart; void spermanalysisrfssdem(int ndon,int nal,int n2loc,double **twoloc, int *sind2,int **pind2,int **amal2,int **coal2,double **twolocmp, int n1loc,double **oneloc,int *sind1,int **pind1,int **amal1,int **coal1, double *onelocrf,int **x,int **y,int **z,int **code,double *output, double *prpheno0,double **prpheno1,double **prpheno2,double *mpp, double *denom,double *tmp,double *tmpt,double *ltmp,double *s, double **p,double *am,double *co,double *pin,double *amin,double *coin, double **prpheno11,double **prpheno21,int **x1,int **y1,int **z1, int **code1,double *loglike,double **exp2,double **exp1,double *sden, double **pden,double *amden,double *coden,double *maxloglike,int *sfix, int **pfix,int *amfix,int *cofix,double *sval,double **pval, double *amval,double *coval,double *sstart,double **pstart,double *amstart,double *costart,double *dconv,int *niter,int simind, int **ptype,double *dtmp); void spermanalysisfssdioemcis(int ndon,int nal,int n2loc,double **twoloc, int *sind2,int **pind2,int **amal2,int **coal2,double **twolocmp, int n1loc,double **oneloc,int *sind1,int **pind1,int **amal1,int **coal1, double *onelocrf,int **x,int **y,int **z,int **code,double *output, double *prpheno0,double **prpheno1,double **prpheno2,double *mpp, double *denom,double *tmp,double *tmpt,double *ltmp,double *s, double **p,double *am,double *co,double *pin,double *amin,double *coin, double **prpheno11,double **prpheno21,int **x1,int **y1,int **z1, int **code1,double *loglike,double **exp2,double **exp1,double *sden, double **pden,double *amden,double *coden,double *maxloglike,int *sfix, int **pfix,int *amfix,int *cofix,double *sval,double **pval, double *amval,double *coval,double *maxvals,double *quan, int *sfix2,int **pfix2,int *amfix2,int *cofix2,double *sval2, double **pval2,double *amval2,double *coval2,double *sstart,double **pstart,double *amstart,double *costart,double *dconv,int *niter, int nstep1,int nstep2,int nstep3,double *sstart2,double **pstart2,double *amstart2,double *costart2,int **ptype,int **ptype2, double *dtmp); int *sfix2, **pfix2, *amfix2, *cofix2,ciopt; double *sval2, **pval2, *amval2, *coval2; void spermanalysisfssdioemcisonly(int ndon,int nal,int n2loc,double **twoloc, int *sind2,int **pind2,int **amal2,int **coal2,double **twolocmp, int n1loc,double **oneloc,int *sind1,int **pind1,int **amal1,int **coal1, double *onelocrf,int **x,int **y,int **z,int **code,double *output, double *prpheno0,double **prpheno1,double **prpheno2,double *mpp, double *denom,double *tmp,double *tmpt,double *ltmp,double *s, double **p,double *am,double *co,double *pin,double *amin,double *coin, double **prpheno11,double **prpheno21,int **x1,int **y1,int **z1, int **code1,double *loglike,double **exp2,double **exp1,double *sden, double **pden,double *amden,double *coden,double *maxloglike,int *sfix, int **pfix,int *amfix,int *cofix,double *sval,double **pval, double *amval,double *coval,double *maxvals,double *quan, int *sfix2,int **pfix2,int *amfix2,int *cofix2,double *sval2, double **pval2,double *amval2,double *coval2,double *sstart,double **pstart,double *amstart,double *costart,double *dconv,int *niter, int nstep1,double *sstart2,double **pstart2,double *amstart2, double *costart2,int **ptype,double *dtmp); void chisqquan(double cilevel,double *quan,double *chitmp); double cilevel[1],chitmp[10], *nobs; int nq, n, nstart, itmp, sopt, lineco, nfix, nparam, niter[1]; FILE *finput, *fdata, *fmodel, *fstart, *fopt; char datafile[50], modelfile[50], str[1000], ctmp[50], startfile[50]; char *str2, optionsfile[50]; double dtmp[6], *dconv, **expcellprobs, *fml; int optopt, cpeopt, fmopt, sllopt, nq2, nbound, nstep1, nstep2; int nstep3, cisopt, simopt, nsim, *nsimexc; double **twolocsim, **onelocsim, *llsimnull, *llsimalt, *obslr; void get2expcellprobs(double *s, double *p,double *am,double *co,double *mp,int **x, int **y, int **z, int **code, double *prpheno0, double **prpheno1, double **prpheno2, double *mpp,double *denom,double *expcellprobs); void get1expcellprobs(double *s, double *p,double *am,double *co,double *rf,int **x, int **y, int **z, int **code, double *prpheno0, double **prpheno1, double **prpheno2, double *denom,double *expcellprobs); long seed, ab[32], cd; double *strue, **ptrue, *amtrue, *cotrue; double *sstart2, **pstart2, *amstart2, *costart2; int **ptype, **ptype2; double randomnum(long *seed,long *ab,long *cd); /* set seed */ seed = -1 * (long) time(NULL); /*Read from input file: data file name, model file name, options indicator and possible options file name*/ /*Data file name */ if(argc!=2){ printf("No input file name given on command line or more than\n"); printf("one argument given on command line.\n"); exit(1); } if((finput=fopen(argv[1],"r"))==NULL){ printf("Cannot open file %s for reading.\n",argv[1]); exit(1); } if(fgets(str,128,finput)==NULL){ printf("Data file name missing from first line of file %s.\n", argv[1]); exit(1); } if(sscanf(str,"%s",datafile)!=1){ printf("Data file name missing from first line of file %s.\n", argv[1]); exit(1); } if((fdata=fopen(datafile,"r"))==NULL){ printf("Cannot open file %s for reading.\n",datafile); exit(1); } /*Model file name */ if(fgets(str,128,finput)==NULL){ printf("Model file name missing from second line of file %s.\n", argv[1]); exit(1); } if(sscanf(str,"%s",modelfile)!=1){ printf("Model file name missing from second line of file %s.\n", argv[1]); exit(1); } if((fmodel=fopen(modelfile,"r"))==NULL){ printf("Cannot open file %s for reading.\n",modelfile); exit(1); } /* Options indicator and possibly file name */ if(fgets(str,128,finput)==NULL){ printf("Options info missing from fourth line\n"); printf("of file %s. Should be 0 to use defaults\n",argv[1]); exit(1); } if((n=sscanf(str,"%lf %s",dtmp,optionsfile))!=1 && n!=2){ printf("Options info missing from fourth line\n"); printf("of file %s. Should be 0 to use default values.\n",argv[1]); exit(1); } optopt=(int)*dtmp; if(*dtmp==1 && n==1){ printf("Name of options file missing from fourth line\n"); printf("of file %s.\n",argv[1]); exit(1); } if(*dtmp!=1. && *dtmp!=0.){ printf("Invalid options selection entered on line 4 of file\n"); printf("%s. Selection must be 0 or 1. Input read as %f.\n",argv[1], *dtmp); exit(1); } if(optopt==1) if((fopt=fopen(optionsfile,"r"))==NULL){ printf("Cannot open file %s for reading.\n",optionsfile); exit(1); } fclose(finput); /* End of input from input file */ /* Read from data file: number of two-marker data sets, then for each two-marker data set read the data set and 3 recombination fractions, number of one-marker data sets, for each one-marker data set read the data set and recombination fraction.*/ if(fgets(str,128,fdata)==NULL){ printf("Number of two-marker data sets missing from line 1\n"); printf("of file %s. Must be a nonnegative integer.\n",datafile); exit(1); } if((n=sscanf(str,"%lf",dtmp))!=1){ printf("Number of two-marker data sets missing from line 1\n"); printf("of file %s. Must be a nonnegative integer.\n",datafile); exit(1); } n2loc=(int)*dtmp; if(*dtmp<0. || (*dtmp-(double)n2loc)!=0.){ printf("Invalid number of two-marker data sets on line 1 of file\n"); printf("%s. Must be a nonnegative integer. Input read as %f.\n", datafile,*dtmp); exit(1); } if(n2loc>0){ twoloc=dmatrix(0,n2loc-1,0,15); twolocmp=dmatrix(0,n2loc-1,0,3); sind2=ivector(0,n2loc-1); pind2=imatrix(0,n2loc-1,0,1); amal2=imatrix(0,n2loc-1,0,3); coal2=imatrix(0,n2loc-1,0,3); exp2=dmatrix(0,n2loc-1,0,20); twolocsim=dmatrix(0,n2loc-1,0,15); } for(i=0;i=.5){ printf("Invalid recombination fraction entered for two-marker data\n"); printf("set number %d on line %d of file %s. Recombination\n", i+1,2*i+3,datafile); printf("fraction number %d read as %f. Must be between 0 and .5\n", j+1,dtmp[j]); printf("with 0 allowed, but not .5.\n"); exit(1); } } /* Check that recombination fractions are mathematically possible. */ if((*dtmp>dtmp[1]+dtmp[2]) || (dtmp[1]>*dtmp+dtmp[2]) || (dtmp[2]>*dtmp+dtmp[1])){ printf("Recombination fractions not mathematically possible for\n"); printf("two-marker data set number %d on line %d of file %s.\n", i+1,2*i+3,datafile); printf("Largest recombination fraction may not be greater than the\n"); printf("Sum of the other two\n"); exit(1); } /* Compute multilocus recombination probabilities. */ twolocmp[i][1]=(dtmp[0]+dtmp[1]-dtmp[2])/2.; twolocmp[i][2]=(dtmp[1]+dtmp[2]-dtmp[0])/2.; twolocmp[i][3]=(dtmp[0]+dtmp[2]-dtmp[1])/2.; twolocmp[i][0]=1.-twolocmp[i][1]-twolocmp[i][2]-twolocmp[i][3]; } /* Read in number of one-marker data sets */ if(fgets(str,128,fdata)==NULL){ printf("Number of one-marker data sets missing from line %d\n", 2+2*n2loc); printf("of file %s. Must be a nonnegative integer.\n",datafile); exit(1); } if((n=sscanf(str,"%lf",dtmp))!=1){ printf("Number of one-marker data sets missing from line %d\n", 2+2*n2loc); printf("of file %s. Must be a nonnegative integer.\n",datafile); exit(1); } n1loc=(int)*dtmp; if(*dtmp<0. || (*dtmp-(double)n1loc)!=0.){ printf("Invalid number of one-marker data sets on line %d of file\n", 2+2*n2loc); printf("%s. Must be a nonnegative integer. Input read as %f.\n", datafile,*dtmp); exit(1); } if(n1loc>0){ oneloc=dmatrix(0,n1loc-1,0,3); onelocrf=dvector(0,n1loc-1); sind1=ivector(0,n1loc-1); pind1=imatrix(0,n1loc-1,0,1); amal1=imatrix(0,n1loc-1,0,1); coal1=imatrix(0,n1loc-1,0,1); exp1=dmatrix(0,n1loc-1,0,12); onelocsim=dmatrix(0,n1loc-1,0,3); } for(i=0;i=.5){ printf("Invalid recombination fraction entered for one-marker data\n"); printf("set number %d on line %d of file %s. Recombination\n", i+1,2*n2loc+2*i+4,datafile); printf("fraction read as %f. Must be between 0 and .5\n", dtmp[0]); printf("with 0 allowed, but not .5.\n"); exit(1); } onelocrf[i]=dtmp[0]; } fclose(fdata); /* End of input from data file */ ndon=n2loc+n1loc+1; nal=4*n2loc+2*n1loc+1; dconv=dvector(0,ndon*3+nal*2); nobs=dvector(0,ndon-1); maxloglike=dvector(0,ndon); fml=dvector(0,ndon); expcellprobs=dmatrix(0,ndon-1,0,15); nsimexc=ivector(0,ndon); llsimnull=dvector(0,ndon); llsimalt=dvector(0,ndon); obslr=dvector(0,ndon); ptype=imatrix(0,ndon-1,0,7); ptype2=imatrix(0,ndon-1,0,7); /* Begin input from model file */ lineco=0; if(n2loc>0){ lineco++; if(fgets(str,1000,fmodel)==NULL){ printf("Segregation parameter indices for two-marker data sets"); printf(" missing\n"); printf("from line %d of file %s\n",lineco,modelfile); exit(1); } str2=str; for(i=0;i0){ lineco++; if(fgets(str,1000,fmodel)==NULL){ printf("Segregation parameter indices for one-marker data sets"); printf(" missing\n"); printf("from line %d of file %s\n",lineco,modelfile); exit(1); } str2=str; for(i=0;i0){ lineco++; if(fgets(str,1000,fmodel)==NULL){ printf("Indices for probability of 1 sperm deposited (d1) for two-marker\n"); printf("data sets missing "); printf("from line %d of file %s.\n",lineco,modelfile); exit(1); } str2=str; for(i=0;i0){ lineco++; if(fgets(str,1000,fmodel)==NULL){ printf("Indices for probability of 1 sperm deposited (d1) for "); printf("one-marker\n"); printf("data sets missing "); printf("from line %d of file %s.\n",lineco,modelfile); exit(1); } str2=str; for(i=0;i0){ lineco++; if(fgets(str,1000,fmodel)==NULL){ printf("Indices for probability of 2 sperm deposited (d2) for"); printf(" two-marker\n"); printf("data sets missing "); printf("from line %d of file %s.\n",lineco,modelfile); exit(1); } str2=str; for(i=0;i0){ lineco++; if(fgets(str,1000,fmodel)==NULL){ printf("Indices for probability of 2 sperm deposited (d2) for "); printf("one-marker\n"); printf("data sets missing "); printf("from line %d of file %s.\n",lineco,modelfile); exit(1); } str2=str; for(i=0;i0){ printf("Identifiability warning: parameter d%d %d appears in at\n", k+1,i); printf("least one one-marker data set, but does not appear in any\n"); printf("two-marker data set. Check the documentation for further\n"); printf("details (see \"Identifiability warning\").\n"); } } /*printf("Number of parameters is %d\n",nparam);*/ fflush(stdout); sfix=ivector(0,ndon-1); pfix=imatrix(0,ndon-1,0,1); amfix=ivector(0,nal-1); cofix=ivector(0,nal-1); sval=dvector(0,ndon-1); pval=dmatrix(0,ndon-1,0,1); amval=dvector(0,nal-1); coval=dvector(0,nal-1); for(i=0;inparam){ printf("Invalid number of parameters to be fixed, line %d of \n", lineco); printf("file %s. Must be integer between 0 and %d inclusive.\n", modelfile,nparam); exit(1); } for(i=0;i1.){ printf("Invalid parameter value for fixed value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f. Must\n", nfix,lineco,modelfile,dtmp[1]); printf("be between 0 and 1.\n"); exit(1); } if(strcmp(ctmp,"s")==0){ if(*dtmp>(ndon-1)){ printf("Invalid parameter index for fixed value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f. Must\n", nfix,lineco,modelfile,*dtmp); printf("be less than %d.\n",ndon); exit(1); } if(sfix[(int)*dtmp]==1){ printf("Warning on line %d of file %s: Parameter s %d has previously been\n", lineco,modelfile,(int)*dtmp); printf("fixed at %f. Resetting to new value %f.\n",sval[(int)*dtmp], dtmp[1]); } sfix[(int)*dtmp]=1; sval[(int)*dtmp]=dtmp[1]; } else if(strcmp(ctmp,"d1")==0){ if(*dtmp>(ndon-1)){ printf("Invalid parameter index for fixed value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f. Must\n", nfix,lineco,modelfile,*dtmp); printf("be less than %d.\n",ndon); exit(1); } if(pfix[(int)*dtmp][0]==1){ printf("Warning on line %d of file %s: Parameter d1 %d has previously been\n", lineco,modelfile,(int)*dtmp); printf("fixed at %f. Resetting to new value %f.\n", pval[(int)*dtmp][0],dtmp[1]); } pfix[(int)*dtmp][0]=1; pval[(int)*dtmp][0]=dtmp[1]; } else if(strcmp(ctmp,"d2")==0){ if(*dtmp>(ndon-1)){ printf("Invalid parameter index for fixed value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f. Must\n", nfix,lineco,modelfile,*dtmp); printf("be less than %d.\n",ndon); exit(1); } if(pfix[(int)*dtmp][1]==1){ printf("Warning on line %d of file %s: Parameter d2 %d has previously been\n", lineco,modelfile,(int)*dtmp); printf("fixed at %f. Resetting to new value %f.\n", pval[(int)*dtmp][1],dtmp[1]); } pfix[(int)*dtmp][1]=1; pval[(int)*dtmp][1]=dtmp[1]; } else if(strcmp(ctmp,"a")==0){ if(*dtmp>(nal-1)){ printf("Invalid parameter index for fixed value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f. Must\n", nfix,lineco,modelfile,*dtmp); printf("be less than %d.\n",nal); exit(1); } if(amfix[(int)*dtmp]==1){ printf("Warning on line %d of file %s: Parameter a %d has previously been\n", lineco,modelfile,(int)*dtmp); printf("fixed at %f. Resetting to new value %f.\n", amval[(int)*dtmp],dtmp[1]); } amfix[(int)*dtmp]=1; amval[(int)*dtmp]=dtmp[1]; } else if(strcmp(ctmp,"c")==0){ if(*dtmp>(nal-1)){ printf("Invalid parameter index for fixed value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f. Must\n", nfix,lineco,modelfile,*dtmp); printf("be less than %d.\n",nal); exit(1); } if(cofix[(int)*dtmp]==1){ printf("Warning on line %d of file %s: Parameter c %d has previously been\n", lineco,modelfile,(int)*dtmp); printf("fixed at %f. Resetting to new value %f.\n", coval[(int)*dtmp],dtmp[1]); } cofix[(int)*dtmp]=1; coval[(int)*dtmp]=dtmp[1]; } } for(i=0;i1.){ printf("Invalid fixed parameter choices: Two-marker data set %d\n", i+1); printf("uses deposit parameters d1 %d and d2 %d which are fixed to\n", pind2[i][0],pind2[i][1]); printf("values %f and %f. These values should sum to no more than 1.\n", pval[pind2[i][0]][0],pval[pind2[i][1]][1]); exit(1); } } for(i=0;i1.){ printf("Invalid fixed parameter choices: One-marker data set %d\n", i+1); printf("uses deposit parameters d1 %d and d2 %d which are fixed to\n", pind1[i][0],pind1[i][1]); printf("values %f and %f. These values should sum to no more than 1.\n", pval[pind1[i][0]][0],pval[pind1[i][1]][1]); exit(1); } } fclose(fmodel); /* End of input from model file */ /* Create ptype */ for(i=0;i0 && *cilevel<1)){ printf("Invalid confidence interval level entered on line %d of file", lineco); printf(" %s. Must be\n",optionsfile); printf("between 0 and 1 (exclusive). Input read as %f.\n",*cilevel); exit(1); } chisqquan(*cilevel,quan,chitmp); /* Information on cell probabilities or observed/expected counts */ lineco=2; if(fgets(str,128,fopt)==NULL){ printf("Cell probabilities/expected counts info missing from\n"); printf("line %d of file %s.\n",lineco,optionsfile); exit(1); } if((n=sscanf(str,"%lf",dtmp))!=1){ printf("Cell probabilities/expected counts info missing from\n"); printf("line %d of file %s. Must be 0, 1, or 2.\n",lineco, optionsfile); exit(1); } if(*dtmp!=1. && *dtmp!=2. && *dtmp!=0.){ printf("Invalid cell probabilities/expected counts info entered on line %d\n", lineco); printf("of file %s. Option must be 0, 1 or 2. Input read as %f.\n", optionsfile,*dtmp); exit(1); } cpeopt=(int)*dtmp; /* Output separate log-likelihoods for each data set? */ lineco=3; if(fgets(str,128,fopt)==NULL){ printf("Indicator of whether to calculate separate log-likelihoods\n"); printf("missing from line %d of file %s.\n",lineco,optionsfile); exit(1); } if((n=sscanf(str,"%lf",dtmp))!=1){ printf("Indicator of whether to calculate separate log-likelihoods\n"); printf("missing from line %d of file %s. Must be 0 or 1.\n",lineco, optionsfile); exit(1); } if(*dtmp!=1. && *dtmp!=0.){ printf("Invalid indicator of whether to calculate separate\n"); printf("log-likelihoods entered on line %d of file %s\n", lineco,optionsfile); printf("Indicator must be 0 or 1. Input read as %f.\n", *dtmp); exit(1); } sllopt=(int)*dtmp; /* Information on full multinomial likelihood calculation */ lineco=4; if(fgets(str,128,fopt)==NULL){ printf("Full multinomial likelihood info missing from\n"); printf("line %d of file %s.\n",lineco,optionsfile); exit(1); } if((n=sscanf(str,"%lf",dtmp))!=1){ printf("Full multinomial likelihood info missing from\n"); printf("line %d of file %s. Must be 0 or 1.\n",lineco, optionsfile); exit(1); } if(*dtmp!=1. && *dtmp!=0.){ printf("Invalid full multinomial likelihood info entered on line %d\n", lineco); printf("of file %s. Option must be 0 or 1. Input read as %f.\n", optionsfile,*dtmp); exit(1); } fmopt=(int)*dtmp; /* Numbers of steps for CI search */ lineco=5; if(fgets(str,128,fopt)==NULL){ printf("Info on number of steps for CI search missing from\n"); printf("line %d of file %s.\n",lineco,optionsfile); exit(1); } if((n=sscanf(str,"%lf %lf %lf %lf",dtmp,dtmp+1,dtmp+2,dtmp+3))!=1 && n!=4){ printf("Info on number of steps for CI search missing from\n"); printf("line %d of file %s. Must be 0 to use defaults\n",lineco, optionsfile); printf("or else 1 followed by 3 nonnegative integers\n"); exit(1); } cisopt=(int)*dtmp; if(*dtmp==1. && n==1){ printf("Numbers of steps for CI search missing from line %d\n", lineco); printf("of file %s. Must be three nonnegative integers.\n", optionsfile); exit(1); } if(*dtmp!=1. && *dtmp!=0.){ printf("Invalid CI search steps option entered on line %d\n", lineco); printf("of file %s. Option must be 0 or 1. Input read as %f.\n", optionsfile,*dtmp); exit(1); } if(cisopt==1){ nstep1=(int)dtmp[1]; nstep2=(int)dtmp[2]; nstep3=(int)dtmp[3]; } if((cisopt==1) && !(dtmp[1]>0. && dtmp[2]>0. && dtmp[3]>0. && (dtmp[1]-(double)nstep1)==0 && (dtmp[2]-(double)nstep2)==0 && (dtmp[3]-(double)nstep3)==0)){ printf("Invalid CI search step numbers entered on line %d of file", lineco); printf(" %s. Must be nonnegative integers.\n",optionsfile); printf("Input read as %f %f %f.\n",dtmp[1],dtmp[2],dtmp[3]); exit(1); } /* Starting value info and possibly file name */ lineco=6; if(fgets(str,128,fopt)==NULL){ printf("Starting value option info missing from line %d\n", lineco); printf("of file %s.\n",optionsfile); exit(1); } if((n=sscanf(str,"%lf %s",dtmp,startfile))!=1 && n!=2){ printf("Starting value option info missing from line %d\n", lineco); printf("of file %s. Should be 0 to use default values.\n", optionsfile); exit(1); } sopt=(int)*dtmp; if(*dtmp==1 && n==1){ printf("Name of starting values file missing from line %d\n", lineco); printf("of file %s.\n",optionsfile); exit(1); } if(*dtmp!=1. && *dtmp!=0.){ printf("Invalid starting value option entered on line %d of file\n", lineco); printf("%s. Option must be 0 or 1. Input read as %f.\n", optionsfile,*dtmp); exit(1); } if(sopt==1) if((fstart=fopen(startfile,"r"))==NULL){ printf("Cannot open file %s for reading.\n",startfile); exit(1); } /* Simulation info */ lineco=7; if(fgets(str,128,fopt)==NULL){ printf("Simulation option info missing from\n"); printf("line %d of file %s.\n",lineco,optionsfile); exit(1); } if((n=sscanf(str,"%lf %lf",dtmp,dtmp+1))!=1 && n!=2){ printf("Simulation option info missing from\n"); printf("line %d of file %s. Must be 0 or 1.\n", lineco,optionsfile); exit(1); } simopt=(int)*dtmp; nsim=(int)dtmp[1]; if(*dtmp==1 && n==1){ printf("Number of simulations missing from line %d\n", lineco); printf("of file %s. Must be a nonnegative integer.\n", optionsfile); exit(1); } if(*dtmp!=1. && *dtmp!=0.){ printf("Invalid simulation option entered on line %d\n", lineco); printf("of file %s. Option must be 0 or 1. Input read as %f.\n", optionsfile,*dtmp); exit(1); } if(simopt==1 && !(dtmp[1]-(double)nsim==0. && dtmp[1]>0.)){ printf("Invalid number of simulations entered on line %d of file", lineco); printf(" %s. Must be\n",optionsfile); printf("a nonnegative integer. Input read as %f.\n",dtmp[1]); exit(1); } fclose(fopt); } /* End of input from options file */ /* Begin input from starting value file */ ssetstart=ivector(0,ndon-1); psetstart=imatrix(0,ndon-1,0,1); amsetstart=ivector(0,nal-1); cosetstart=ivector(0,nal-1); sstart=dvector(0,ndon-1); pstart=dmatrix(0,ndon-1,0,1); amstart=dvector(0,nal-1); costart=dvector(0,nal-1); sstart2=dvector(0,ndon-1); pstart2=dmatrix(0,ndon-1,0,1); amstart2=dvector(0,nal-1); costart2=dvector(0,nal-1); if(sopt==1){ lineco=1; if(fgets(str,128,fstart)==NULL){ printf("Missing number of parameters for which starting values are\n"); printf("to be specified, line %d of file %s.\n", lineco,startfile); exit(1); } if((n=sscanf(str,"%lf",dtmp))!=1){ printf("Missing number of parameters for which starting values are\n"); printf("to be specified, line %d of file %s. Must\n", lineco,startfile); printf("be an integer between 0 and %d inclusive.\n",nparam); exit(1); } nstart=(int)*dtmp; if(*dtmp<0. || (*dtmp-(double)nstart)!=0. || *dtmp>nparam){ printf("Invalid number nstart (number of parameters for which user\n"); printf("will enter starting values) entered on line %d of file\n", lineco); printf("%s. Must be integer between 0 and %d inclusive.\n", startfile,nparam); printf("Input read as %f.\n", *dtmp); exit(1); } for(i=0;i=1.){ printf("Invalid parameter value for starting value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f.\n", nstart,lineco,startfile,dtmp[1]); printf("Must be between 0 and 1 (exclusive).\n"); exit(1); } if(strcmp(ctmp,"s")==0){ if(*dtmp>(ndon-1)){ printf("Invalid parameter index for starting value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f\n", nstart,lineco,startfile,*dtmp); printf("Must be less than %d.\n",ndon); exit(1); } if(ssetstart[(int)*dtmp]==1){ printf("Warning on line %d of file %s: Parameter s %d has previously had\n", lineco,startfile,(int)*dtmp); printf("starting value %f assigned. Resetting to new value %f.\n", sstart[(int)*dtmp],dtmp[1]); } ssetstart[(int)*dtmp]=1; sstart[(int)*dtmp]=dtmp[1]; } else if(strcmp(ctmp,"d1")==0){ if(*dtmp>(ndon-1)){ printf("Invalid parameter index for starting value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f\n", nstart,lineco,startfile,*dtmp); printf("Must be less than %d.\n",ndon); exit(1); } if(psetstart[(int)*dtmp][0]==1){ printf("Warning on line %d of file %s: Parameter d1 %d has previously had\n", lineco,startfile,(int)*dtmp); printf("starting value %f assigned. Resetting to new value %f.\n", pstart[(int)*dtmp][0],dtmp[1]); } psetstart[(int)*dtmp][0]=1; pstart[(int)*dtmp][0]=dtmp[1]; } else if(strcmp(ctmp,"d2")==0){ if(*dtmp>(ndon-1)){ printf("Invalid parameter index for starting value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f\n", nstart,lineco,startfile,*dtmp); printf("Must be less than %d.\n",ndon); exit(1); } if(psetstart[(int)*dtmp][1]==1){ printf("Warning on line %d of file %s: Parameter d2 %d has previously had\n", lineco,startfile,(int)*dtmp); printf("starting value %f assigned. Resetting to new value %f.\n", pstart[(int)*dtmp][1],dtmp[1]); } psetstart[(int)*dtmp][1]=1; pstart[(int)*dtmp][1]=dtmp[1]; } else if(strcmp(ctmp,"a")==0){ if(*dtmp>(nal-1)){ printf("Invalid parameter index for starting value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f\n", nstart,lineco,startfile,*dtmp); printf("Must be less than %d.\n",nal); exit(1); } if(amsetstart[(int)*dtmp]==1){ printf("Warning on line %d of file %s: Parameter a %d has previously had\n", lineco,startfile,(int)*dtmp); printf("starting value %f assigned. Resetting to new value %f.\n", amstart[(int)*dtmp],dtmp[1]); } amsetstart[(int)*dtmp]=1; amstart[(int)*dtmp]=dtmp[1]; } else if(strcmp(ctmp,"c")==0){ if(*dtmp>(nal-1)){ printf("Invalid parameter index for starting value number %d out of\n", i+1); printf("%d on line %d of file %s. Input read as %f\n", nstart,lineco,startfile,*dtmp); printf("Must be less than %d.\n",nal); exit(1); } if(cosetstart[(int)*dtmp]==1){ printf("Warning on line %d of file %s: Parameter c %d has previously had\n", lineco,startfile,(int)*dtmp); printf("starting value %f assigned. Resetting to new value %f.\n", costart[(int)*dtmp],dtmp[1]); } cosetstart[(int)*dtmp]=1; costart[(int)*dtmp]=dtmp[1]; } } /* Do not allow clashes between d1 and d2 parameters. Previously checked for clashes between fixed values */ for(i=0;i=1.){ printf("Invalid starting value choices: Two-marker data set %d\n", i+1); printf("uses deposit parameters d1 %d and d2 %d which are started at\n", pind2[i][0],pind2[i][1]); printf("values %f and %f. These values should sum to less than 1.\n", pstart[pind2[i][0]][0],pstart[pind2[i][1]][1]); exit(1); } /* d2 fixed and d1 not fixed, but d1 started. */ if(pfix[pind2[i][0]][0]==0 && psetstart[pind2[i][0]][0]==1 && pfix[pind2[i][1]][1]==1 && pstart[pind2[i][0]][0]+pval[pind2[i][1]][1]>=1.){ printf("Invalid starting value choices: Two-marker data set %d\n", i+1); printf("uses deposit parameters d1 %d and d2 %d, with the latter\n", pind2[i][0],pind2[i][1]); printf("fixed at %f while the former is started at %f. These values\n", pval[pind2[i][1]][1],pstart[pind2[i][0]][0]); printf("should sum to less than 1.\n"); exit(1); } /* d1 fixed and d2 not fixed, but d2 started */ if(pfix[pind2[i][1]][1]==0 && psetstart[pind2[i][1]][1]==1 && pfix[pind2[i][0]][0]==1 && pstart[pind2[i][1]][1]+pval[pind2[i][0]][0]>=1.){ printf("Invalid starting value choices: Two-marker data set %d\n", i+1); printf("uses deposit parameters d1 %d and d2 %d, with the former\n", pind2[i][0],pind2[i][1]); printf("fixed at %f while the latter is started at %f. These values\n", pval[pind2[i][0]][0],pstart[pind2[i][1]][1]); printf("should sum to less than 1.\n"); exit(1); } } for(i=0;i=1.){ printf("Invalid starting value choices: One-marker data set %d\n", i+1); printf("uses deposit parameters d1 %d and d2 %d which are started at\n", pind1[i][0],pind1[i][1]); printf("values %f and %f. These values should sum to less than 1.\n", pstart[pind1[i][0]][0],pstart[pind1[i][1]][1]); exit(1); } /* d2 fixed and d1 not fixed, but d1 started. */ if(pfix[pind1[i][0]][0]==0 && psetstart[pind1[i][0]][0]==1 && pfix[pind1[i][1]][1]==1 && pstart[pind1[i][0]][0]+pval[pind1[i][1]][1]>=1.){ printf("Invalid starting value choices: One-marker data set %d\n", i+1); printf("uses deposit parameters d1 %d and d2 %d, with the latter\n", pind1[i][0],pind1[i][1]); printf("fixed at %f while the former is started at %f. These values\n", pval[pind1[i][1]][1],pstart[pind1[i][0]][0]); printf("should sum to less than 1.\n"); exit(1); } /* d1 fixed and d2 not fixed, but d2 started */ if(pfix[pind1[i][1]][1]==0 && psetstart[pind1[i][1]][1]==1 && pfix[pind1[i][0]][0]==1 && pstart[pind1[i][1]][1]+pval[pind1[i][0]][0]>=1.){ printf("Invalid starting value choices: One-marker data set %d\n", i+1); printf("uses deposit parameters d1 %d and d2 %d, with the former\n", pind1[i][0],pind1[i][1]); printf("fixed at %f while the latter is started at %f. These values\n", pval[pind1[i][0]][0],pstart[pind1[i][1]][1]); printf("should sum to less than 1.\n"); exit(1); } } fclose(fstart); } /* End of input from starting value file */ prpheno1=dmatrix(0,15,0,7); prpheno2=dmatrix(0,15,0,63); prpheno11=dmatrix(0,3,0,3); prpheno21=dmatrix(0,3,0,15); x=imatrix(0,15,0,3); y=imatrix(0,7,0,1); z=imatrix(0,63,0,3); code=imatrix(0,3,0,3); makexyzcode(x,y,z,code); x1=imatrix(0,3,0,1); y1=imatrix(0,3,0,1); z1=imatrix(0,15,0,3); code1=imatrix(0,1,0,1); makeonexyzcode(x1,y1,z1,code1); s=dvector(0,ndon-1); sden=dvector(0,ndon-1); strue=dvector(0,ndon-1); p=dmatrix(0,ndon-1,0,1); pden=dmatrix(0,ndon-1,0,1); ptrue=dmatrix(0,ndon-1,0,1); pin=dvector(0,1); am=dvector(0,nal-1); amden=dvector(0,nal-1); amtrue=dvector(0,nal-1); amin=dvector(0,3); co=dvector(0,nal-1); coden=dvector(0,nal-1); cotrue=dvector(0,nal-1); coin=dvector(0,3); /* Output information on data and model used. Also calculate full multinomial log-likelihood */ *fml=0.; if(n2loc>0){ printf("\nTwo-marker data sets\n\n"); for(i=0;i0.){ *fml+=(twoloc[i][j]*log(twoloc[i][j]/nobs[i])); fml[i+1]+=(twoloc[i][j]*log(twoloc[i][j]/nobs[i])); } printf("Data set %d No. observations = %.2f Parameters used:\n", i+1,nobs[i]); printf("s d1 d2 a(A) a(B) a(a) a(b) c(A) c(B) "); printf("c(a) c(b)\n"); printf("%-3d %-4d %-4d %-6d %-6d %-6d %-6d %-6d %-6d %-6d %-6d\n", sind2[i],pind2[i][0],pind2[i][1],amal2[i][0],amal2[i][1],amal2[i][2], amal2[i][3],coal2[i][0],coal2[i][1],coal2[i][2],coal2[i][3]); } printf("\n"); fflush(stdout); } if(n1loc>0){ printf("One-marker data sets\n\n"); for(i=0;i0.){ *fml+=(oneloc[i][j]*log(oneloc[i][j]/ nobs[n2loc+i])); fml[n2loc+i+1]+=(oneloc[i][j]*log(oneloc[i][j]/ nobs[n2loc+i])); } printf("Data set %d No. observations = %.2f Parameters used:\n", i+1,nobs[n2loc+i]); printf("s d1 d2 a(C) a(c) c(C) c(c)\n"); printf("%-3d %-4d %-4d %-6d %-6d %-6d %-6d\n", sind1[i],pind1[i][0],pind1[i][1],amal1[i][0],amal1[i][1],coal1[i][0], coal1[i][1]); } printf("\n"); fflush(stdout); } /* Output information on fixed values and starting values */ if(nfix>0){ printf("Parameters fixed\n"); printf("\nParameter Value\n"); for(i=0;i0) printf("Two-marker data sets\n"); for(i=0;i0) printf("One-marker data sets\n"); for(i=0;i=1.) pstart[pind2[i][1]][1]=.2*(1.- pval[pind2[i][0]][0]); /* d2 fixed and d1 not fixed and d1 not started */ if(pfix[pind2[i][1]][1]==1 && pfix[pind2[i][0]][0]==0 && psetstart[pind2[i][0]][0]==0 && pval[pind2[i][1]][1]+ pstart[pind2[i][0]][0]>=1.) pstart[pind2[i][0]][0]=.95*(1.- pval[pind2[i][1]][1]); /* neither fixed and d1 started and d2 not started */ /*printf("pfix[pind2[i][1]][1] %d\n",pfix[pind2[i][1]][1]); printf("pfix[pind2[i][0]][0] %d\n",pfix[pind2[i][0]][0]); printf("psetstart[pind2[i][0]][0] %d\n",psetstart[pind2[i][0]][0]); printf("psetstart[pind2[i][1]][1] %d\n",psetstart[pind2[i][1]][1]); printf("pstart[pind2[i][0]][0] + pstart[pind2[i][1]][1] %f\n", pstart[pind2[i][0]][0] + pstart[pind2[i][1]][1]);*/ if(pfix[pind2[i][1]][1]==0 && pfix[pind2[i][0]][0]==0 && psetstart[pind2[i][0]][0]==1 && psetstart[pind2[i][1]][1]==0 && pstart[pind2[i][0]][0] + pstart[pind2[i][1]][1]>=1.){ pstart[pind2[i][1]][1]=.2*(1.-pstart[pind2[i][0]][0]); /*printf("pind2[i][1] %d pstart[pind2[i][1]][1] %f\n", pind2[i][1],pstart[pind2[i][1]][1]); printf("pstart[pind2[i][0]][0] %f\n", pstart[pind2[i][0]][0]);*/ } /* neither fixed and d2 started and d1 not started */ if(pfix[pind2[i][1]][1]==0 && pfix[pind2[i][0]][0]==0 && psetstart[pind2[i][0]][0]==0 && psetstart[pind2[i][1]][1]==1 && pstart[pind2[i][0]][0] + pstart[pind2[i][1]][1]>=1.) pstart[pind2[i][0]][0]=.95*(1.-pstart[pind2[i][1]][1]); } for(i=0;i=1.) pstart[pind1[i][1]][1]=.2*(1.- pval[pind1[i][0]][0]); /* d2 fixed and d1 not fixed and d1 not started */ if(pfix[pind1[i][1]][1]==1 && pfix[pind1[i][0]][0]==0 && psetstart[pind1[i][0]][0]==0 && pval[pind1[i][1]][1]+ pstart[pind1[i][0]][0]>=1.) pstart[pind1[i][0]][0]=.95*(1.- pval[pind1[i][1]][1]); /* neither fixed and d1 started and d2 not started */ if(pfix[pind1[i][1]][1]==0 && pfix[pind1[i][0]][0]==0 && psetstart[pind1[i][0]][0]==1 && psetstart[pind1[i][1]][1]==0 && pstart[pind1[i][0]][0] + pstart[pind1[i][1]][1]>=1.) pstart[pind1[i][1]][1]=.2*(1.-pstart[pind1[i][0]][0]); /* neither fixed and d2 started and d1 not started */ if(pfix[pind1[i][1]][1]==0 && pfix[pind1[i][0]][0]==0 && psetstart[pind1[i][0]][0]==0 && psetstart[pind1[i][1]][1]==1 && pstart[pind1[i][0]][0] + pstart[pind1[i][1]][1]>=1.) pstart[pind1[i][0]][0]=.95*(1.-pstart[pind1[i][1]][1]); } printf("Maximizing likelihood...\n"); /* maximize likelihood */ /*printf("ndon %d\n",ndon); printf("nal %d\n",nal); printf("n2loc %d\n",n2loc); printf("twoloc\n"); for(i=0;i90000){ printf("ERROR: the EM algorithm did not converge. Try using\n"); printf("different starting values.\n"); } printf("\nNumber of iterations used:\n"); printf("%d\n",*niter); printf("Segregation parameter estimates\n"); printf("\nParameter Estimate\n"); /* output parameter estimates */ for(i=0;i(1.-pow(10.,-10.))){ nbound++; printf(" (apparently on boundary)"); } printf("\n"); } } printf("\nDeposit parameter estimates\n\n"); printf("Parameter Estimate\n"); for(j=0;j<2;j++){ for(i=0;i(1.-pow(10.,-10.))){ nbound++; printf(" (apparently on boundary)"); } printf("\n"); } } printf("\n"); } printf("Amplification parameter estimates\n\n"); printf("Parameter Estimate\n"); for(i=0;i(1.-pow(10.,-10.))){ nbound++; printf(" (apparently on boundary)"); } printf("\n"); } } printf("\nContamination parameter estimates\n\n"); printf("Parameter Estimate\n"); for(i=0;i(1.-pow(10.,-10.))){ nbound++; printf(" (apparently on boundary)"); } printf("\n"); } } printf("\nLog-likelihood at maximum\n"); printf("%f\n",*maxloglike); fflush(stdout); if(nbound>0){ printf("\nWarning, %d parameters apparently maximized on the boundary.\n", nbound); printf("See documentation for details (under \"Estimation of\n"); printf("parameters on the boundary,\")\n\n"); } if(sllopt==1){ printf("Log-likelihoods for individual data sets\n"); if(n2loc>0) printf("Two-marker data sets:\n"); for(i=0;i0) printf("One-marker data sets:\n"); for(i=0;i0) printf("Two-marker data sets\n"); for(i=0;i0) printf("One-marker data sets\n"); for(i=0;i0){ maxvals=dvector(0,3*ndon+2*nal); for(i=0;idtmp[1] && l<15;l++, dtmp[1]+=expcellprobs[j][l]); /*printf("l %d\n",l);*/ twolocsim[j][l]+=1.; } llsimalt[j+1]=0.; for(k=0;k<16;k++) if(twolocsim[j][k]>0.){ llsimalt[j+1]+=(twolocsim[j][k]*log(twolocsim[j][k]/nobs[j])); llsimalt[0]+=(twolocsim[j][k]*log(twolocsim[j][k]/nobs[j])); } /*for(k=0;k<16;k++) printf("%f ",twolocsim[j][k]); printf("\n");*/ } for(j=0;jdtmp[1] && l<3; l++,dtmp[1]+=expcellprobs[n2loc+j][l]); onelocsim[j][l]+=1.; } llsimalt[n2loc+j+1]=0.; for(k=0;k<4;k++) if(onelocsim[j][k]>0.){ llsimalt[n2loc+j+1]+=(onelocsim[j][k]* log(onelocsim[j][k]/nobs[n2loc+j])); llsimalt[0]+=(onelocsim[j][k]*log(onelocsim[j][k]/ nobs[n2loc+j])); } /*for(k=0;k<4;k++) printf("%f ",onelocsim[j][k]); printf("\n");*/ } /*for(j=0;jpow(10.,-3.) && strue[j]<1.-pow(10.,-3.)) sstart[j]=strue[j]; else sstart[j]=.5; for(k=0;k<2;k++){ if(ptrue[j][k]>pow(10.,-3.) && ptrue[j][k]<1.- pow(10.,-2.)) pstart[j][k]=ptrue[j][k]; else{ if(ptrue[j][k]<=pow(10.,-3.)) pstart[j][k]=pow(10.,-3.); else if(ptrue[j][k]>=1-pow(10.,-2.)) pstart[j][k]=1.- pow(10.,-2.); } } } for(j=0;jpow(10.,-3.)&& amtrue[j]<1.-pow(10.,-3.)) amstart[j]=amtrue[j]; else amstart[j]=.8; if(cotrue[j]>pow(10.,-3.) && cotrue[j]<1.-pow(10.,-3.)) costart[j]=cotrue[j]; else costart[j]=.005; } for(j=0;j=1.) pstart[j][k]=.5*(1.-pstart[pind2[l][1-k]][1-k]); for(l=0;l=1.) pstart[j][k]=.5*(1.-pstart[pind1[l][1-k]][1-k]); }*/ spermanalysisrfssdem(ndon,nal,n2loc,twolocsim,sind2,pind2,amal2,coal2, twolocmp,n1loc,onelocsim,sind1,pind1,amal1,coal1,onelocrf,x,y,z,code, output,prpheno0,prpheno1,prpheno2,mpp,denom,tmp,tmpt,ltmp,s,p,am,co, pin,amin,coin,prpheno11,prpheno21,x1,y1,z1,code1,loglike,exp2,exp1,sden, pden,amden,coden,llsimnull,sfix,pfix,amfix,cofix,sval,pval, amval,coval,sstart,pstart,amstart,costart,dconv,niter,1,ptype,dtmp); for(j=0;j0.) printf("Two-marker data sets\n"); for(i=0;i0.) printf("One-marker data sets\n"); for(i=0;i