/* C-program for case control quasi-likelihood score test and case control corrected chi2 test in samples with related individuals * * Version 1.3 * * 09-13-2002 Catherine Bourgain 07-31-2003 Catherine Bourgain - correct small bug on lines 341 and 354 08-6-2003 Catherine Bourgain - add s.d. for allele frequency estimates based on QL and print S_allsample=2*(the sum of all entries in the inverse of the covariance matrix) - add the output of the score value for each allele when the p-value is smaller than 5%. The sign of this score allows to know which allele is associated with the cases 10-22-2003 Catherine Bourgain -name of data and kinship files must be entered in the command line. File name chosen by the user. 10-24-2003 Catherine Bourgain -each independent family is treated separatly. Kinship coefficients should be computed within each family only 11-05-2003 Catherine Bourgain -may analyze several markers at the same time. -creates an CC-QLStest.err file listing the individuals from the marker data file and from the kinship coefficient file that cannot be used in the analysis. -kinship coefficients do not need to be ordered following the order of the individuals in the marker data file. 01-07-04 Catherine Bourgain - correct a bug in the readkincoef function - able to compute the statistics when an allele numbered between 1 and M, has frequency zero 06-02-04 Catherine Bourgain - correct another bug in the readkincoef function 06-11-04 Catherine Bourgain - change several memory allocations to allow the analysis of larger family sets 06-13-04 Catherine Bourgain - add a parameter to check that QL-frequency estimates are non negative. Skip QL computation for such markers 08-09-05 Catherine Bourgain -correct memory allocation procedures and add a test that the number of families does not exceed MAXFAM 03-14-06 Catherine Bourgain - correct a bug in the makeFreqMat function - correct memory allocation problems throughout the program Two files are required : a linkage data file a file with the kinship coefficient of all ind of the markid file. Notes : marker alleles should be named from 1 to M Output is a CC-QLStest.out file with - allele frequency estimates in cases, controls, overall sample of cases and controls obtained by both naive counting and using the allele-based quasi-likelihood estimator - the statistic value of the corrected chi2 test for case-control association and the corresponding p-value - the statistic value of the Case-control quasi-likelihood score test and and the corresponding p-value Note that both p-values are computed using the chi2 distribution as the null distribution of the statistics, which is true as long as the allele frequency are not too small. In pratice, the chi2 approximation might hold as long as non of the observed counts of alleles are greater than 5 in cases and in controls. A warning message is printed in the output file if such a situation occurs */ #include #include #include "nrutil.h" #include #include "nrutil.c" #include "cholesky.c" #include "chisq.c" #define MAXFAM 1000 #define MAXMARK 1000 struct FAM { int N; //total number of individuals in a family int Pheno; //number of individuals phenotyped in a family int NoPheno; // number of individuals not phenotyped in a family double **cholent; double **chol; int *MissingVec; // list of individuals with missing genotype int *descri; // array of correspondance between rank in the family and Id's }; typedef struct Nsize { int N; //total number of ind int Nc; // number of cases int Nt; //number of controls }Nsize; struct MARKER { int Nball; //number of alleles int Nc; //number of cases with known genotype int Nt; //number of controls with known genotype int ***mark; //3-dim array. 1st dimension: family number; 2nd dim: indiv rank; 3rd dim has 3 components [1]:allele1,[2]:allele2 and [3]:status Nsize *typed; }; /* M is the number of alleles N is the number of individuals in the marker data file NPheno is the number of phenotyped individuals NnoPheno the number of not phenotyped individuals F number of independent families outfile is the result file, genofile is the marker data file and idfile is the kinshipcoef file famdata is an array of FAM with particular information regarding each family Mark is an array of MARKER with particular information for each marker Storekin is a 3-dim array storing the inbreeding and kinship coefficients in each family. */ int M=0,N,F,NbMark,NPheno=0,NnoPheno=0; FILE *outfile,*genofile,*idfile,*errfile; struct FAM famdata[MAXFAM]; struct MARKER Mark[MAXMARK]; double ***Storekin; void readcar(char *name); void readdata (struct MARKER *Mark,struct FAM *famdata,FILE *errfile); void readGenotypes(int fam,int m,int length,double **cholaug,double **cholaugCase,double **cholaugControl,int *MissingVec,int miss,double **kincoefmatrix,double **kincoefmatrixCase,double **kincoefmatrixControl); void readkincoef(char *name,FILE *errfile,struct FAM *famdata,double ***Storekin); void alleleFreq(double **cholaug,double *frequency,int size, double *denominator); void naiveCount(double **cholaug,double *Naive,double *Naivefreq,int size); void getfrequency(double *Naivefreq,double *NaivefreqCase,double *NaivefreqControl,double *frequency,double *frequencyCase,double *frequencyControl,double denominator,double denomcases,double denomcontrols,int Nall,int Ncase,int Ncontrol); void modifcholaug(double **cholent,double **cholaug,double **chol,double *frequency,int size); int makeFreqMat(double **freqMatrix,double *frequency); void comput_info_score(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size); void from_info2_chi2(double info_rr,double info_rf,double info_ff,double *Naive,double *Naivefreq,double **freqNaive,double *chi2val,int Nall,int Ncase); void from_info2_score(double **freqMatrix,double infoQL_rr,double infoQL_rf,double infoQL_ff,double *Rvector,double *testval); void comput_info_chi2(double **kincoefMatrix,double **cholaug,double *info_rr,double *info_rf,double *info_ff,int size); int findInd(int Id1,int family); int main (int argc, char *argv[]) { /* cholent stores the allelic information in the M first columns. Column M+1 contains 2, column M+2 contains 2 for cases and 0 for controls cholaug stores cholent times the inverse of the cholesky decompo of the covariance matrix chol stores the cholesky decomposition of kincoefMatrix kincoefMatrix stores the kinship based covariance Matrix frequency stores the frequency estimates freqMatrix is a matrix that is a function of the M-1 allele frequencies MissingVec lists all individual number for whom genotype or phenotype info is missing dmatrix and dvector are available from nrutil.c */ double **cholaug,**cholentCase,**cholentControl; double **kincoefMatrix,**kincoefMatrixCase,**kincoefMatrixControl; double *frequency,*frequencyCase,*frequencyControl; double **freqMatrix; double testval=0,chi2val=0, denominator=0, denomcases=0, denomcontrols=0; double *Naive,*Naivefreq,**freqNaive,*NaiveCase,*NaiveControl,*NaivefreqCase,*NaivefreqControl,*Rvector; double info_rr=0,info_rf=0,info_ff=0,infoQL_rr=0,infoQL_rf=0,infoQL_ff=0; int i,j,fam,m,followcase=0,followcontrol=0,followall=0,df=0,Npas,negfreq=0; if (argc!=3) {printf("Wrong usage. Should be ./CC-QLStest markerdata_file kinshipcoef_file\n"); exit(1); } readcar(argv[1]); for (m=1;m<=NbMark;m++) { Mark[NbMark].Nball=0; Mark[NbMark].Nc=0; Mark[NbMark].Nt=0; Mark[m].typed = (Nsize *)malloc((size_t) ((MAXFAM+1)*sizeof(Nsize))); if (!Mark[m].typed) {printf("Error in memory allocation 1\n"); exit(1);} Mark[m].mark = (int ***)malloc((size_t) ((MAXFAM+1)*sizeof(int**))); if (!Mark[m].mark) {printf("Error in memory allocation 2\n"); exit(1);} for (i=0;i<=F;i++) { Mark[m].typed[i].N=0; Mark[m].typed[i].Nc=0; Mark[m].typed[i].Nt=0; Mark[m].mark[i] = (int **)malloc((size_t) ((N+1)*sizeof(int*))); if (!Mark[m].mark[i]) {printf("Error in memory allocation 3\n"); exit(1);} for (j=0;j<=famdata[i].N;j++) {Mark[m].mark[i][j]= (int *)malloc((size_t) ((5)*sizeof(int))); if (!Mark[m].mark[i][j]) {printf("Error in memory allocation 4 %d %d %d\n",famdata[i].N,i,j); exit(1);} } famdata[i].NoPheno=0; famdata[i].Pheno=0; famdata[i].descri=(int *)malloc((N+1)*sizeof(int)); if (!famdata[i].descri) {printf("Error in memory allocation 5\n"); exit(1);} } } if((errfile=fopen("CC-QLStest.err", "w"))==NULL) { printf("Can't open CC-QLStest.err .\n"); exit(1); } readdata(Mark,famdata,errfile); Storekin = (double ***)malloc((size_t) ((F+1)*sizeof(double**))); for (i=0;i<=F;i++) { if (i==0) Npas=N; else Npas=famdata[i].Pheno+famdata[i].NoPheno; Storekin[i] = (double **)malloc((size_t) ((Npas+1)*sizeof(double*))); if (!Storekin[i]) {printf("Error in memory allocation for kinship coefficient storage\n"); exit(1);} for (j=0;j<=Npas;j++) { Storekin[i][j]= (double *)malloc((size_t) ((Npas+1)*sizeof(double))); if (!Storekin[i][j]) {printf("Error in memory allocation for kinship coefficient storage\n"); exit(1);} } } readkincoef(argv[2],errfile,famdata,Storekin); if((outfile=fopen("CC-QLStest.out", "w"))==NULL) { printf("Can't open CC-QLStest.out .\n"); exit(1); } fprintf(outfile,"******Results of the Case-control Quasi-likelihood score test******\n\n"); fprintf(outfile,"Phenotype information is available for %d individuals from %d independent families\n\n",NPheno,F); for (m=1;m<=NbMark;m++) { negfreq=0; M=Mark[m].Nball; freqMatrix=dmatrix(1,M-1,1,M-1); frequency=dvector(1,M); frequencyCase=dvector(1,M); frequencyControl=dvector(1,M); Naive=dvector(1,M); NaiveCase=dvector(1,M); NaiveControl=dvector(1,M); Naivefreq=dvector(1,M); Rvector=dvector(1,M); NaivefreqCase=dvector(1,M); NaivefreqControl=dvector(1,M); freqNaive=dmatrix(1,M-1,1,M-1); testval=0; chi2val=0; denominator=0; denomcases=0; denomcontrols=0; info_rr=0; info_rf=0; info_ff=0; infoQL_rr=0; infoQL_rf=0; infoQL_ff=0; for (i=1;i<=M;i++) { frequency[i]=0; frequencyCase[i]=0; frequencyControl[i]=0; Naive[i]=0; NaiveCase[i]=0; NaiveControl[i]=0; Naivefreq[i]=0; NaivefreqCase[i]=0; NaivefreqControl[i]=0; if (i0) { followall=1; famdata[fam].cholent=dmatrix(1,Mark[m].typed[fam].N,1,M+2); famdata[fam].chol=dmatrix(1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); cholaug=dmatrix(1,Mark[m].typed[fam].N,1,M+2); kincoefMatrix=dmatrix(1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); } if ((famdata[fam].Pheno-Mark[m].typed[fam].N)>0) famdata[fam].MissingVec=ivector(1,famdata[fam].Pheno-Mark[m].typed[fam].N); if (Mark[m].typed[fam].Nc>0){ followcase=1; cholentCase=dmatrix(1,Mark[m].typed[fam].Nc,1,M+2); kincoefMatrixCase=dmatrix(1,Mark[m].typed[fam].Nc,1,Mark[m].typed[fam].Nc); } if (Mark[m].typed[fam].Nt>0){ followcontrol=1; cholentControl=dmatrix(1,Mark[m].typed[fam].Nt,1,M+2); kincoefMatrixControl=dmatrix(1,Mark[m].typed[fam].Nt,1,Mark[m].typed[fam].Nt); } readGenotypes(fam,m,famdata[fam].Pheno,famdata[fam].cholent,cholentCase,cholentControl,famdata[fam].MissingVec,famdata[fam].Pheno-Mark[m].typed[fam].N,kincoefMatrix,kincoefMatrixCase,kincoefMatrixControl); /*computing allele frequencies in the cases... */ if (followcase==1) { naiveCount(cholentCase,NaiveCase,NaivefreqCase,Mark[m].typed[fam].Nc); if (cholesky(kincoefMatrixCase,Mark[m].typed[fam].Nc,cholentCase,M+1,kincoefMatrixCase,cholentCase,1)!=1) {printf("cholesky decomposition of the case cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholentCase,frequencyCase,Mark[m].typed[fam].Nc, &denomcases); } /*computing allele frequencies in the controls... */ if (followcontrol==1) { naiveCount(cholentControl,NaiveControl,NaivefreqControl,Mark[m].typed[fam].Nt); if (cholesky(kincoefMatrixControl,Mark[m].typed[fam].Nt,cholentControl,M+1,kincoefMatrixControl,cholentControl,1)!=1) { printf("cholesky decomposition of the control cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholentControl,frequencyControl,Mark[m].typed[fam].Nt, &denomcontrols); } /*computing overall allele frequencies and statistics...*/ if (followall==1) { naiveCount(famdata[fam].cholent,Naive,Naivefreq,Mark[m].typed[fam].N); comput_info_chi2(kincoefMatrix,famdata[fam].cholent,&info_rr,&info_rf,&info_ff,Mark[m].typed[fam].N); if (cholesky(kincoefMatrix,Mark[m].typed[fam].N,famdata[fam].cholent,M+2,famdata[fam].chol,cholaug,1)!=1) {printf("cholesky decomposition of the cov matrix failed for family %d. Might be due to inconsistent kinship coefficient values...\n",fam); exit(1); } alleleFreq(cholaug,frequency,Mark[m].typed[fam].N, &denominator); } /* if (Mark[m].typed[fam].N>0) free_dmatrix(cholaug,1,Mark[m].typed[fam].N,1,M+2); if (Mark[m].typed[fam].N>0) free_dmatrix(kincoefMatrix,1,Mark[m].typed[fam].N,1,Mark[m].typed[fam].N); if (Mark[m].typed[fam].Nc>0) free_dmatrix(kincoefMatrixCase,1,Mark[m].typed[fam].Nc,1,Mark[m].typed[fam].Nc); if (Mark[m].typed[fam].Nt>0) free_dmatrix(kincoefMatrixControl,1,Mark[m].typed[fam].Nt,1,Mark[m].typed[fam].Nt); if (Mark[m].typed[fam].Nc>0) free_dmatrix(cholentCase,1,Mark[m].typed[fam].Nc,1,M+1); if (Mark[m].typed[fam].Nt>0) free_dmatrix(cholentControl,1,Mark[m].typed[fam].Nt,1,M+1); */ } getfrequency(Naivefreq,NaivefreqCase,NaivefreqControl,frequency, frequencyCase,frequencyControl,denominator,denomcases,denomcontrols,Mark[m].Nc+Mark[m].Nt,Mark[m].Nc,Mark[m].Nt); makeFreqMat(freqNaive,Naivefreq); from_info2_chi2(info_rr,info_rf,info_ff,Naive,Naivefreq,freqNaive,&chi2val,Mark[m].Nc+Mark[m].Nt,Mark[m].Nc); if (makeFreqMat(freqMatrix,frequency)==1) {negfreq=1;} if (negfreq==0) { for (fam=1;fam<=F;fam++) { cholaug=dmatrix(1,Mark[m].typed[fam].N,1,M+2); modifcholaug(famdata[fam].cholent,cholaug,famdata[fam].chol,frequency,Mark[m].typed[fam].N); comput_info_score(cholaug,freqMatrix,&infoQL_rr,&infoQL_rf,&infoQL_ff,Rvector,Mark[m].typed[fam].N); free_dmatrix(cholaug,1,Mark[m].typed[fam].N,1,M+1); } from_info2_score(freqMatrix,infoQL_rr,infoQL_rf,infoQL_ff,Rvector,&testval); } /*output printing ... */ fprintf(outfile,"****************************************"); fprintf(outfile,"\n\nAnalysis of Marker %d \n\n",m); fprintf(outfile,"****************************************\n"); fprintf(outfile,"%d cases and %d controls available\n\n",Mark[m].Nc,Mark[m].Nt); fprintf(outfile,"*****************************************\n"); fprintf(outfile,"allele frequency estimates using naive counting in\n"); fprintf(outfile,"\t\t cases \t\t controls \t\t all sample \n"); for (i=1;i<=M;i++) fprintf(outfile,"allele %d : freq = %.4f\t freq = %.4f\t\t freq = %.4f\n",i,NaivefreqCase[i],NaivefreqControl[i],Naivefreq[i]); fprintf(outfile,"\n*****************************************\n"); if (negfreq==0) { fprintf(outfile,"allele frequency estimates using the allele based quasi-likelihood estimator in\n"); fprintf(outfile,"\t\t\t cases \t\t\t controls \t\t\t all sample \n"); for (i=1;i<=M;i++) fprintf(outfile,"allele %d : freq = %.4f sd = %.4f\t freq = %.4f sd = %.4f\t\t freq = %.4f sd = %.4f\n",i,frequencyCase[i],sqrt(2*frequencyCase[i]*(1-frequencyCase[i])/denomcases),frequencyControl[i],sqrt(2*frequencyControl[i]*(1-frequencyControl[i])/denomcontrols),frequency[i],sqrt(2*frequency[i]*(1-frequency[i])/denominator)); } else fprintf(outfile,"QL computation of allele frequencies gives negative values....\n\nskipped... \n\nUse naive estimates\n\n"); if (Mark[m].Nt!=0 && Mark[m].Nc!=0) { df=0; for (i=1;i<=M-1;i++) if (frequency[i]!=0) df++; fprintf(outfile,"\n*****************************************\n"); fprintf(outfile,"Case-control corrected chi2 test \n\n"); fprintf(outfile,"Corrected chi2 statistic value = %f\t pvalue = %f df = %d\n\n",chi2val,pochisq(chi2val,df),df); for (i=1;i<=M;i++) {if (((Mark[m].Nc)*NaivefreqCase[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of allele %d in cases\n\n",i); if (((Mark[m].Nt)*NaivefreqControl[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of allele %d in controls\ni\n",i); } fprintf(outfile,"*****************************************\n"); fprintf(outfile,"Case-control quasi-likelihood score test \n\n"); if (negfreq==0) { fprintf(outfile,"CC-QL statistic value = %f\t pvalue = %f df = %d\n\n",testval,pochisq(testval,df),df); if (pochisq(testval,df)<=0.05) { for (i=1;i<=M-1;i++) { if (Rvector[i]>0) fprintf(outfile,"Frequency of allele %d is increased in the cases (quasi-score associated to this allele is %.4f)\n\n",i,Rvector[i]); if (Rvector[i]<0) fprintf(outfile,"Frequency of allele %d is increased in the controls (quasi-score associated to this allele is %.4f)\n\n",i,Rvector[i]); if (Rvector[i]==0) fprintf(outfile,"Frequency of allele %d is the same in cases and controls (quasi-score associated to this allele is 0)\n\n",i); } } for (i=1;i<=M;i++) {if (((Mark[m].Nc)*frequencyCase[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in cases\n",i); if (((Mark[m].Nt)*frequencyControl[i])<5) fprintf(outfile,"The p-value might not be exact because of the small number of type %d alleles in controls\n",i); } } else printf("Computation of the CC-QL statistic is not possible\n\n"); } else { if ((Mark[m].Nc)==0) fprintf(outfile,"\n\nTest statistics not computed because there are no cases in the sample \n\n"); else fprintf(outfile,"\n\nTest statistics not computed because there are no controls in the sample \n\n"); } /*free_dmatrix(freqMatrix,1,M-1,1,M-1); free_dvector(frequency,1,M); free_dvector(frequencyCase,1,M); free_dvector(frequencyControl,1,M); free_dvector(Naive,1,M-1); free_dvector(NaiveCase,1,M-1); free_dvector(NaiveControl,1,M-1); free_dvector(Naivefreq,1,M-1); free_dvector(Rvector,1,M-1); free_dvector(NaivefreqCase,1,M-1); free_dvector(NaivefreqControl,1,M-1); free_dmatrix(freqNaive,1,M-1,1,M-1);*/ } fclose(outfile); //free_dvector(Naive,1,M-1); //free_dvector(Naivefreq,1,M-1); // free_dvector(frequency,1,M); free_dmatrix(freqMatrix,1,M-1,1,M-1); return 0; } /****************************************************** Reads the number of markers (NbMark), of families (F) and the total number of individuals listed in the linkage data file (N). The number of markers is calculated from the number of columns in the first line. *****************************************************/ void readcar (char *name) { int all1=0, all2=0,fam=0,famold=1,n=0; int indnumber=0,ind=0; char line[2*(MAXMARK+3)]; int length=2*(MAXMARK+3); if((genofile=fopen(name, "r"))==NULL) { printf("Can't open %s\n",name); exit(1); } NbMark=0; fscanf(genofile,"%*d %*d %*d %*d %*d %*d "); fgets(line,length,genofile); n=sscanf(line,"%d %d %[^\n]",&all1,&all2,line); if (n<2) { printf("No marker to test. Please check first line in marker data file\n\n"); exit(1); } else { NbMark++; while (n==3) { n=sscanf(line,"%d %d %[^\n]",&all1,&all2,line); if (n>=2) { NbMark++; } } } rewind(genofile); famdata[1].N=0; while((fscanf(genofile, "%d %d %*d %*d %*d %*[^\n]",&fam,&ind))==2) { if (fam==famold) famdata[fam].N++; else famdata[fam].N=1; if (fam!=famold && fam!=(famold+1)) {printf("Problems with family %d.\n Family should have following Id numbers from 1 to N\n",fam); exit(1);} indnumber++; famold=fam; } N=indnumber; F=fam; if (F>MAXFAM) { printf("Number of families exceeds the maximum number of families allowed\n. You should try to change the value of MAXFAM in the CC-QLStest.c file and recompile.\n\n"); exit(1);} rewind(genofile); } /****************************************************** Reads the marker data file. Family and individual information is stored in famdata[fam] and Mark[m].mark[fam] ranking the individuals in each family. famdata[fam].descri[rank]=Id gives the correspondence between rank and Id. *****************************************************/ void readdata (struct MARKER *Mark,struct FAM *famdata,FILE *errfile) { int all1=0, all2=0,i=0,fam=0,famold=1,n=0; int status=0,indnumber=0,ind=0,m=0,j,indiv=0; char line[2*(MAXMARK+3)]; int length=2*(MAXMARK+3); for (i=1;i<=NbMark;i++) Mark[i].Nball=0; while((fscanf(genofile, "%d %d %*d %*d %*d %d ",&fam,&ind,&status))==3) { if (fam!=famold && fam!=(famold+1)) { printf("Problems with family %d.\n Family should have following Id numbers from 1 to N\n ",fam); exit(1); } if (fam!=famold) {indnumber=0; NPheno+=famdata[famold].Pheno; NnoPheno+=famdata[famold].NoPheno; } if (status ==0) { fprintf(errfile,"individual %d from family %d is not phenotyped...skipped\n",ind,fam); famdata[fam].NoPheno++; fscanf(genofile,"%[^\n]",line); } else if (status==1 || status==2) { indnumber++; famdata[fam].Pheno++; famdata[fam].descri[indnumber]=ind; m=0; while (mMark[m].Nball) Mark[m].Nball=all1; if (all2>Mark[m].Nball) Mark[m].Nball=all2; } } } fscanf(genofile,"\n"); famold=fam; } NPheno+=famdata[fam].Pheno; NnoPheno+=famdata[fam].NoPheno; } /****************************************************** Construct the genotype and kinship coef matrices for the global sample, the case sample and the control sample. *****************************************************/ void readGenotypes(int fam,int m,int length,double **cholaug,double **cholaugCase,double **cholaugControl,int *MissingVec,int miss,double **kincoefmatrix,double **kincoefMatrixCase,double **kincoefmatrixControl) { int all1=0, all2=0,j=0,i=0,family=0,follow=0; int a1=0,tot=1,controls=1,cases=1; int nbc1=0,nbc2=0,nbt1=0,nbt2=0,nb1=0,nb2=0; for (i=1;i<=length;i++) //length is famdata[fam].NPheno { if (Mark[m].mark[fam][i][1]!=0 && Mark[m].mark[fam][i][2]!=0) { nb1++; nb2=nb1; if (Storekin[fam][i][i]==-1) { printf("No inbreeding coefficient for individual %d from family %d. Please check...\n\n",i,fam); exit(1); } kincoefmatrix[nb1][nb1]=Storekin[fam][i][i]+1; if (Mark[m].mark[fam][i][3]==2) { nbc1++; nbc2=nbc1; kincoefMatrixCase[nbc1][nbc1]=kincoefmatrix[nb2][nb1]; for (j=i+1;j<=length;j++) { if (Storekin[fam][i][j]==-1) { printf("No kinship coefficient between individual %d and individual %d from family %d. Please check...\n\n",famdata[fam].descri[i],famdata[fam].descri[j],fam); exit(1); } if (Mark[m].mark[fam][j][1]!=0 && Mark[m].mark[fam][j][2]!=0) { nb2++; kincoefmatrix[nb1][nb2]=2*Storekin[fam][i][j]; kincoefmatrix[nb2][nb1]=kincoefmatrix[nb1][nb2]; if (Mark[m].mark[fam][j][3]==2) { nbc2++; kincoefMatrixCase[nbc1][nbc2]=kincoefmatrix[nb1][nb2]; kincoefMatrixCase[nbc2][nbc1]=kincoefMatrixCase[nbc1][nbc2]; } } } cholaug[tot][M+1]=2; cholaugCase[cases][M+1]=2; cholaug[tot][M+2]=2; cholaugCase[cases][M+2]=2; for (a1=1;a1<=M;a1++) cholaug[tot][a1]=0; for(a1=1;a1<=M;a1++) { if (Mark[m].mark[fam][i][1]==a1) cholaug[tot][a1]++; if (Mark[m].mark[fam][i][2]==a1) cholaug[tot][a1]++; cholaugCase[cases][a1]=cholaug[tot][a1]; } cases++; tot++; } else if (Mark[m].mark[fam][i][3]==1) { nbt1++; nbt2=nbt1; kincoefmatrixControl[nbt1][nbt1]=kincoefmatrix[nb2][nb1]; for (j=i+1;j<=length;j++) { if (Mark[m].mark[fam][j][1]!=0 && Mark[m].mark[fam][j][2]!=0) { nb2++; kincoefmatrix[nb1][nb2]=2*Storekin[fam][i][j]; kincoefmatrix[nb2][nb1]=kincoefmatrix[nb1][nb2]; if (Mark[m].mark[fam][j][3]==1) { nbt2++; kincoefmatrixControl[nbt1][nbt2]=kincoefmatrix[nb1][nb2]; kincoefmatrixControl[nbt2][nbt1]=kincoefmatrixControl[nbt1][nbt2]; } } } cholaug[tot][M+1]=2; cholaugControl[controls][M+1]=2; cholaug[tot][M+2]=0; cholaugControl[controls][M+2]=0; for (a1=1;a1<=M;a1++) cholaug[tot][a1]=0; for (a1=1;a1<=M;a1++) { if (Mark[m].mark[fam][i][1]==a1) cholaug[tot][a1]++; if (Mark[m].mark[fam][i][2]==a1) cholaug[tot][a1]++; cholaugControl[controls][a1]=cholaug[tot][a1]; } controls++; tot++; } } else { follow++; MissingVec[follow]=i; } } if ((cases-1)!=Mark[m].typed[fam].Nc || (controls-1)!=Mark[m].typed[fam].Nt) {printf("Problems while rereading the data file...\n"); exit(1); } if (follow!=miss) {printf("problem with missing data in family %d follow=%d miss=%d\n",fam,follow,miss); exit(1);} } /****************************************************** Reads the kinship coefficients of the individual pairs from the kinshipcoef file and stores the values in the Storekin array. Storekin returns value -1 for pairs of individuals for which no coefficient is available *****************************************************/ void readkincoef(char *name,FILE *errfile, struct FAM *famdata,double ***Storekin) { double coef=0; int follow=0,j,i,n,famold=0; long int Id1,Id2; int family=0,ind1=0,ind2=0,indold1=0,indold2=0,Idold1=0,Idold2=0; int err_array[N]; int test=0,lim=0; for (i=1;i<=N;i++) err_array[i]=0; if((idfile=fopen(name, "r"))==NULL) { printf("Can't open %s\n",name); exit(1); } for (follow=1;follow<=F;follow++) for (i=1;i<=famdata[follow].Pheno;i++) for (j=i;j<=famdata[follow].Pheno;j++) { Storekin[follow][i][j]=-1; if (i!=j) Storekin[follow][j][i]=-1; } while (fscanf(idfile, "%d %ld %ld %lf\n",&family,&Id1,&Id2,&coef)==4) { if (family>F) { printf("Problem with family number %d. Family should have following Id numbers from 1 to N\n",family); exit(1); } if (family==famold) { if (Id1==Idold1) ind1=indold1; else if (Id1==Idold2) ind1=indold2; else ind1=findInd(Id1,family); if (Id2==Idold1) ind2=indold1; else if (Id2==Idold2) ind2=indold2; else ind2=findInd(Id2,family); } else { ind1=findInd(Id1,family); ind2=findInd(Id2,family); } famold=family; if (ind1!=0 && ind2!=0) { Storekin[family][ind1][ind2]=coef; Storekin[family][ind2][ind1]=coef; } if (ind1==0) { test=0; for (j=1;j<=lim;j++) if (Id1==err_array[j]) { test=1; j=lim+1; } if (test==0) { fprintf(errfile,"individual %ld from family %d is in the kinshipcoef file but is not phenotyped or not available from marker data file\n",Id1,family); lim++; err_array[lim]=Id1; } } if (ind2==0) { test=0; for (j=1;j<=lim;j++) if (Id2==err_array[j]) { test=1; j=lim+1; } if (test==0) { fprintf(errfile,"individual %ld from family %d is in the kinshipcoef file but is not phenotyped or not available from marker data file\n",Id2,family); lim++; err_array[lim]=Id2; } } Idold1=Id1; Idold2=Id2; indold1=ind1; indold2=ind2; } } /****************************************************** returns the rank of an individual in his family from its family number and Id *****************************************************/ int findInd(Id1,family) { int indpas=1; while (indpas<=famdata[family].Pheno) { if (famdata[family].descri[indpas]==Id1) return(indpas); indpas++; } return(0); } /****************************************************** Computes the frequency estimates using the quasilikelihood approach *****************************************************/ void alleleFreq(double **cholaug,double *frequency,int size, double *denominator) { int i=0, j=0; double pasden=0,numerator=0; for(i=1; i<=size; i++){ pasden=pasden+(cholaug[i][M+1])*(cholaug[i][M+1]); } for(j=1; j<=M; j++) { for(i=1; i<=size; i++) { numerator=numerator+cholaug[i][M+1]*cholaug[i][j]; } frequency[j]+=numerator; numerator=0; } *denominator+=pasden; } /****************************************************** Computes the frequency estimates using the naive counting *****************************************************/ void naiveCount(double **cholaug,double *Naive,double *Naivefreq,int size) { int i=0, j=0; double sum=0, contC=0; for(i=1; i<=M; i++){ contC=0; for(j=1; j<=size; j++) { sum+=cholaug[j][i]; if (cholaug[j][M+2]==2) contC+=cholaug[j][i]; } Naive[i]+=contC; Naivefreq[i]+=sum; sum=0; } } /****************************************************** To get from counts to frequencies *****************************************************/ void getfrequency(double *Naivefreq,double *NaivefreqCase,double *NaivefreqControl,double *frequency,double *frequencyCase,double *frequencyControl,double denominator,double denomcases,double denomcontrols,int Nall,int Ncase,int Ncontrol) { int i; for (i=1;i<=M;i++) { Naivefreq[i]/=(2*Nall); NaivefreqCase[i]/=(2*Ncase); NaivefreqControl[i]/=(2*Ncontrol); frequency[i]/=denominator; frequencyCase[i]/=denomcases; frequencyControl[i]/=denomcontrols; } } /****************************************************** Modify the cholaug matrix such as the M-1 first columns are equal to C-t*(Y-mu) instead of C-t*Y *****************************************************/ void modifcholaug(double **cholent,double **cholaug,double **chol,double *frequency,int size) { int i,j,k; for (i=1;i<=size;i++) for (j=1;j<=M+2;j++) { if (j0;i--) {invcholm[i][i]=1/cholm[i][i]; for (j=i-1;j>0;j--) {for (k=j+1;k<=i;k++) invcholm[j][i]-=cholm[j][k]*invcholm[k][i]; invcholm[j][i]/=cholm[j][j]; invcholm[i][j]=0; } } //inverse of transMat for (i=1;i<=Mpas-1;i++) for (j=1;j<=Mpas-1;j++) for (k=1;k<=Mpas-1;k++) { freqPasMat[i][j]+=invcholm[i][k]*invcholm[j][k]; } k=0; for (i=1;i<=M-1;i++) { if (Followfreq[i]==1) { k++; freqMatrix[i][i]=freqPasMat[k][k]; l=k; for (j=i+1;j<=M-1;j++) { if (Followfreq[j]==1) { l++; freqMatrix[i][j]=freqPasMat[k][l]; freqMatrix[j][i]=freqMatrix[i][j]; } else { freqMatrix[i][j]=0; freqMatrix[j][i]=0; } } } else { freqMatrix[i][i]=0; for (j=i+1;j<=M-1;j++) { freqMatrix[i][j]=0; freqMatrix[j][i]=0; } } } /* printf(""); */ return 0; } /****************************************************** Computes the information components required for the pseudo-score test *****************************************************/ void comput_info_score(double **cholaug,double **freqMatrix,double *infoQL_rr,double *infoQL_rf,double *infoQL_ff,double *Rvector,int size) { int i,j; double info_rr=0,info_rf=0,info=0,info_ff=0; for (i=1;i<=size;i++) {info_rr+=cholaug[i][M+2]*cholaug[i][M+2]; info_rf+=cholaug[i][M+2]*cholaug[i][M+1]; info_ff+=cholaug[i][M+1]*cholaug[i][M+1]; for (j=1;j<=M-1;j++) Rvector[j]+=cholaug[i][M+2]*cholaug[i][j]; } *infoQL_rr+=info_rr; *infoQL_rf+=info_rf; *infoQL_ff+=info_ff; } /****************************************************** Computes the pseudo-score test *****************************************************/ void from_info2_score(double **freqMatrix,double infoQL_rr,double infoQL_rf,double infoQL_ff,double *Rvector,double *testval) { int i,j; double info=0,score=0; info=infoQL_rr - infoQL_rf*infoQL_rf/infoQL_ff; for (i=1;i<=M-1;i++) for (j=1;j<=M-1;j++) score+=Rvector[i]*Rvector[j]/info*freqMatrix[i][j]/2; *testval=score; } /****************************************************** Computes the corrected chi2 test *****************************************************/ void from_info2_chi2(double info_rr, double info_rf, double info_ff, double *Naive,double *Naivefreq,double **freqNaive,double *chi2val,int Nall,int Ncase) { int i,j; double chi2old=0; double freqpas=0,corrfactor=0; corrfactor=2/((info_rr-2*Ncase*(double)info_rf/Nall+(double)Ncase/Nall*(double)Ncase/Nall*info_ff)); for (i=1;i<=M-1;i++) for (j=1;j<=M-1;j++) chi2old+=(Naive[i]-2*Ncase*Naivefreq[i])*(Naive[j]-2*Ncase*Naivefreq[j])*freqNaive[i][j]; *chi2val=chi2old*corrfactor; } /****************************************************** Computes the information components required to derive the correction factor for the chi2 test *****************************************************/ void comput_info_chi2(double **kincoefMatrix,double **cholaug,double *info_rr,double *info_rf,double *info_ff,int size) { int i,j; double infopas_rr=0,infopas_rf=0,infopas_ff=0; for (i=1;i<=size;i++) { for (j=1;j<=size;j++) { infopas_rr+=cholaug[i][M+2]*kincoefMatrix[i][j]*cholaug[j][M+2]; infopas_rf+=cholaug[i][M+2]*kincoefMatrix[i][j]*cholaug[j][M+1]; infopas_ff+=cholaug[i][M+1]*kincoefMatrix[i][j]*cholaug[j][M+1]; } } *info_rr+=infopas_rr; *info_rf+=infopas_rf; *info_ff+=infopas_ff; }