#include #include #include #include "nrutil.h" #include #include "nrutil.c" #define MAXLEN 1000 #define MAXMARK 200000 #define MAXLINE 1000 #define MAXFAMILYSIZE 1000 void converttfam(char *infilename); FILE *outgenofile,*genofile; FILE *errfile; char typedfile[MAXLEN]="pedphenofile"; char outfile[MAXLEN] = "outfile"; char kinoutfile1[MAXLEN]="KinInbcoef_software_input.pedinfo"; char kinoutfile1x[MAXLEN]="KinInbcoefX_software_input.pedinfo"; char kinoutfile2[MAXLEN]="KinInbcoef_software_input.list"; char kinoutfile2x[MAXLEN]="KinInbcoefX_software_input.list"; char roadoutfile[MAXLEN]="ROADTRIPS_software_input.pedinfo"; char roadoutfile2[MAXLEN]="ROADTRIPS_software_input_PED_STUDY_ID.pedinfo"; char mqlsoutfile[MAXLEN]="MQLS_XM_software_input.pedinfo"; char erroutfile[MAXLEN]="outfile.err"; char matchoutfile[MAXLEN]="outfile.pedmatch"; int gfile=0; int ofile=0; int rfile = 1; /* Change to 0 to not print file */ int kfile = 1,kfilex=1; int mfile=1; int main (int argc, char *argv[]) { char str1[MAXLEN]; char *ch; int arg; if (argc > 1) { for (arg=1; arg < argc && argv[arg][0] == '-'; arg++) { switch (argv[arg][1]) { case 'f': strncpy(typedfile, argv[++arg], MAXLEN); printf("user specified input file: %s \n", typedfile); gfile = 1; break; case 'o': strncpy(outfile, argv[++arg], MAXLEN); printf("user specified prefix file: %s \n", outfile); ofile = 1; break; /* case 'k': kfile = 1; break; case 'x': kfilex = 1; break; */ default: printf ("Unknown option \"%s\"\n", argv[arg]); exit(1); }} } if (gfile) { /* printf("Enter .tfam file (-f): "); fgets(typedfile, MAXLEN, stdin); ch = strchr(typedfile, '\n'); if (ch != NULL) *ch = '\0'; */ printf("\n user specified input file: %s \n", typedfile); } else{printf("\nuser did not specify input file. Default input file is used: %s \n", typedfile);} if (ofile) { /* printf("Enter output prefix file (-o): "); fgets(outfile, MAXLEN, stdin); ch = strchr(typedfile, '\n'); if (ch != NULL) *ch = '\0'; */ printf("user specified output prefix: %s \n\n", outfile); } else{printf("user did not specify output prefix. Default output prefix is used: %s \n\n", outfile);} sprintf(mqlsoutfile,"%s.pedpheno",outfile); sprintf(kinoutfile1,"%s.kinpedigree",outfile); sprintf(kinoutfile2,"%s.kinlist",outfile); sprintf(kinoutfile1x,"%s.kinpedigreeX",outfile); sprintf(erroutfile,"%s.err",outfile); sprintf(matchoutfile,"%s.pedmatch",outfile); /* sprintf(kinoutfile2x,"%s.kinlistX",outfile); */ if (mfile) { printf("output pedigree and phenotype file for MQLS_XM Software: %s\n", mqlsoutfile); } if (kfile==1 ) { printf("output pedigree and phenotype file for KinInbcoef Software: %s\n", kinoutfile1); printf("output pedigree and phenotype file for KinInbcoefX Software: %s\n", kinoutfile1x); printf("output list file of study individuals for KinInbcoef Software and KinInbcoefX Software: %s\n\n", kinoutfile2); } if (kfilex==1) { /* printf("output list of study individuals for KinInbcoefX Software: %s\n", kinoutfile2x); */ } if((errfile=fopen(erroutfile, "w"))==NULL) { printf("Can't open %s .\n",erroutfile); exit(1); } converttfam(typedfile); } void converttfam(char *typedfile) { int i,j,k,l,n=0; int indnumber=0,ind=0,nstu,nloci; char c; /*char line[4*(MAXMARK+3)];*/ char mystr[200],str1[200],str2[200],fid[600][200],study[600][200]; char str3[200],str4[200],str5[200],str6[200],zero[200]; int all1=0, all2=0; int NEWPERS1; int **geno,count; char ***famindivid,**famname; char **PED,**STUID,**FAID,**MOID,**PHENO,**SEX; int *NEWFAMNUM,*NEWFAMSTUID,*NEWFAMFAID,*NEWFAMMOID,**REVERSE,*famtotal; FILE *fkin1,*fkin2,*fkin1x,*fkin2x,*fmqls,*froad,*ffam,*froad2; FILE *ftemp1,*ftemp2; int MAXFAMILY,num1,num2,match,matchfam,**FAMILY,*NEWSTUDYORDER; char line[MAXLINE],line2[MAXLINE]; int length=MAXLINE; if((genofile=fopen(typedfile, "r"))==NULL) { printf("Can't open %s\n",typedfile); exit(1); } /* GET THE TOTAL NUMBER OF individuals in the FILE */ printf("Getting the total number of sample individuals \n"); nstu=0; fgets(line,length,genofile); n=sscanf(line,"%s %s %s %s %s %s %[^\n]",str1,str2,str3,str4,str5,str6,line); /* printf("n is %d \n",n); printf("%s %s %s %s %s %s \n",str1,str2,str3,str4,str5,str6); */ if(n!=6) {printf("There is an error in the input file %s. The first line of the pedigree/phenotype information file does not contain exactly 6 values \n",typedfile); exit(1);} else{nstu++; while(n==6 & fscanf(genofile,"%s ",str1)!=EOF) {fgets(line,length,genofile); /* printf("str1 is %s and line is %s \n",str1,line); */ i=sscanf(line,"%s %s %s %s %s %[^\n]",str2,str3,str4,str5,str6,line); n=i+1; /* printf("n is %d \n",n); printf("%s %s %s %s %s %s \n",str1,str2,str3,str4,str5,str6); */ nstu++;} } /* rewind(genofile); printf("WITH NEW code there are %d sample individuals read in for formatting \n",nstu); nstu=0; while(fscanf(genofile,"%s %s %s %s %s %s",str1,str2,str3,str4,str5,str6)==6) { nstu++; } */ if(nstu==0) {printf("There is an error in the input file %s. The first line of this file does not contain 6 values \n",typedfile); exit(1); } /* while(c=getc(genofile)!=EOF) {fgets(line,length,genofile); nstu++; } */ printf("There are %d sample individuals that were read in for formatting \n",nstu); rewind(genofile); printf("Grouping individuals who are in the same pedigrees. \n"); PED=(char **)malloc((size_t) ((nstu+1)*sizeof(char*))); STUID=(char **)malloc((size_t) ((nstu+1)*sizeof(char*))); FAID=(char **)malloc((size_t) ((nstu+1)*sizeof(char*))); MOID=(char **)malloc((size_t) ((nstu+1)*sizeof(char*))); SEX=(char **)malloc((size_t) ((nstu+1)*sizeof(char*))); PHENO=(char **)malloc((size_t) ((nstu+1)*sizeof(char*))); for(j=1;j<=nstu;j++) {PED[j]=(char *)malloc((size_t) ((200)*sizeof(char))); STUID[j]=(char *)malloc((size_t) ((200)*sizeof(char))); FAID[j]=(char *)malloc((size_t) ((200)*sizeof(char))); MOID[j]=(char *)malloc((size_t) ((200)*sizeof(char))); SEX[j]=(char *)malloc((size_t) ((200)*sizeof(char))); PHENO[j]=(char *)malloc((size_t) ((200)*sizeof(char))); } count=0; while(fscanf(genofile,"%s %s %s %s %s %s",str1,str2,str3,str4,str5,str6)==6) { count++; NEWPERS1=count; strncpy(PED[NEWPERS1],str1,200); strncpy(STUID[NEWPERS1],str2,200); strncpy(FAID[NEWPERS1],str3,200); strncpy(MOID[NEWPERS1],str4,200); strncpy(SEX[NEWPERS1],str5,200); strncpy(PHENO[NEWPERS1],str6,200); } fclose(genofile); famname=(char **)malloc((size_t) ((nstu+1)*sizeof(char*))); for(j=1;j<=nstu;j++) {famname[j]=(char *)malloc((size_t) ((200)*sizeof(char)));} if(nstuMAXFAMILYSIZE) {printf("number of individuals in family %s exceeds the maximum of %d.\n Change the value of MAXFAMILYSIZE in the source file and recompile\n",PED[i],MAXFAMILYSIZE); exit(1); } }} if(match==0) { MAXFAMILY=MAXFAMILY+1; NEWFAMNUM[i]=MAXFAMILY; strncpy(famname[MAXFAMILY],PED[i],200); famtotal[MAXFAMILY]=1; FAMILY[MAXFAMILY][famtotal[MAXFAMILY]]=i; FAMILY[MAXFAMILY][0]=famtotal[MAXFAMILY]; NEWFAMSTUID[i]=famtotal[MAXFAMILY]; } } strncpy(mystr,PED[i],200); } /* GET NEW FATHER AND MOTHER ID ORDERS WHEN DIFFERENT FROM 0 */ sprintf(zero,"0"); for(i=1;i<=nstu;i++) { NEWFAMFAID[i]=0; num1=strcmp(FAID[i],zero); if(num1!=0) {match=0; for(j=1;j<=nstu&&match==0;j++) {num2=strcmp(STUID[j],FAID[i]); if(num2==0&&NEWFAMNUM[i]==NEWFAMNUM[j]) {match=1; matchfam=j; NEWFAMFAID[i]=matchfam; }} if(match==0) {fprintf(errfile,"Warning: Individual %s from Family %s has person %s listed as the father, but %s is not in the study. Father ID is set to 0 for this individual. \n",STUID[i],PED[i],FAID[i],FAID[i]);} } NEWFAMMOID[i]=0; num1=strcmp(MOID[i],zero); if(num1!=0) {match=0; for(j=1;j<=nstu&&match==0;j++) {num2=strcmp(STUID[j],MOID[i]); if(num2==0&&NEWFAMNUM[i]==NEWFAMNUM[j]) {match=1; matchfam=j; NEWFAMMOID[i]=matchfam; }} if(match==0) {fprintf(errfile,"Warning: Individual %s from family %s has person %s listed as the mother, but %s is not in the study. Mother ID is set to 0 for this individual. \n",STUID[i],PED[i],MOID[i],MOID[i]);} } } /* if((ffam=fopen("FAMILY_NAMES_INFORMATION_FILE", "w"))==NULL) { printf("Can't open %s \n",outfile); exit(1); } fprintf(ffam,"Family_Name \t Family_Number\n"); for(i=1;i<=MAXFAMILY;i++) {fprintf(ffam,"%s \t %d \n",famname[i],i ); } fclose(ffam); */ if((ffam=fopen(matchoutfile, "w"))==NULL) { printf("Can't open %s \n",matchoutfile); exit(1); } fprintf(ffam,"Family_ID \t STUDY_ID \t New_Family_ID \t New_STUDY_ID \n"); for(i=1;i<=MAXFAMILY;i++) { for(j=1;j<=FAMILY[i][0];j++) { fprintf(ffam,"%s \t %s \t %d \t %d \n",famname[i],STUID[FAMILY[i][j]],i,j); } } fclose(ffam); NEWSTUDYORDER=ivector(0,nstu); count=0; for(i=1;i<=MAXFAMILY;i++) {for(j=1;j<=FAMILY[i][0];j++) {count++; NEWSTUDYORDER[count]=FAMILY[i][j]; }} if(kfile==1) { if((fkin1=fopen(kinoutfile1, "w"))==NULL) { printf("Can't open %s \n",kinoutfile1); exit(1); } if((fkin2=fopen(kinoutfile2, "w"))==NULL) { printf("Can't open %s \n",kinoutfile2); exit(1); } } if(kfilex==1) { if((fkin1x=fopen(kinoutfile1x, "w"))==NULL) { printf("Can't open %s \n",kinoutfile1x); exit(1); } /* if((fkin2x=fopen(kinoutfile2x, "w"))==NULL) { printf("Can't open %s \n",kinoutfile2x); exit(1); } */ } if((fmqls=fopen(mqlsoutfile, "w"))==NULL) { printf("Can't open %s \n",mqlsoutfile); exit(1); } printf("Printing the pedigree and phenotype information to files for the formats requred by the various software packages \n"); /* if((ftemp1=fopen("ROADTRIPS_software_input_UNRELATED.pedinfo", "w"))==NULL) { printf("Can't open %s \n",outfile); exit(1); } if((ftemp2=fopen("ROADTRIPS_software_input_UNRELATED.kinship", "w"))==NULL) { printf("Can't open %s \n",outfile); exit(1); } */ for(i=1;i<=nstu;i++) { /* fprintf(fmqls,"%d %s %s %s %s %s \n",NEWFAMNUM[NEWSTUDYORDER[i]],STUID[NEWSTUDYORDER[i]],FAID[NEWSTUDYORDER[i]],MOID[NEWSTUDYORDER[i]],SEX[NEWSTUDYORDER[i]],PHENO[NEWSTUDYORDER[i]]); */ fprintf(fmqls,"%d %d %d %d %s %s \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWFAMFAID[NEWSTUDYORDER[i]]],NEWFAMSTUID[NEWFAMMOID[NEWSTUDYORDER[i]]],SEX[NEWSTUDYORDER[i]],PHENO[NEWSTUDYORDER[i]]); if(kfile==1) { fprintf(fkin1,"%d %d %d %d \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWFAMFAID[NEWSTUDYORDER[i]]],NEWFAMSTUID[NEWFAMMOID[NEWSTUDYORDER[i]]]); fprintf(fkin2,"%d %d \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]]); if(kfilex==1) {fprintf(fkin1x,"%d %d %d %d %s \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWFAMFAID[NEWSTUDYORDER[i]]],NEWFAMSTUID[NEWFAMMOID[NEWSTUDYORDER[i]]],SEX[NEWSTUDYORDER[i]]); /* fprintf(fkin2x,"%d %d \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]]); */ } /* fprintf(fkin1,"%d %s %s %s \n",NEWFAMNUM[NEWSTUDYORDER[i]],STUID[NEWSTUDYORDER[i]],FAID[NEWSTUDYORDER[i]],MOID[NEWSTUDYORDER[i]]); fprintf(fkin2,"%d %s \n",NEWFAMNUM[NEWSTUDYORDER[i]],STUID[NEWSTUDYORDER[i]]); */ } } fclose(fmqls); if(kfile==1) {fclose(fkin1); fclose(fkin2); } if(kfilex==1) {fclose(fkin1x); } printf("Formatting is complete. \n"); } /* ./FORMAT -f NEW_PEDIGREE_ASSOCIATION_TRANSPOSE.tfam */