#include <stdio.h>
#include <stdlib.h>
#include<string.h>
#include "nrutil.h" 
#include <math.h>
#include "nrutil.c"

#define MAXLEN 1000
#define MAXMARK 200000
#define MAXLINE 1000
#define MAXFAMILYSIZE 1000

void converttfam(char *infilename);

FILE *outgenofile,*genofile;
FILE *errfile;

char typedfile[MAXLEN]="pedphenofile";
    char outfile[MAXLEN] = "outfile";
    char kinoutfile1[MAXLEN]="KinInbcoef_software_input.pedinfo";
    char kinoutfile1x[MAXLEN]="KinInbcoefX_software_input.pedinfo";
    char kinoutfile2[MAXLEN]="KinInbcoef_software_input.list";
     char kinoutfile2x[MAXLEN]="KinInbcoefX_software_input.list";
    char roadoutfile[MAXLEN]="ROADTRIPS_software_input.pedinfo";
    char roadoutfile2[MAXLEN]="ROADTRIPS_software_input_PED_STUDY_ID.pedinfo";
    char mqlsoutfile[MAXLEN]="MQLS_XM_software_input.pedinfo";
char erroutfile[MAXLEN]="outfile.err";    
char matchoutfile[MAXLEN]="outfile.pedmatch";  
int gfile=0;
    int ofile=0;
    int rfile = 1;   /* Change to 0 to not print file */
    int kfile = 1,kfilex=1;
    int mfile=1;   

int main (int argc, char *argv[])


{   char str1[MAXLEN];
    char *ch;
    int arg;
      

if (argc > 1) {
	for (arg=1; arg < argc && argv[arg][0] == '-'; arg++) {
	    switch (argv[arg][1]) 
		{
	case 'f':
		    strncpy(typedfile, argv[++arg], MAXLEN);
		    
             printf("user specified input file: %s \n", typedfile);
		    gfile = 1;
		    break;

  
		    
case 'o':
		   strncpy(outfile, argv[++arg], MAXLEN);
		    
             printf("user specified prefix file: %s \n", outfile);
		    ofile = 1;
		    break;


		    /*
case 'k':
		   
		    kfile = 1;
		    break;

		    
	
case 'x':
		   
		    kfilex = 1;
		    break;
		    */
 default:
		    printf ("Unknown option \"%s\"\n", argv[arg]);
		    exit(1);

		}}

 }




 if (gfile) {
   /*        printf("Enter .tfam file (-f): ");
        fgets(typedfile, MAXLEN, stdin);
        ch = strchr(typedfile, '\n');
        if (ch != NULL)
            *ch = '\0';
	    */    
    printf("\n user specified input file: %s \n", typedfile);
    }
 else{printf("\nuser did not specify input file.  Default input file is used: %s \n", typedfile);}


if (ofile) {
  /*    printf("Enter output prefix file (-o): ");
        fgets(outfile, MAXLEN, stdin);
        ch = strchr(typedfile, '\n');
        if (ch != NULL)
            *ch = '\0';
	    */ 
       printf("user specified output prefix: %s \n\n", outfile);
    }

 else{printf("user did not specify output prefix.  Default output prefix is used: %s \n\n", outfile);}

 


 sprintf(mqlsoutfile,"%s.pedpheno",outfile);
sprintf(kinoutfile1,"%s.kinpedigree",outfile);
sprintf(kinoutfile2,"%s.kinlist",outfile);
sprintf(kinoutfile1x,"%s.kinpedigreeX",outfile);
sprintf(erroutfile,"%s.err",outfile);
sprintf(matchoutfile,"%s.pedmatch",outfile);
/* sprintf(kinoutfile2x,"%s.kinlistX",outfile); */

if (mfile) {
printf("output pedigree and phenotype file for MQLS_XM Software: %s\n", mqlsoutfile);
 }

if (kfile==1 ) {
printf("output pedigree and phenotype file for KinInbcoef Software: %s\n", kinoutfile1);
printf("output pedigree and phenotype file for KinInbcoefX Software: %s\n", kinoutfile1x);
printf("output list file of study individuals for KinInbcoef Software and KinInbcoefX Software: %s\n\n", kinoutfile2);
}


if (kfilex==1) {

/* printf("output list of study individuals for KinInbcoefX Software: %s\n", kinoutfile2x); */
}

 




 if((errfile=fopen(erroutfile, "w"))==NULL)
    {
      printf("Can't open %s .\n",erroutfile);
	exit(1);
    }





 converttfam(typedfile);










 }


void converttfam(char *typedfile)
{

  int i,j,k,l,n=0;
  int indnumber=0,ind=0,nstu,nloci;
    char c;
    /*char line[4*(MAXMARK+3)];*/
    
char mystr[200],str1[200],str2[200],fid[600][200],study[600][200];
 char str3[200],str4[200],str5[200],str6[200],zero[200];  
int all1=0, all2=0;
 int NEWPERS1;
 int **geno,count;
 char ***famindivid,**famname;
 char **PED,**STUID,**FAID,**MOID,**PHENO,**SEX;
 int *NEWFAMNUM,*NEWFAMSTUID,*NEWFAMFAID,*NEWFAMMOID,**REVERSE,*famtotal;
 FILE *fkin1,*fkin2,*fkin1x,*fkin2x,*fmqls,*froad,*ffam,*froad2;
 FILE *ftemp1,*ftemp2; 
int MAXFAMILY,num1,num2,match,matchfam,**FAMILY,*NEWSTUDYORDER;
 char line[MAXLINE],line2[MAXLINE];
    int length=MAXLINE;




 
if((genofile=fopen(typedfile, "r"))==NULL)
    {
	printf("Can't open %s\n",typedfile);
	exit(1);
	}





  

/* GET THE TOTAL NUMBER OF individuals in the  FILE */

 printf("Getting the total number of sample individuals \n");




 nstu=0;




fgets(line,length,genofile);
 n=sscanf(line,"%s %s %s %s %s %s %[^\n]",str1,str2,str3,str4,str5,str6,line);
 /*  
  printf("n is %d \n",n);
	printf("%s %s %s %s %s %s \n",str1,str2,str3,str4,str5,str6);  
 */
if(n!=6)
    {printf("There is an error in the input file %s.  The first line of the pedigree/phenotype information file does not contain exactly 6 values \n",typedfile);
     exit(1);}
  else{nstu++;
    while(n==6 & fscanf(genofile,"%s ",str1)!=EOF)
      {fgets(line,length,genofile);
	/*	printf("str1 is %s and line is %s \n",str1,line);	*/

i=sscanf(line,"%s %s %s %s %s %[^\n]",str2,str3,str4,str5,str6,line);
	n=i+1;
	/*	printf("n is %d \n",n);	  
	printf("%s %s %s %s %s %s  \n",str1,str2,str3,str4,str5,str6);
	*/
nstu++;}

  }



/*
 rewind(genofile);

 printf("WITH NEW code there are %d sample individuals read in for formatting \n",nstu);


 nstu=0;
 while(fscanf(genofile,"%s %s %s %s %s %s",str1,str2,str3,str4,str5,str6)==6)
   { nstu++;
   }
*/

 if(nstu==0)
   {printf("There is an error in the input file %s.  The first line of this file does not contain 6 values \n",typedfile);
	exit(1);
   }

 /*
 while(c=getc(genofile)!=EOF)
       {fgets(line,length,genofile);
 nstu++;
       }
 */

printf("There are %d sample individuals that were read in for formatting \n",nstu);


rewind(genofile);



 printf("Grouping individuals who are in the same pedigrees. \n");


PED=(char **)malloc((size_t) ((nstu+1)*sizeof(char*)));
STUID=(char **)malloc((size_t) ((nstu+1)*sizeof(char*)));
FAID=(char **)malloc((size_t) ((nstu+1)*sizeof(char*)));
MOID=(char **)malloc((size_t) ((nstu+1)*sizeof(char*)));
SEX=(char **)malloc((size_t) ((nstu+1)*sizeof(char*)));
PHENO=(char **)malloc((size_t) ((nstu+1)*sizeof(char*)));
 


 for(j=1;j<=nstu;j++)  
   {PED[j]=(char *)malloc((size_t) ((200)*sizeof(char)));
STUID[j]=(char *)malloc((size_t) ((200)*sizeof(char)));
FAID[j]=(char *)malloc((size_t) ((200)*sizeof(char)));
MOID[j]=(char *)malloc((size_t) ((200)*sizeof(char)));
SEX[j]=(char *)malloc((size_t) ((200)*sizeof(char)));
PHENO[j]=(char *)malloc((size_t) ((200)*sizeof(char)));


   }
 







 count=0;
 while(fscanf(genofile,"%s %s %s %s %s %s",str1,str2,str3,str4,str5,str6)==6)
   { count++;
NEWPERS1=count;

  
strncpy(PED[NEWPERS1],str1,200);  
strncpy(STUID[NEWPERS1],str2,200);  
strncpy(FAID[NEWPERS1],str3,200);  
strncpy(MOID[NEWPERS1],str4,200);  
strncpy(SEX[NEWPERS1],str5,200);  
strncpy(PHENO[NEWPERS1],str6,200);  

   }

 fclose(genofile);






famname=(char **)malloc((size_t) ((nstu+1)*sizeof(char*)));

 for(j=1;j<=nstu;j++)
   {famname[j]=(char *)malloc((size_t) ((200)*sizeof(char)));}



 if(nstu<MAXFAMILYSIZE)
   {n=nstu;}
 else
   {n=MAXFAMILYSIZE;}

NEWFAMNUM=ivector(0,nstu);
NEWFAMSTUID=ivector(0,nstu);
NEWFAMFAID=ivector(0,nstu);
NEWFAMMOID=ivector(0,nstu);
famtotal=ivector(0,nstu);
 FAMILY=imatrix(0,nstu,0,n);




sprintf(mystr,"FAMILY");


 MAXFAMILY=0;


 for(i=1;i<=nstu;i++)
   {
num1=strcmp(PED[i],mystr);
 
 if(num1==0&&MAXFAMILY==0)
   {MAXFAMILY++;
    famtotal[MAXFAMILY]=1;
     NEWFAMNUM[i]=MAXFAMILY;
FAMILY[MAXFAMILY][famtotal[MAXFAMILY]]=i; 
FAMILY[MAXFAMILY][0]=famtotal[MAXFAMILY];
NEWFAMSTUID[i]=famtotal[MAXFAMILY];
}

 else{

   match=0;
   for(j=1;j<=MAXFAMILY&&match==0;j++)
     {num1=strcmp(PED[i],famname[j]);
      
       if(num1==0 & j!=MAXFAMILY)
	 {printf("Error in the input file.  Individuals with the same Family ID Family must be appear together in the file without breaks.  Family %s has at least one individual, individual %s,  that does not appear together with other individuals in family %s \n",famname[j],STUID[i],famname[j]);

fprintf(errfile,"Error in the input file.  Individuals with the same Family ID Family must be appear together in the file without breaks.  Family %s has at least one individual, individual %s,  that does not appear together with other individuals in family %s \n",famname[j],STUID[i],famname[j]);

	   exit(1);
	     }

 if(num1==0& j==MAXFAMILY)
	   {match=1;
             matchfam=j;
            famtotal[matchfam]++;   
	    NEWFAMNUM[i]=matchfam;
FAMILY[matchfam][famtotal[matchfam]]=i; 
FAMILY[matchfam][0]=famtotal[matchfam];
NEWFAMSTUID[i]=famtotal[matchfam];

 if(famtotal[matchfam]>MAXFAMILYSIZE)
   {printf("number of individuals in family %s exceeds the maximum of %d.\n Change the value of MAXFAMILYSIZE in the source file and recompile\n",PED[i],MAXFAMILYSIZE);
		 exit(1);
}

	   }}

   if(match==0)
     {
MAXFAMILY=MAXFAMILY+1;
 NEWFAMNUM[i]=MAXFAMILY;
strncpy(famname[MAXFAMILY],PED[i],200);
famtotal[MAXFAMILY]=1;
FAMILY[MAXFAMILY][famtotal[MAXFAMILY]]=i; 
FAMILY[MAXFAMILY][0]=famtotal[MAXFAMILY];
 NEWFAMSTUID[i]=famtotal[MAXFAMILY];
     }
 }

 
strncpy(mystr,PED[i],200);  

 

   }

   

 /* GET NEW FATHER AND MOTHER ID ORDERS WHEN DIFFERENT FROM 0 */

sprintf(zero,"0");
 for(i=1;i<=nstu;i++)
   {

     NEWFAMFAID[i]=0;
     num1=strcmp(FAID[i],zero);
     if(num1!=0)
  {match=0;
    for(j=1;j<=nstu&&match==0;j++)
      {num2=strcmp(STUID[j],FAID[i]);
	if(num2==0&&NEWFAMNUM[i]==NEWFAMNUM[j])
	   {match=1;
             matchfam=j;  
	    NEWFAMFAID[i]=matchfam;
	   	 
  }}
    
    if(match==0)
      {fprintf(errfile,"Warning: Individual %s from Family %s has person %s listed as the father, but %s is not in the study.  Father ID is set to 0 for this individual. \n",STUID[i],PED[i],FAID[i],FAID[i]);}
    
}



NEWFAMMOID[i]=0;
     num1=strcmp(MOID[i],zero);
     if(num1!=0)
  {match=0;
    for(j=1;j<=nstu&&match==0;j++)
      {num2=strcmp(STUID[j],MOID[i]);
       if(num2==0&&NEWFAMNUM[i]==NEWFAMNUM[j])
	   {match=1;
             matchfam=j;  
	    NEWFAMMOID[i]=matchfam;
	   }}

 if(match==0)
   {fprintf(errfile,"Warning: Individual %s from family %s has person %s listed as the mother, but %s is not in the study.  Mother ID is set to 0 for this individual. \n",STUID[i],PED[i],MOID[i],MOID[i]);}


  }

   }











 
 /*
if((ffam=fopen("FAMILY_NAMES_INFORMATION_FILE", "w"))==NULL)
    {
      printf("Can't open %s \n",outfile);
	exit(1);
    }


 fprintf(ffam,"Family_Name  \t Family_Number\n");
 for(i=1;i<=MAXFAMILY;i++)
   {fprintf(ffam,"%s \t %d \n",famname[i],i );
   }
 
 fclose(ffam);

 */
 

if((ffam=fopen(matchoutfile, "w"))==NULL)
    {
      printf("Can't open %s \n",matchoutfile);
	exit(1);
    }


 fprintf(ffam,"Family_ID \t STUDY_ID \t New_Family_ID  \t New_STUDY_ID \n");


 for(i=1;i<=MAXFAMILY;i++)
   {

for(j=1;j<=FAMILY[i][0];j++)
       {

  fprintf(ffam,"%s \t %s \t  %d  \t %d  \n",famname[i],STUID[FAMILY[i][j]],i,j);


       }
   }

 fclose(ffam);










NEWSTUDYORDER=ivector(0,nstu);

 count=0;
 for(i=1;i<=MAXFAMILY;i++)
   {for(j=1;j<=FAMILY[i][0];j++)
       {count++;
         NEWSTUDYORDER[count]=FAMILY[i][j];
       }}




 if(kfile==1)
   {
 if((fkin1=fopen(kinoutfile1, "w"))==NULL)
    {
      printf("Can't open %s \n",kinoutfile1);
	exit(1);
    }


 if((fkin2=fopen(kinoutfile2, "w"))==NULL)
    {
      printf("Can't open %s \n",kinoutfile2);
	exit(1);
    }

   }


 if(kfilex==1)
   {
 if((fkin1x=fopen(kinoutfile1x, "w"))==NULL)
    {
      printf("Can't open %s \n",kinoutfile1x);
	exit(1);
    }
 /*
 if((fkin2x=fopen(kinoutfile2x, "w"))==NULL)
    {
      printf("Can't open %s \n",kinoutfile2x);
	exit(1);
    }
 */

}



if((fmqls=fopen(mqlsoutfile, "w"))==NULL)
    {
      printf("Can't open %s \n",mqlsoutfile);
	exit(1);
    }





 printf("Printing the pedigree and phenotype information to files for the formats requred by the various software packages \n");  

 /*

if((ftemp1=fopen("ROADTRIPS_software_input_UNRELATED.pedinfo", "w"))==NULL)
    {
      printf("Can't open %s \n",outfile);
	exit(1);
    }


if((ftemp2=fopen("ROADTRIPS_software_input_UNRELATED.kinship", "w"))==NULL)
    {
      printf("Can't open %s \n",outfile);
	exit(1);
    }


 */


for(i=1;i<=nstu;i++)
  {

    /*
 fprintf(fmqls,"%d %s %s %s %s %s \n",NEWFAMNUM[NEWSTUDYORDER[i]],STUID[NEWSTUDYORDER[i]],FAID[NEWSTUDYORDER[i]],MOID[NEWSTUDYORDER[i]],SEX[NEWSTUDYORDER[i]],PHENO[NEWSTUDYORDER[i]]);
    */
    


fprintf(fmqls,"%d %d %d %d %s %s \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWFAMFAID[NEWSTUDYORDER[i]]],NEWFAMSTUID[NEWFAMMOID[NEWSTUDYORDER[i]]],SEX[NEWSTUDYORDER[i]],PHENO[NEWSTUDYORDER[i]]);
 


if(kfile==1)
  {

fprintf(fkin1,"%d %d %d %d  \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWFAMFAID[NEWSTUDYORDER[i]]],NEWFAMSTUID[NEWFAMMOID[NEWSTUDYORDER[i]]]);
 
fprintf(fkin2,"%d %d   \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]]);
 
if(kfilex==1)
  {fprintf(fkin1x,"%d %d %d %d %s  \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWFAMFAID[NEWSTUDYORDER[i]]],NEWFAMSTUID[NEWFAMMOID[NEWSTUDYORDER[i]]],SEX[NEWSTUDYORDER[i]]);
    /*
fprintf(fkin2x,"%d %d   \n",NEWFAMNUM[NEWSTUDYORDER[i]],NEWFAMSTUID[NEWSTUDYORDER[i]]);
    */

  }

    /*
fprintf(fkin1,"%d %s %s %s   \n",NEWFAMNUM[NEWSTUDYORDER[i]],STUID[NEWSTUDYORDER[i]],FAID[NEWSTUDYORDER[i]],MOID[NEWSTUDYORDER[i]]);

 fprintf(fkin2,"%d %s \n",NEWFAMNUM[NEWSTUDYORDER[i]],STUID[NEWSTUDYORDER[i]]);

    */

   }

 



  }

fclose(fmqls);

 if(kfile==1)
   {fclose(fkin1);
     fclose(fkin2);
   }


if(kfilex==1)
   {fclose(fkin1x);
     
   }


 

 printf("Formatting is complete. \n");







 







}





/*  ./FORMAT -f NEW_PEDIGREE_ASSOCIATION_TRANSPOSE.tfam    */