/******************************************************************************************
* Qian 5/23/2009
* Added function checkParentsSex to detect mismatch between parental status and sex value
* (e.g. male mother, female father). Errors are printed to screen and to file sexErrorFile.
*
* Qian Zhang 2/25/2009 
* X-kinship coefficients.
*
* Note: 
* - Individual ID's can be any integer. No order is assumed on the ID's.
*
* -----------------
* Catherine Bourgain 12/1/2003 
*  Computes inbreeding and kinship coefficients for pedigrees using KARIGL algorithm (1981,Ann. Hum Genet 45:299-305)
*  Usage : ./KinInbcoef pedigreefile listname outputfile
* where 
* - pedigreefile has 4 col. Fam indId fatherId motherId
* - listname is a file with the list of the individuals for whom computation of the coefficients is required. Should have 2 columns : Fam indId
* - outputfile has four col: Fam Id1 Id2 kinship/inbreeding coef
* for all possible pairs of indiv listed in the listname file
*
* Inbreeding and Kinship coefficients are computed only for individuals from the same family.
* 
* MAXFAM is the maximum number of families to be analyzed at the same time. Can be changed
* MAXIND is the maximum number of individuals in each family for whom inbreeding/kinship coefficients are to be computed. Can be changed.
*
* g++ should be used for compilation (to allow use of map class)
*
**************************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <map>
using namespace std;

#define MAXFAM 100	// Maximum number of families
#define MAXIND 1000     // Maximum number of individuals per family

// Makes "parents" an alias for "struct data"
typedef struct data {
   int father;
   int mother;
   int generation;
}parents;

// Information for a family
// Maps are used instead of arrays because no order is assumed on the individual ID's,
// e.g. they need not be consecutive integers starting from 0 or 1, so this cuts down on 
// unused allocated memory.
struct info {
   map<int,parents> ped;  // Map between individual IDs and individuals' parents
   int Nsize;      	  // Number of people in this family
   map<int,int> sex;      // Map between individual IDs and sex (1 for male, 2 for female)
};

// Declare functions

struct info Global[MAXFAM+1];					// Holds most of the information in this program
void check(long *x, long *y,int boolfix);			 				
void create_generation(int fam,struct info *Global);
double phi2(int i, int j,int fam);
int make_generation(int ind,int per,int mer,struct info G);
int checkParentsSex();

// Declare variables:

int keep[MAXFAM+1];		
		
// list contains ID numbers. list[fam][i] is the ID of individual i of family fam.
int list[MAXFAM+1][MAXIND+1];	

// i is the index of array ped. i=0 never gets used.
// F is the number of families, fam is a family ID used by Global as an index (fam = 0 never gets used).		
int  i,j,F,fam=0;				
						
FILE *infile;       			// Pedigree file
FILE *listfile;				// List file, says who coefficients should be calculated for
FILE *outfile; 				// Output file

// Error file listing an individual's ID and his or her parent's ID, 
// where the parent does not have the correct sex value (i.e. female father or male mother)
FILE *sexErrorFile;	
			
// argc is the number of arguments that program takes + 1 (so including the name of the program).
// argv is an array of inputs to the program, where the name of the program is argv[0].
int main(int argc, char *argv[]) {
        int NbInd=0,Nblist=0;
        int famold=1;

        // May 22 add on sexErrorFile
        if (argc!= 5) {
                printf("wrong usage. Should be ./idcoef pedfile listfile outfile sexErrorFile\n");
                exit(1);
	}

        if ((infile=fopen(argv[1],"r"))==NULL) {
	        printf("Can't open pedigree file\n");
                exit(1);
        }

        if ((listfile=fopen(argv[2],"r"))==NULL) {
	        printf("Can't open file with the list of individuals\n");
                exit(1);
        }

	if ((outfile=fopen(argv[3],"w"))==NULL) {
                printf("Can't open outfile\n");
                exit(1);
        }
        
	// May 22: Add on sexErrorFile
   	if ((sexErrorFile=fopen(argv[4],"w"))==NULL) {
		printf("Can't open sexErrorFile\n");
   		exit(1);
	}

	// Intialize Nsize for each family
	for (fam=1;fam<=MAXFAM;fam++) {
	        Global[fam].Nsize=0;
	}
   
        // Read in family number, individual ID, father, mother, and sex for the 1st person 
        // in the pedigree file. Intialize values in Global for this person.

        NbInd++;
        fscanf(infile,"%d ",&fam);  
        fscanf(infile,"%d ",&i);
        Global[fam].Nsize++;
        Global[fam].ped[i].mother=0;
        Global[fam].ped[i].father=0;
        Global[fam].ped[i].generation=0;
        Global[fam].sex[i] = 0;

        fscanf(infile,"%d %d %d\n", &Global[fam].ped[i].father, &Global[fam].ped[i].mother, &Global[fam].sex[i]);   

        // Continue reading in family #, individual ID, father, mother, and sex data for every individual in the pedigree file.
        while (!feof(infile)) {
    	
                fscanf(infile,"%d ",&fam);
     	
                if (fam > MAXFAM) {
      	                printf("number of families exceeds the maximum.\n Change the value of MAXFAM in the source file and recompile\n");
       	                exit(1);
	        }
        
                // if statement is true if we have come across the first person (as listed in the pedigree file) of a new family
                if (fam!=famold) {
                        if  (fam!=(famold+1)) {
                              printf("families should have following Id's from 1 to F\n\n");
                              exit(1);
                        }
                        else { 
                              famold = fam;	    	 
                        }
                }

                Global[fam].Nsize++; // Update the size of the family as we read in info for each individual in the family

                if (Global[fam].Nsize > MAXIND) {
                        printf("number of individuals in family %d exceeds the maximum.\n Change the value of MAXIND in the source file and recompile\n",fam);
                        exit(1);
                }

		// Update count of total number of individuals in the pedigree file
		NbInd++;  
   
		fscanf(infile, "%d", &i);
		Global[fam].ped[i].mother=0;
		Global[fam].ped[i].father=0;
		Global[fam].ped[i].generation=0;
		Global[fam].sex[i] = 0; 

                fscanf(infile,"%d %d %d\n", &Global[fam].ped[i].father, &Global[fam].ped[i].mother, &Global[fam].sex[i]);   

    }// end while loop

     // After the while has finished, famold should be the number of families in the pedigree.
     // F gets assigned this number. 	  
     F = famold;

     // May 22 Qian: Global should now hold all mother, father, sex info. 
     // Check that all fathers have sex male and all mothers have sex female.
     // If there is an error, quit the program.
     if (checkParentsSex() == 1) {
       	exit(1);
     }

     for (fam=1;fam<=F;fam++) {
         create_generation(fam,Global);		// Determine the generation numbers of parents in family fam
     
         // Initialize keep and list
         keep[fam]=0;
         for (j=0; j<=Global[fam].Nsize; j++) {
	         list[fam][j]=0;
		 }
     }

  /*  keep is an array whose indices are family IDs and
  * whose entries are the # of individuals of the family in the
  * listfile. Array list keeps individual ID numbers 
  */

  while (!feof(listfile)) {
      fscanf(listfile,"%d\n",&fam);
      fscanf(listfile,"%d\n",&i);

      // find returns the iterator corresponding to individual i in this family's map of individuals to parents.
      // end returns the iterator corresponding to one past the last element of map.
      if (Global[fam].ped.find(i)==Global[fam].ped.end()) { 
		printf("Individual %d not in pedigree file...\n Skipped\n",i);
	  }
      else {
        // Update count of the number of individuals in the list file
		Nblist++;

		if (Nblist<=NbInd) {
	      keep[fam]++;
	      list[fam][keep[fam]-1]=i;
		}
		else {
	      printf("too many individuals in the list file\n");
	      exit(1);
		}	  
      }// end else
  }// end while

  // Calculate kinship and inbreeding coefficients for individuals in each family
  for (fam=1;fam<=F;fam++){
       // Continue so long as family fam has individuals for whom coefficients need to be calculated 
       // (i.e. this family has individuals listed in the list file)
       if (keep[fam]>0){

	  // Iterate through individuals in the family
  	  for (i=0;i<=keep[fam]-1;i++){  
	        
              // Print inbreeding coefficient to output file: 
              // 1 for male i, 1/2 + 1/2(phi2 between parents of i) for female i
              if(Global[fam].sex[list[fam][i]] == 1){// i is male
                   fprintf(outfile,"%d %d %d %f\n",fam,list[fam][i],list[fam][i],1.0);
              }
              else{ // i is female
                   fprintf(outfile,"%d %d %d %f\n",fam,list[fam][i],list[fam][i],
                            phi2(Global[fam].ped[list[fam][i]].father,Global[fam].ped[list[fam][i]].mother,fam));
              }

              // Print kinship coefficients to output file:
              // For i with every other individual j in the family, so long as the coefficient hasn't already been calculated.
              // Note coefficient(i,j) = coefficient(j,i).
    	      for (j=i+1;j<=keep[fam]-1;j++){
       	         fprintf(outfile,"%d %d %d %f\n",fam,list[fam][i],list[fam][j],phi2(list[fam][i],list[fam][j],fam));
              }   
	   }
       }// end if
  } // end for

  // close all files
  fclose(infile);
  fclose(listfile);
  fclose(outfile);
  fclose(sexErrorFile);
} // end main function

/*----------------------------------*/

// Switch ID numbers *x and *y if individual *x is of an older generation than y.
// Dereference memory addresses x and y to get their contents *x and *y,
// which will be integers, ID numbers.
void check(int *x, int *y,int fam) {
    int z;

    if (Global[fam].ped[*x].generation >= Global[fam].ped[*y].generation) {
        return;
    }
    else {
	    // If individual *x is of generation < *y's generation (i.e. of an older generation than y)
	    // swap the ID values *x and *y
	    z = *x;
	    *x = *y;
	    *y = z;
    }
}

/*------------------------------------- */
// Calculates coefficients (inbreeding if i = j, kinship if i != j)
// between individual of ID i, individual of ID j, of family fam.
double phi2(int i, int j,int fam){

  if (i * j == 0) {
       return 0.0;
  }
  else if (i == j) {  
      // If i male, return inbreeding coeff 1
      if(Global[fam].sex[i] == 1) {
          return 1;
      }
      else { 
      // i is female, return inbreeding coeff 1/2 + 1/2phi(i's dad, i's mom)
          return ((1 + phi2(Global[fam].ped[i].father, Global[fam].ped[i].mother, fam)) / 2);
      }
  }
  else { // i != j
      // After the check function, i should denote the individual of the same or newer
      // generation than the other individual in the pair of individuals {i,j} 
      check(&i, &j,fam);
     
      if (Global[fam].sex[i] == 1) {
          // i is male, calculate phi(i's mom, j, fam)
          return(phi2(Global[fam].ped[i].mother, j, fam)); 
      }
      else {
    	  // i is female
          return ((phi2(Global[fam].ped[i].father, j, fam) + phi2(Global[fam].ped[i].mother, j, fam)) / 2);
      }
  }
}

/*--------------------------------------*/
void create_generation(int fam,struct info *Global) {
    int i=0, per=0, mer=0, generation=0, pas=0, ind=0;

    // :: is scope operator
    // Declare iter and iterend, of type iterator, which is an attribute of the map class
    map<int,parents>::iterator iter,iterend;

    iter = Global[fam].ped.begin();	// iterator corresponding to 1st individual in the domain of the map
    iterend = Global[fam].ped.end();	// iterator corresponding to element that is one after the last element of the map
    
    while (iter!=iterend) {
		// Iterator is like a pointer to a pair (first, second).
		// first is an individual ID, an element of the domain of the map between individuals and parents.
		// second is the thing of type parents that first maps to.
		ind = iter->first;     	 	// Dereference iterator iter, assign first to ind			
		per = iter->second.father;	
		mer = iter->second.mother;

        	// if parents second of individual first have unassigned generation, assign a generation
		if (iter->second.generation==0) { 	
			iter->second.generation=make_generation(ind, per, mer, Global[fam]);
		}
		
		++iter; // Make iter be the next iterator, the one corresponding to the next element in the domain of the map
    }
}

/*  -------------------------------------  */

// ind is individual ID, per is father ID, mer is mother ID.
// generations are consecutive integers ordered from 1 to whatever, where 1 is the oldest generation, 
// and the next integer is the next generation.
int make_generation(int ind, int per, int mer, struct info G) { 
    if (per!=0 && G.ped[per].generation==0) {
	    G.ped[per].generation=make_generation(per, G.ped[per].father, G.ped[per].mother, G);
    }
		
    if (mer!=0 && G.ped[mer].generation==0) {
	    G.ped[mer].generation=make_generation(mer, G.ped[mer].father, G.ped[mer].mother, G);
    }

    // If mom and dad are from the 1st generation, assign them generation 1
    if (per==0 && mer==0) {
    	    return(1);
    } 
    else { // Return the newest generation #
	    if (G.ped[per].generation >= G.ped[mer].generation) {
	        return(G.ped[per].generation+1);
            }
	    else {
                return(G.ped[mer].generation+1);
            }
    }
}

// May 22: Checks that each individual ind of family fam has a father whose sex is male and a mother whose sex is female.
// If there is an error (either father listed as female or mother listed as male),
// then print the individual's ID and ID of parent with incorrectly listed sex to the screen, as well as to sexErrorFile.
// Each row of sexErrorFile gives data ordered: family ID, individual ID, ID of parent with mismatched sex,
// parent's status (mother or father), and parent's sex (male or female). 
int checkParentsSex() {	
	// Integer error is 1 if there is an error, 0 if there is no error
    	int father=0,mother=0,ind=0, error = 0;
    	map<int,parents>::iterator iter,iterend;

	for(fam = 1; fam <= F; fam++) {
                iter = Global[fam].ped.begin();         // iterator corresponding to 1st individual in the domain of the map
                iterend = Global[fam].ped.end();        // iterator corresponding to element that is one after the last element of the map

		while (iter != iterend) {
			ind = iter->first;			// ID # for this individual
			father = iter->second.father;		// ID # of this individuals' father
			mother = iter->second.mother;		// ID # of this individuals' mother

			// If individual ind is not a founder (father ID  not 0) 
			// and ind's father is not male (encoded 1), then error
			if (father != 0 && Global[fam].sex[father] != 1) {
				printf("Error: Individual %d in family %d has female father %d \n", ind, fam, father);
				fprintf(sexErrorFile,"%d %d %d %d %d\n", fam, ind, father, 1, 2);
				error = 1;
			}

			// If individual ind is not a founder (mother ID not 0)
			// and ind's mother is not female, then error
			if (mother != 0 && Global[fam].sex[mother] == 1) {
				printf("Error: Individual %d in family %d has male mother %d \n ", ind, fam, mother);
				fprintf(sexErrorFile,"%d %d %d %d %d\n", fam, ind, mother, 2, 1);
				error = 1;
			}
			
			++iter; // Make iter be the next iterator, the one corresponding to the next element in the domain of the map
		}
	}
	
	return(error);
}
