#ifndef __PEDDATA_H_ #define __PEDDATA_H_ #include "hfestimator.h" #include "iqlaestimator.h" #include "iqlbestimator.h" #include using namespace std; class ParamSet { public: // input files char phenofile[128]; char genofile[128]; char ibdfile[128]; char parameterfile[128]; int begin; int end; int outoption; int full_thresh; // threshold for haplotype pooling ParamSet() { memset(phenofile,0,128); strcpy(phenofile,"pedigree"); memset(genofile,0,128); strcpy(genofile,"markid"); memset(ibdfile,0,128); strcpy(ibdfile,"ibdcoef"); memset(parameterfile,0,128); strcpy(parameterfile,"parameter"); begin = -1; end = -1; outoption = 98; // a(97) -- choice a; b(98) -- choice b full_thresh = 15; } }; class trio { public: int cat_o; int cat_par1; int cat_par2; int typed_o; int typed_par1; int typed_par2; }; class MarkerInfo { public: string rsnumber; int position; // Nucleotide in case-control samples char allele0; char allele1; }; class punit { public: int place; double p; }; class hap_unit { public: int name; int order; // start from 0 double freq; }; class PedData { private: int MAXTOP; int snp_size; // the total number of SNPs in the genotype data int sample_size; // the total number of individuals FILE *outfp; FILE *sigfp; FILE *sigfp_min; map fam_map; // the value records the order starting from 0 vector fam_list; int people_size; // how many people in the genotype file, also in phenotype file vector marker_info; // markers in case-control data vector raw_content; vector content; // record the integer genotypes of all individuals map individual_list; // dimension is sample_size, value is location in genotype file int chrom; double beg; double end; int outoption; int full_thresh; double freq_thresh; // the # of markers and location (which compose haplotype) int nmark; double prevalence; map hap_list; // store the unique haplotypes vector complete_geno_list; vector geno_list; vector missing_list; // store the unique missing pattern, unit is one genotype vector geno_trio_list; vector missing_trio_list; int oneparent; // record whether there is one parent case in the data double p; // full-df p-value double p_min; // min 1-df p-value map hap_freq_table; // record the CDW_a estimator, starting point for Newton-Raphson vector hap_clus_list; vector *cluster; public: PedData(); int run(ParamSet &ps); int signal_scan(); /* loading data file */ int read_pheno(char *pheno_file); // read pedigree and phenotype int read_geno(char *geno_file); // read genotype file int load_ibdcoeff(char *ibdco_file); // read IBD coefficients file int load_parameter(char *parameter_file); // read parameter file (prevalence and size) void assign_gcode(); void assign_gcode(int *loci); void assign_info(); void unique_gcode(); // populate geno_list void unique_mis_patten(const vector & hpcontent); // populate missing_list void unique_trio_gcode(); // populate geno_list void unique_mis_trio_patten(const vector & hpcontent); // populate missing_trio_list void EW_a_estimator(); void CDW_a_estimator(); void IQL_a_estimator(); void IQL_a_estimator(vector &a0v, vector &a1v); void IQL_b_estimator(); void IQL_b_estimator(vector &a0v, vector &a1v); void hap_cluster(); void print_data(); void set_output(FILE *fp, FILE *fpp, FILE *fppp) { outfp = fp; sigfp = fpp; sigfp_min = fppp; } vector *genCombSet(vector & all, const int tsize, int index); vector *windSet(vector & all, const int tsize); void mem_clean(); ~PedData(); private: int convert_coding(); }; int find_loc(const vector & s_vec, const trio & m); bool inc_sort(const punit& a, const punit& b); bool dec_sort(const hap_unit & a, const hap_unit & b); #endif