#ifndef __PEDDATA_H_ #define __PEDDATA_H_ #include "hfestimator.h" #include "iqlaestimator.h" #include "iqlbestimator.h" #include using namespace std; class ParamSet { public: // input files char phenofile[128]; char genofile[128]; char ibdfile[128]; char dbfile[128]; char parameterfile[128]; double md_thresh; double r2_thresh; ParamSet() { memset(phenofile,0,128); strcpy(phenofile,"pedigree"); memset(genofile,0,128); strcpy(genofile,"markid"); memset(ibdfile,0,128); strcpy(ibdfile,"ibdcoef"); memset(dbfile,0,128); strcpy(dbfile,"database"); memset(parameterfile,0,128); strcpy(parameterfile,"parameter"); md_thresh = 0.4; r2_thresh = 1.0; } }; class trio { public: int cat_o; int cat_par1; int cat_par2; int typed_o; int typed_par1; int typed_par2; }; class MarkerInfo { public: string rsnumber; int onchip; int position; int order; int anchor_num; // how many tag SNPs int strand_switch; // Nucleotide by reference map char allele0; char allele1; // Nucleotide in case-control samples char alleleA; char alleleB; vector anchor_list; // record the rs numbers for tag SNPs map weight_table; map phap_freq_table; // population hap frequency table in HapMap double Md; double mr2; }; class punit { public: int place; double p; }; class PedData { private: int MAXTOP; int snp_size; // the total number of SNPs with position in the genotype data, also in database int sample_size; // the total number of individuals in the phenotype file FILE *outfp; FILE *sigfp; map fam_map; // the value records the order starting from 0 vector fam_list; int people_size; // how many people in the genotype file, also in phenotype file vector raw_content; vector content; // record the integer genotypes of all individuals map individual_list; // dimension is sample_size, value is location in genotype file // information in db map index_map; // value is the order in marker_info, from 0 to n-1 map order2index; // key: order in genotype file, value: order in marker_info vector marker_info; // markers in hapmap int chrom; // threshhold double md_thresh; double r2_thresh; int nmark; double prevalence; map hap_list; // store the unique haplotypes vector complete_geno_list; vector geno_list; vector missing_list; // store the unique missing pattern, unit is one genotype vector geno_trio_list; vector missing_trio_list; int oneparent; // record whether there is one parent case in the data double p; // p-value double stat; map weight_table; // estimated weights in Dan's paper map hap_weight; map hap_freq_table; // record the WL estimator, starting point for Newton-Raphson public: PedData(); int run(ParamSet &ps); void signal_scan(); /* loading data file */ int read_pheno(char *pheno_file); // read pedigree and phenotype int read_geno(char *geno_file); // read genotype file int load_ibdcoeff(char *ibdco_file); // read IBD coefficients file int load_db(char *db_file); int load_parameter(char *parameter_file); // read parameter file (prevalence) void assign(); void assign_gcode(int iter); void assign_info(); void unique_gcode(); // populate geno_list void unique_mis_patten(const vector & hpcontent); // populate missing_list void unique_trio_gcode(); // populate geno_list void unique_mis_trio_patten(const vector & hpcontent); // populate missing_trio_list void EW_a_estimator(); void CDW_a_estimator(); void ATRIUM_a_estimator(map &trans_map, vector &a0v, vector &a1v, int loc); void ATRIUM_b_estimator(map &trans_map, vector &a0v, vector &a1v, int loc); void print_data(); void set_output(FILE *fp, FILE *fpp) { outfp = fp; sigfp = fpp; } void mem_clean(); ~PedData(); private: int convert_coding(); }; vector tokenize(const string& str, const string& delimiters); int find_loc(const vector & s_vec, const trio & m); bool inc_sort(const punit& a, const punit& b); #endif