From 22ca2649f1d367bb340d5c5df354ecfecdea0550 Mon Sep 17 00:00:00 2001 From: Dariusz Murakowski Date: Thu, 20 Nov 2014 15:00:01 -0500 Subject: [PATCH] Monte Carlo simulation of Boltzmann-distributed energy. Start Gillespie code. More build rules. --- Makefile | 18 ++- hamiltonian.cpp | 32 +++++ hamiltonian.h | 9 ++ mc.cpp | 129 +++++++++++++++-- mc.h | 20 ++- ss.cpp | 439 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ss.h | 192 +++++++++++++++++++++++++ 7 files changed, 811 insertions(+), 28 deletions(-) create mode 100644 ss.cpp create mode 100644 ss.h diff --git a/Makefile b/Makefile index 94dce1e..d2adedf 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,13 @@ -SRCS = wf.cpp hamiltonian.cpp population.cpp virus.cpp -EXECNAME = wf +SRCS_WF = wf.cpp hamiltonian.cpp population.cpp virus.cpp +EXECNAME_WF = wf SRCS_MC = mc.cpp hamiltonian.cpp population.cpp virus.cpp EXECNAME_MC = mc +SRCS_SS = ss.cpp hamiltonian.cpp population.cpp virus.cpp +EXECNAME_SS = ss + CXX = c++ CFLAGS = $(DBGFLAG) -Wall -Wextra -pedantic -Wno-unused-parameter @@ -25,15 +28,18 @@ endif .PHONY: clean -all: $(EXECNAME) $(EXECNAME_MC) +all: $(EXECNAME_WF) $(EXECNAME_MC) $(EXECNAME_SS) # @echo done making $(EXECNAME) -$(EXECNAME): $(SRCS) - $(CXX) $(SRCS) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME) $(LIBS) +$(EXECNAME_WF): $(SRCS_WF) + $(CXX) $(SRCS_WF) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME_WF) $(LIBS) $(EXECNAME_MC): $(SRCS_MC) $(CXX) $(SRCS_MC) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME_MC) $(LIBS) +$(EXECNAME_SS): $(SRCS_SS) + $(CXX) $(SRCS_SS) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME_SS) $(LIBS) + # concatenate all the source files before compiling # ./concat.sh $(EXECNAME) $(SRCS) # $(CXX) -x c++ $(EXECNAME).combined $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME) $(LIBS) @@ -46,6 +52,6 @@ $(EXECNAME_MC): $(SRCS_MC) # $(CXX) -c $(CFLAGS) $(INCLUDES) $< -o $@ clean: - $(RM) *.o $(EXECNAME) $(EXECNAME_MC) + $(RM) *.o $(EXECNAME_WF) $(EXECNAME_MC) $(EXECNAME_SS) # $(RM) *.o *.combined $(EXECNAME) diff --git a/hamiltonian.cpp b/hamiltonian.cpp index 40a3697..eff464c 100644 --- a/hamiltonian.cpp +++ b/hamiltonian.cpp @@ -351,6 +351,38 @@ double EpitopeHamiltonian::get_energy(const std::set &mutated_site } +// helper function to test whether vector contains a value +// based on http://stackoverflow.com/questions/6194797/what-is-a-c-container-with-a-contains-operation +template +inline bool contains(const std::vector &vec, const T &value) +{ + return std::find(vec.begin(), vec.end(), value) != vec.end(); +} + +// helper function to test whether set contains a value +// based on http://stackoverflow.com/questions/6194797/what-is-a-c-container-with-a-contains-operation +template +inline bool contains(const std::set &container, const T &value) +{ + return container.find(value) != container.end(); +} + +void EpitopeHamiltonian::generate_allsites_set() +{ + this->epitopeAllSites = MutatedSiteSequence(); + for (unsigned ep=0; epnotEpitopeAllSites = MutatedSiteSequence(); + for (unsigned i = 0; i < static_cast(this->size); ++i) { + if (!contains(epitopeAllSites,i)) { + notEpitopeAllSites.insert(i); + } + } +} + /*********************************************** diff --git a/hamiltonian.h b/hamiltonian.h index a0ba314..fbdd6ad 100644 --- a/hamiltonian.h +++ b/hamiltonian.h @@ -11,9 +11,14 @@ typedef std::vector > AdjacencyList; typedef std::vector > Coupling; +typedef std::set MutatedSiteSequence; + class Virus; +//template bool contains(const std::vector &vec, const T &value); +//template bool contains(const std::set &container, const T &value); + class Hamiltonian { @@ -48,10 +53,14 @@ public: std::vector penalty; std::vector > epitopeWT; std::vector > epitopeMut; + + MutatedSiteSequence epitopeAllSites; // all site indices belonging to an epitope (WT or mut ant) + MutatedSiteSequence notEpitopeAllSites; // all sites not involved in an epitope EpitopeHamiltonian(std::string &FILENAME); void set_epitope(const std::vector &eWT, const std::vector &eMut, double d); + void generate_allsites_set(); bool escaped(const Virus &v); bool escaped(const std::set &mutated_sites) const; diff --git a/mc.cpp b/mc.cpp index d0ddcf0..df03df2 100644 --- a/mc.cpp +++ b/mc.cpp @@ -8,11 +8,28 @@ #include #include "hamiltonian.h" -#include "population.h" +// #include "population.h" #include "virus.h" #include "mc.h" +// helper function to test whether vector contains a value +// based on http://stackoverflow.com/questions/6194797/what-is-a-c-container-with-a-contains-operation +template +inline bool contains(const std::vector &vec, const T &value) +{ + return std::find(vec.begin(), vec.end(), value) != vec.end(); +} + +// helper function to test whether set contains a value +// based on http://stackoverflow.com/questions/6194797/what-is-a-c-container-with-a-contains-operation +template +inline bool contains(const std::set &container, const T &value) +{ + return container.find(value) != container.end(); +} + + // generate a random number generator seed from /dev/urandom // borrowed from SSC src/runtime/ssc_rts.c static unsigned sim_random_seed() { @@ -34,9 +51,62 @@ static unsigned sim_random_seed() { } +// initial sequence (given epitope constraints in H) +MutatedSiteSequence initial_sequence(const EpitopeHamiltonian &H, const MutatedSiteSequence &initState) +{ + MutatedSiteSequence s(initState); // copy + + for (unsigned ep=0; ep esc_var; unsigned int esc_num=P.escape_variant(H, esc_var); @@ -141,6 +232,7 @@ void run(RunParameters &r, unsigned seed) { fprintf(supout,"\n"); fflush(supout); + */ } @@ -155,6 +247,10 @@ void run(RunParameters &r, unsigned seed) { Hamiltonian H(r.couplingsInfile); H.set_temp(r.bh, r.bJ); + + std::cerr << "unsupported simulation scheme" << std::endl; + exit(1); + /* Population P(H, r.n, r.mu); unsigned int i; @@ -166,6 +262,7 @@ void run(RunParameters &r, unsigned seed) { if (r.runUntilEscape && P.escaped(H)) break; } + */ //} @@ -183,7 +280,7 @@ void run(RunParameters &r, unsigned seed) { // Import initial state from a state file -void importState(RunParameters &r) { +void importState(RunParameters_MC &r) { FILE *input=fopen(r.stateInfile.c_str(),"r"); // .st if (input == NULL) { perror((std::string("ERROR in importState: ") + r.stateInfile).c_str()); exit(1); } @@ -196,10 +293,12 @@ void importState(RunParameters &r) { while (fscanf(input,"%le",&frac)==1) { + if (frac != 1.0) { + std::cerr << "ERROR in importState: " << r.stateInfile << ": population fraction must be unity (1.0) for a Monte Carlo run. Received: " << frac; exit(1); + } + std::cout << frac; // XXX - r.initFrac.push_back(frac); - std::set tempSet; while (fscanf(input,"%c",&o)==1) { @@ -218,7 +317,7 @@ void importState(RunParameters &r) { } - r.initPop.push_back(tempSet); + r.initState = MutatedSiteSequence(tempSet); if (o==';') break; @@ -231,7 +330,7 @@ void importState(RunParameters &r) { // load epitope definitions from file -void importEpitope(RunParameters &r) { +void importEpitope(RunParameters_MC &r) { std::ifstream input(r.epitopeInfile.c_str()); // .ep if (!input) { perror((std::string("ERROR in importEpitope: ") + r.epitopeInfile).c_str()); exit(1); } @@ -286,7 +385,7 @@ void importEpitope(RunParameters &r) { // insert into initial state of population -void add_to_two_site_pop(RunParameters &r, bool s1, bool s2, double frac) { +void add_to_two_site_pop(RunParameters_MC &r, bool s1, bool s2, double frac) { r.initFrac.push_back(frac); @@ -347,7 +446,7 @@ void usage() int main(int argc, char *argv[]) { - RunParameters r; + RunParameters_MC r; unsigned seed = sim_random_seed(); diff --git a/mc.h b/mc.h index 724da81..33daf8d 100644 --- a/mc.h +++ b/mc.h @@ -11,6 +11,9 @@ #include +typedef std::set MutatedSiteSequence; + + // STRING MANIPULATION // Converts generic to string @@ -69,7 +72,7 @@ inline double strtodouble(const std::string &s) { // This class holds the parameters needed for running the simulation -class RunParameters { +class RunParameters_MC { public: @@ -99,8 +102,10 @@ public: double h2; double J12; - unsigned int write_mod; // Write state of the population every __ generations + unsigned int write_mod; // Write state of the population every __ MC steps + MutatedSiteSequence initState; // initial state of system + std::vector > initPop; // Initial population sequences std::vector initFrac; // Initial population fraction std::vector > eWT; // Sites that are consensus (WT) in the targeted epitope @@ -115,7 +120,7 @@ public: std::string epitopeInfile; - RunParameters() { + RunParameters_MC() { directory=""; @@ -172,14 +177,15 @@ public: //printf("%s\n%s\n%s\n",couplingsInfile.c_str(),trajectoryOutfile.c_str(),stateInfile.c_str()); } - ~RunParameters() {} + ~RunParameters_MC() {} }; -void run(RunParameters &r); -void importState(RunParameters &r); -void importEpitope(RunParameters &r); +void run(RunParameters_MC &r); +void importState(RunParameters_MC &r); +void importEpitope(RunParameters_MC &r); +MutatedSiteSequence initSeq(const EpitopeHamiltonian &H, const MutatedSiteSequence &initState); #endif // MC_H diff --git a/ss.cpp b/ss.cpp new file mode 100644 index 0000000..8f69a3c --- /dev/null +++ b/ss.cpp @@ -0,0 +1,439 @@ +#include +#include +#include +#include +#include +#include + +#include + +#include "hamiltonian.h" +#include "population.h" +#include "virus.h" +#include "ss.h" + + +// generate a random number generator seed from /dev/urandom +// borrowed from SSC src/runtime/ssc_rts.c +static unsigned sim_random_seed() { + unsigned random_seed; + FILE *rng = fopen("/dev/urandom", "r"); + if (rng == NULL) { + fprintf(stderr, + "error: cannot read /dev/urandom randomness generator\n"); + exit(1); + } + size_t num_random_read = fread(&random_seed, sizeof(random_seed), 1, rng); + if (num_random_read != 1) { + fprintf(stderr, + "error: cannot read /dev/urandom randomness generator\n"); + exit(1); + } + fclose(rng); + return random_seed; +} + + +// Run the program + +void run(RunParameters &r, unsigned seed) { + + // Initialize RNG and set initial state, if importing from file + + static gsl_rng *rnd = gsl_rng_alloc(gsl_rng_taus2); //pointer to random number generator state + + //srand((unsigned)time(0)); + //gsl_rng_set(rnd,rand()); + gsl_rng_set(rnd,seed); + + r.setFiles(); + FILE *popout=fopen(r.trajectoryOutfile.c_str(),"w"); // .dat + FILE *supout=r.useTwoSite ? NULL : fopen(r.supplementaryOutfile.c_str(),"w"); // .sum + if (popout == NULL) { perror("File error"); exit(1); } + if (r.useTwoSite && supout == NULL) { perror("File error"); exit(1); } + + if (r.importState) importState(r); // .st + if (r.useEpitope) importEpitope(r); // .ep + fflush(stdout); + + + if(r.useTwoSite) { + + fprintf(popout,"gen\tV00\tV10\tV01\tV11\n"); + + for (unsigned int n=0;n esc_var; + unsigned int esc_num = 0; + if (!r.runUntilEscape && !r.runUntilEscape_all) esc_num = P.escape_variant(H, esc_var); // default behavior for backwards compatibility + if (r.runUntilEscape_all) esc_num = P.escape_variant_all(H, esc_var); + if (r.runUntilEscape) esc_num = P.escape_variant(H, esc_var); + // should behave better if both are true + // but more likely to escape one than all, so that one is output + + fprintf(supout,"%d\t%d",i,esc_num); + + //for (std::set::iterator iter=esc_var.begin(); iter!=esc_var.end(); ++iter) fprintf(supout,"\t%d",*iter); + + // print sequence as CSV (comma-separated) + fprintf(supout,"\t"); + std::set::iterator ms_iter=esc_var.begin(); + if (ms_iter != esc_var.end()) { + fprintf(supout,"%d",*ms_iter); + ++ms_iter; + } + for (; ms_iter!=esc_var.end(); ++ms_iter) + fprintf(supout,",%d",*ms_iter); + + fprintf(supout,"\n"); + + fflush(supout); + + } + + } + + + // Run (w/out targeted epitope) + + else { + + //for (unsigned int n=0;n.st + if (input == NULL) { perror((std::string("ERROR in importState: ") + r.stateInfile).c_str()); exit(1); } + + char o; + double frac; + unsigned int site; + + // Import initial state of the population + + while (fscanf(input,"%le",&frac)==1) { + + std::cout << frac; // XXX + + r.initFrac.push_back(frac); + + std::set tempSet; + + while (fscanf(input,"%c",&o)==1) { + + std::cout << o; // XXX + + if (o=='\n' || o==';') break; + + else { + + fscanf(input,"%u",&site); + std::cout << site; // XXX + tempSet.insert(site); + + } + + } + + r.initPop.push_back(tempSet); + + if (o==';') break; + + } + + std::cout << "\n"; // XXX + +} + + +// load epitope definitions from file + +void importEpitope(RunParameters &r) { + + std::ifstream input(r.epitopeInfile.c_str()); // .ep + if (!input) { perror((std::string("ERROR in importEpitope: ") + r.epitopeInfile).c_str()); exit(1); } + + std::string readStr; + while (std::getline(input,readStr)) { + + std::string word; + unsigned int site; + std::vector tmpEp; + size_t pos0 = 0; + size_t posBar = readStr.find('|',pos0); + size_t posEnd = std::string::npos; //readStr.size(); + + // could use std::noskipws to keep tab ('\t') characters? + + std::stringstream readStrStrm(std::string(readStr,pos0,posBar-pos0)); + while (readStrStrm >> word) { + std::cout << word << " "; // XXX + std::istringstream i(word); + if (i >> site) + tmpEp.push_back(site); + else // must have encountered '|' + break; + } + r.eWT.push_back(tmpEp); + + tmpEp.clear(); // reset temp epitope + readStrStrm.str(""); readStrStrm.clear(); // reset stream + + std::cout << "| "; // XXX + + readStrStrm.str(std::string(readStr,posBar+1,posEnd/*-posBar-1*/)); + while (readStrStrm >> word) { + std::cout << word << " "; // XXX + std::istringstream i(word); + if (i >> site) + tmpEp.push_back(site); + else + break; + } + r.eMut.push_back(tmpEp); + + r.numEpitopes++; + + std::cout << "\n"; // XXX + + } + +} + + +// insert into initial state of population + +void add_to_two_site_pop(RunParameters &r, bool s1, bool s2, double frac) { + + r.initFrac.push_back(frac); + + std::set tempSet; + if (s1) tempSet.insert(0); + if (s2) tempSet.insert(1); + r.initPop.push_back(tempSet); + +} + + +void usage() +{ + std::cout << +"Command line rules:\n" +"\n" +" -d (string) working directory\n" +" -i (string) input couplings file\n" +" -o (string) output file stem\n" +" -s (string) input state file, containing initial population fraction\n" +" -e (string) input file containing targeted epitope\n" +" -n (int/double) population size\n" +" -g (int/double) number of generations\n" +" -mu (double) mutation rate\n" +" -b (double) \"inverse temperature\" multiplier\n" +" -r flag to use relative energy condition for survival probability\n" +" -esc flag to run until escape observed (or max number of generations reached)\n" +" -esc_all flag to run until escape from *all* immune pressure (or max num gen)\n" +" -v flag for verbose output\n" +"\n" +" -2site flag for two-site two-allele system\n" +" -h1 (double) value of field at site 1\n" +" -h2 (double) value of field at site 2\n" +" -J12 (double) value of coupling between sites 1 and 2\n" +" (note the sign convention on these is P ~ exp(-H),\n" +" H = sum_i h_i s_i + sum_{i +#include +#include +#include +#include +#include +#include +#include + + +// STRING MANIPULATION + +// Converts generic to string + +template + +inline std::string tostring (const T& t) { + + std::stringstream ss; + ss << t; + return ss.str(); + +} + + +// Converts a string to an integer + +inline int strtoint(const std::string &s) { + + std::istringstream i(s); + int x; + + if (!(i >> x)) printf("String to integer conversion failed!"); + return x; + +} + + +// Converts a string to an unsigned integer + +inline int strtouint(const std::string &s) { + + std::istringstream i(s); + unsigned int x; + + if (!(i >> x)) printf("String to unsigned integer conversion failed!"); + return x; + +} + + +// Converts a string to a double + +inline double strtodouble(const std::string &s) { + + std::istringstream i(s); + double x; + + if (!(i >> x)) printf("String to double conversion failed!"); + return x; + +} + + +// PROGRAM SETTINGS + +// This class holds the parameters needed for running the simulation + +class RunParameters { + +public: + + std::string directory; // Path to the directory where the inut file is located + // Output will also be sent to this directory + std::string infile; // Input file from which couplings are to be read + std::string outfile; // Output file (prefix) where data is to be written + std::string statefile; // Input state file which contains initial population fractions + std::string epitopefile; // Input file name for targeted epitopes + + unsigned int n; // Population size + unsigned int g; // Number of generations + unsigned int num_runs; // Number of times to run the simulation for gathering overall statistics + double mu; // Mutation rate + double bh; // Field "inverse temperature" multiplier + double bJ; // Coupling "inverse temperature" multiplier + int estart; // Epitope start position + int eend; // Epitope end position + bool runUntilEscape; // If true, run the simulation until the population escapes + bool runUntilEscape_all; // If true, run the simulation until the population escapes *all* immune pressure + bool importState; // If true, import state from a state file + bool useEpitope; // Include selective pressure on an epitope + bool useRelative; // If true, use relative energy for survival probability + bool useVerbose; // If true, print extra information while program is running + + bool useTwoSite; // If true, use two-site two-allele model + double h1; + double h2; + double J12; + + unsigned int write_mod; // Write state of the population every __ generations + + std::vector > initPop; // Initial population sequences + std::vector initFrac; // Initial population fraction + std::vector > eWT; // Sites that are consensus (WT) in the targeted epitope + std::vector > eMut; // Sites that are mutant in the targeted epitope + double penalty; // Energy penalty if sequence contains the targeted epitope + unsigned int numEpitopes; // how many epitopes are targeted + + std::string couplingsInfile; + std::string trajectoryOutfile; + std::string supplementaryOutfile; + std::string stateInfile; + std::string epitopeInfile; + + enum PenaltyTYPE { PenaltyEACH, PenaltyTOTAL }; + + PenaltyTYPE penaltyType; + + + RunParameters() { + + directory=""; + + n = 100000; + g = 150; + num_runs = 1000; + mu = 6.0e-5; + bh = 1.0; + bJ = 1.0; + + estart = 0; + eend = 0; + + penalty = 10.0; + penaltyType = PenaltyTOTAL; + + runUntilEscape=false; + runUntilEscape_all=false; + importState=false; + useEpitope=false; + useRelative=false; + useVerbose=false; + + useTwoSite = false; + h1 = 0.0; + h2 = 0.0; + J12 = 0.0; + + write_mod = 1; + + numEpitopes = 0; + + } + void setFiles() { + + if (strcmp(directory.c_str(),"")!=0) { + + couplingsInfile=directory+"/"+infile+".j"; + trajectoryOutfile=directory+"/"+outfile+".dat"; + supplementaryOutfile=directory+"/"+outfile+".sum"; + stateInfile=directory+"/"+statefile+".st"; + epitopeInfile=directory+"/"+epitopefile+".ep"; + + } + + else { + + couplingsInfile=infile+".j"; + trajectoryOutfile=outfile+".dat"; + supplementaryOutfile=outfile+".sum"; + stateInfile=statefile+".st"; + epitopeInfile=epitopefile+".ep"; + + } + + //printf("%s\n%s\n%s\n",couplingsInfile.c_str(),trajectoryOutfile.c_str(),stateInfile.c_str()); + + } + ~RunParameters() {} + +}; + + +void run(RunParameters &r); +void importState(RunParameters &r); +void importEpitope(RunParameters &r); + + +#endif -- 2.7.4