Monte Carlo simulation of Boltzmann-distributed energy. Start Gillespie code. More...
authorDariusz Murakowski <murakdar@mit.edu>
Thu, 20 Nov 2014 20:00:01 +0000 (15:00 -0500)
committerDariusz Murakowski <murakdar@mit.edu>
Thu, 20 Nov 2014 20:00:01 +0000 (15:00 -0500)
Makefile
hamiltonian.cpp
hamiltonian.h
mc.cpp
mc.h
ss.cpp [new file with mode: 0644]
ss.h [new file with mode: 0644]

index 94dce1e..d2adedf 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,13 @@
 
-SRCS = wf.cpp hamiltonian.cpp population.cpp virus.cpp
-EXECNAME = wf
+SRCS_WF = wf.cpp hamiltonian.cpp population.cpp virus.cpp
+EXECNAME_WF = wf
 
 SRCS_MC = mc.cpp hamiltonian.cpp population.cpp virus.cpp
 EXECNAME_MC = mc
 
+SRCS_SS = ss.cpp hamiltonian.cpp population.cpp virus.cpp
+EXECNAME_SS = ss
+
 
 CXX = c++
 CFLAGS = $(DBGFLAG) -Wall -Wextra -pedantic -Wno-unused-parameter
@@ -25,15 +28,18 @@ endif
 
 .PHONY: clean
 
-all: $(EXECNAME) $(EXECNAME_MC)
+all: $(EXECNAME_WF) $(EXECNAME_MC) $(EXECNAME_SS)
 #      @echo done making $(EXECNAME)
 
-$(EXECNAME): $(SRCS)
-       $(CXX) $(SRCS) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME) $(LIBS)
+$(EXECNAME_WF): $(SRCS_WF)
+       $(CXX) $(SRCS_WF) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME_WF) $(LIBS)
 
 $(EXECNAME_MC): $(SRCS_MC)
        $(CXX) $(SRCS_MC) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME_MC) $(LIBS)
 
+$(EXECNAME_SS): $(SRCS_SS)
+       $(CXX) $(SRCS_SS) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME_SS) $(LIBS)
+
 # concatenate all the source files before compiling
 #      ./concat.sh $(EXECNAME) $(SRCS)
 #      $(CXX) -x c++ $(EXECNAME).combined $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME) $(LIBS)
@@ -46,6 +52,6 @@ $(EXECNAME_MC): $(SRCS_MC)
 #      $(CXX) -c $(CFLAGS) $(INCLUDES) $< -o $@
 
 clean:
-       $(RM) *.o $(EXECNAME) $(EXECNAME_MC)
+       $(RM) *.o $(EXECNAME_WF) $(EXECNAME_MC) $(EXECNAME_SS)
 #      $(RM) *.o *.combined $(EXECNAME)
 
index 40a3697..eff464c 100644 (file)
@@ -351,6 +351,38 @@ double EpitopeHamiltonian::get_energy(const std::set<unsigned int> &mutated_site
     
 }
 
+// helper function to test whether vector contains a value
+// based on http://stackoverflow.com/questions/6194797/what-is-a-c-container-with-a-contains-operation
+template <class T>
+inline bool contains(const std::vector<T> &vec, const T &value)
+{
+        return std::find(vec.begin(), vec.end(), value) != vec.end();
+}
+
+// helper function to test whether set contains a value
+// based on http://stackoverflow.com/questions/6194797/what-is-a-c-container-with-a-contains-operation
+template <class T>
+inline bool contains(const std::set<T> &container, const T &value)
+{
+        return container.find(value) != container.end();
+}
+
+void EpitopeHamiltonian::generate_allsites_set()
+{
+    this->epitopeAllSites = MutatedSiteSequence();
+    for (unsigned ep=0; ep<penalty.size(); ++ep) {
+        epitopeAllSites.insert(epitopeWT[ep].begin(), epitopeWT[ep].end());
+        epitopeAllSites.insert(epitopeMut[ep].begin(), epitopeMut[ep].end());
+    }
+
+    this->notEpitopeAllSites = MutatedSiteSequence();
+    for (unsigned i = 0; i < static_cast<unsigned int>(this->size); ++i) {
+        if (!contains(epitopeAllSites,i)) {
+            notEpitopeAllSites.insert(i);
+        }
+    }
+}
+
 
 
 /***********************************************
index a0ba314..fbdd6ad 100644 (file)
 typedef std::vector<std::vector<int> > AdjacencyList;
 typedef std::vector<std::vector<double> > Coupling;
 
+typedef std::set<unsigned int> MutatedSiteSequence;
+
 
 class Virus;
 
+//template <class T> bool contains(const std::vector<T> &vec, const T &value);
+//template <class T> bool contains(const std::set<T> &container, const T &value);
+
 
 class Hamiltonian {
 
@@ -48,10 +53,14 @@ public:
     std::vector<double> penalty;
     std::vector<std::vector<unsigned int> > epitopeWT;
     std::vector<std::vector<unsigned int> > epitopeMut;
+
+    MutatedSiteSequence epitopeAllSites;    // all site indices belonging to an epitope (WT or mut ant)
+    MutatedSiteSequence notEpitopeAllSites; // all sites not involved in an epitope
     
     EpitopeHamiltonian(std::string &FILENAME);
     
     void set_epitope(const std::vector<unsigned int> &eWT, const std::vector<unsigned int> &eMut, double d);
+    void generate_allsites_set();
     
     bool escaped(const Virus &v);
     bool escaped(const std::set<unsigned int> &mutated_sites) const;
diff --git a/mc.cpp b/mc.cpp
index d0ddcf0..df03df2 100644 (file)
--- a/mc.cpp
+++ b/mc.cpp
@@ -8,11 +8,28 @@
 #include <gsl/gsl_rng.h>
 
 #include "hamiltonian.h"
-#include "population.h"
+// #include "population.h"
 #include "virus.h"
 #include "mc.h"
 
 
+// helper function to test whether vector contains a value
+// based on http://stackoverflow.com/questions/6194797/what-is-a-c-container-with-a-contains-operation
+template <class T>
+inline bool contains(const std::vector<T> &vec, const T &value)
+{
+        return std::find(vec.begin(), vec.end(), value) != vec.end();
+}
+
+// helper function to test whether set contains a value
+// based on http://stackoverflow.com/questions/6194797/what-is-a-c-container-with-a-contains-operation
+template <class T>
+inline bool contains(const std::set<T> &container, const T &value)
+{
+        return container.find(value) != container.end();
+}
+
+
 // generate a random number generator seed from /dev/urandom
 // borrowed from SSC src/runtime/ssc_rts.c
 static unsigned sim_random_seed() {
@@ -34,9 +51,62 @@ static unsigned sim_random_seed() {
 }
 
 
+// initial sequence (given epitope constraints in H)
+MutatedSiteSequence initial_sequence(const EpitopeHamiltonian &H, const MutatedSiteSequence &initState)
+{
+    MutatedSiteSequence s(initState);  // copy
+
+    for (unsigned ep=0; ep<H.penalty.size(); ++ep) {
+        for (unsigned i=0; i<H.epitopeWT[ep].size(); ++i) {
+            s.erase(H.epitopeWT[ep][i]);  // sites that are wildtype in sequence
+        }
+        for (unsigned i=0; i<H.epitopeMut[ep].size(); ++i) {
+            s.insert(H.epitopeMut[ep][i]);  // sites that should be mutated in sequence
+        }
+    }
+
+    return s;
+}
+
+
+bool do_flip_or_not(const EpitopeHamiltonian &H, MutatedSiteSequence &seq, gsl_rng *r, unsigned &site)
+{
+    // select site
+    unsigned num_allowed_sites = H.notEpitopeAllSites.size();
+    unsigned site_num = gsl_rng_uniform_int(r, num_allowed_sites);
+    MutatedSiteSequence::const_iterator it = H.notEpitopeAllSites.begin();
+    std::advance(it,site_num);
+    site = *it;
+
+    bool seq_contains_site = contains(seq,site);
+
+    // deltaE of flipping site
+    double deltaE = 0.0;
+    deltaE += H.bh * H.J[site][site] * (seq_contains_site ? -1 : 1);
+    double deltaE_J = 0.0;
+    for (MutatedSiteSequence::iterator iter = seq.begin(); iter != seq.end(); ++iter) {
+        deltaE_J += H.bJ * H.J[site][*iter] * (*iter); // TODO
+    }
+    deltaE_J *= (seq_contains_site ? -1 : 1);
+    deltaE += deltaE_J;
+
+    bool flipped = false;
+    if (gsl_rng_uniform(r) < exp(-deltaE)) {
+        flipped = true;
+        
+        if (seq_contains_site)
+            seq.erase(site);
+        else
+            seq.insert(site);
+    }
+
+    return flipped;     // TODO return deltaE instead?
+}
+
+
 // Run the program
 
-void run(RunParameters &r, unsigned seed) {
+void run(RunParameters_MC &r, unsigned seed) {
     
     // Initialize RNG and set initial state, if importing from file
     
@@ -67,6 +137,10 @@ void run(RunParameters &r, unsigned seed) {
             
                 TwoSiteHamiltonian H(r.h1,r.h2,r.J12);
                 H.set_temp(r.bh, r.bJ);
+
+                std::cerr << "unsupported simulation scheme" << std::endl;
+                exit(1);
+                /*
                 Population P(H, r.n, r.mu, r.initPop, r.initFrac);
                 
                 P.write_two_site_population(popout,H,0);
@@ -84,6 +158,7 @@ void run(RunParameters &r, unsigned seed) {
                     //if (r.runUntilEscape && P.escaped(H)) break;
 
                 }
+                */
                 
             }
 
@@ -97,12 +172,19 @@ void run(RunParameters &r, unsigned seed) {
         if (r.useEpitope) {
         
             for (unsigned int n=0;n<r.num_runs;n++) {
-            
+
+                // initialize H
+
                 EpitopeHamiltonian H(r.couplingsInfile);
                 for (unsigned ep=0; ep<r.numEpitopes; ++ep)
                     H.set_epitope(r.eWT[ep], r.eMut[ep], r.penalty);
+                H.generate_allsites_set();
+
                 H.set_temp(r.bh, r.bJ);
-                Population P(H, r.n, r.mu, r.initPop, r.initFrac);
+
+                // initialize sequence such that it satisfies
+                // constraint of fixed epitope
+                MutatedSiteSequence seq = initial_sequence(H, r.initState); //, r.eWT, r.eMut);
 
                 // print epitopes
                 /*
@@ -122,17 +204,26 @@ void run(RunParameters &r, unsigned seed) {
                 
                 unsigned int i;
                 for (i=0; i<r.g; ++i) {
-            
-                    P.next_generation(H, rnd, r.useRelative, r.useVerbose);
 
+                    unsigned site;
+                    do_flip_or_not(H,seq,rnd,site);
+
+                    // write sequence as CSV (comma-separated)
                     if ((i+1) % r.write_mod == 0) {
-                        P.write_population(popout,i+1);
+                        MutatedSiteSequence::iterator seq_iter=seq.begin();
+                        if (seq_iter != seq.end()) {
+                            fprintf(popout,"%d",*seq_iter);
+                            ++seq_iter;
+                        }
+                        for (; seq_iter!=seq.end(); ++seq_iter)
+                            fprintf(popout,",%d",*seq_iter);
                     }
                 
-                    if (r.runUntilEscape && P.escaped(H)) break;
+                    if (r.runUntilEscape && H.escaped(seq)) break;
                 
                 }
                 
+                /*
                 std::set<unsigned int> esc_var;
                 unsigned int esc_num=P.escape_variant(H, esc_var);
                 
@@ -141,6 +232,7 @@ void run(RunParameters &r, unsigned seed) {
                 fprintf(supout,"\n");
                 
                 fflush(supout);
+                */
             
             }
             
@@ -155,6 +247,10 @@ void run(RunParameters &r, unsigned seed) {
             
                 Hamiltonian H(r.couplingsInfile);
                 H.set_temp(r.bh, r.bJ);
+
+                std::cerr << "unsupported simulation scheme" << std::endl;
+                exit(1);
+                /*
                 Population P(H, r.n, r.mu);
                 
                 unsigned int i;
@@ -166,6 +262,7 @@ void run(RunParameters &r, unsigned seed) {
                     if (r.runUntilEscape && P.escaped(H)) break;
 
                 }
+                */
                 
             //}
             
@@ -183,7 +280,7 @@ void run(RunParameters &r, unsigned seed) {
 
 // Import initial state from a state file
 
-void importState(RunParameters &r) {
+void importState(RunParameters_MC &r) {
 
     FILE *input=fopen(r.stateInfile.c_str(),"r");   // <infile>.st
     if (input == NULL) { perror((std::string("ERROR in importState: ") + r.stateInfile).c_str()); exit(1); }
@@ -196,10 +293,12 @@ void importState(RunParameters &r) {
     
        while (fscanf(input,"%le",&frac)==1) {
 
+        if (frac != 1.0) {
+            std::cerr << "ERROR in importState: " << r.stateInfile << ": population fraction must be unity (1.0) for a Monte Carlo run. Received: " << frac; exit(1);
+        }
+
         std::cout << frac; // XXX
         
-        r.initFrac.push_back(frac);
-        
         std::set<unsigned int> tempSet;
         
                while (fscanf(input,"%c",&o)==1) {
@@ -218,7 +317,7 @@ void importState(RunParameters &r) {
                        
                }
         
-        r.initPop.push_back(tempSet);
+        r.initState = MutatedSiteSequence(tempSet);
         
         if (o==';') break;
         
@@ -231,7 +330,7 @@ void importState(RunParameters &r) {
 
 // load epitope definitions from file
 
-void importEpitope(RunParameters &r) {
+void importEpitope(RunParameters_MC &r) {
 
     std::ifstream input(r.epitopeInfile.c_str());   // <infile>.ep
     if (!input) { perror((std::string("ERROR in importEpitope: ") + r.epitopeInfile).c_str()); exit(1); }
@@ -286,7 +385,7 @@ void importEpitope(RunParameters &r) {
 
 // insert into initial state of population
 
-void add_to_two_site_pop(RunParameters &r, bool s1, bool s2, double frac) {
+void add_to_two_site_pop(RunParameters_MC &r, bool s1, bool s2, double frac) {
 
     r.initFrac.push_back(frac);
 
@@ -347,7 +446,7 @@ void usage()
 
 int main(int argc, char *argv[]) {
     
-    RunParameters r;
+    RunParameters_MC r;
 
     unsigned seed = sim_random_seed();
     
diff --git a/mc.h b/mc.h
index 724da81..33daf8d 100644 (file)
--- a/mc.h
+++ b/mc.h
@@ -11,6 +11,9 @@
 #include <stdio.h>
 
 
+typedef std::set<unsigned int> MutatedSiteSequence;
+
+
 // STRING MANIPULATION
 
 // Converts generic to string
@@ -69,7 +72,7 @@ inline double strtodouble(const std::string &s) {
 
 // This class holds the parameters needed for running the simulation
 
-class RunParameters {
+class RunParameters_MC {
     
 public:
     
@@ -99,8 +102,10 @@ public:
     double h2;
     double J12;
 
-    unsigned int write_mod;     // Write state of the population every __ generations
+    unsigned int write_mod;     // Write state of the population every __ MC steps
     
+    MutatedSiteSequence initState;  // initial state of system
+
     std::vector<std::set<unsigned int> > initPop;   // Initial population sequences
     std::vector<double>                  initFrac;  // Initial population fraction
     std::vector<std::vector<unsigned int> >           eWT;       // Sites that are consensus (WT) in the targeted epitope
@@ -115,7 +120,7 @@ public:
     std::string epitopeInfile;
     
     
-    RunParameters() {
+    RunParameters_MC() {
     
         directory="";
         
@@ -172,14 +177,15 @@ public:
         //printf("%s\n%s\n%s\n",couplingsInfile.c_str(),trajectoryOutfile.c_str(),stateInfile.c_str());
     
     }
-    ~RunParameters() {}
+    ~RunParameters_MC() {}
     
 };
 
 
-void run(RunParameters &r);
-void importState(RunParameters &r);
-void importEpitope(RunParameters &r);
+void run(RunParameters_MC &r);
+void importState(RunParameters_MC &r);
+void importEpitope(RunParameters_MC &r);
+MutatedSiteSequence initSeq(const EpitopeHamiltonian &H, const MutatedSiteSequence &initState);
 
 
 #endif // MC_H
diff --git a/ss.cpp b/ss.cpp
new file mode 100644 (file)
index 0000000..8f69a3c
--- /dev/null
+++ b/ss.cpp
@@ -0,0 +1,439 @@
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+#include <cmath>
+
+#include <gsl/gsl_rng.h>
+
+#include "hamiltonian.h"
+#include "population.h"
+#include "virus.h"
+#include "ss.h"
+
+
+// generate a random number generator seed from /dev/urandom
+// borrowed from SSC src/runtime/ssc_rts.c
+static unsigned sim_random_seed() {
+    unsigned random_seed;
+    FILE *rng = fopen("/dev/urandom", "r");
+    if (rng == NULL) {
+        fprintf(stderr,
+                "error: cannot read /dev/urandom randomness generator\n");
+        exit(1);
+    }
+    size_t num_random_read = fread(&random_seed, sizeof(random_seed), 1, rng);
+    if (num_random_read != 1) {
+        fprintf(stderr,
+                "error: cannot read /dev/urandom randomness generator\n");
+        exit(1);
+    }
+    fclose(rng);
+    return random_seed;
+}
+
+
+// Run the program
+
+void run(RunParameters &r, unsigned seed) {
+    
+    // Initialize RNG and set initial state, if importing from file
+    
+    static gsl_rng *rnd = gsl_rng_alloc(gsl_rng_taus2);        //pointer to random number generator state
+    
+    //srand((unsigned)time(0));
+    //gsl_rng_set(rnd,rand());
+    gsl_rng_set(rnd,seed);
+    
+    r.setFiles();
+    FILE *popout=fopen(r.trajectoryOutfile.c_str(),"w");        // <outfile>.dat
+    FILE *supout=r.useTwoSite ? NULL : fopen(r.supplementaryOutfile.c_str(),"w");     // <outfile>.sum
+    if (popout == NULL) { perror("File error"); exit(1); }
+    if (r.useTwoSite && supout == NULL) { perror("File error"); exit(1); }
+    
+    if (r.importState) importState(r);      // <infile>.st
+    if (r.useEpitope)  importEpitope(r);    // <infile>.ep
+    fflush(stdout);
+    
+
+    if(r.useTwoSite) {
+
+            fprintf(popout,"gen\tV00\tV10\tV01\tV11\n");
+
+            for (unsigned int n=0;n<r.num_runs;n++) {
+
+                //fprintf(popout,"gen\tV00\tV10\tV01\tV11\n");
+            
+                TwoSiteHamiltonian H(r.h1,r.h2,r.J12);
+                H.set_temp(r.bh, r.bJ);
+                Population P(H, r.n, r.mu, r.initPop, r.initFrac);
+                
+                P.write_two_site_population(popout,H,0);
+
+                unsigned int i;
+                for (i=0; i<r.g; ++i) {
+                
+                    //printf("%d\t%d\n",n,i);
+                    P.next_generation(H, rnd, r.useRelative, r.useVerbose);
+
+                    if ((i+1) % r.write_mod == 0) {
+                        P.write_two_site_population(popout,H,i+1);
+                    }
+
+                    //if (r.runUntilEscape && P.escaped(H)) break;
+
+                }
+                
+            }
+
+    }
+
+
+    else {
+        
+        // Run (w/ targeted epitope)
+        
+        if (r.useEpitope) {
+        
+            for (unsigned int n=0; n<r.num_runs; n++) {
+            
+                EpitopeHamiltonian H(r.couplingsInfile);
+
+                double penalty = 0.0;
+                if (r.penaltyType == RunParameters::PenaltyEACH)
+                    penalty = r.penalty;
+                else if (r.penaltyType == RunParameters::PenaltyTOTAL)
+                    penalty = r.penalty / (double)r.numEpitopes;
+                for (unsigned ep=0; ep<r.numEpitopes; ++ep) {
+                    H.set_epitope(r.eWT[ep], r.eMut[ep], penalty);
+                }
+
+                H.set_temp(r.bh, r.bJ);
+
+                Population P(H, r.n, r.mu, r.initPop, r.initFrac);
+
+                // print epitopes
+                /*
+                std::cout << "-----------\n";
+                for (unsigned ep=0; ep<H.penalty.size(); ++ep) {
+                    for (unsigned i=0; i<H.epitopeWT[ep].size(); ++i) {
+                        std::cout << H.epitopeWT[ep][i] << " ";
+                    }
+                    std::cout << "| ";
+                    for (unsigned i=0; i<H.epitopeMut[ep].size(); ++i) {
+                        std::cout << H.epitopeMut[ep][i] << " ";
+                    }
+                    std::cout << "\n";
+                }
+                fflush(stdout);
+                */
+                
+                unsigned int i;
+                for (i=0; i<r.g; ++i) {
+            
+                    P.next_generation(H, rnd, r.useRelative, r.useVerbose);
+
+                    if ((i+1) % r.write_mod == 0) {
+                        P.write_population(popout,i+1);
+                    }
+                
+                    if (r.runUntilEscape && P.escaped(H)) break;
+
+                    if (r.runUntilEscape_all && P.escaped_all(H)) break;
+                
+                }
+                
+                std::set<unsigned int> esc_var;
+                unsigned int esc_num = 0;
+                if (!r.runUntilEscape && !r.runUntilEscape_all) esc_num = P.escape_variant(H, esc_var);  // default behavior for backwards compatibility
+                if (r.runUntilEscape_all) esc_num = P.escape_variant_all(H, esc_var);
+                if (r.runUntilEscape) esc_num = P.escape_variant(H, esc_var);
+                // should behave better if both are true
+                // but more likely to escape one than all, so that one is output
+                
+                fprintf(supout,"%d\t%d",i,esc_num);
+
+                //for (std::set<unsigned int>::iterator iter=esc_var.begin(); iter!=esc_var.end(); ++iter) fprintf(supout,"\t%d",*iter);
+
+                // print sequence as CSV (comma-separated)
+                fprintf(supout,"\t");
+                std::set<unsigned int>::iterator ms_iter=esc_var.begin();
+                if (ms_iter != esc_var.end()) {
+                    fprintf(supout,"%d",*ms_iter);
+                    ++ms_iter;
+                }
+                for (; ms_iter!=esc_var.end(); ++ms_iter)
+                    fprintf(supout,",%d",*ms_iter);
+
+                fprintf(supout,"\n");
+                
+                fflush(supout);
+            
+            }
+            
+        }
+        
+        
+        // Run (w/out targeted epitope)
+        
+        else {
+
+            //for (unsigned int n=0;n<r.num_runs;n++) {
+            
+                Hamiltonian H(r.couplingsInfile);
+                H.set_temp(r.bh, r.bJ);
+                Population P(H, r.n, r.mu);
+                
+                unsigned int i;
+                for (i=0; i<r.g; ++i) {
+                
+                    P.next_generation(H, rnd, r.useRelative, r.useVerbose);
+                    P.write_population(popout,i);
+
+                    if (r.runUntilEscape && P.escaped(H)) break;
+
+                }
+                
+            //}
+            
+        }
+
+    }
+    
+    gsl_rng_free(rnd);  //Free up memory from random number generator
+
+    fclose(popout);     // close file handles
+    if(!r.useTwoSite) fclose(supout);
+
+}
+
+
+// Import initial state from a state file
+
+void importState(RunParameters &r) {
+
+    FILE *input=fopen(r.stateInfile.c_str(),"r");   // <infile>.st
+    if (input == NULL) { perror((std::string("ERROR in importState: ") + r.stateInfile).c_str()); exit(1); }
+    
+    char o;
+    double frac;
+       unsigned int site;
+    
+    // Import initial state of the population
+    
+       while (fscanf(input,"%le",&frac)==1) {
+
+        std::cout << frac; // XXX
+        
+        r.initFrac.push_back(frac);
+        
+        std::set<unsigned int> tempSet;
+        
+               while (fscanf(input,"%c",&o)==1) {
+
+            std::cout << o; // XXX
+                       
+                       if (o=='\n' || o==';') break;
+                       
+                       else {
+                               
+                               fscanf(input,"%u",&site);
+                std::cout << site; // XXX
+                tempSet.insert(site);
+                
+            }
+                       
+               }
+        
+        r.initPop.push_back(tempSet);
+        
+        if (o==';') break;
+        
+       }
+
+    std::cout << "\n";  // XXX
+    
+}
+
+
+// load epitope definitions from file
+
+void importEpitope(RunParameters &r) {
+
+    std::ifstream input(r.epitopeInfile.c_str());   // <infile>.ep
+    if (!input) { perror((std::string("ERROR in importEpitope: ") + r.epitopeInfile).c_str()); exit(1); }
+
+    std::string readStr;
+    while (std::getline(input,readStr)) {
+
+        std::string word;
+        unsigned int site;
+        std::vector<unsigned int> tmpEp;
+        size_t pos0 = 0;
+        size_t posBar = readStr.find('|',pos0);
+        size_t posEnd = std::string::npos; //readStr.size();
+
+        // could use std::noskipws to keep tab ('\t') characters?
+
+        std::stringstream readStrStrm(std::string(readStr,pos0,posBar-pos0));
+        while (readStrStrm >> word) {
+            std::cout << word << " "; // XXX
+            std::istringstream i(word);
+            if (i >> site)
+                tmpEp.push_back(site);
+            else    // must have encountered '|'
+                break;
+        }
+        r.eWT.push_back(tmpEp);
+
+        tmpEp.clear();  // reset temp epitope
+        readStrStrm.str("");  readStrStrm.clear();  // reset stream
+
+        std::cout << "| "; // XXX
+
+        readStrStrm.str(std::string(readStr,posBar+1,posEnd/*-posBar-1*/));
+        while (readStrStrm >> word) {
+            std::cout << word << " "; // XXX
+            std::istringstream i(word);
+            if (i >> site)
+                tmpEp.push_back(site);
+            else
+                break;
+        }
+        r.eMut.push_back(tmpEp);
+
+        r.numEpitopes++;
+
+        std::cout << "\n"; // XXX
+
+    }
+
+}
+
+
+// insert into initial state of population
+
+void add_to_two_site_pop(RunParameters &r, bool s1, bool s2, double frac) {
+
+    r.initFrac.push_back(frac);
+
+    std::set<unsigned int> tempSet;
+    if (s1) tempSet.insert(0);
+    if (s2) tempSet.insert(1);
+    r.initPop.push_back(tempSet);
+
+}
+
+
+void usage()
+{
+    std::cout <<
+"Command line rules:\n"
+"\n"
+" -d  (string)        working directory\n"
+" -i  (string)        input couplings file\n"
+" -o  (string)        output file stem\n"
+" -s  (string)        input state file, containing initial population fraction\n"
+" -e  (string)        input file containing targeted epitope\n"
+" -n  (int/double)    population size\n"
+" -g  (int/double)    number of generations\n"
+" -mu (double)        mutation rate\n"
+" -b  (double)        \"inverse temperature\" multiplier\n"
+" -r                  flag to use relative energy condition for survival probability\n"
+" -esc                flag to run until escape observed (or max number of generations reached)\n"
+" -esc_all            flag to run until escape from *all* immune pressure (or max num gen)\n"
+" -v                  flag for verbose output\n"
+"\n"
+" -2site              flag for two-site two-allele system\n"
+" -h1  (double)       value of field at site 1\n"
+" -h2  (double)       value of field at site 2\n"
+" -J12 (double)       value of coupling between sites 1 and 2\n"
+"                     (note the sign convention on these is P ~ exp(-H),\n"
+"                      H = sum_i h_i s_i + sum_{i<j} J_{ij} s_i s_j\n"
+"                      although internal representation is opposite)\n"
+" -write_mod  (int)   write out state every 'write_mod' generations\n"
+" -penaltyEach  (double)       bonus (per epitope) for mutation in epitope\n"
+" -penaltyTotal (double)       maximum total bonus if all epitopes contain mutations;\n"
+"                              bonus per epitope = this / number of epitopes\n"
+;   std::cout.flush();
+}
+
+/*
+ * Command line rules:
+ *
+ *  -d  (string)        working directory
+ *  -i  (string)        input couplings file
+ *  -o  (string)        output file stem
+ *  -s  (string)        input state file, containing initial population fraction and targeted epitope
+ *  -e  (string)        input file containing targeted epitope
+ *  -n  (int/double)    population size
+ *  -g  (int/double)    number of generations
+ *  -mu (double)        mutation rate
+ *  -b  (double)        "inverse temperature" multiplier
+ *  -r                  flag to use relative energy condition for survival probability
+ *  -esc                flag to run until escape observed (or max number of generations reached)
+ *  -v                  flag for verbose output
+ *  -2site              flag for two-site two-allele system
+ */
+
+int main(int argc, char *argv[]) {
+    
+    RunParameters r;
+
+    unsigned seed = sim_random_seed();
+    
+    // Process command line input
+    
+    for (int i=1;i<argc;i++) {
+        
+        if      (strcmp(argv[i],"-d")==0) { if (++i==argc) break; else r.directory=argv[i];                         }
+        else if (strcmp(argv[i],"-i")==0) { if (++i==argc) break; else r.infile=argv[i];                            }
+        else if (strcmp(argv[i],"-o")==0) { if (++i==argc) break; else r.outfile=argv[i];                           }
+        else if (strcmp(argv[i],"-s")==0) { if (++i==argc) break; else { r.statefile=argv[i]; r.importState=true; } }
+        else if (strcmp(argv[i],"-e")==0) { if (++i==argc) break; else { r.epitopefile=argv[i]; r.useEpitope=true; } }
+        
+        else if (strcmp(argv[i],"-n")==0)  { if (++i==argc) break; else r.n=(unsigned int)strtodouble(argv[i]); }
+        else if (strcmp(argv[i],"-g")==0)  { if (++i==argc) break; else r.g=(unsigned int)strtodouble(argv[i]); }
+        else if (strcmp(argv[i],"-mu")==0) { if (++i==argc) break; else r.mu=strtodouble(argv[i]);              }
+        else if (strcmp(argv[i],"-b")==0)  { if (++i==argc) break; else { r.bh=strtodouble(argv[i]); r.bJ=r.bh; } }
+        else if (strcmp(argv[i],"-bh")==0) { if (++i==argc) break; else r.bh=strtodouble(argv[i]);              }
+        else if (strcmp(argv[i],"-bJ")==0) { if (++i==argc) break; else r.bJ=strtodouble(argv[i]);              }
+
+        else if (strcmp(argv[i],"-penaltyEach")==0) { if (++i==argc) break; else { r.penalty=strtodouble(argv[i]); r.penaltyType=RunParameters::PenaltyEACH; } }
+        else if (strcmp(argv[i],"-penaltyTotal")==0) { if (++i==argc) break; else { r.penalty=strtodouble(argv[i]); r.penaltyType=RunParameters::PenaltyTOTAL; } }
+        
+        else if (strcmp(argv[i],"-r")==0)   { r.useRelative=true;    }
+        else if (strcmp(argv[i],"-esc")==0) { r.runUntilEscape=true; }
+        else if (strcmp(argv[i],"-esc_all")==0) { r.runUntilEscape_all=true; }
+        else if (strcmp(argv[i],"-v")==0)   { r.useVerbose=true;     }
+        
+        else if (strcmp(argv[i],"-numruns")==0)  { if (++i==argc) break; else r.num_runs=(unsigned int)strtodouble(argv[i]); }
+
+        else if (strcmp(argv[i],"-2site")==0)   { r.useTwoSite=true;     }
+        else if (strcmp(argv[i],"-h1")==0)  { if (++i==argc) break; else r.h1=-strtodouble(argv[i]);  }
+        else if (strcmp(argv[i],"-h2")==0)  { if (++i==argc) break; else r.h2=-strtodouble(argv[i]);  }
+        else if (strcmp(argv[i],"-J12")==0) { if (++i==argc) break; else r.J12=-strtodouble(argv[i]); }
+
+        else if (strcmp(argv[i],"-p00")==0) { if (++i==argc) break; else add_to_two_site_pop(r,false,false,strtodouble(argv[i])); }
+        else if (strcmp(argv[i],"-p10")==0) { if (++i==argc) break; else add_to_two_site_pop(r,true ,false,strtodouble(argv[i])); }
+        else if (strcmp(argv[i],"-p01")==0) { if (++i==argc) break; else add_to_two_site_pop(r,false,true ,strtodouble(argv[i])); }
+        else if (strcmp(argv[i],"-p11")==0) { if (++i==argc) break; else add_to_two_site_pop(r,true ,true ,strtodouble(argv[i])); }
+
+        else if (strcmp(argv[i],"-seed")==0) { if (++i==argc) break; else seed=(unsigned)strtodouble(argv[i]); }
+
+        else if (strcmp(argv[i],"-write_mod")==0) { if (++i==argc) break; else r.write_mod=(unsigned int)strtodouble(argv[i]); }
+
+        else if (strcmp(argv[i],"-h")==0 || strcmp(argv[i],"--help")==0)   { usage(); return 0;    }
+        
+        else printf("Unrecognized command! '%s'\n",argv[i]);
+                
+    }
+    
+    std::cout << "seed = " << seed << "\n";
+    run(r,seed);
+    
+    return 0;
+    
+}      
+       
diff --git a/ss.h b/ss.h
new file mode 100644 (file)
index 0000000..a2192d7
--- /dev/null
+++ b/ss.h
@@ -0,0 +1,192 @@
+#ifndef WF_H
+#define WF_H
+
+#include <vector>
+#include <set>
+#include <math.h>
+#include <string>
+#include <sstream>
+#include <cstring>
+#include <iostream>
+#include <stdio.h>
+
+
+// STRING MANIPULATION
+
+// Converts generic to string
+
+template <class T>
+
+inline std::string tostring (const T& t) {
+    
+    std::stringstream ss;
+    ss << t;
+    return ss.str();
+    
+}
+
+
+// Converts a string to an integer
+
+inline int strtoint(const std::string &s) {
+    
+    std::istringstream i(s);
+    int x;
+    
+    if (!(i >> x)) printf("String to integer conversion failed!");
+    return x;
+    
+}
+
+
+// Converts a string to an unsigned integer
+
+inline int strtouint(const std::string &s) {
+    
+    std::istringstream i(s);
+    unsigned int x;
+    
+    if (!(i >> x)) printf("String to unsigned integer conversion failed!");
+    return x;
+    
+}
+
+
+// Converts a string to a double
+
+inline double strtodouble(const std::string &s) {
+    
+    std::istringstream i(s);
+    double x;
+    
+    if (!(i >> x)) printf("String to double conversion failed!");
+    return x;
+
+}
+
+
+// PROGRAM SETTINGS
+
+// This class holds the parameters needed for running the simulation
+
+class RunParameters {
+    
+public:
+    
+    std::string directory;      // Path to the directory where the inut file is located
+                                // Output will also be sent to this directory
+    std::string infile;         // Input file from which couplings are to be read
+    std::string outfile;        // Output file (prefix) where data is to be written
+    std::string statefile;      // Input state file which contains initial population fractions
+    std::string epitopefile;    // Input file name for targeted epitopes
+    
+    unsigned int n;             // Population size
+    unsigned int g;             // Number of generations
+    unsigned int num_runs;      // Number of times to run the simulation for gathering overall statistics
+    double mu;                  // Mutation rate
+    double bh;                  // Field "inverse temperature" multiplier
+    double bJ;                  // Coupling "inverse temperature" multiplier
+    int estart;                 // Epitope start position
+    int eend;                   // Epitope end position
+    bool runUntilEscape;        // If true, run the simulation until the population escapes
+    bool runUntilEscape_all;    // If true, run the simulation until the population escapes *all* immune pressure
+    bool importState;           // If true, import state from a state file
+    bool useEpitope;            // Include selective pressure on an epitope 
+    bool useRelative;           // If true, use relative energy for survival probability
+    bool useVerbose;            // If true, print extra information while program is running
+
+    bool useTwoSite;            // If true, use two-site two-allele model
+    double h1;
+    double h2;
+    double J12;
+
+    unsigned int write_mod;     // Write state of the population every __ generations
+    
+    std::vector<std::set<unsigned int> > initPop;   // Initial population sequences
+    std::vector<double>                  initFrac;  // Initial population fraction
+    std::vector<std::vector<unsigned int> >           eWT;       // Sites that are consensus (WT) in the targeted epitope
+    std::vector<std::vector<unsigned int> >           eMut;      // Sites that are mutant in the targeted epitope
+    double                               penalty;   // Energy penalty if sequence contains the targeted epitope
+    unsigned int numEpitopes;   // how many epitopes are targeted
+    
+    std::string couplingsInfile;
+    std::string trajectoryOutfile;
+    std::string supplementaryOutfile;
+    std::string stateInfile;
+    std::string epitopeInfile;
+
+    enum PenaltyTYPE { PenaltyEACH, PenaltyTOTAL };
+
+    PenaltyTYPE penaltyType;
+    
+    
+    RunParameters() {
+    
+        directory="";
+        
+        n  = 100000;
+        g  = 150;
+        num_runs = 1000;
+        mu = 6.0e-5;
+        bh = 1.0;
+        bJ = 1.0;
+        
+        estart = 0;
+        eend   = 0;
+        
+        penalty = 10.0;
+        penaltyType = PenaltyTOTAL;
+        
+        runUntilEscape=false;
+        runUntilEscape_all=false;
+        importState=false;
+        useEpitope=false;
+        useRelative=false;
+        useVerbose=false;
+
+        useTwoSite = false;
+        h1  = 0.0;
+        h2  = 0.0;
+        J12 = 0.0;
+
+        write_mod = 1;
+
+        numEpitopes = 0;
+        
+    }
+    void setFiles() {
+        
+        if (strcmp(directory.c_str(),"")!=0) {
+        
+            couplingsInfile=directory+"/"+infile+".j";
+            trajectoryOutfile=directory+"/"+outfile+".dat";
+            supplementaryOutfile=directory+"/"+outfile+".sum";
+            stateInfile=directory+"/"+statefile+".st";
+            epitopeInfile=directory+"/"+epitopefile+".ep";
+            
+        }
+        
+        else {
+        
+            couplingsInfile=infile+".j";
+            trajectoryOutfile=outfile+".dat";
+            supplementaryOutfile=outfile+".sum";
+            stateInfile=statefile+".st";
+            epitopeInfile=epitopefile+".ep";
+        
+        }
+        
+        //printf("%s\n%s\n%s\n",couplingsInfile.c_str(),trajectoryOutfile.c_str(),stateInfile.c_str());
+    
+    }
+    ~RunParameters() {}
+    
+};
+
+
+void run(RunParameters &r);
+void importState(RunParameters &r);
+void importEpitope(RunParameters &r);
+
+
+#endif