Wright-Fisher code by Tom Butler (via John Barton).
authorDariusz Murakowski <murakdar@mit.edu>
Tue, 14 May 2013 21:01:52 +0000 (17:01 -0400)
committerDariusz Murakowski <murakdar@mit.edu>
Tue, 14 May 2013 21:01:52 +0000 (17:01 -0400)
Makefile
binomial.cpp [deleted file]
hamiltonian.cpp [new file with mode: 0644]
hamiltonian.h [new file with mode: 0644]
mt19937ar.cpp [deleted file]
population.cpp [new file with mode: 0644]
population.h [new file with mode: 0644]
virus.cpp [new file with mode: 0644]
virus.h [new file with mode: 0644]
wf.cpp
wf.h [new file with mode: 0644]

index 73f864f..231fde7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,13 @@
 
-#SRCS = wf.cpp mt19937ar.cpp # binomial.cpp
-SRCS = wf.cpp mt19937ar.cpp
+SRCS = wf.cpp hamiltonian.cpp population.cpp virus.cpp
 EXECNAME = wf
-OBJS = $(SRCS:.cpp=.o)
 
 CXX = c++
 CFLAGS = $(DBGFLAG) -Wall -Wextra -pedantic
-INCLUDES = -I/usr/local/pkg/gsl/gsl-1.15/include
+INCLUDEDIR = -I/usr/local/pkg/gsl/gsl-1.15/include
 # = `gsl-config --cflags`
-LFLAGS = $(DBGFLAG)
-LIBS = /usr/local/pkg/gsl/gsl-1.15/lib/libgsl.a /usr/local/pkg/gsl/gsl-1.15/lib/libgslcblas.a -lm
+LIBDIR = -L/usr/local/pkg/gsl/gsl-1.15/lib
+LIBS = -lgsl -lgslcblas -lm
 # = -L/usr/local/pkg/gsl/gsl-1.15/lib -lgsl -lgslcblas -lm
 # = `gsl-config --libs`
 
@@ -27,7 +25,8 @@ all: $(EXECNAME)
 #      @echo done making $(EXECNAME)
 
 $(EXECNAME): $(SRCS)
-       $(CXX) $(SRCS) $(CFLAGS) $(LFLAGS) $(INCLUDES) $(LIBS) -o $(EXECNAME)
+       $(CXX) $(SRCS) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME) $(LIBS)
+#      $(CXX) $(SRCS) $(CFLAGS) $(LFLAGS) $(INCLUDES) $(LIBS) -o $(EXECNAME)
 #      $(CXX) $(CFLAGS) $(INCLUDES) -o $(EXECNAME) $(OBJS) $(LFLAGS) $(LIBS)
 
 # equivalent way using 'old-fashioned suffix rules' would be .c.o:
diff --git a/binomial.cpp b/binomial.cpp
deleted file mode 100644 (file)
index e092602..0000000
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * borrowed from GSL (1.15) source code:
- * randist/binomial_tpe.c
- * randist/binomial.c
- * cdf/binomial.c
- */
-
-#include <math.h>
-#include <stdlib.h>
-
-/*
- * forward declarations
- */
-double genrand_real2(void);
-
-typedef struct
-  {
-    //const gsl_rng_type * type;
-    void *state;
-  }
-gsl_rng;
-
-// call my Mersenne Twister
-// note: not thread-safe!!! uses static variable....
-double gsl_rng_uniform(const gsl_rng *r)
-{
-    gsl_rng blah = *r; // solve warning: unused parameter
-    return genrand_real2();
-}
-
-/* {
- * start of: sys/pow_int.c
- */
-double gsl_pow_uint(double x, unsigned int n)
-{
-  double value = 1.0;
-
-  /* repeated squaring method 
-   * returns 0.0^0 = 1.0, so continuous in x
-   */
-  do {
-     if(n & 1) value *= x;  /* for n odd */
-     n >>= 1;
-     x *= x;
-  } while (n);
-
-  return value;
-}
-/* end of: sys/pow_int.c
- * }
- */
-
-/* {
- * start of: randist/binomial_tpe.c
- */
-
-/* The binomial distribution has the form,
-
-   f(x) =  n!/(x!(n-x)!) * p^x (1-p)^(n-x) for integer 0 <= x <= n
-        =  0                               otherwise
-
-   This implementation follows the public domain ranlib function
-   "ignbin", the bulk of which is the BTPE (Binomial Triangle
-   Parallelogram Exponential) algorithm introduced in
-   Kachitvichyanukul and Schmeiser[1].  It has been translated to use
-   modern C coding standards.
-
-   If n is small and/or p is near 0 or near 1 (specifically, if
-   n*min(p,1-p) < SMALL_MEAN), then a different algorithm, called
-   BINV, is used which has an average runtime that scales linearly
-   with n*min(p,1-p).
-
-   But for larger problems, the BTPE algorithm takes the form of two
-   functions b(x) and t(x) -- "bottom" and "top" -- for which b(x) <
-   f(x)/f(M) < t(x), with M = floor(n*p+p).  b(x) defines a triangular
-   region, and t(x) includes a parallelogram and two tails.  Details
-   (including a nice drawing) are in the paper.
-
-   [1] Kachitvichyanukul, V. and Schmeiser, B. W.  Binomial Random
-   Variate Generation.  Communications of the ACM, 31, 2 (February,
-   1988) 216.
-
-   Note, Bruce Schmeiser (personal communication) points out that if
-   you want very fast binomial deviates, and you are happy with
-   approximate results, and/or n and n*p are both large, then you can
-   just use gaussian estimates: mean=n*p, variance=n*p*(1-p).
-
-   This implementation by James Theiler, April 2003, after obtaining
-   permission -- and some good advice -- from Drs. Kachitvichyanukul
-   and Schmeiser to use their code as a starting point, and then doing
-   a little bit of tweaking.
-
-   Additional polishing for GSL coding standards by Brian Gough.  */
-
-#define SMALL_MEAN 14           /* If n*p < SMALL_MEAN then use BINV
-                                   algorithm. The ranlib
-                                   implementation used cutoff=30; but
-                                   on my computer 14 works better */
-
-#define BINV_CUTOFF 110         /* In BINV, do not permit ix too large */
-
-#define FAR_FROM_MEAN 20        /* If ix-n*p is larger than this, then
-                                   use the "squeeze" algorithm.
-                                   Ranlib used 20, and this seems to
-                                   be the best choice on my machine as
-                                   well */
-
-#define LNFACT(x) gsl_sf_lnfact(x)
-
-inline static double
-Stirling (double y1)
-{
-  double y2 = y1 * y1;
-  double s =
-    (13860.0 -
-     (462.0 - (132.0 - (99.0 - 140.0 / y2) / y2) / y2) / y2) / y1 / 166320.0;
-  return s;
-}
-
-unsigned int
-gsl_ran_binomial (const gsl_rng * rng, double p, unsigned int n)
-{
-  int ix;                       /* return value */
-  int flipped = 0;
-  double q, s, np;
-
-  if (n == 0)
-    return 0;
-
-  if (p > 0.5)
-    {
-      p = 1.0 - p;              /* work with small p */
-      flipped = 1;
-    }
-
-  q = 1 - p;
-  s = p / q;
-  np = n * p;
-
-  /* Inverse cdf logic for small mean (BINV in K+S) */
-
-  if (np < SMALL_MEAN)
-    {
-      double f0 = gsl_pow_uint (q, n);   /* f(x), starting with x=0 */
-
-      while (1)
-        {
-          /* This while(1) loop will almost certainly only loop once; but
-           * if u=1 to within a few epsilons of machine precision, then it
-           * is possible for roundoff to prevent the main loop over ix to
-           * achieve its proper value.  following the ranlib implementation,
-           * we introduce a check for that situation, and when it occurs,
-           * we just try again.
-           */
-
-          double f = f0;
-          double u = gsl_rng_uniform (rng);
-
-          for (ix = 0; ix <= BINV_CUTOFF; ++ix)
-            {
-              if (u < f)
-                goto Finish;
-              u -= f;
-              /* Use recursion f(x+1) = f(x)*[(n-x)/(x+1)]*[p/(1-p)] */
-              f *= s * (n - ix) / (ix + 1);
-            }
-
-          /* It should be the case that the 'goto Finish' was encountered
-           * before this point was ever reached.  But if we have reached
-           * this point, then roundoff has prevented u from decreasing
-           * all the way to zero.  This can happen only if the initial u
-           * was very nearly equal to 1, which is a rare situation.  In
-           * that rare situation, we just try again.
-           *
-           * Note, following the ranlib implementation, we loop ix only to
-           * a hardcoded value of SMALL_MEAN_LARGE_N=110; we could have
-           * looped to n, and 99.99...% of the time it won't matter.  This
-           * choice, I think is a little more robust against the rare
-           * roundoff error.  If n>LARGE_N, then it is technically
-           * possible for ix>LARGE_N, but it is astronomically rare, and
-           * if ix is that large, it is more likely due to roundoff than
-           * probability, so better to nip it at LARGE_N than to take a
-           * chance that roundoff will somehow conspire to produce an even
-           * larger (and more improbable) ix.  If n<LARGE_N, then once
-           * ix=n, f=0, and the loop will continue until ix=LARGE_N.
-           */
-        }
-    }
-  else
-    {
-      /* For n >= SMALL_MEAN, we invoke the BTPE algorithm */
-
-      int k;
-
-      double ffm = np + p;      /* ffm = n*p+p             */
-      int m = (int) ffm;        /* m = int floor[n*p+p]    */
-      double fm = m;            /* fm = double m;          */
-      double xm = fm + 0.5;     /* xm = half integer mean (tip of triangle)  */
-      double npq = np * q;      /* npq = n*p*q            */
-
-      /* Compute cumulative area of tri, para, exp tails */
-
-      /* p1: radius of triangle region; since height=1, also: area of region */
-      /* p2: p1 + area of parallelogram region */
-      /* p3: p2 + area of left tail */
-      /* p4: p3 + area of right tail */
-      /* pi/p4: probability of i'th area (i=1,2,3,4) */
-
-      /* Note: magic numbers 2.195, 4.6, 0.134, 20.5, 15.3 */
-      /* These magic numbers are not adjustable...at least not easily! */
-
-      double p1 = floor (2.195 * sqrt (npq) - 4.6 * q) + 0.5;
-
-      /* xl, xr: left and right edges of triangle */
-      double xl = xm - p1;
-      double xr = xm + p1;
-
-      /* Parameter of exponential tails */
-      /* Left tail:  t(x) = c*exp(-lambda_l*[xl - (x+0.5)]) */
-      /* Right tail: t(x) = c*exp(-lambda_r*[(x+0.5) - xr]) */
-
-      double c = 0.134 + 20.5 / (15.3 + fm);
-      double p2 = p1 * (1.0 + c + c);
-
-      double al = (ffm - xl) / (ffm - xl * p);
-      double lambda_l = al * (1.0 + 0.5 * al);
-      double ar = (xr - ffm) / (xr * q);
-      double lambda_r = ar * (1.0 + 0.5 * ar);
-      double p3 = p2 + c / lambda_l;
-      double p4 = p3 + c / lambda_r;
-
-      double var, accept;
-      double u, v;              /* random variates */
-
-    TryAgain:
-
-      /* generate random variates, u specifies which region: Tri, Par, Tail */
-      u = gsl_rng_uniform (rng) * p4;
-      v = gsl_rng_uniform (rng);
-
-      if (u <= p1)
-        {
-          /* Triangular region */
-          ix = (int) (xm - p1 * v + u);
-          goto Finish;
-        }
-      else if (u <= p2)
-        {
-          /* Parallelogram region */
-          double x = xl + (u - p1) / c;
-          v = v * c + 1.0 - fabs (x - xm) / p1;
-          if (v > 1.0 || v <= 0.0)
-            goto TryAgain;
-          ix = (int) x;
-        }
-      else if (u <= p3)
-        {
-          /* Left tail */
-          ix = (int) (xl + log (v) / lambda_l);
-          if (ix < 0)
-            goto TryAgain;
-          v *= ((u - p2) * lambda_l);
-        }
-      else
-        {
-          /* Right tail */
-          ix = (int) (xr - log (v) / lambda_r);
-          if (ix > (double) n)
-            goto TryAgain;
-          v *= ((u - p3) * lambda_r);
-        }
-
-      /* At this point, the goal is to test whether v <= f(x)/f(m) 
-       *
-       *  v <= f(x)/f(m) = (m!(n-m)! / (x!(n-x)!)) * (p/q)^{x-m}
-       *
-       */
-
-      /* Here is a direct test using logarithms.  It is a little
-       * slower than the various "squeezing" computations below, but
-       * if things are working, it should give exactly the same answer
-       * (given the same random number seed).  */
-
-#ifdef DIRECT
-      var = log (v);
-
-      accept =
-        LNFACT (m) + LNFACT (n - m) - LNFACT (ix) - LNFACT (n - ix)
-        + (ix - m) * log (p / q);
-
-#else /* SQUEEZE METHOD */
-
-      /* More efficient determination of whether v < f(x)/f(M) */
-
-      k = abs (ix - m);
-
-      if (k <= FAR_FROM_MEAN)
-        {
-          /* 
-           * If ix near m (ie, |ix-m|<FAR_FROM_MEAN), then do
-           * explicit evaluation using recursion relation for f(x)
-           */
-          double g = (n + 1) * s;
-          double f = 1.0;
-
-          var = v;
-
-          if (m < ix)
-            {
-              int i;
-              for (i = m + 1; i <= ix; i++)
-                {
-                  f *= (g / i - s);
-                }
-            }
-          else if (m > ix)
-            {
-              int i;
-              for (i = ix + 1; i <= m; i++)
-                {
-                  f /= (g / i - s);
-                }
-            }
-
-          accept = f;
-        }
-      else
-        {
-          /* If ix is far from the mean m: k=ABS(ix-m) large */
-
-          var = log (v);
-
-          if (k < npq / 2 - 1)
-            {
-              /* "Squeeze" using upper and lower bounds on
-               * log(f(x)) The squeeze condition was derived
-               * under the condition k < npq/2-1 */
-              double amaxp =
-                k / npq * ((k * (k / 3.0 + 0.625) + (1.0 / 6.0)) / npq + 0.5);
-              double ynorm = -(k * k / (2.0 * npq));
-              if (var < ynorm - amaxp)
-                goto Finish;
-              if (var > ynorm + amaxp)
-                goto TryAgain;
-            }
-
-          /* Now, again: do the test log(v) vs. log f(x)/f(M) */
-
-#if USE_EXACT
-          /* This is equivalent to the above, but is a little (~20%) slower */
-          /* There are five log's vs three above, maybe that's it? */
-
-          accept = LNFACT (m) + LNFACT (n - m)
-            - LNFACT (ix) - LNFACT (n - ix) + (ix - m) * log (p / q);
-
-#else /* USE STIRLING */
-          /* The "#define Stirling" above corresponds to the first five
-           * terms in asymptoic formula for
-           * log Gamma (y) - (y-0.5)log(y) + y - 0.5 log(2*pi);
-           * See Abramowitz and Stegun, eq 6.1.40
-           */
-
-          /* Note below: two Stirling's are added, and two are
-           * subtracted.  In both K+S, and in the ranlib
-           * implementation, all four are added.  I (jt) believe that
-           * is a mistake -- this has been confirmed by personal
-           * correspondence w/ Dr. Kachitvichyanukul.  Note, however,
-           * the corrections are so small, that I couldn't find an
-           * example where it made a difference that could be
-           * observed, let alone tested.  In fact, define'ing Stirling
-           * to be zero gave identical results!!  In practice, alv is
-           * O(1), ranging 0 to -10 or so, while the Stirling
-           * correction is typically O(10^{-5}) ...setting the
-           * correction to zero gives about a 2% performance boost;
-           * might as well keep it just to be pendantic.  */
-
-          {
-            double x1 = ix + 1.0;
-            double w1 = n - ix + 1.0;
-            double f1 = fm + 1.0;
-            double z1 = n + 1.0 - fm;
-
-            accept = xm * log (f1 / x1) + (n - m + 0.5) * log (z1 / w1)
-              + (ix - m) * log (w1 * p / (x1 * q))
-              + Stirling (f1) + Stirling (z1) - Stirling (x1) - Stirling (w1);
-          }
-#endif
-#endif
-        }
-
-
-      if (var <= accept)
-        {
-          goto Finish;
-        }
-      else
-        {
-          goto TryAgain;
-        }
-    }
-
-Finish:
-
-  return (flipped) ? (n - ix) : (unsigned int)ix;
-}
-
-unsigned int
-gsl_ran_binomial_tpe (const gsl_rng * rng, double p, unsigned int n)
-{
-  return gsl_ran_binomial (rng, p, n);
-}
-
-/* end of: randist/binomial_tpe.c
- * }
- */
-
diff --git a/hamiltonian.cpp b/hamiltonian.cpp
new file mode 100644 (file)
index 0000000..b5b94ba
--- /dev/null
@@ -0,0 +1,291 @@
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+#include <vector>
+
+#include "hamiltonian.h"
+
+
+//Constructor for loading couplings from John Barton's Ising Inversion code
+//Code for this function derived from John Barton's code       
+
+Hamiltonian::Hamiltonian(std::string &FILENAME) {
+       
+    this->bh=1.0;
+    this->bJ=1.0;
+    
+       FILE* input;
+       input = fopen(FILENAME.c_str(),"r");
+       
+       typedef std::vector<int> Key;
+       std::map<Key,double> JIndex;
+    char o;
+       int site=0;
+    
+       fscanf(input,"%d",&size);
+       
+       while (fscanf(input,"%d",&site)==1) {
+        
+        Key inputKey(2,site);        
+               while (fscanf(input,"%c",&o)==1) {
+                       
+                       if (o==';') break;
+                       
+                       else {
+                               
+                               int neighborSite=0;
+                               double neighborJ=0;
+                               fscanf(input,"%d %c %lf",&neighborSite,&o,&neighborJ);
+                               
+                               if (neighborSite==site) {
+                    
+                                       inputKey[1]=site;
+                    JIndex[inputKey]=neighborJ;
+                                       
+                               }
+                               else {
+                
+                        inputKey[1]=neighborSite;
+                        JIndex[inputKey]=neighborJ;
+                        
+                }
+                
+                       }
+            
+               }
+        
+       }
+               
+       fclose(input);
+       Key inputKey(2,0);
+       J.resize(size);
+    
+       for(int i=0;i<size;i++) J[i].resize(size,0.0);
+    Graph.resize(size);
+    
+       for (int i=0;i<size;i++) {
+               
+        J[i].resize(size,0.0);
+        
+               for(int j=i;j<size;j++){
+        
+                       inputKey[0]=i;
+                       inputKey[1]=j;
+            
+                       if (JIndex[inputKey]!=0.0) {
+            
+                               J[i][j]+=JIndex[inputKey];
+                               J[j][i]=J[i][j];
+                
+                               if (J[i][j]!=0.0) {
+                                       
+                    Graph[i].push_back(j);     //Populate the adjacency list
+                                       if(j!=i) Graph[j].push_back(i);
+                    
+                               }
+                
+                       }
+            
+               }
+        
+       }
+    
+}
+
+
+//Constructor for loading couplings from John Barton's Ising Inversion code
+//Code for this function derived from John Barton's code       
+
+EpitopeHamiltonian::EpitopeHamiltonian(std::string &FILENAME) {
+       
+    this->bh=1.0;
+    this->bJ=1.0;
+    
+       FILE* input;
+       input = fopen(FILENAME.c_str(),"r");
+       
+       typedef std::vector<int> Key;
+       std::map<Key,double> JIndex;
+    char o;
+       int site=0;
+    
+       fscanf(input,"%d",&size);
+       
+       while (fscanf(input,"%d",&site)==1) {
+        
+        Key inputKey(2,site);        
+               while (fscanf(input,"%c",&o)==1) {
+                       
+                       if (o==';') break;
+                       
+                       else {
+                               
+                               int neighborSite=0;
+                               double neighborJ=0;
+                               fscanf(input,"%d %c %lf",&neighborSite,&o,&neighborJ);
+                               
+                               if (neighborSite==site) {
+                    
+                                       inputKey[1]=site;
+                    JIndex[inputKey]=neighborJ;
+                                       
+                               }
+                               else {
+                
+                        inputKey[1]=neighborSite;
+                        JIndex[inputKey]=neighborJ;
+                        
+                }
+                
+                       }
+            
+               }
+        
+       }
+               
+       fclose(input);
+       Key inputKey(2,0);
+       J.resize(size);
+    
+       for(int i=0;i<size;i++) J[i].resize(size,0.0);
+    Graph.resize(size);
+    
+       for (int i=0;i<size;i++) {
+               
+        J[i].resize(size,0.0);
+        
+               for(int j=i;j<size;j++){
+        
+                       inputKey[0]=i;
+                       inputKey[1]=j;
+            
+                       if (JIndex[inputKey]!=0.0) {
+            
+                               J[i][j]+=JIndex[inputKey];
+                               J[j][i]=J[i][j];
+                
+                               if (J[i][j]!=0.0) {
+                                       
+                    Graph[i].push_back(j);     //Populate the adjacency list
+                                       if(j!=i) Graph[j].push_back(i);
+                    
+                               }
+                
+                       }
+            
+               }
+        
+       }
+    
+}
+
+
+
+// Return the energy for a given set of mutated sites
+
+double Hamiltonian::get_energy(const std::set<unsigned int> &mutated_sites) const {
+
+    double efield = 0.0;
+    double ecoupling = 0.0;
+       std::set<unsigned int>::iterator iter1;
+       std::set<unsigned int>::iterator iter2;
+    
+       for (iter1=mutated_sites.begin();iter1!=mutated_sites.end();++iter1) {
+    
+               efield -= J[*iter1][*iter1];
+               
+        iter2 = iter1;
+        ++iter2;
+               
+        for (;iter2!=mutated_sites.end();iter2++) ecoupling -= J[*iter1][*iter2];
+                
+    }
+    
+    // Check for mismatch between targeted epitope and current sequence
+    
+    return ((efield * bh) + (ecoupling * bJ));
+    
+}
+
+
+// Set the targeted epitope
+
+void EpitopeHamiltonian::set_epitope(const std::vector<unsigned int> &eWT, const std::vector<unsigned int> &eMut, double d) {
+
+    penalty=d;
+    epitopeWT=eWT;
+    epitopeMut=eMut;
+
+}
+
+
+// Return true if the virus has escaped from immune pressure
+
+bool EpitopeHamiltonian::escaped(const Virus &v) {
+
+    return EpitopeHamiltonian::escaped(v.mutated_sites);
+
+}
+
+
+// Return true if the sequence has escaped from immune pressure
+
+bool EpitopeHamiltonian::escaped(const std::set<unsigned int> &mutated_sites) const {
+
+    bool escape=false;
+    
+    for (int i=0;i<epitopeWT.size();i++) {
+    
+        if (mutated_sites.count(epitopeWT[i])==1) { escape=true; break; }
+        
+    }
+    
+    if (!escape) {
+    
+        for (int i=0;i<epitopeMut.size();i++) {
+        
+            if (mutated_sites.count(epitopeMut[i])==0) { escape=true; break; }
+            
+        }
+        
+    }
+    
+    return escape;
+
+}
+
+
+// Return the energy for a given set of mutated sites
+
+double EpitopeHamiltonian::get_energy(const std::set<unsigned int> &mutated_sites) const {
+
+    double efield = 0.0;
+    double ecoupling = 0.0;
+       std::set<unsigned int>::iterator iter1;
+       std::set<unsigned int>::iterator iter2;
+    
+       for (iter1=mutated_sites.begin();iter1!=mutated_sites.end();++iter1) {
+    
+               efield -= J[*iter1][*iter1];
+               
+        iter2 = iter1;
+        ++iter2;
+               
+        for (;iter2!=mutated_sites.end();iter2++) ecoupling -= J[*iter1][*iter2];
+                
+    }
+    
+    // Check for mismatch between targeted epitope and current sequence
+    
+    if (escaped(mutated_sites)) return ((efield * bh) + (ecoupling * bJ));
+    else                        return ((efield * bh) + (ecoupling * bJ) + (penalty * bh));
+    
+}
+
+
+
+
+
diff --git a/hamiltonian.h b/hamiltonian.h
new file mode 100644 (file)
index 0000000..90e2978
--- /dev/null
@@ -0,0 +1,61 @@
+#ifndef HAMILTONIAN_H
+#define HAMILTONIAN_H
+
+#include <string>
+#include <vector>
+#include <set>
+
+#include "virus.h"
+
+
+typedef std::vector<std::vector<int> > AdjacencyList;
+typedef std::vector<std::vector<double> > Coupling;
+
+
+class Virus;
+
+
+class Hamiltonian {
+
+public:
+    
+    int size;
+    double bh;      // fields "inverse temperature" multiplier
+    double bJ;      // couplings "inverse temperature" multiplier
+    Coupling J;
+    AdjacencyList Graph;
+    
+    Hamiltonian(std::string &FILENAME);
+    Hamiltonian() { }
+    virtual ~Hamiltonian() { }
+
+    virtual bool escaped(const Virus &v) { return false; }
+    virtual bool escaped(const std::set<unsigned int> &mutated_sites) const { return false; }
+    virtual double get_energy(const std::set<unsigned int> &mutated_sites) const;
+    
+    void set_temp(double x)           { bh=x; bJ=x; }
+    void set_temp(double x, double y) { bh=x; bJ=y; }
+
+};
+
+
+class EpitopeHamiltonian : public Hamiltonian {
+
+public:
+    
+    double penalty;
+    std::vector<unsigned int> epitopeWT;
+    std::vector<unsigned int> epitopeMut;
+    
+    EpitopeHamiltonian(std::string &FILENAME);
+    
+    void set_epitope(const std::vector<unsigned int> &eWT, const std::vector<unsigned int> &eMut, double d);
+    
+    bool escaped(const Virus &v);
+    bool escaped(const std::set<unsigned int> &mutated_sites) const;
+    double get_energy(const std::set<unsigned int> &mutated_sites) const;
+
+};
+
+
+#endif
diff --git a/mt19937ar.cpp b/mt19937ar.cpp
deleted file mode 100644 (file)
index c05cf87..0000000
+++ /dev/null
@@ -1,250 +0,0 @@
-// SOURCE DOWNLOADED FROM: http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
-// MORE INFO: http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html
-
-/* 
-   A C-program for MT19937, with initialization improved 2002/1/26.
-   Coded by Takuji Nishimura and Makoto Matsumoto.
-
-   Before using, initialize the state by using init_genrand(seed)  
-   or init_by_array(init_key, key_length).
-
-   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
-   All rights reserved.                          
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-     1. Redistributions of source code must retain the above copyright
-        notice, this list of conditions and the following disclaimer.
-
-     2. Redistributions in binary form must reproduce the above copyright
-        notice, this list of conditions and the following disclaimer in the
-        documentation and/or other materials provided with the distribution.
-
-     3. The names of its contributors may not be used to endorse or promote 
-        products derived from this software without specific prior written 
-        permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-   Any feedback is very welcome.
-   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
-   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
-*/
-
-#include <stdlib.h>
-#include <stdio.h>
-
-/* Period parameters */  
-#define N 624
-#define M 397
-#define MATRIX_A 0x9908b0dfUL   /* constant vector a */
-#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
-#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
-
-static unsigned long mt[N]; /* the array for the state vector  */
-static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */
-
-/* initializes mt[N] with a seed */
-void init_genrand(unsigned long s)
-{
-    mt[0]= s & 0xffffffffUL;
-    for (mti=1; mti<N; mti++) {
-        mt[mti] = 
-           (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti); 
-        /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
-        /* In the previous versions, MSBs of the seed affect   */
-        /* only MSBs of the array mt[].                        */
-        /* 2002/01/09 modified by Makoto Matsumoto             */
-        mt[mti] &= 0xffffffffUL;
-        /* for >32 bit machines */
-    }
-}
-
-/* initialize by an array with array-length */
-/* init_key is the array for initializing keys */
-/* key_length is its length */
-/* slight change for C++, 2004/2/26 */
-void init_by_array(unsigned long init_key[], int key_length)
-{
-    int i, j, k;
-    init_genrand(19650218UL);
-    i=1; j=0;
-    k = (N>key_length ? N : key_length);
-    for (; k; k--) {
-        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL))
-          + init_key[j] + j; /* non linear */
-        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
-        i++; j++;
-        if (i>=N) { mt[0] = mt[N-1]; i=1; }
-        if (j>=key_length) j=0;
-    }
-    for (k=N-1; k; k--) {
-        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL))
-          - i; /* non linear */
-        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
-        i++;
-        if (i>=N) { mt[0] = mt[N-1]; i=1; }
-    }
-
-    mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ 
-}
-
-/* generates a random number on [0,0xffffffff]-interval */
-unsigned long genrand_int32(void)
-{
-    unsigned long y;
-    static unsigned long mag01[2]={0x0UL, MATRIX_A};
-    /* mag01[x] = x * MATRIX_A  for x=0,1 */
-
-    if (mti >= N) { /* generate N words at one time */
-        int kk;
-
-        if (mti == N+1)   /* if init_genrand() has not been called, */
-            init_genrand(5489UL); /* a default initial seed is used */
-
-        for (kk=0;kk<N-M;kk++) {
-            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
-            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL];
-        }
-        for (;kk<N-1;kk++) {
-            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
-            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
-        }
-        y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
-        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
-
-        mti = 0;
-    }
-  
-    y = mt[mti++];
-
-    /* Tempering */
-    y ^= (y >> 11);
-    y ^= (y << 7) & 0x9d2c5680UL;
-    y ^= (y << 15) & 0xefc60000UL;
-    y ^= (y >> 18);
-
-    return y;
-}
-
-/* generates a random number on [0,0x7fffffff]-interval */
-long genrand_int31(void)
-{
-    return (long)(genrand_int32()>>1);
-}
-
-/* generates a random number on [0,1]-real-interval */
-double genrand_real1(void)
-{
-    return genrand_int32()*(1.0/4294967295.0); 
-    /* divided by 2^32-1 */ 
-}
-
-/* generates a random number on [0,1)-real-interval */
-double genrand_real2(void)
-{
-    return genrand_int32()*(1.0/4294967296.0); 
-    /* divided by 2^32 */
-}
-
-/* generates a random number on (0,1)-real-interval */
-double genrand_real3(void)
-{
-    return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0); 
-    /* divided by 2^32 */
-}
-
-/* generates a random number on [0,1) with 53-bit resolution*/
-double genrand_res53(void) 
-{ 
-    unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; 
-    return(a*67108864.0+b)*(1.0/9007199254740992.0); 
-} 
-/* These real versions are due to Isaku Wada, 2002/01/09 added */
-
-/*
-int main(void)
-{
-    int i;
-    unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4;
-    init_by_array(init, length);
-    printf("1000 outputs of genrand_int32()\n");
-    for (i=0; i<1000; i++) {
-      printf("%10lu ", genrand_int32());
-      if (i%5==4) printf("\n");
-    }
-    printf("\n1000 outputs of genrand_real2()\n");
-    for (i=0; i<1000; i++) {
-      printf("%10.8f ", genrand_real2());
-      if (i%5==4) printf("\n");
-    }
-    return 0;
-}
-*/
-#undef N
-#undef M
-#undef MATRIX_A
-#undef UPPER_MASK
-#undef LOWER_MASK
-
-
-
-#define MT_RAND_MAX 0xffffffffUL  /* 2**32 - 1 */
-
-/*
- * generate uniform 0 <= x < N, i.e. interval [0,N)
- * int x = (int)((double)rand() / (((double)RAND_MAX + (double)1.0) / N))
- */
-
-/*
- * Returns a random integer in the range [0..limit)
- * see http://stackoverflow.com/questions/2999075/generate-a-random-number-within-range/2999130#2999130
- * and http://stackoverflow.com/questions/10219355/does-n-rand-rand-max-make-a-skewed-random-number-distribution/10219422#10219422
- * alternate at http://stackoverflow.com/questions/11641629/generating-a-uniform-distribution-of-integers-in-c/11658168#11658168
- * also http://c-faq.com/lib/randrange.html
- * and GSL source: http://bzr.savannah.gnu.org/lh/gsl/trunk/annotate/head:/rng/gsl_rng.h
- */
-unsigned int rand_lim(unsigned int limit) {
-    unsigned int divisor = MT_RAND_MAX/limit;
-    unsigned int retval;
-    do {
-        retval = genrand_int32() / divisor;
-    } while (retval >= limit);
-    return retval;
-}
-
-#undef MT_RAND_MAX
-
-/* generate a random number generator seed from /dev/urandom */
-// borrowed from SSC src/runtime/ssc_rts.c
-unsigned ising_sim_random_seed() {
-    unsigned random_seed;
-    FILE *rng = fopen("/dev/urandom", "r");
-    if (rng == NULL) {
-        fprintf(stderr,
-                "error: cannot read /dev/urandom randomness generator\n");
-        exit(1);
-    }
-    size_t num_random_read = fread(&random_seed, sizeof(random_seed), 1, rng);
-    if (num_random_read != 1) {
-        fprintf(stderr,
-                "error: cannot read /dev/urandom randomness generator\n");
-        exit(1);
-    }
-    fclose(rng);
-    return random_seed;
-}
-
diff --git a/population.cpp b/population.cpp
new file mode 100644 (file)
index 0000000..a222a87
--- /dev/null
@@ -0,0 +1,281 @@
+#include <vector>
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include <cmath>
+
+#include "population.h"
+
+
+//Constructor that assembles a population of N viruses of wild type
+
+Population::Population(const Hamiltonian &H, unsigned int N, double mu) {
+       
+       Virus V(H, mu);
+       pop[V] = N;
+       this->N = N;
+    this->mu = mu;
+    
+       p    = 1.0-pow((1.0-V.mu),H.size);
+    Eavg = V.energy;
+       
+}
+
+
+//Constructor that assembles a population of N viruses given starting population fractions
+
+Population::Population(const Hamiltonian &H, unsigned int N, double mu, const std::vector<std::set<unsigned int> > &initPop, const std::vector<double> &initFrac) {
+       
+    for (int i=0;i<initPop.size();i++) {
+    
+        Virus V(H, mu, initPop[i]);
+        pop[V] = (unsigned int) N * initFrac[i];
+        Eavg += V.energy * pop[V];
+        
+    }
+       
+    this->N = N;
+    this->mu = mu;
+    
+       p     = 1.0-pow((1.0-mu),H.size);
+    Eavg /= (double) N;
+       
+}
+
+
+//Step population forward a generation.
+
+void Population::next_generation(const Hamiltonian &H, gsl_rng* r, bool useRelative, bool useVerbose) {
+       
+       mutate_population(H,r);
+    
+       unsigned int total_deaths = 0;
+    unsigned int num_survive  = 0;
+    double new_Eavg = 0.0;
+    
+       virus_map::iterator iter=pop.begin();
+       
+       for(;iter!=pop.end();++iter){
+    
+               if (useRelative) num_survive = gsl_ran_binomial(r,iter->first.survival(Eavg),iter->second);
+        else             num_survive = gsl_ran_binomial(r,iter->first.survival(),iter->second);
+        
+        total_deaths += iter->second - num_survive;
+               iter->second  = num_survive;
+        
+        new_Eavg += iter->first.energy * num_survive;
+               
+        // Report survival
+        
+        if (useVerbose) {
+            
+            std::cout << "survival probability " << ((useRelative) ? iter->first.survival(Eavg) : iter->first.survival())
+            << " number survived " << num_survive << " total number "
+            <<  iter->second << std::endl;
+            
+        }
+               
+       }
+    
+       if (useVerbose) { std::cout << "checkpoint, " << "total_deaths = " << total_deaths << std::endl; }
+    
+       //delete zero population strains
+    
+       iter=pop.begin();
+    
+       while (iter!=pop.end()) {
+    
+               if (iter->second==0) pop.erase(iter++);
+               else ++iter;
+        
+       }
+    
+       unsigned int ran;
+       unsigned int selector;
+       unsigned int pop_size=compute_population_size();
+    
+       if (useVerbose) print_population_size();
+    
+       virus_map prev_gen = pop;
+    
+       for (unsigned int i=0; i<total_deaths; ++i) {
+    
+               ran = gsl_rng_uniform_int(r,pop_size+1);
+               virus_map::iterator iter = prev_gen.begin();
+               selector = iter->second;
+        
+               while (selector<ran) {
+                       
+            ++iter;
+                       selector += iter->second;
+               
+        }
+        
+               pop[iter->first]++;
+        
+        new_Eavg += iter->first.energy;
+        
+       }
+    
+    Eavg = new_Eavg/((double) N);
+    
+}
+
+
+// Output population to file
+
+void Population::write_population(std::string filename) {
+
+       std::ofstream fout;
+       fout.open(filename.c_str());
+    
+       std::set<unsigned int> ms;
+    
+       for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) {
+               
+        fout << iter->second << '\t';
+               fout << iter->first.energy << '\t';
+        
+               ms = iter->first.mutated_sites;
+        
+               for (std::set<unsigned int>::iterator ms_iter=ms.begin(); ms_iter!=ms.end(); ++ms_iter) {
+        
+                       fout << *ms_iter << '\t';
+            
+               }
+        
+               fout << std::endl;
+        
+       }
+    
+       fout.close();
+    
+}
+
+
+// Output population to file
+
+void Population::write_population(FILE *output, unsigned int generation) {
+
+       fprintf(output,"%d\n",generation);
+    
+    std::set<unsigned int> ms;
+    
+       for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) {
+               
+        fprintf(output,"%d\t%.6e",iter->second,iter->first.energy);
+        
+        ms = iter->first.mutated_sites;
+        
+               for (std::set<unsigned int>::iterator ms_iter=ms.begin(); ms_iter!=ms.end(); ++ms_iter) fprintf(output,"\t%d",*ms_iter);
+        fprintf(output,"\n");
+        
+       }
+    
+       fflush(output);
+    
+}
+
+
+// Compute the total population size
+
+unsigned int Population::compute_population_size() {
+
+       unsigned int i=0;
+  
+       for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) i+=iter->second;
+       
+       return i;
+    
+}
+
+
+// Print population size to standard out
+
+void Population::print_population_size() {
+
+       std::cout << "The total population size is " << compute_population_size() << std::endl;
+    
+}
+
+
+//Mutate every virus in the population
+
+void Population::mutate_population(const Hamiltonian &H, gsl_rng* r) {
+       
+       virus_map prev_generation = pop;
+    unsigned int num_mutate;
+    
+       for(virus_map::iterator iter=prev_generation.begin(); iter!=prev_generation.end(); ++iter) {
+               
+        num_mutate=gsl_ran_binomial(r,p,iter->second);
+        
+               pop[iter->first]=pop[iter->first]-num_mutate;
+        
+               if (pop[iter->first]<=0) pop.erase(iter->first);
+        
+               for (unsigned int i=0; i<num_mutate; ++i) {
+        
+                       Virus V(H,mu,iter->first.mutated_sites);
+                       V.mutate(H,r);
+                       
+            if (pop.count(V)==0) pop[V]  = 1;
+            else                 pop[V] += 1;
+            
+               }
+        
+       }
+    
+}              
+       
+
+// Compute the number of escaped viruses in the population
+
+unsigned int Population::compute_num_escaped(Hamiltonian &H) {
+
+       unsigned int i=0;
+  
+       for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) { if (H.escaped(iter->first)) i+=iter->second; }
+       
+       return i;
+    
+}
+
+
+// Check whether most of the population has escaped from immune pressure
+
+bool Population::escaped(Hamiltonian &H) {
+
+       if (2*compute_num_escaped(H)>N) return true;
+    else                            return false;
+    
+}
+
+
+// Compute the number of escaped viruses in the population
+
+unsigned int Population::escape_variant(Hamiltonian &H, std::set<unsigned int> &mutant) {
+
+       unsigned int i=0;
+  
+       for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) {
+    
+        if (H.escaped(iter->first) && (iter->second)>i) {
+        
+            i=iter->second;
+            mutant=iter->first.mutated_sites;
+            
+        }
+        
+    }
+    
+    return i;
+    
+}
+
+
+
+
+
diff --git a/population.h b/population.h
new file mode 100644 (file)
index 0000000..7e7edd7
--- /dev/null
@@ -0,0 +1,41 @@
+#ifndef POPULATION_H
+#define POPULATION_H
+
+#include <vector>
+#include <map>
+#include <string>
+#include <gsl/gsl_rng.h>
+
+#include "virus.h"
+
+
+typedef std::map<Virus, unsigned int> virus_map;
+
+
+class Population{
+       
+public:
+    
+    unsigned int N;    // Population size
+    double p;       // Probability that a sequence has one or more mutations
+    double Eavg;    // Average energy of the sequence population
+    double mu;      // Mutation rate
+    virus_map pop;
+       
+    Population(const Hamiltonian &H, unsigned int N, double mu);
+    Population(const Hamiltonian &H, unsigned int N, double mu, const std::vector<std::set<unsigned int> > &initPop, const std::vector<double> &initFrac);
+        
+    void next_generation(const Hamiltonian &H, gsl_rng* r, bool useRelative, bool useVerbose);
+    void write_population(std::string filepath);
+    void write_population(FILE *output, unsigned int generation);
+    unsigned int compute_population_size();
+    void print_population_size();
+    void mutate_population(const Hamiltonian &H, gsl_rng* r);
+    
+    unsigned int compute_num_escaped(Hamiltonian &H);
+    bool escaped(Hamiltonian &H);
+    unsigned int escape_variant(Hamiltonian &H, std::set<unsigned int> &mutant);
+        
+};
+
+#endif
diff --git a/virus.cpp b/virus.cpp
new file mode 100644 (file)
index 0000000..007a5d1
--- /dev/null
+++ b/virus.cpp
@@ -0,0 +1,107 @@
+#include <iostream>
+#include <cmath>
+#include <set>
+#include <vector>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+
+#include "virus.h"
+
+
+//Constuct a virus object of wildtype
+
+Virus::Virus(const Hamiltonian &H, double mu) {
+       
+       this->mutated_sites.clear();
+       this->mu=mu;
+       L=H.size;
+       energy=0;
+
+}
+
+
+//Construct a virus and compute its energy.
+
+Virus::Virus(const Hamiltonian &H, double mu, const std::set<unsigned int> &mutated_sites) {
+       
+       this->mutated_sites=mutated_sites;
+       this->mu=mu;
+       L=H.size;
+       update_energy(H);
+
+}              
+
+
+//Print key numerical parameters of the object to the terminal for diagnostics.
+
+void Virus::print_parameters() {
+       
+       std::cout << "mu = " << mu << std::endl;
+       std::cout << "energy = " << energy << std::endl;
+
+}
+
+
+//Mutate the entire virus.  Takes a Hamiltonian object and a pointer
+//to an instance of a gsl_rng random number generator
+
+void Virus::mutate(const Hamiltonian &H, gsl_rng* r) {
+       
+       unsigned int n=gsl_ran_binomial(r,mu,L);
+    
+       while (n<1) n=gsl_ran_binomial(r,mu,L);
+    
+    for (int i=0;i<n;i++) {
+    
+        unsigned int x=gsl_rng_uniform_int(r,H.size);
+        
+        if (mutated_sites.count(x)==0) mutated_sites.insert(x);
+        else                           mutated_sites.erase(x);
+        
+    }
+        
+    update_energy(H);
+    
+}
+
+
+//Computes the odds of survival
+
+double Virus::survival() const {
+
+       double z = exp(-energy);
+       return z/(1+z);
+    
+}
+
+
+//Computes the odds of survival
+
+double Virus::survival(double Eavg) const {
+
+       double z = exp(Eavg-energy);
+       return z/(1+z);
+    
+}
+
+
+//Update the energy of the virus object.  Takes as an argument the
+//Hamiltonian that determines the energy
+
+void Virus::update_energy(const Hamiltonian &H) {
+
+    energy=H.get_energy(mutated_sites);
+    
+}
+
+
+bool operator<(const Virus& lhs, const Virus& rhs) {
+       
+    return lhs.mutated_sites < rhs.mutated_sites;
+
+}
+
+
+
+       
+       
diff --git a/virus.h b/virus.h
new file mode 100644 (file)
index 0000000..b254d80
--- /dev/null
+++ b/virus.h
@@ -0,0 +1,44 @@
+#ifndef VIRUS_H
+#define VIRUS_H
+
+#include <vector>
+#include <set>
+#include <gsl/gsl_rng.h>
+
+#include "hamiltonian.h"
+
+
+class Hamiltonian;
+
+
+class Virus {
+
+public:
+    
+    double energy;
+    std::set<unsigned int> mutated_sites;
+       
+    Virus(const Hamiltonian &H, double mu);
+    Virus(const Hamiltonian &H, double mu, const std::set<unsigned int> &mutated_sites);
+    
+    void print_parameters();
+    void mutate(const Hamiltonian &H, gsl_rng* r);
+    double survival() const;
+    double survival(double Eavg) const;
+    
+    friend class Population;
+       
+private:
+    
+    double mu;
+    unsigned int L;
+       
+    void update_energy(const Hamiltonian &H);
+    
+};
+
+
+bool operator<(const Virus& lhs, const Virus& rhs);
+
+#endif
+
diff --git a/wf.cpp b/wf.cpp
index 6af0fe9..8c14ef8 100644 (file)
--- a/wf.cpp
+++ b/wf.cpp
-
 #include <iostream>
-#include <math.h>
-
-// STL (Standard Template Library)
-#include <set>
-#include <map>
-#include <algorithm>    // std::copy
-#include <iterator>     // std::ostream_iterator
+#include <string>
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+#include <cmath>
 
-// GSL (GNU Scientific Library)
 #include <gsl/gsl_rng.h>
-#include <gsl/gsl_randist.h>
-
-// mt19937ar.cpp forward declarations
-void init_genrand(unsigned long);
-unsigned long genrand_int32(void);
-unsigned ising_sim_random_seed();
 
-/*
-// binomial.cpp forward declarations
-typedef struct { void *state; } gsl_rng;
-unsigned int gsl_ran_binomial(const gsl_rng*, double, unsigned int);
-*/
-
-typedef unsigned int spin_t;
-typedef std::set<spin_t> mutset_t;  // virus is defined by set of mutations
-typedef std::map<mutset_t,int> pop_t;   // number of each virus species
+#include "hamiltonian.h"
+#include "population.h"
+#include "virus.h"
+#include "wf.h"
+
+
+// Run the program
+
+void run(RunParameters &r) {
+    
+    printf("\n");
+    
+    // Initialize RNG and set initial state, if importing from file
+    
+    static gsl_rng *rnd = gsl_rng_alloc(gsl_rng_taus); //pointer to random number generator state
+    
+    srand((unsigned)time(0));
+    gsl_rng_set(rnd,rand());
+    
+    r.setFiles();
+    //FILE *popout=fopen(r.trajectoryOutfile.c_str(),"w");
+    FILE *supout=fopen(r.supplementaryOutfile.c_str(),"w");
+    
+    if (r.importState) importState(r);
+    
+    
+    // Run (w/ targeted epitope)
+    
+    if (r.useEpitope) {
+    
+        for (unsigned int n=0;n<r.num_runs;n++) {
+        
+            EpitopeHamiltonian H(r.couplingsInfile);
+            H.set_epitope(r.eWT, r.eMut, r.penalty);
+            H.set_temp(r.bh, r.bJ);
+            Population P(H, r.n, r.mu, r.initPop, r.initFrac);
+            
+            unsigned int i;
+            for (i=0; i<r.g; ++i) {
+        
+                P.next_generation(H, rnd, r.useRelative, r.useVerbose);
+                //P.write_population(popout,i);
+            
+                if (r.runUntilEscape && P.escaped(H)) break;
+            
+            }
+            
+            std::set<unsigned int> esc_var;
+            unsigned int esc_num=P.escape_variant(H, esc_var);
+            
+            fprintf(supout,"%d\t%d",i,esc_num);
+            for (std::set<unsigned int>::iterator iter=esc_var.begin(); iter!=esc_var.end(); ++iter) fprintf(supout,"\t%d",*iter);
+            fprintf(supout,"\n");
+            
+            fflush(supout);
+        
+        }
+        
+    }
+    
+    
+    // Run (w/out targeted epitope)
+    
+    else {
+    
+        Hamiltonian H(r.couplingsInfile);
+        H.set_temp(r.bh, r.bJ);
+        Population P(H, r.n, r.mu);
+        
+        for (unsigned int i=0; i<r.g; ++i) {
+        
+            P.next_generation(H, rnd, r.useRelative, r.useVerbose);
+            //P.write_population(popout,i);
+                        
+        }
+        
+    }
+    
+    gsl_rng_free(rnd);  //Free up memory from random number generator
 
-/*
- * print a set of mutations (or anything) to stream as a CSV-like string
- */
-template <typename T>
-std::ostream& operator<<(std::ostream& os, const std::set<T> &v)
-{
-    typename std::set<T>::iterator i = v.begin(), end = v.end();
-    os << *i;  // print first element
-    for (++i; i != end; ++i)  // then with comma as prefix
-        os << ',' << *i;
-    return os;
 }
 
 
-int main(int argc, char **argv)
-{
-    argc++; argv++; // avoid "warning: unused parameter"
-
-    gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
-    gsl_rng_set(rng,ising_sim_random_seed());
-
-    unsigned int NUMSITES = 100;
-
-    pop_t Pop;
-
-    spin_t muts[] = {3,0,5};
-    mutset_t s(muts,muts+3);
-    /*
-    mutset_t s;
-    for (int i = 0; i < 3; ++i)
-        s.insert(muts[i]);
-    */
-    std::cout << gsl_ran_binomial(rng,0.1,NUMSITES) << std::endl;
+// Import initial state from a state file
+
+void importState(RunParameters &r) {
+
+    FILE *input=fopen(r.stateInfile.c_str(),"r");
+    
+    char o;
+    double frac;
+       unsigned int site;
+    
+    // Import initial state of the population
+    
+       while (fscanf(input,"%le",&frac)==1) {
+        
+        r.initFrac.push_back(frac);
+        
+        std::set<unsigned int> tempSet;
+        
+               while (fscanf(input,"%c",&o)==1) {
+                       
+                       if (o=='\n' || o==';') break;
+                       
+                       else {
+                               
+                               fscanf(input,"%u",&site);
+                tempSet.insert(site);
+                
+            }
+                       
+               }
+        
+        r.initPop.push_back(tempSet);
+        
+        if (o==';') break;
+        
+       }
+    
+    // Import targeted epitope
+    
+    while (fscanf(input,"%u",&site)==1) {
+    
+        r.eWT.push_back(site);
+        
+        fscanf(input,"%c",&o);
+        if (o=='\n') break;
+        
+    }
+    
+    while (fscanf(input,"%u",&site)==1) {
+    
+        r.eMut.push_back(site);
+        
+        fscanf(input,"%c",&o);
+        if (o=='\n') break;
+        
+    }
+    
+    r.useEpitope=true;
 
-    Pop[s] = 100;
-    Pop[s] += 50;
+}
 
-#if 0
-    for(pop_t::iterator vi = Pop.begin(); vi != Pop.end(); ++vi) {
-        mutset_t v = vi->first;
 
-        /*
-        // appends "," to each element
-        std::ostream_iterator<int> output(std::cout, ",");
-        std::copy(v.begin(),v.end(), output);
-        */
-        /*
-        // appends "," to each element
-        for(mutset_t::iterator i = v.begin(); i != v.end(); ++i) {
-            std::cout << (*i) << ",";
-        }
-        */
-        /*
-        // prefix every element (except the first) with ","
-        mutset_t::iterator i = v.begin(), end = v.end();
-        std::cout << *i;
-        for (++i; i != end; ++i)
-            std::cout << ',' << *i;
-        std::cout << ' ';
-        */
-        ///*
-        // proper printing function
-        std::cout << v << ' ';
-        //*/
+/*
+ * Command line rules:
+ *
+ *  -d  (string)        working directory
+ *  -i  (string)        input couplings file
+ *  -o  (string)        output file stem
+ *  -s  (string)        input state file, containing initial population fraction and targeted epitope
+ *  -n  (int/double)    population size
+ *  -g  (int/double)    number of generations
+ *  -mu (double)        mutation rate
+ *  -b  (double)        "inverse temperature" multiplier
+ *  -e  (int) (int)     targeted epitope spans sites [e#1, ..., e#2], assuming all WT
+ *  -r                  flag to use relative energy condition for survival probability
+ *  -esc                flag to run until escape observed (or max number of generations reached)
+ *  -v                  flag for verbose output
+ *
+ */
 
-        std::cout << vi->second << std::endl;
+int main(int argc, char *argv[]) {
+    
+    RunParameters r;
+    
+    // Process command line input
+    
+    for (int i=1;i<argc;i++) {
+        
+        if      (strcmp(argv[i],"-d")==0) { if (++i==argc) break; else r.directory=argv[i];                         }
+        else if (strcmp(argv[i],"-i")==0) { if (++i==argc) break; else r.infile=argv[i];                            }
+        else if (strcmp(argv[i],"-o")==0) { if (++i==argc) break; else r.outfile=argv[i];                           }
+        else if (strcmp(argv[i],"-s")==0) { if (++i==argc) break; else { r.statefile=argv[i]; r.importState=true; } }
+        
+        else if (strcmp(argv[i],"-n")==0)  { if (++i==argc) break; else r.n=(unsigned int)strtodouble(argv[i]); }
+        else if (strcmp(argv[i],"-g")==0)  { if (++i==argc) break; else r.g=(unsigned int)strtodouble(argv[i]); }
+        else if (strcmp(argv[i],"-mu")==0) { if (++i==argc) break; else r.mu=strtodouble(argv[i]);              }
+        else if (strcmp(argv[i],"-b")==0)  { if (++i==argc) break; else { r.bh=strtodouble(argv[i]); r.bJ=r.bh; } }
+        else if (strcmp(argv[i],"-bh")==0) { if (++i==argc) break; else r.bh=strtodouble(argv[i]);              }
+        else if (strcmp(argv[i],"-bJ")==0) { if (++i==argc) break; else r.bJ=strtodouble(argv[i]);              }
+        
+//        else if (strcmp(argv[i],"-e")==0) { if (argc-i<2) break; else { r.estart=strtoint(argv[++i]);
+//                                                                        r.eend=strtoint(argv[++i]);
+//                                                                        r.useEpitope=true;            } }
+        
+        else if (strcmp(argv[i],"-r")==0)   { r.useRelative=true;    }
+        else if (strcmp(argv[i],"-esc")==0) { r.runUntilEscape=true; }
+        else if (strcmp(argv[i],"-v")==0)   { r.useVerbose=true;     }
+        
+        else printf("Unrecognized command! '%s'\n",argv[i]);
+                
     }
-#endif
-
+    
+    run(r);
+    
     return 0;
-}
-
+    
+}      
+       
diff --git a/wf.h b/wf.h
new file mode 100644 (file)
index 0000000..f437a2e
--- /dev/null
+++ b/wf.h
@@ -0,0 +1,164 @@
+#ifndef WF_H
+#define WF_H
+
+#include <vector>
+#include <set>
+#include <math.h>
+#include <string>
+#include <sstream>
+#include <cstring>
+#include <iostream>
+#include <stdio.h>
+
+
+// STRING MANIPULATION
+
+// Converts generic to string
+
+template <class T>
+
+inline std::string tostring (const T& t) {
+    
+    std::stringstream ss;
+    ss << t;
+    return ss.str();
+    
+}
+
+
+// Converts a string to an integer
+
+inline int strtoint(const std::string &s) {
+    
+    std::istringstream i(s);
+    int x;
+    
+    if (!(i >> x)) printf("String to integer conversion failed!");
+    return x;
+    
+}
+
+
+// Converts a string to an unsigned integer
+
+inline int strtouint(const std::string &s) {
+    
+    std::istringstream i(s);
+    unsigned int x;
+    
+    if (!(i >> x)) printf("String to unsigned integer conversion failed!");
+    return x;
+    
+}
+
+
+// Converts a string to a double
+
+inline double strtodouble(const std::string &s) {
+    
+    std::istringstream i(s);
+    double x;
+    
+    if (!(i >> x)) printf("String to double conversion failed!");
+    return x;
+
+}
+
+
+// PROGRAM SETTINGS
+
+// This class holds the parameters needed for running the simulation
+
+class RunParameters {
+    
+public:
+    
+    std::string directory;      // Path to the directory where the inut file is located
+                                // Output will also be sent to this directory
+    std::string infile;         // Input file from which couplings are to be read
+    std::string outfile;        // Output file (prefix) where data is to be written
+    std::string statefile;      // Input state file which contains initial population fractions
+                                // and targeted epitope
+    
+    unsigned int n;             // Population size
+    unsigned int g;             // Number of generations
+    unsigned int num_runs;      // Number of times to run the simulation for gathering overall statistics
+    double mu;                  // Mutation rate
+    double bh;                  // Field "inverse temperature" multiplier
+    double bJ;                  // Coupling "inverse temperature" multiplier
+    int estart;                 // Epitope start position
+    int eend;                   // Epitope end position
+    bool runUntilEscape;        // If true, run the simulation until the population escapes
+    bool importState;           // If true, import state from a state file
+    bool useEpitope;            // Include selective pressure on an epitope 
+    bool useRelative;           // If true, use relative energy for survival probability
+    bool useVerbose;            // If true, print extra information while program is running
+    
+    std::vector<std::set<unsigned int> > initPop;   // Initial population sequences
+    std::vector<double>                  initFrac;  // Initial population fraction
+    std::vector<unsigned int>            eWT;       // Sites that are consensus (WT) in the targeted epitope
+    std::vector<unsigned int>            eMut;      // Sites that are mutant in the targeted epitope
+    double                               penalty;   // Energy penalty if sequence contains the targeted epitope
+    
+    std::string couplingsInfile;
+    std::string trajectoryOutfile;
+    std::string supplementaryOutfile;
+    std::string stateInfile;
+    
+    
+    RunParameters() {
+    
+        directory="";
+        
+        n  = 100000;
+        g  = 150;
+        num_runs = 1000;
+        mu = 6.0 * pow(10.0,-5.0);
+        bh = 1.0;
+        bJ = 1.0;
+        
+        estart = 0;
+        eend   = 0;
+        
+        penalty = 10.0;
+        
+        runUntilEscape=false;
+        importState=false;
+        useEpitope=false;
+        useRelative=false;
+        useVerbose=false;
+        
+    }
+    void setFiles() {
+        
+        if (strcmp(directory.c_str(),"")!=0) {
+        
+            couplingsInfile=directory+"/"+infile+".j";
+            trajectoryOutfile=directory+"/"+outfile+".dat";
+            supplementaryOutfile=directory+"/"+outfile+".sum";
+            stateInfile=directory+"/"+statefile+".st";
+            
+        }
+        
+        else {
+        
+            couplingsInfile=infile+".j";
+            trajectoryOutfile=outfile+".dat";
+            supplementaryOutfile=outfile+".sum";
+            stateInfile=statefile+".st";
+        
+        }
+        
+        //printf("%s\n%s\n%s\n",couplingsInfile.c_str(),trajectoryOutfile.c_str(),stateInfile.c_str());
+    
+    }
+    ~RunParameters() {}
+    
+};
+
+
+void run(RunParameters &r);
+void importState(RunParameters &r);
+
+
+#endif