From 40d1c174b1160c2d06c8f6e54729f0a1d17e038a Mon Sep 17 00:00:00 2001 From: Dariusz Murakowski Date: Tue, 14 May 2013 17:01:52 -0400 Subject: [PATCH] Wright-Fisher code by Tom Butler (via John Barton). --- Makefile | 13 +- binomial.cpp | 416 -------------------------------------------------------- hamiltonian.cpp | 291 +++++++++++++++++++++++++++++++++++++++ hamiltonian.h | 61 +++++++++ mt19937ar.cpp | 250 ---------------------------------- population.cpp | 281 ++++++++++++++++++++++++++++++++++++++ population.h | 41 ++++++ virus.cpp | 107 +++++++++++++++ virus.h | 44 ++++++ wf.cpp | 283 ++++++++++++++++++++++++++------------ wf.h | 164 ++++++++++++++++++++++ 11 files changed, 1191 insertions(+), 760 deletions(-) delete mode 100644 binomial.cpp create mode 100644 hamiltonian.cpp create mode 100644 hamiltonian.h delete mode 100644 mt19937ar.cpp create mode 100644 population.cpp create mode 100644 population.h create mode 100644 virus.cpp create mode 100644 virus.h create mode 100644 wf.h diff --git a/Makefile b/Makefile index 73f864f..231fde7 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,13 @@ -#SRCS = wf.cpp mt19937ar.cpp # binomial.cpp -SRCS = wf.cpp mt19937ar.cpp +SRCS = wf.cpp hamiltonian.cpp population.cpp virus.cpp EXECNAME = wf -OBJS = $(SRCS:.cpp=.o) CXX = c++ CFLAGS = $(DBGFLAG) -Wall -Wextra -pedantic -INCLUDES = -I/usr/local/pkg/gsl/gsl-1.15/include +INCLUDEDIR = -I/usr/local/pkg/gsl/gsl-1.15/include # = `gsl-config --cflags` -LFLAGS = $(DBGFLAG) -LIBS = /usr/local/pkg/gsl/gsl-1.15/lib/libgsl.a /usr/local/pkg/gsl/gsl-1.15/lib/libgslcblas.a -lm +LIBDIR = -L/usr/local/pkg/gsl/gsl-1.15/lib +LIBS = -lgsl -lgslcblas -lm # = -L/usr/local/pkg/gsl/gsl-1.15/lib -lgsl -lgslcblas -lm # = `gsl-config --libs` @@ -27,7 +25,8 @@ all: $(EXECNAME) # @echo done making $(EXECNAME) $(EXECNAME): $(SRCS) - $(CXX) $(SRCS) $(CFLAGS) $(LFLAGS) $(INCLUDES) $(LIBS) -o $(EXECNAME) + $(CXX) $(SRCS) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME) $(LIBS) +# $(CXX) $(SRCS) $(CFLAGS) $(LFLAGS) $(INCLUDES) $(LIBS) -o $(EXECNAME) # $(CXX) $(CFLAGS) $(INCLUDES) -o $(EXECNAME) $(OBJS) $(LFLAGS) $(LIBS) # equivalent way using 'old-fashioned suffix rules' would be .c.o: diff --git a/binomial.cpp b/binomial.cpp deleted file mode 100644 index e092602..0000000 --- a/binomial.cpp +++ /dev/null @@ -1,416 +0,0 @@ -/* - * borrowed from GSL (1.15) source code: - * randist/binomial_tpe.c - * randist/binomial.c - * cdf/binomial.c - */ - -#include -#include - -/* - * forward declarations - */ -double genrand_real2(void); - -typedef struct - { - //const gsl_rng_type * type; - void *state; - } -gsl_rng; - -// call my Mersenne Twister -// note: not thread-safe!!! uses static variable.... -double gsl_rng_uniform(const gsl_rng *r) -{ - gsl_rng blah = *r; // solve warning: unused parameter - return genrand_real2(); -} - -/* { - * start of: sys/pow_int.c - */ -double gsl_pow_uint(double x, unsigned int n) -{ - double value = 1.0; - - /* repeated squaring method - * returns 0.0^0 = 1.0, so continuous in x - */ - do { - if(n & 1) value *= x; /* for n odd */ - n >>= 1; - x *= x; - } while (n); - - return value; -} -/* end of: sys/pow_int.c - * } - */ - -/* { - * start of: randist/binomial_tpe.c - */ - -/* The binomial distribution has the form, - - f(x) = n!/(x!(n-x)!) * p^x (1-p)^(n-x) for integer 0 <= x <= n - = 0 otherwise - - This implementation follows the public domain ranlib function - "ignbin", the bulk of which is the BTPE (Binomial Triangle - Parallelogram Exponential) algorithm introduced in - Kachitvichyanukul and Schmeiser[1]. It has been translated to use - modern C coding standards. - - If n is small and/or p is near 0 or near 1 (specifically, if - n*min(p,1-p) < SMALL_MEAN), then a different algorithm, called - BINV, is used which has an average runtime that scales linearly - with n*min(p,1-p). - - But for larger problems, the BTPE algorithm takes the form of two - functions b(x) and t(x) -- "bottom" and "top" -- for which b(x) < - f(x)/f(M) < t(x), with M = floor(n*p+p). b(x) defines a triangular - region, and t(x) includes a parallelogram and two tails. Details - (including a nice drawing) are in the paper. - - [1] Kachitvichyanukul, V. and Schmeiser, B. W. Binomial Random - Variate Generation. Communications of the ACM, 31, 2 (February, - 1988) 216. - - Note, Bruce Schmeiser (personal communication) points out that if - you want very fast binomial deviates, and you are happy with - approximate results, and/or n and n*p are both large, then you can - just use gaussian estimates: mean=n*p, variance=n*p*(1-p). - - This implementation by James Theiler, April 2003, after obtaining - permission -- and some good advice -- from Drs. Kachitvichyanukul - and Schmeiser to use their code as a starting point, and then doing - a little bit of tweaking. - - Additional polishing for GSL coding standards by Brian Gough. */ - -#define SMALL_MEAN 14 /* If n*p < SMALL_MEAN then use BINV - algorithm. The ranlib - implementation used cutoff=30; but - on my computer 14 works better */ - -#define BINV_CUTOFF 110 /* In BINV, do not permit ix too large */ - -#define FAR_FROM_MEAN 20 /* If ix-n*p is larger than this, then - use the "squeeze" algorithm. - Ranlib used 20, and this seems to - be the best choice on my machine as - well */ - -#define LNFACT(x) gsl_sf_lnfact(x) - -inline static double -Stirling (double y1) -{ - double y2 = y1 * y1; - double s = - (13860.0 - - (462.0 - (132.0 - (99.0 - 140.0 / y2) / y2) / y2) / y2) / y1 / 166320.0; - return s; -} - -unsigned int -gsl_ran_binomial (const gsl_rng * rng, double p, unsigned int n) -{ - int ix; /* return value */ - int flipped = 0; - double q, s, np; - - if (n == 0) - return 0; - - if (p > 0.5) - { - p = 1.0 - p; /* work with small p */ - flipped = 1; - } - - q = 1 - p; - s = p / q; - np = n * p; - - /* Inverse cdf logic for small mean (BINV in K+S) */ - - if (np < SMALL_MEAN) - { - double f0 = gsl_pow_uint (q, n); /* f(x), starting with x=0 */ - - while (1) - { - /* This while(1) loop will almost certainly only loop once; but - * if u=1 to within a few epsilons of machine precision, then it - * is possible for roundoff to prevent the main loop over ix to - * achieve its proper value. following the ranlib implementation, - * we introduce a check for that situation, and when it occurs, - * we just try again. - */ - - double f = f0; - double u = gsl_rng_uniform (rng); - - for (ix = 0; ix <= BINV_CUTOFF; ++ix) - { - if (u < f) - goto Finish; - u -= f; - /* Use recursion f(x+1) = f(x)*[(n-x)/(x+1)]*[p/(1-p)] */ - f *= s * (n - ix) / (ix + 1); - } - - /* It should be the case that the 'goto Finish' was encountered - * before this point was ever reached. But if we have reached - * this point, then roundoff has prevented u from decreasing - * all the way to zero. This can happen only if the initial u - * was very nearly equal to 1, which is a rare situation. In - * that rare situation, we just try again. - * - * Note, following the ranlib implementation, we loop ix only to - * a hardcoded value of SMALL_MEAN_LARGE_N=110; we could have - * looped to n, and 99.99...% of the time it won't matter. This - * choice, I think is a little more robust against the rare - * roundoff error. If n>LARGE_N, then it is technically - * possible for ix>LARGE_N, but it is astronomically rare, and - * if ix is that large, it is more likely due to roundoff than - * probability, so better to nip it at LARGE_N than to take a - * chance that roundoff will somehow conspire to produce an even - * larger (and more improbable) ix. If n= SMALL_MEAN, we invoke the BTPE algorithm */ - - int k; - - double ffm = np + p; /* ffm = n*p+p */ - int m = (int) ffm; /* m = int floor[n*p+p] */ - double fm = m; /* fm = double m; */ - double xm = fm + 0.5; /* xm = half integer mean (tip of triangle) */ - double npq = np * q; /* npq = n*p*q */ - - /* Compute cumulative area of tri, para, exp tails */ - - /* p1: radius of triangle region; since height=1, also: area of region */ - /* p2: p1 + area of parallelogram region */ - /* p3: p2 + area of left tail */ - /* p4: p3 + area of right tail */ - /* pi/p4: probability of i'th area (i=1,2,3,4) */ - - /* Note: magic numbers 2.195, 4.6, 0.134, 20.5, 15.3 */ - /* These magic numbers are not adjustable...at least not easily! */ - - double p1 = floor (2.195 * sqrt (npq) - 4.6 * q) + 0.5; - - /* xl, xr: left and right edges of triangle */ - double xl = xm - p1; - double xr = xm + p1; - - /* Parameter of exponential tails */ - /* Left tail: t(x) = c*exp(-lambda_l*[xl - (x+0.5)]) */ - /* Right tail: t(x) = c*exp(-lambda_r*[(x+0.5) - xr]) */ - - double c = 0.134 + 20.5 / (15.3 + fm); - double p2 = p1 * (1.0 + c + c); - - double al = (ffm - xl) / (ffm - xl * p); - double lambda_l = al * (1.0 + 0.5 * al); - double ar = (xr - ffm) / (xr * q); - double lambda_r = ar * (1.0 + 0.5 * ar); - double p3 = p2 + c / lambda_l; - double p4 = p3 + c / lambda_r; - - double var, accept; - double u, v; /* random variates */ - - TryAgain: - - /* generate random variates, u specifies which region: Tri, Par, Tail */ - u = gsl_rng_uniform (rng) * p4; - v = gsl_rng_uniform (rng); - - if (u <= p1) - { - /* Triangular region */ - ix = (int) (xm - p1 * v + u); - goto Finish; - } - else if (u <= p2) - { - /* Parallelogram region */ - double x = xl + (u - p1) / c; - v = v * c + 1.0 - fabs (x - xm) / p1; - if (v > 1.0 || v <= 0.0) - goto TryAgain; - ix = (int) x; - } - else if (u <= p3) - { - /* Left tail */ - ix = (int) (xl + log (v) / lambda_l); - if (ix < 0) - goto TryAgain; - v *= ((u - p2) * lambda_l); - } - else - { - /* Right tail */ - ix = (int) (xr - log (v) / lambda_r); - if (ix > (double) n) - goto TryAgain; - v *= ((u - p3) * lambda_r); - } - - /* At this point, the goal is to test whether v <= f(x)/f(m) - * - * v <= f(x)/f(m) = (m!(n-m)! / (x!(n-x)!)) * (p/q)^{x-m} - * - */ - - /* Here is a direct test using logarithms. It is a little - * slower than the various "squeezing" computations below, but - * if things are working, it should give exactly the same answer - * (given the same random number seed). */ - -#ifdef DIRECT - var = log (v); - - accept = - LNFACT (m) + LNFACT (n - m) - LNFACT (ix) - LNFACT (n - ix) - + (ix - m) * log (p / q); - -#else /* SQUEEZE METHOD */ - - /* More efficient determination of whether v < f(x)/f(M) */ - - k = abs (ix - m); - - if (k <= FAR_FROM_MEAN) - { - /* - * If ix near m (ie, |ix-m| ix) - { - int i; - for (i = ix + 1; i <= m; i++) - { - f /= (g / i - s); - } - } - - accept = f; - } - else - { - /* If ix is far from the mean m: k=ABS(ix-m) large */ - - var = log (v); - - if (k < npq / 2 - 1) - { - /* "Squeeze" using upper and lower bounds on - * log(f(x)) The squeeze condition was derived - * under the condition k < npq/2-1 */ - double amaxp = - k / npq * ((k * (k / 3.0 + 0.625) + (1.0 / 6.0)) / npq + 0.5); - double ynorm = -(k * k / (2.0 * npq)); - if (var < ynorm - amaxp) - goto Finish; - if (var > ynorm + amaxp) - goto TryAgain; - } - - /* Now, again: do the test log(v) vs. log f(x)/f(M) */ - -#if USE_EXACT - /* This is equivalent to the above, but is a little (~20%) slower */ - /* There are five log's vs three above, maybe that's it? */ - - accept = LNFACT (m) + LNFACT (n - m) - - LNFACT (ix) - LNFACT (n - ix) + (ix - m) * log (p / q); - -#else /* USE STIRLING */ - /* The "#define Stirling" above corresponds to the first five - * terms in asymptoic formula for - * log Gamma (y) - (y-0.5)log(y) + y - 0.5 log(2*pi); - * See Abramowitz and Stegun, eq 6.1.40 - */ - - /* Note below: two Stirling's are added, and two are - * subtracted. In both K+S, and in the ranlib - * implementation, all four are added. I (jt) believe that - * is a mistake -- this has been confirmed by personal - * correspondence w/ Dr. Kachitvichyanukul. Note, however, - * the corrections are so small, that I couldn't find an - * example where it made a difference that could be - * observed, let alone tested. In fact, define'ing Stirling - * to be zero gave identical results!! In practice, alv is - * O(1), ranging 0 to -10 or so, while the Stirling - * correction is typically O(10^{-5}) ...setting the - * correction to zero gives about a 2% performance boost; - * might as well keep it just to be pendantic. */ - - { - double x1 = ix + 1.0; - double w1 = n - ix + 1.0; - double f1 = fm + 1.0; - double z1 = n + 1.0 - fm; - - accept = xm * log (f1 / x1) + (n - m + 0.5) * log (z1 / w1) - + (ix - m) * log (w1 * p / (x1 * q)) - + Stirling (f1) + Stirling (z1) - Stirling (x1) - Stirling (w1); - } -#endif -#endif - } - - - if (var <= accept) - { - goto Finish; - } - else - { - goto TryAgain; - } - } - -Finish: - - return (flipped) ? (n - ix) : (unsigned int)ix; -} - -unsigned int -gsl_ran_binomial_tpe (const gsl_rng * rng, double p, unsigned int n) -{ - return gsl_ran_binomial (rng, p, n); -} - -/* end of: randist/binomial_tpe.c - * } - */ - diff --git a/hamiltonian.cpp b/hamiltonian.cpp new file mode 100644 index 0000000..b5b94ba --- /dev/null +++ b/hamiltonian.cpp @@ -0,0 +1,291 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "hamiltonian.h" + + +//Constructor for loading couplings from John Barton's Ising Inversion code +//Code for this function derived from John Barton's code + +Hamiltonian::Hamiltonian(std::string &FILENAME) { + + this->bh=1.0; + this->bJ=1.0; + + FILE* input; + input = fopen(FILENAME.c_str(),"r"); + + typedef std::vector Key; + std::map JIndex; + char o; + int site=0; + + fscanf(input,"%d",&size); + + while (fscanf(input,"%d",&site)==1) { + + Key inputKey(2,site); + while (fscanf(input,"%c",&o)==1) { + + if (o==';') break; + + else { + + int neighborSite=0; + double neighborJ=0; + fscanf(input,"%d %c %lf",&neighborSite,&o,&neighborJ); + + if (neighborSite==site) { + + inputKey[1]=site; + JIndex[inputKey]=neighborJ; + + } + else { + + inputKey[1]=neighborSite; + JIndex[inputKey]=neighborJ; + + } + + } + + } + + } + + fclose(input); + Key inputKey(2,0); + J.resize(size); + + for(int i=0;ibh=1.0; + this->bJ=1.0; + + FILE* input; + input = fopen(FILENAME.c_str(),"r"); + + typedef std::vector Key; + std::map JIndex; + char o; + int site=0; + + fscanf(input,"%d",&size); + + while (fscanf(input,"%d",&site)==1) { + + Key inputKey(2,site); + while (fscanf(input,"%c",&o)==1) { + + if (o==';') break; + + else { + + int neighborSite=0; + double neighborJ=0; + fscanf(input,"%d %c %lf",&neighborSite,&o,&neighborJ); + + if (neighborSite==site) { + + inputKey[1]=site; + JIndex[inputKey]=neighborJ; + + } + else { + + inputKey[1]=neighborSite; + JIndex[inputKey]=neighborJ; + + } + + } + + } + + } + + fclose(input); + Key inputKey(2,0); + J.resize(size); + + for(int i=0;i &mutated_sites) const { + + double efield = 0.0; + double ecoupling = 0.0; + std::set::iterator iter1; + std::set::iterator iter2; + + for (iter1=mutated_sites.begin();iter1!=mutated_sites.end();++iter1) { + + efield -= J[*iter1][*iter1]; + + iter2 = iter1; + ++iter2; + + for (;iter2!=mutated_sites.end();iter2++) ecoupling -= J[*iter1][*iter2]; + + } + + // Check for mismatch between targeted epitope and current sequence + + return ((efield * bh) + (ecoupling * bJ)); + +} + + +// Set the targeted epitope + +void EpitopeHamiltonian::set_epitope(const std::vector &eWT, const std::vector &eMut, double d) { + + penalty=d; + epitopeWT=eWT; + epitopeMut=eMut; + +} + + +// Return true if the virus has escaped from immune pressure + +bool EpitopeHamiltonian::escaped(const Virus &v) { + + return EpitopeHamiltonian::escaped(v.mutated_sites); + +} + + +// Return true if the sequence has escaped from immune pressure + +bool EpitopeHamiltonian::escaped(const std::set &mutated_sites) const { + + bool escape=false; + + for (int i=0;i &mutated_sites) const { + + double efield = 0.0; + double ecoupling = 0.0; + std::set::iterator iter1; + std::set::iterator iter2; + + for (iter1=mutated_sites.begin();iter1!=mutated_sites.end();++iter1) { + + efield -= J[*iter1][*iter1]; + + iter2 = iter1; + ++iter2; + + for (;iter2!=mutated_sites.end();iter2++) ecoupling -= J[*iter1][*iter2]; + + } + + // Check for mismatch between targeted epitope and current sequence + + if (escaped(mutated_sites)) return ((efield * bh) + (ecoupling * bJ)); + else return ((efield * bh) + (ecoupling * bJ) + (penalty * bh)); + +} + + + + + diff --git a/hamiltonian.h b/hamiltonian.h new file mode 100644 index 0000000..90e2978 --- /dev/null +++ b/hamiltonian.h @@ -0,0 +1,61 @@ +#ifndef HAMILTONIAN_H +#define HAMILTONIAN_H + +#include +#include +#include + +#include "virus.h" + + +typedef std::vector > AdjacencyList; +typedef std::vector > Coupling; + + +class Virus; + + +class Hamiltonian { + +public: + + int size; + double bh; // fields "inverse temperature" multiplier + double bJ; // couplings "inverse temperature" multiplier + Coupling J; + AdjacencyList Graph; + + Hamiltonian(std::string &FILENAME); + Hamiltonian() { } + virtual ~Hamiltonian() { } + + virtual bool escaped(const Virus &v) { return false; } + virtual bool escaped(const std::set &mutated_sites) const { return false; } + virtual double get_energy(const std::set &mutated_sites) const; + + void set_temp(double x) { bh=x; bJ=x; } + void set_temp(double x, double y) { bh=x; bJ=y; } + +}; + + +class EpitopeHamiltonian : public Hamiltonian { + +public: + + double penalty; + std::vector epitopeWT; + std::vector epitopeMut; + + EpitopeHamiltonian(std::string &FILENAME); + + void set_epitope(const std::vector &eWT, const std::vector &eMut, double d); + + bool escaped(const Virus &v); + bool escaped(const std::set &mutated_sites) const; + double get_energy(const std::set &mutated_sites) const; + +}; + + +#endif diff --git a/mt19937ar.cpp b/mt19937ar.cpp deleted file mode 100644 index c05cf87..0000000 --- a/mt19937ar.cpp +++ /dev/null @@ -1,250 +0,0 @@ -// SOURCE DOWNLOADED FROM: http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c -// MORE INFO: http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html - -/* - A C-program for MT19937, with initialization improved 2002/1/26. - Coded by Takuji Nishimura and Makoto Matsumoto. - - Before using, initialize the state by using init_genrand(seed) - or init_by_array(init_key, key_length). - - Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. The names of its contributors may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - Any feedback is very welcome. - http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html - email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) -*/ - -#include -#include - -/* Period parameters */ -#define N 624 -#define M 397 -#define MATRIX_A 0x9908b0dfUL /* constant vector a */ -#define UPPER_MASK 0x80000000UL /* most significant w-r bits */ -#define LOWER_MASK 0x7fffffffUL /* least significant r bits */ - -static unsigned long mt[N]; /* the array for the state vector */ -static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */ - -/* initializes mt[N] with a seed */ -void init_genrand(unsigned long s) -{ - mt[0]= s & 0xffffffffUL; - for (mti=1; mti> 30)) + mti); - /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ - /* In the previous versions, MSBs of the seed affect */ - /* only MSBs of the array mt[]. */ - /* 2002/01/09 modified by Makoto Matsumoto */ - mt[mti] &= 0xffffffffUL; - /* for >32 bit machines */ - } -} - -/* initialize by an array with array-length */ -/* init_key is the array for initializing keys */ -/* key_length is its length */ -/* slight change for C++, 2004/2/26 */ -void init_by_array(unsigned long init_key[], int key_length) -{ - int i, j, k; - init_genrand(19650218UL); - i=1; j=0; - k = (N>key_length ? N : key_length); - for (; k; k--) { - mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL)) - + init_key[j] + j; /* non linear */ - mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ - i++; j++; - if (i>=N) { mt[0] = mt[N-1]; i=1; } - if (j>=key_length) j=0; - } - for (k=N-1; k; k--) { - mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL)) - - i; /* non linear */ - mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ - i++; - if (i>=N) { mt[0] = mt[N-1]; i=1; } - } - - mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ -} - -/* generates a random number on [0,0xffffffff]-interval */ -unsigned long genrand_int32(void) -{ - unsigned long y; - static unsigned long mag01[2]={0x0UL, MATRIX_A}; - /* mag01[x] = x * MATRIX_A for x=0,1 */ - - if (mti >= N) { /* generate N words at one time */ - int kk; - - if (mti == N+1) /* if init_genrand() has not been called, */ - init_genrand(5489UL); /* a default initial seed is used */ - - for (kk=0;kk> 1) ^ mag01[y & 0x1UL]; - } - for (;kk> 1) ^ mag01[y & 0x1UL]; - } - y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK); - mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL]; - - mti = 0; - } - - y = mt[mti++]; - - /* Tempering */ - y ^= (y >> 11); - y ^= (y << 7) & 0x9d2c5680UL; - y ^= (y << 15) & 0xefc60000UL; - y ^= (y >> 18); - - return y; -} - -/* generates a random number on [0,0x7fffffff]-interval */ -long genrand_int31(void) -{ - return (long)(genrand_int32()>>1); -} - -/* generates a random number on [0,1]-real-interval */ -double genrand_real1(void) -{ - return genrand_int32()*(1.0/4294967295.0); - /* divided by 2^32-1 */ -} - -/* generates a random number on [0,1)-real-interval */ -double genrand_real2(void) -{ - return genrand_int32()*(1.0/4294967296.0); - /* divided by 2^32 */ -} - -/* generates a random number on (0,1)-real-interval */ -double genrand_real3(void) -{ - return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0); - /* divided by 2^32 */ -} - -/* generates a random number on [0,1) with 53-bit resolution*/ -double genrand_res53(void) -{ - unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; - return(a*67108864.0+b)*(1.0/9007199254740992.0); -} -/* These real versions are due to Isaku Wada, 2002/01/09 added */ - -/* -int main(void) -{ - int i; - unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; - init_by_array(init, length); - printf("1000 outputs of genrand_int32()\n"); - for (i=0; i<1000; i++) { - printf("%10lu ", genrand_int32()); - if (i%5==4) printf("\n"); - } - printf("\n1000 outputs of genrand_real2()\n"); - for (i=0; i<1000; i++) { - printf("%10.8f ", genrand_real2()); - if (i%5==4) printf("\n"); - } - return 0; -} -*/ -#undef N -#undef M -#undef MATRIX_A -#undef UPPER_MASK -#undef LOWER_MASK - - - -#define MT_RAND_MAX 0xffffffffUL /* 2**32 - 1 */ - -/* - * generate uniform 0 <= x < N, i.e. interval [0,N) - * int x = (int)((double)rand() / (((double)RAND_MAX + (double)1.0) / N)) - */ - -/* - * Returns a random integer in the range [0..limit) - * see http://stackoverflow.com/questions/2999075/generate-a-random-number-within-range/2999130#2999130 - * and http://stackoverflow.com/questions/10219355/does-n-rand-rand-max-make-a-skewed-random-number-distribution/10219422#10219422 - * alternate at http://stackoverflow.com/questions/11641629/generating-a-uniform-distribution-of-integers-in-c/11658168#11658168 - * also http://c-faq.com/lib/randrange.html - * and GSL source: http://bzr.savannah.gnu.org/lh/gsl/trunk/annotate/head:/rng/gsl_rng.h - */ -unsigned int rand_lim(unsigned int limit) { - unsigned int divisor = MT_RAND_MAX/limit; - unsigned int retval; - do { - retval = genrand_int32() / divisor; - } while (retval >= limit); - return retval; -} - -#undef MT_RAND_MAX - -/* generate a random number generator seed from /dev/urandom */ -// borrowed from SSC src/runtime/ssc_rts.c -unsigned ising_sim_random_seed() { - unsigned random_seed; - FILE *rng = fopen("/dev/urandom", "r"); - if (rng == NULL) { - fprintf(stderr, - "error: cannot read /dev/urandom randomness generator\n"); - exit(1); - } - size_t num_random_read = fread(&random_seed, sizeof(random_seed), 1, rng); - if (num_random_read != 1) { - fprintf(stderr, - "error: cannot read /dev/urandom randomness generator\n"); - exit(1); - } - fclose(rng); - return random_seed; -} - diff --git a/population.cpp b/population.cpp new file mode 100644 index 0000000..a222a87 --- /dev/null +++ b/population.cpp @@ -0,0 +1,281 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "population.h" + + +//Constructor that assembles a population of N viruses of wild type + +Population::Population(const Hamiltonian &H, unsigned int N, double mu) { + + Virus V(H, mu); + pop[V] = N; + this->N = N; + this->mu = mu; + + p = 1.0-pow((1.0-V.mu),H.size); + Eavg = V.energy; + +} + + +//Constructor that assembles a population of N viruses given starting population fractions + +Population::Population(const Hamiltonian &H, unsigned int N, double mu, const std::vector > &initPop, const std::vector &initFrac) { + + for (int i=0;iN = N; + this->mu = mu; + + p = 1.0-pow((1.0-mu),H.size); + Eavg /= (double) N; + +} + + +//Step population forward a generation. + +void Population::next_generation(const Hamiltonian &H, gsl_rng* r, bool useRelative, bool useVerbose) { + + mutate_population(H,r); + + unsigned int total_deaths = 0; + unsigned int num_survive = 0; + double new_Eavg = 0.0; + + virus_map::iterator iter=pop.begin(); + + for(;iter!=pop.end();++iter){ + + if (useRelative) num_survive = gsl_ran_binomial(r,iter->first.survival(Eavg),iter->second); + else num_survive = gsl_ran_binomial(r,iter->first.survival(),iter->second); + + total_deaths += iter->second - num_survive; + iter->second = num_survive; + + new_Eavg += iter->first.energy * num_survive; + + // Report survival + + if (useVerbose) { + + std::cout << "survival probability " << ((useRelative) ? iter->first.survival(Eavg) : iter->first.survival()) + << " number survived " << num_survive << " total number " + << iter->second << std::endl; + + } + + } + + if (useVerbose) { std::cout << "checkpoint, " << "total_deaths = " << total_deaths << std::endl; } + + //delete zero population strains + + iter=pop.begin(); + + while (iter!=pop.end()) { + + if (iter->second==0) pop.erase(iter++); + else ++iter; + + } + + unsigned int ran; + unsigned int selector; + unsigned int pop_size=compute_population_size(); + + if (useVerbose) print_population_size(); + + virus_map prev_gen = pop; + + for (unsigned int i=0; isecond; + + while (selectorsecond; + + } + + pop[iter->first]++; + + new_Eavg += iter->first.energy; + + } + + Eavg = new_Eavg/((double) N); + +} + + +// Output population to file + +void Population::write_population(std::string filename) { + + std::ofstream fout; + fout.open(filename.c_str()); + + std::set ms; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) { + + fout << iter->second << '\t'; + fout << iter->first.energy << '\t'; + + ms = iter->first.mutated_sites; + + for (std::set::iterator ms_iter=ms.begin(); ms_iter!=ms.end(); ++ms_iter) { + + fout << *ms_iter << '\t'; + + } + + fout << std::endl; + + } + + fout.close(); + +} + + +// Output population to file + +void Population::write_population(FILE *output, unsigned int generation) { + + fprintf(output,"%d\n",generation); + + std::set ms; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) { + + fprintf(output,"%d\t%.6e",iter->second,iter->first.energy); + + ms = iter->first.mutated_sites; + + for (std::set::iterator ms_iter=ms.begin(); ms_iter!=ms.end(); ++ms_iter) fprintf(output,"\t%d",*ms_iter); + fprintf(output,"\n"); + + } + + fflush(output); + +} + + +// Compute the total population size + +unsigned int Population::compute_population_size() { + + unsigned int i=0; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) i+=iter->second; + + return i; + +} + + +// Print population size to standard out + +void Population::print_population_size() { + + std::cout << "The total population size is " << compute_population_size() << std::endl; + +} + + +//Mutate every virus in the population + +void Population::mutate_population(const Hamiltonian &H, gsl_rng* r) { + + virus_map prev_generation = pop; + unsigned int num_mutate; + + for(virus_map::iterator iter=prev_generation.begin(); iter!=prev_generation.end(); ++iter) { + + num_mutate=gsl_ran_binomial(r,p,iter->second); + + pop[iter->first]=pop[iter->first]-num_mutate; + + if (pop[iter->first]<=0) pop.erase(iter->first); + + for (unsigned int i=0; ifirst.mutated_sites); + V.mutate(H,r); + + if (pop.count(V)==0) pop[V] = 1; + else pop[V] += 1; + + } + + } + +} + + +// Compute the number of escaped viruses in the population + +unsigned int Population::compute_num_escaped(Hamiltonian &H) { + + unsigned int i=0; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) { if (H.escaped(iter->first)) i+=iter->second; } + + return i; + +} + + +// Check whether most of the population has escaped from immune pressure + +bool Population::escaped(Hamiltonian &H) { + + if (2*compute_num_escaped(H)>N) return true; + else return false; + +} + + +// Compute the number of escaped viruses in the population + +unsigned int Population::escape_variant(Hamiltonian &H, std::set &mutant) { + + unsigned int i=0; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) { + + if (H.escaped(iter->first) && (iter->second)>i) { + + i=iter->second; + mutant=iter->first.mutated_sites; + + } + + } + + return i; + +} + + + + + diff --git a/population.h b/population.h new file mode 100644 index 0000000..7e7edd7 --- /dev/null +++ b/population.h @@ -0,0 +1,41 @@ +#ifndef POPULATION_H +#define POPULATION_H + +#include +#include +#include +#include + +#include "virus.h" + + +typedef std::map virus_map; + + +class Population{ + +public: + + unsigned int N; // Population size + double p; // Probability that a sequence has one or more mutations + double Eavg; // Average energy of the sequence population + double mu; // Mutation rate + virus_map pop; + + Population(const Hamiltonian &H, unsigned int N, double mu); + Population(const Hamiltonian &H, unsigned int N, double mu, const std::vector > &initPop, const std::vector &initFrac); + + void next_generation(const Hamiltonian &H, gsl_rng* r, bool useRelative, bool useVerbose); + void write_population(std::string filepath); + void write_population(FILE *output, unsigned int generation); + unsigned int compute_population_size(); + void print_population_size(); + void mutate_population(const Hamiltonian &H, gsl_rng* r); + + unsigned int compute_num_escaped(Hamiltonian &H); + bool escaped(Hamiltonian &H); + unsigned int escape_variant(Hamiltonian &H, std::set &mutant); + +}; + +#endif diff --git a/virus.cpp b/virus.cpp new file mode 100644 index 0000000..007a5d1 --- /dev/null +++ b/virus.cpp @@ -0,0 +1,107 @@ +#include +#include +#include +#include +#include +#include + +#include "virus.h" + + +//Constuct a virus object of wildtype + +Virus::Virus(const Hamiltonian &H, double mu) { + + this->mutated_sites.clear(); + this->mu=mu; + L=H.size; + energy=0; + +} + + +//Construct a virus and compute its energy. + +Virus::Virus(const Hamiltonian &H, double mu, const std::set &mutated_sites) { + + this->mutated_sites=mutated_sites; + this->mu=mu; + L=H.size; + update_energy(H); + +} + + +//Print key numerical parameters of the object to the terminal for diagnostics. + +void Virus::print_parameters() { + + std::cout << "mu = " << mu << std::endl; + std::cout << "energy = " << energy << std::endl; + +} + + +//Mutate the entire virus. Takes a Hamiltonian object and a pointer +//to an instance of a gsl_rng random number generator + +void Virus::mutate(const Hamiltonian &H, gsl_rng* r) { + + unsigned int n=gsl_ran_binomial(r,mu,L); + + while (n<1) n=gsl_ran_binomial(r,mu,L); + + for (int i=0;i +#include +#include + +#include "hamiltonian.h" + + +class Hamiltonian; + + +class Virus { + +public: + + double energy; + std::set mutated_sites; + + Virus(const Hamiltonian &H, double mu); + Virus(const Hamiltonian &H, double mu, const std::set &mutated_sites); + + void print_parameters(); + void mutate(const Hamiltonian &H, gsl_rng* r); + double survival() const; + double survival(double Eavg) const; + + friend class Population; + +private: + + double mu; + unsigned int L; + + void update_energy(const Hamiltonian &H); + +}; + + +bool operator<(const Virus& lhs, const Virus& rhs); + +#endif + diff --git a/wf.cpp b/wf.cpp index 6af0fe9..8c14ef8 100644 --- a/wf.cpp +++ b/wf.cpp @@ -1,101 +1,210 @@ - #include -#include - -// STL (Standard Template Library) -#include -#include -#include // std::copy -#include // std::ostream_iterator +#include +#include +#include +#include +#include -// GSL (GNU Scientific Library) #include -#include - -// mt19937ar.cpp forward declarations -void init_genrand(unsigned long); -unsigned long genrand_int32(void); -unsigned ising_sim_random_seed(); -/* -// binomial.cpp forward declarations -typedef struct { void *state; } gsl_rng; -unsigned int gsl_ran_binomial(const gsl_rng*, double, unsigned int); -*/ - -typedef unsigned int spin_t; -typedef std::set mutset_t; // virus is defined by set of mutations -typedef std::map pop_t; // number of each virus species +#include "hamiltonian.h" +#include "population.h" +#include "virus.h" +#include "wf.h" + + +// Run the program + +void run(RunParameters &r) { + + printf("\n"); + + // Initialize RNG and set initial state, if importing from file + + static gsl_rng *rnd = gsl_rng_alloc(gsl_rng_taus); //pointer to random number generator state + + srand((unsigned)time(0)); + gsl_rng_set(rnd,rand()); + + r.setFiles(); + //FILE *popout=fopen(r.trajectoryOutfile.c_str(),"w"); + FILE *supout=fopen(r.supplementaryOutfile.c_str(),"w"); + + if (r.importState) importState(r); + + + // Run (w/ targeted epitope) + + if (r.useEpitope) { + + for (unsigned int n=0;n esc_var; + unsigned int esc_num=P.escape_variant(H, esc_var); + + fprintf(supout,"%d\t%d",i,esc_num); + for (std::set::iterator iter=esc_var.begin(); iter!=esc_var.end(); ++iter) fprintf(supout,"\t%d",*iter); + fprintf(supout,"\n"); + + fflush(supout); + + } + + } + + + // Run (w/out targeted epitope) + + else { + + Hamiltonian H(r.couplingsInfile); + H.set_temp(r.bh, r.bJ); + Population P(H, r.n, r.mu); + + for (unsigned int i=0; i -std::ostream& operator<<(std::ostream& os, const std::set &v) -{ - typename std::set::iterator i = v.begin(), end = v.end(); - os << *i; // print first element - for (++i; i != end; ++i) // then with comma as prefix - os << ',' << *i; - return os; } -int main(int argc, char **argv) -{ - argc++; argv++; // avoid "warning: unused parameter" - - gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - gsl_rng_set(rng,ising_sim_random_seed()); - - unsigned int NUMSITES = 100; - - pop_t Pop; - - spin_t muts[] = {3,0,5}; - mutset_t s(muts,muts+3); - /* - mutset_t s; - for (int i = 0; i < 3; ++i) - s.insert(muts[i]); - */ - std::cout << gsl_ran_binomial(rng,0.1,NUMSITES) << std::endl; +// Import initial state from a state file + +void importState(RunParameters &r) { + + FILE *input=fopen(r.stateInfile.c_str(),"r"); + + char o; + double frac; + unsigned int site; + + // Import initial state of the population + + while (fscanf(input,"%le",&frac)==1) { + + r.initFrac.push_back(frac); + + std::set tempSet; + + while (fscanf(input,"%c",&o)==1) { + + if (o=='\n' || o==';') break; + + else { + + fscanf(input,"%u",&site); + tempSet.insert(site); + + } + + } + + r.initPop.push_back(tempSet); + + if (o==';') break; + + } + + // Import targeted epitope + + while (fscanf(input,"%u",&site)==1) { + + r.eWT.push_back(site); + + fscanf(input,"%c",&o); + if (o=='\n') break; + + } + + while (fscanf(input,"%u",&site)==1) { + + r.eMut.push_back(site); + + fscanf(input,"%c",&o); + if (o=='\n') break; + + } + + r.useEpitope=true; - Pop[s] = 100; - Pop[s] += 50; +} -#if 0 - for(pop_t::iterator vi = Pop.begin(); vi != Pop.end(); ++vi) { - mutset_t v = vi->first; - /* - // appends "," to each element - std::ostream_iterator output(std::cout, ","); - std::copy(v.begin(),v.end(), output); - */ - /* - // appends "," to each element - for(mutset_t::iterator i = v.begin(); i != v.end(); ++i) { - std::cout << (*i) << ","; - } - */ - /* - // prefix every element (except the first) with "," - mutset_t::iterator i = v.begin(), end = v.end(); - std::cout << *i; - for (++i; i != end; ++i) - std::cout << ',' << *i; - std::cout << ' '; - */ - ///* - // proper printing function - std::cout << v << ' '; - //*/ +/* + * Command line rules: + * + * -d (string) working directory + * -i (string) input couplings file + * -o (string) output file stem + * -s (string) input state file, containing initial population fraction and targeted epitope + * -n (int/double) population size + * -g (int/double) number of generations + * -mu (double) mutation rate + * -b (double) "inverse temperature" multiplier + * -e (int) (int) targeted epitope spans sites [e#1, ..., e#2], assuming all WT + * -r flag to use relative energy condition for survival probability + * -esc flag to run until escape observed (or max number of generations reached) + * -v flag for verbose output + * + */ - std::cout << vi->second << std::endl; +int main(int argc, char *argv[]) { + + RunParameters r; + + // Process command line input + + for (int i=1;i +#include +#include +#include +#include +#include +#include +#include + + +// STRING MANIPULATION + +// Converts generic to string + +template + +inline std::string tostring (const T& t) { + + std::stringstream ss; + ss << t; + return ss.str(); + +} + + +// Converts a string to an integer + +inline int strtoint(const std::string &s) { + + std::istringstream i(s); + int x; + + if (!(i >> x)) printf("String to integer conversion failed!"); + return x; + +} + + +// Converts a string to an unsigned integer + +inline int strtouint(const std::string &s) { + + std::istringstream i(s); + unsigned int x; + + if (!(i >> x)) printf("String to unsigned integer conversion failed!"); + return x; + +} + + +// Converts a string to a double + +inline double strtodouble(const std::string &s) { + + std::istringstream i(s); + double x; + + if (!(i >> x)) printf("String to double conversion failed!"); + return x; + +} + + +// PROGRAM SETTINGS + +// This class holds the parameters needed for running the simulation + +class RunParameters { + +public: + + std::string directory; // Path to the directory where the inut file is located + // Output will also be sent to this directory + std::string infile; // Input file from which couplings are to be read + std::string outfile; // Output file (prefix) where data is to be written + std::string statefile; // Input state file which contains initial population fractions + // and targeted epitope + + unsigned int n; // Population size + unsigned int g; // Number of generations + unsigned int num_runs; // Number of times to run the simulation for gathering overall statistics + double mu; // Mutation rate + double bh; // Field "inverse temperature" multiplier + double bJ; // Coupling "inverse temperature" multiplier + int estart; // Epitope start position + int eend; // Epitope end position + bool runUntilEscape; // If true, run the simulation until the population escapes + bool importState; // If true, import state from a state file + bool useEpitope; // Include selective pressure on an epitope + bool useRelative; // If true, use relative energy for survival probability + bool useVerbose; // If true, print extra information while program is running + + std::vector > initPop; // Initial population sequences + std::vector initFrac; // Initial population fraction + std::vector eWT; // Sites that are consensus (WT) in the targeted epitope + std::vector eMut; // Sites that are mutant in the targeted epitope + double penalty; // Energy penalty if sequence contains the targeted epitope + + std::string couplingsInfile; + std::string trajectoryOutfile; + std::string supplementaryOutfile; + std::string stateInfile; + + + RunParameters() { + + directory=""; + + n = 100000; + g = 150; + num_runs = 1000; + mu = 6.0 * pow(10.0,-5.0); + bh = 1.0; + bJ = 1.0; + + estart = 0; + eend = 0; + + penalty = 10.0; + + runUntilEscape=false; + importState=false; + useEpitope=false; + useRelative=false; + useVerbose=false; + + } + void setFiles() { + + if (strcmp(directory.c_str(),"")!=0) { + + couplingsInfile=directory+"/"+infile+".j"; + trajectoryOutfile=directory+"/"+outfile+".dat"; + supplementaryOutfile=directory+"/"+outfile+".sum"; + stateInfile=directory+"/"+statefile+".st"; + + } + + else { + + couplingsInfile=infile+".j"; + trajectoryOutfile=outfile+".dat"; + supplementaryOutfile=outfile+".sum"; + stateInfile=statefile+".st"; + + } + + //printf("%s\n%s\n%s\n",couplingsInfile.c_str(),trajectoryOutfile.c_str(),stateInfile.c_str()); + + } + ~RunParameters() {} + +}; + + +void run(RunParameters &r); +void importState(RunParameters &r); + + +#endif -- 2.7.4