From: Dariusz Murakowski Date: Tue, 14 May 2013 21:01:52 +0000 (-0400) Subject: Wright-Fisher code by Tom Butler (via John Barton). X-Git-Url: http://src.murakowski.org/?a=commitdiff_plain;h=40d1c174b1160c2d06c8f6e54729f0a1d17e038a;p=VirEvoDyn.git Wright-Fisher code by Tom Butler (via John Barton). --- diff --git a/Makefile b/Makefile index 73f864f..231fde7 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,13 @@ -#SRCS = wf.cpp mt19937ar.cpp # binomial.cpp -SRCS = wf.cpp mt19937ar.cpp +SRCS = wf.cpp hamiltonian.cpp population.cpp virus.cpp EXECNAME = wf -OBJS = $(SRCS:.cpp=.o) CXX = c++ CFLAGS = $(DBGFLAG) -Wall -Wextra -pedantic -INCLUDES = -I/usr/local/pkg/gsl/gsl-1.15/include +INCLUDEDIR = -I/usr/local/pkg/gsl/gsl-1.15/include # = `gsl-config --cflags` -LFLAGS = $(DBGFLAG) -LIBS = /usr/local/pkg/gsl/gsl-1.15/lib/libgsl.a /usr/local/pkg/gsl/gsl-1.15/lib/libgslcblas.a -lm +LIBDIR = -L/usr/local/pkg/gsl/gsl-1.15/lib +LIBS = -lgsl -lgslcblas -lm # = -L/usr/local/pkg/gsl/gsl-1.15/lib -lgsl -lgslcblas -lm # = `gsl-config --libs` @@ -27,7 +25,8 @@ all: $(EXECNAME) # @echo done making $(EXECNAME) $(EXECNAME): $(SRCS) - $(CXX) $(SRCS) $(CFLAGS) $(LFLAGS) $(INCLUDES) $(LIBS) -o $(EXECNAME) + $(CXX) $(SRCS) $(CFLAGS) $(LIBDIR) $(INCLUDEDIR) -o $(EXECNAME) $(LIBS) +# $(CXX) $(SRCS) $(CFLAGS) $(LFLAGS) $(INCLUDES) $(LIBS) -o $(EXECNAME) # $(CXX) $(CFLAGS) $(INCLUDES) -o $(EXECNAME) $(OBJS) $(LFLAGS) $(LIBS) # equivalent way using 'old-fashioned suffix rules' would be .c.o: diff --git a/binomial.cpp b/binomial.cpp deleted file mode 100644 index e092602..0000000 --- a/binomial.cpp +++ /dev/null @@ -1,416 +0,0 @@ -/* - * borrowed from GSL (1.15) source code: - * randist/binomial_tpe.c - * randist/binomial.c - * cdf/binomial.c - */ - -#include -#include - -/* - * forward declarations - */ -double genrand_real2(void); - -typedef struct - { - //const gsl_rng_type * type; - void *state; - } -gsl_rng; - -// call my Mersenne Twister -// note: not thread-safe!!! uses static variable.... -double gsl_rng_uniform(const gsl_rng *r) -{ - gsl_rng blah = *r; // solve warning: unused parameter - return genrand_real2(); -} - -/* { - * start of: sys/pow_int.c - */ -double gsl_pow_uint(double x, unsigned int n) -{ - double value = 1.0; - - /* repeated squaring method - * returns 0.0^0 = 1.0, so continuous in x - */ - do { - if(n & 1) value *= x; /* for n odd */ - n >>= 1; - x *= x; - } while (n); - - return value; -} -/* end of: sys/pow_int.c - * } - */ - -/* { - * start of: randist/binomial_tpe.c - */ - -/* The binomial distribution has the form, - - f(x) = n!/(x!(n-x)!) * p^x (1-p)^(n-x) for integer 0 <= x <= n - = 0 otherwise - - This implementation follows the public domain ranlib function - "ignbin", the bulk of which is the BTPE (Binomial Triangle - Parallelogram Exponential) algorithm introduced in - Kachitvichyanukul and Schmeiser[1]. It has been translated to use - modern C coding standards. - - If n is small and/or p is near 0 or near 1 (specifically, if - n*min(p,1-p) < SMALL_MEAN), then a different algorithm, called - BINV, is used which has an average runtime that scales linearly - with n*min(p,1-p). - - But for larger problems, the BTPE algorithm takes the form of two - functions b(x) and t(x) -- "bottom" and "top" -- for which b(x) < - f(x)/f(M) < t(x), with M = floor(n*p+p). b(x) defines a triangular - region, and t(x) includes a parallelogram and two tails. Details - (including a nice drawing) are in the paper. - - [1] Kachitvichyanukul, V. and Schmeiser, B. W. Binomial Random - Variate Generation. Communications of the ACM, 31, 2 (February, - 1988) 216. - - Note, Bruce Schmeiser (personal communication) points out that if - you want very fast binomial deviates, and you are happy with - approximate results, and/or n and n*p are both large, then you can - just use gaussian estimates: mean=n*p, variance=n*p*(1-p). - - This implementation by James Theiler, April 2003, after obtaining - permission -- and some good advice -- from Drs. Kachitvichyanukul - and Schmeiser to use their code as a starting point, and then doing - a little bit of tweaking. - - Additional polishing for GSL coding standards by Brian Gough. */ - -#define SMALL_MEAN 14 /* If n*p < SMALL_MEAN then use BINV - algorithm. The ranlib - implementation used cutoff=30; but - on my computer 14 works better */ - -#define BINV_CUTOFF 110 /* In BINV, do not permit ix too large */ - -#define FAR_FROM_MEAN 20 /* If ix-n*p is larger than this, then - use the "squeeze" algorithm. - Ranlib used 20, and this seems to - be the best choice on my machine as - well */ - -#define LNFACT(x) gsl_sf_lnfact(x) - -inline static double -Stirling (double y1) -{ - double y2 = y1 * y1; - double s = - (13860.0 - - (462.0 - (132.0 - (99.0 - 140.0 / y2) / y2) / y2) / y2) / y1 / 166320.0; - return s; -} - -unsigned int -gsl_ran_binomial (const gsl_rng * rng, double p, unsigned int n) -{ - int ix; /* return value */ - int flipped = 0; - double q, s, np; - - if (n == 0) - return 0; - - if (p > 0.5) - { - p = 1.0 - p; /* work with small p */ - flipped = 1; - } - - q = 1 - p; - s = p / q; - np = n * p; - - /* Inverse cdf logic for small mean (BINV in K+S) */ - - if (np < SMALL_MEAN) - { - double f0 = gsl_pow_uint (q, n); /* f(x), starting with x=0 */ - - while (1) - { - /* This while(1) loop will almost certainly only loop once; but - * if u=1 to within a few epsilons of machine precision, then it - * is possible for roundoff to prevent the main loop over ix to - * achieve its proper value. following the ranlib implementation, - * we introduce a check for that situation, and when it occurs, - * we just try again. - */ - - double f = f0; - double u = gsl_rng_uniform (rng); - - for (ix = 0; ix <= BINV_CUTOFF; ++ix) - { - if (u < f) - goto Finish; - u -= f; - /* Use recursion f(x+1) = f(x)*[(n-x)/(x+1)]*[p/(1-p)] */ - f *= s * (n - ix) / (ix + 1); - } - - /* It should be the case that the 'goto Finish' was encountered - * before this point was ever reached. But if we have reached - * this point, then roundoff has prevented u from decreasing - * all the way to zero. This can happen only if the initial u - * was very nearly equal to 1, which is a rare situation. In - * that rare situation, we just try again. - * - * Note, following the ranlib implementation, we loop ix only to - * a hardcoded value of SMALL_MEAN_LARGE_N=110; we could have - * looped to n, and 99.99...% of the time it won't matter. This - * choice, I think is a little more robust against the rare - * roundoff error. If n>LARGE_N, then it is technically - * possible for ix>LARGE_N, but it is astronomically rare, and - * if ix is that large, it is more likely due to roundoff than - * probability, so better to nip it at LARGE_N than to take a - * chance that roundoff will somehow conspire to produce an even - * larger (and more improbable) ix. If n= SMALL_MEAN, we invoke the BTPE algorithm */ - - int k; - - double ffm = np + p; /* ffm = n*p+p */ - int m = (int) ffm; /* m = int floor[n*p+p] */ - double fm = m; /* fm = double m; */ - double xm = fm + 0.5; /* xm = half integer mean (tip of triangle) */ - double npq = np * q; /* npq = n*p*q */ - - /* Compute cumulative area of tri, para, exp tails */ - - /* p1: radius of triangle region; since height=1, also: area of region */ - /* p2: p1 + area of parallelogram region */ - /* p3: p2 + area of left tail */ - /* p4: p3 + area of right tail */ - /* pi/p4: probability of i'th area (i=1,2,3,4) */ - - /* Note: magic numbers 2.195, 4.6, 0.134, 20.5, 15.3 */ - /* These magic numbers are not adjustable...at least not easily! */ - - double p1 = floor (2.195 * sqrt (npq) - 4.6 * q) + 0.5; - - /* xl, xr: left and right edges of triangle */ - double xl = xm - p1; - double xr = xm + p1; - - /* Parameter of exponential tails */ - /* Left tail: t(x) = c*exp(-lambda_l*[xl - (x+0.5)]) */ - /* Right tail: t(x) = c*exp(-lambda_r*[(x+0.5) - xr]) */ - - double c = 0.134 + 20.5 / (15.3 + fm); - double p2 = p1 * (1.0 + c + c); - - double al = (ffm - xl) / (ffm - xl * p); - double lambda_l = al * (1.0 + 0.5 * al); - double ar = (xr - ffm) / (xr * q); - double lambda_r = ar * (1.0 + 0.5 * ar); - double p3 = p2 + c / lambda_l; - double p4 = p3 + c / lambda_r; - - double var, accept; - double u, v; /* random variates */ - - TryAgain: - - /* generate random variates, u specifies which region: Tri, Par, Tail */ - u = gsl_rng_uniform (rng) * p4; - v = gsl_rng_uniform (rng); - - if (u <= p1) - { - /* Triangular region */ - ix = (int) (xm - p1 * v + u); - goto Finish; - } - else if (u <= p2) - { - /* Parallelogram region */ - double x = xl + (u - p1) / c; - v = v * c + 1.0 - fabs (x - xm) / p1; - if (v > 1.0 || v <= 0.0) - goto TryAgain; - ix = (int) x; - } - else if (u <= p3) - { - /* Left tail */ - ix = (int) (xl + log (v) / lambda_l); - if (ix < 0) - goto TryAgain; - v *= ((u - p2) * lambda_l); - } - else - { - /* Right tail */ - ix = (int) (xr - log (v) / lambda_r); - if (ix > (double) n) - goto TryAgain; - v *= ((u - p3) * lambda_r); - } - - /* At this point, the goal is to test whether v <= f(x)/f(m) - * - * v <= f(x)/f(m) = (m!(n-m)! / (x!(n-x)!)) * (p/q)^{x-m} - * - */ - - /* Here is a direct test using logarithms. It is a little - * slower than the various "squeezing" computations below, but - * if things are working, it should give exactly the same answer - * (given the same random number seed). */ - -#ifdef DIRECT - var = log (v); - - accept = - LNFACT (m) + LNFACT (n - m) - LNFACT (ix) - LNFACT (n - ix) - + (ix - m) * log (p / q); - -#else /* SQUEEZE METHOD */ - - /* More efficient determination of whether v < f(x)/f(M) */ - - k = abs (ix - m); - - if (k <= FAR_FROM_MEAN) - { - /* - * If ix near m (ie, |ix-m| ix) - { - int i; - for (i = ix + 1; i <= m; i++) - { - f /= (g / i - s); - } - } - - accept = f; - } - else - { - /* If ix is far from the mean m: k=ABS(ix-m) large */ - - var = log (v); - - if (k < npq / 2 - 1) - { - /* "Squeeze" using upper and lower bounds on - * log(f(x)) The squeeze condition was derived - * under the condition k < npq/2-1 */ - double amaxp = - k / npq * ((k * (k / 3.0 + 0.625) + (1.0 / 6.0)) / npq + 0.5); - double ynorm = -(k * k / (2.0 * npq)); - if (var < ynorm - amaxp) - goto Finish; - if (var > ynorm + amaxp) - goto TryAgain; - } - - /* Now, again: do the test log(v) vs. log f(x)/f(M) */ - -#if USE_EXACT - /* This is equivalent to the above, but is a little (~20%) slower */ - /* There are five log's vs three above, maybe that's it? */ - - accept = LNFACT (m) + LNFACT (n - m) - - LNFACT (ix) - LNFACT (n - ix) + (ix - m) * log (p / q); - -#else /* USE STIRLING */ - /* The "#define Stirling" above corresponds to the first five - * terms in asymptoic formula for - * log Gamma (y) - (y-0.5)log(y) + y - 0.5 log(2*pi); - * See Abramowitz and Stegun, eq 6.1.40 - */ - - /* Note below: two Stirling's are added, and two are - * subtracted. In both K+S, and in the ranlib - * implementation, all four are added. I (jt) believe that - * is a mistake -- this has been confirmed by personal - * correspondence w/ Dr. Kachitvichyanukul. Note, however, - * the corrections are so small, that I couldn't find an - * example where it made a difference that could be - * observed, let alone tested. In fact, define'ing Stirling - * to be zero gave identical results!! In practice, alv is - * O(1), ranging 0 to -10 or so, while the Stirling - * correction is typically O(10^{-5}) ...setting the - * correction to zero gives about a 2% performance boost; - * might as well keep it just to be pendantic. */ - - { - double x1 = ix + 1.0; - double w1 = n - ix + 1.0; - double f1 = fm + 1.0; - double z1 = n + 1.0 - fm; - - accept = xm * log (f1 / x1) + (n - m + 0.5) * log (z1 / w1) - + (ix - m) * log (w1 * p / (x1 * q)) - + Stirling (f1) + Stirling (z1) - Stirling (x1) - Stirling (w1); - } -#endif -#endif - } - - - if (var <= accept) - { - goto Finish; - } - else - { - goto TryAgain; - } - } - -Finish: - - return (flipped) ? (n - ix) : (unsigned int)ix; -} - -unsigned int -gsl_ran_binomial_tpe (const gsl_rng * rng, double p, unsigned int n) -{ - return gsl_ran_binomial (rng, p, n); -} - -/* end of: randist/binomial_tpe.c - * } - */ - diff --git a/hamiltonian.cpp b/hamiltonian.cpp new file mode 100644 index 0000000..b5b94ba --- /dev/null +++ b/hamiltonian.cpp @@ -0,0 +1,291 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "hamiltonian.h" + + +//Constructor for loading couplings from John Barton's Ising Inversion code +//Code for this function derived from John Barton's code + +Hamiltonian::Hamiltonian(std::string &FILENAME) { + + this->bh=1.0; + this->bJ=1.0; + + FILE* input; + input = fopen(FILENAME.c_str(),"r"); + + typedef std::vector Key; + std::map JIndex; + char o; + int site=0; + + fscanf(input,"%d",&size); + + while (fscanf(input,"%d",&site)==1) { + + Key inputKey(2,site); + while (fscanf(input,"%c",&o)==1) { + + if (o==';') break; + + else { + + int neighborSite=0; + double neighborJ=0; + fscanf(input,"%d %c %lf",&neighborSite,&o,&neighborJ); + + if (neighborSite==site) { + + inputKey[1]=site; + JIndex[inputKey]=neighborJ; + + } + else { + + inputKey[1]=neighborSite; + JIndex[inputKey]=neighborJ; + + } + + } + + } + + } + + fclose(input); + Key inputKey(2,0); + J.resize(size); + + for(int i=0;ibh=1.0; + this->bJ=1.0; + + FILE* input; + input = fopen(FILENAME.c_str(),"r"); + + typedef std::vector Key; + std::map JIndex; + char o; + int site=0; + + fscanf(input,"%d",&size); + + while (fscanf(input,"%d",&site)==1) { + + Key inputKey(2,site); + while (fscanf(input,"%c",&o)==1) { + + if (o==';') break; + + else { + + int neighborSite=0; + double neighborJ=0; + fscanf(input,"%d %c %lf",&neighborSite,&o,&neighborJ); + + if (neighborSite==site) { + + inputKey[1]=site; + JIndex[inputKey]=neighborJ; + + } + else { + + inputKey[1]=neighborSite; + JIndex[inputKey]=neighborJ; + + } + + } + + } + + } + + fclose(input); + Key inputKey(2,0); + J.resize(size); + + for(int i=0;i &mutated_sites) const { + + double efield = 0.0; + double ecoupling = 0.0; + std::set::iterator iter1; + std::set::iterator iter2; + + for (iter1=mutated_sites.begin();iter1!=mutated_sites.end();++iter1) { + + efield -= J[*iter1][*iter1]; + + iter2 = iter1; + ++iter2; + + for (;iter2!=mutated_sites.end();iter2++) ecoupling -= J[*iter1][*iter2]; + + } + + // Check for mismatch between targeted epitope and current sequence + + return ((efield * bh) + (ecoupling * bJ)); + +} + + +// Set the targeted epitope + +void EpitopeHamiltonian::set_epitope(const std::vector &eWT, const std::vector &eMut, double d) { + + penalty=d; + epitopeWT=eWT; + epitopeMut=eMut; + +} + + +// Return true if the virus has escaped from immune pressure + +bool EpitopeHamiltonian::escaped(const Virus &v) { + + return EpitopeHamiltonian::escaped(v.mutated_sites); + +} + + +// Return true if the sequence has escaped from immune pressure + +bool EpitopeHamiltonian::escaped(const std::set &mutated_sites) const { + + bool escape=false; + + for (int i=0;i &mutated_sites) const { + + double efield = 0.0; + double ecoupling = 0.0; + std::set::iterator iter1; + std::set::iterator iter2; + + for (iter1=mutated_sites.begin();iter1!=mutated_sites.end();++iter1) { + + efield -= J[*iter1][*iter1]; + + iter2 = iter1; + ++iter2; + + for (;iter2!=mutated_sites.end();iter2++) ecoupling -= J[*iter1][*iter2]; + + } + + // Check for mismatch between targeted epitope and current sequence + + if (escaped(mutated_sites)) return ((efield * bh) + (ecoupling * bJ)); + else return ((efield * bh) + (ecoupling * bJ) + (penalty * bh)); + +} + + + + + diff --git a/hamiltonian.h b/hamiltonian.h new file mode 100644 index 0000000..90e2978 --- /dev/null +++ b/hamiltonian.h @@ -0,0 +1,61 @@ +#ifndef HAMILTONIAN_H +#define HAMILTONIAN_H + +#include +#include +#include + +#include "virus.h" + + +typedef std::vector > AdjacencyList; +typedef std::vector > Coupling; + + +class Virus; + + +class Hamiltonian { + +public: + + int size; + double bh; // fields "inverse temperature" multiplier + double bJ; // couplings "inverse temperature" multiplier + Coupling J; + AdjacencyList Graph; + + Hamiltonian(std::string &FILENAME); + Hamiltonian() { } + virtual ~Hamiltonian() { } + + virtual bool escaped(const Virus &v) { return false; } + virtual bool escaped(const std::set &mutated_sites) const { return false; } + virtual double get_energy(const std::set &mutated_sites) const; + + void set_temp(double x) { bh=x; bJ=x; } + void set_temp(double x, double y) { bh=x; bJ=y; } + +}; + + +class EpitopeHamiltonian : public Hamiltonian { + +public: + + double penalty; + std::vector epitopeWT; + std::vector epitopeMut; + + EpitopeHamiltonian(std::string &FILENAME); + + void set_epitope(const std::vector &eWT, const std::vector &eMut, double d); + + bool escaped(const Virus &v); + bool escaped(const std::set &mutated_sites) const; + double get_energy(const std::set &mutated_sites) const; + +}; + + +#endif diff --git a/mt19937ar.cpp b/mt19937ar.cpp deleted file mode 100644 index c05cf87..0000000 --- a/mt19937ar.cpp +++ /dev/null @@ -1,250 +0,0 @@ -// SOURCE DOWNLOADED FROM: http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c -// MORE INFO: http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html - -/* - A C-program for MT19937, with initialization improved 2002/1/26. - Coded by Takuji Nishimura and Makoto Matsumoto. - - Before using, initialize the state by using init_genrand(seed) - or init_by_array(init_key, key_length). - - Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. The names of its contributors may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - Any feedback is very welcome. - http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html - email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) -*/ - -#include -#include - -/* Period parameters */ -#define N 624 -#define M 397 -#define MATRIX_A 0x9908b0dfUL /* constant vector a */ -#define UPPER_MASK 0x80000000UL /* most significant w-r bits */ -#define LOWER_MASK 0x7fffffffUL /* least significant r bits */ - -static unsigned long mt[N]; /* the array for the state vector */ -static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */ - -/* initializes mt[N] with a seed */ -void init_genrand(unsigned long s) -{ - mt[0]= s & 0xffffffffUL; - for (mti=1; mti> 30)) + mti); - /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ - /* In the previous versions, MSBs of the seed affect */ - /* only MSBs of the array mt[]. */ - /* 2002/01/09 modified by Makoto Matsumoto */ - mt[mti] &= 0xffffffffUL; - /* for >32 bit machines */ - } -} - -/* initialize by an array with array-length */ -/* init_key is the array for initializing keys */ -/* key_length is its length */ -/* slight change for C++, 2004/2/26 */ -void init_by_array(unsigned long init_key[], int key_length) -{ - int i, j, k; - init_genrand(19650218UL); - i=1; j=0; - k = (N>key_length ? N : key_length); - for (; k; k--) { - mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL)) - + init_key[j] + j; /* non linear */ - mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ - i++; j++; - if (i>=N) { mt[0] = mt[N-1]; i=1; } - if (j>=key_length) j=0; - } - for (k=N-1; k; k--) { - mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL)) - - i; /* non linear */ - mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ - i++; - if (i>=N) { mt[0] = mt[N-1]; i=1; } - } - - mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ -} - -/* generates a random number on [0,0xffffffff]-interval */ -unsigned long genrand_int32(void) -{ - unsigned long y; - static unsigned long mag01[2]={0x0UL, MATRIX_A}; - /* mag01[x] = x * MATRIX_A for x=0,1 */ - - if (mti >= N) { /* generate N words at one time */ - int kk; - - if (mti == N+1) /* if init_genrand() has not been called, */ - init_genrand(5489UL); /* a default initial seed is used */ - - for (kk=0;kk> 1) ^ mag01[y & 0x1UL]; - } - for (;kk> 1) ^ mag01[y & 0x1UL]; - } - y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK); - mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL]; - - mti = 0; - } - - y = mt[mti++]; - - /* Tempering */ - y ^= (y >> 11); - y ^= (y << 7) & 0x9d2c5680UL; - y ^= (y << 15) & 0xefc60000UL; - y ^= (y >> 18); - - return y; -} - -/* generates a random number on [0,0x7fffffff]-interval */ -long genrand_int31(void) -{ - return (long)(genrand_int32()>>1); -} - -/* generates a random number on [0,1]-real-interval */ -double genrand_real1(void) -{ - return genrand_int32()*(1.0/4294967295.0); - /* divided by 2^32-1 */ -} - -/* generates a random number on [0,1)-real-interval */ -double genrand_real2(void) -{ - return genrand_int32()*(1.0/4294967296.0); - /* divided by 2^32 */ -} - -/* generates a random number on (0,1)-real-interval */ -double genrand_real3(void) -{ - return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0); - /* divided by 2^32 */ -} - -/* generates a random number on [0,1) with 53-bit resolution*/ -double genrand_res53(void) -{ - unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; - return(a*67108864.0+b)*(1.0/9007199254740992.0); -} -/* These real versions are due to Isaku Wada, 2002/01/09 added */ - -/* -int main(void) -{ - int i; - unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; - init_by_array(init, length); - printf("1000 outputs of genrand_int32()\n"); - for (i=0; i<1000; i++) { - printf("%10lu ", genrand_int32()); - if (i%5==4) printf("\n"); - } - printf("\n1000 outputs of genrand_real2()\n"); - for (i=0; i<1000; i++) { - printf("%10.8f ", genrand_real2()); - if (i%5==4) printf("\n"); - } - return 0; -} -*/ -#undef N -#undef M -#undef MATRIX_A -#undef UPPER_MASK -#undef LOWER_MASK - - - -#define MT_RAND_MAX 0xffffffffUL /* 2**32 - 1 */ - -/* - * generate uniform 0 <= x < N, i.e. interval [0,N) - * int x = (int)((double)rand() / (((double)RAND_MAX + (double)1.0) / N)) - */ - -/* - * Returns a random integer in the range [0..limit) - * see http://stackoverflow.com/questions/2999075/generate-a-random-number-within-range/2999130#2999130 - * and http://stackoverflow.com/questions/10219355/does-n-rand-rand-max-make-a-skewed-random-number-distribution/10219422#10219422 - * alternate at http://stackoverflow.com/questions/11641629/generating-a-uniform-distribution-of-integers-in-c/11658168#11658168 - * also http://c-faq.com/lib/randrange.html - * and GSL source: http://bzr.savannah.gnu.org/lh/gsl/trunk/annotate/head:/rng/gsl_rng.h - */ -unsigned int rand_lim(unsigned int limit) { - unsigned int divisor = MT_RAND_MAX/limit; - unsigned int retval; - do { - retval = genrand_int32() / divisor; - } while (retval >= limit); - return retval; -} - -#undef MT_RAND_MAX - -/* generate a random number generator seed from /dev/urandom */ -// borrowed from SSC src/runtime/ssc_rts.c -unsigned ising_sim_random_seed() { - unsigned random_seed; - FILE *rng = fopen("/dev/urandom", "r"); - if (rng == NULL) { - fprintf(stderr, - "error: cannot read /dev/urandom randomness generator\n"); - exit(1); - } - size_t num_random_read = fread(&random_seed, sizeof(random_seed), 1, rng); - if (num_random_read != 1) { - fprintf(stderr, - "error: cannot read /dev/urandom randomness generator\n"); - exit(1); - } - fclose(rng); - return random_seed; -} - diff --git a/population.cpp b/population.cpp new file mode 100644 index 0000000..a222a87 --- /dev/null +++ b/population.cpp @@ -0,0 +1,281 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "population.h" + + +//Constructor that assembles a population of N viruses of wild type + +Population::Population(const Hamiltonian &H, unsigned int N, double mu) { + + Virus V(H, mu); + pop[V] = N; + this->N = N; + this->mu = mu; + + p = 1.0-pow((1.0-V.mu),H.size); + Eavg = V.energy; + +} + + +//Constructor that assembles a population of N viruses given starting population fractions + +Population::Population(const Hamiltonian &H, unsigned int N, double mu, const std::vector > &initPop, const std::vector &initFrac) { + + for (int i=0;iN = N; + this->mu = mu; + + p = 1.0-pow((1.0-mu),H.size); + Eavg /= (double) N; + +} + + +//Step population forward a generation. + +void Population::next_generation(const Hamiltonian &H, gsl_rng* r, bool useRelative, bool useVerbose) { + + mutate_population(H,r); + + unsigned int total_deaths = 0; + unsigned int num_survive = 0; + double new_Eavg = 0.0; + + virus_map::iterator iter=pop.begin(); + + for(;iter!=pop.end();++iter){ + + if (useRelative) num_survive = gsl_ran_binomial(r,iter->first.survival(Eavg),iter->second); + else num_survive = gsl_ran_binomial(r,iter->first.survival(),iter->second); + + total_deaths += iter->second - num_survive; + iter->second = num_survive; + + new_Eavg += iter->first.energy * num_survive; + + // Report survival + + if (useVerbose) { + + std::cout << "survival probability " << ((useRelative) ? iter->first.survival(Eavg) : iter->first.survival()) + << " number survived " << num_survive << " total number " + << iter->second << std::endl; + + } + + } + + if (useVerbose) { std::cout << "checkpoint, " << "total_deaths = " << total_deaths << std::endl; } + + //delete zero population strains + + iter=pop.begin(); + + while (iter!=pop.end()) { + + if (iter->second==0) pop.erase(iter++); + else ++iter; + + } + + unsigned int ran; + unsigned int selector; + unsigned int pop_size=compute_population_size(); + + if (useVerbose) print_population_size(); + + virus_map prev_gen = pop; + + for (unsigned int i=0; isecond; + + while (selectorsecond; + + } + + pop[iter->first]++; + + new_Eavg += iter->first.energy; + + } + + Eavg = new_Eavg/((double) N); + +} + + +// Output population to file + +void Population::write_population(std::string filename) { + + std::ofstream fout; + fout.open(filename.c_str()); + + std::set ms; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) { + + fout << iter->second << '\t'; + fout << iter->first.energy << '\t'; + + ms = iter->first.mutated_sites; + + for (std::set::iterator ms_iter=ms.begin(); ms_iter!=ms.end(); ++ms_iter) { + + fout << *ms_iter << '\t'; + + } + + fout << std::endl; + + } + + fout.close(); + +} + + +// Output population to file + +void Population::write_population(FILE *output, unsigned int generation) { + + fprintf(output,"%d\n",generation); + + std::set ms; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) { + + fprintf(output,"%d\t%.6e",iter->second,iter->first.energy); + + ms = iter->first.mutated_sites; + + for (std::set::iterator ms_iter=ms.begin(); ms_iter!=ms.end(); ++ms_iter) fprintf(output,"\t%d",*ms_iter); + fprintf(output,"\n"); + + } + + fflush(output); + +} + + +// Compute the total population size + +unsigned int Population::compute_population_size() { + + unsigned int i=0; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) i+=iter->second; + + return i; + +} + + +// Print population size to standard out + +void Population::print_population_size() { + + std::cout << "The total population size is " << compute_population_size() << std::endl; + +} + + +//Mutate every virus in the population + +void Population::mutate_population(const Hamiltonian &H, gsl_rng* r) { + + virus_map prev_generation = pop; + unsigned int num_mutate; + + for(virus_map::iterator iter=prev_generation.begin(); iter!=prev_generation.end(); ++iter) { + + num_mutate=gsl_ran_binomial(r,p,iter->second); + + pop[iter->first]=pop[iter->first]-num_mutate; + + if (pop[iter->first]<=0) pop.erase(iter->first); + + for (unsigned int i=0; ifirst.mutated_sites); + V.mutate(H,r); + + if (pop.count(V)==0) pop[V] = 1; + else pop[V] += 1; + + } + + } + +} + + +// Compute the number of escaped viruses in the population + +unsigned int Population::compute_num_escaped(Hamiltonian &H) { + + unsigned int i=0; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) { if (H.escaped(iter->first)) i+=iter->second; } + + return i; + +} + + +// Check whether most of the population has escaped from immune pressure + +bool Population::escaped(Hamiltonian &H) { + + if (2*compute_num_escaped(H)>N) return true; + else return false; + +} + + +// Compute the number of escaped viruses in the population + +unsigned int Population::escape_variant(Hamiltonian &H, std::set &mutant) { + + unsigned int i=0; + + for (virus_map::iterator iter=pop.begin(); iter!=pop.end(); ++iter) { + + if (H.escaped(iter->first) && (iter->second)>i) { + + i=iter->second; + mutant=iter->first.mutated_sites; + + } + + } + + return i; + +} + + + + + diff --git a/population.h b/population.h new file mode 100644 index 0000000..7e7edd7 --- /dev/null +++ b/population.h @@ -0,0 +1,41 @@ +#ifndef POPULATION_H +#define POPULATION_H + +#include +#include +#include +#include + +#include "virus.h" + + +typedef std::map virus_map; + + +class Population{ + +public: + + unsigned int N; // Population size + double p; // Probability that a sequence has one or more mutations + double Eavg; // Average energy of the sequence population + double mu; // Mutation rate + virus_map pop; + + Population(const Hamiltonian &H, unsigned int N, double mu); + Population(const Hamiltonian &H, unsigned int N, double mu, const std::vector > &initPop, const std::vector &initFrac); + + void next_generation(const Hamiltonian &H, gsl_rng* r, bool useRelative, bool useVerbose); + void write_population(std::string filepath); + void write_population(FILE *output, unsigned int generation); + unsigned int compute_population_size(); + void print_population_size(); + void mutate_population(const Hamiltonian &H, gsl_rng* r); + + unsigned int compute_num_escaped(Hamiltonian &H); + bool escaped(Hamiltonian &H); + unsigned int escape_variant(Hamiltonian &H, std::set &mutant); + +}; + +#endif diff --git a/virus.cpp b/virus.cpp new file mode 100644 index 0000000..007a5d1 --- /dev/null +++ b/virus.cpp @@ -0,0 +1,107 @@ +#include +#include +#include +#include +#include +#include + +#include "virus.h" + + +//Constuct a virus object of wildtype + +Virus::Virus(const Hamiltonian &H, double mu) { + + this->mutated_sites.clear(); + this->mu=mu; + L=H.size; + energy=0; + +} + + +//Construct a virus and compute its energy. + +Virus::Virus(const Hamiltonian &H, double mu, const std::set &mutated_sites) { + + this->mutated_sites=mutated_sites; + this->mu=mu; + L=H.size; + update_energy(H); + +} + + +//Print key numerical parameters of the object to the terminal for diagnostics. + +void Virus::print_parameters() { + + std::cout << "mu = " << mu << std::endl; + std::cout << "energy = " << energy << std::endl; + +} + + +//Mutate the entire virus. Takes a Hamiltonian object and a pointer +//to an instance of a gsl_rng random number generator + +void Virus::mutate(const Hamiltonian &H, gsl_rng* r) { + + unsigned int n=gsl_ran_binomial(r,mu,L); + + while (n<1) n=gsl_ran_binomial(r,mu,L); + + for (int i=0;i +#include +#include + +#include "hamiltonian.h" + + +class Hamiltonian; + + +class Virus { + +public: + + double energy; + std::set mutated_sites; + + Virus(const Hamiltonian &H, double mu); + Virus(const Hamiltonian &H, double mu, const std::set &mutated_sites); + + void print_parameters(); + void mutate(const Hamiltonian &H, gsl_rng* r); + double survival() const; + double survival(double Eavg) const; + + friend class Population; + +private: + + double mu; + unsigned int L; + + void update_energy(const Hamiltonian &H); + +}; + + +bool operator<(const Virus& lhs, const Virus& rhs); + +#endif + diff --git a/wf.cpp b/wf.cpp index 6af0fe9..8c14ef8 100644 --- a/wf.cpp +++ b/wf.cpp @@ -1,101 +1,210 @@ - #include -#include - -// STL (Standard Template Library) -#include -#include -#include // std::copy -#include // std::ostream_iterator +#include +#include +#include +#include +#include -// GSL (GNU Scientific Library) #include -#include - -// mt19937ar.cpp forward declarations -void init_genrand(unsigned long); -unsigned long genrand_int32(void); -unsigned ising_sim_random_seed(); -/* -// binomial.cpp forward declarations -typedef struct { void *state; } gsl_rng; -unsigned int gsl_ran_binomial(const gsl_rng*, double, unsigned int); -*/ - -typedef unsigned int spin_t; -typedef std::set mutset_t; // virus is defined by set of mutations -typedef std::map pop_t; // number of each virus species +#include "hamiltonian.h" +#include "population.h" +#include "virus.h" +#include "wf.h" + + +// Run the program + +void run(RunParameters &r) { + + printf("\n"); + + // Initialize RNG and set initial state, if importing from file + + static gsl_rng *rnd = gsl_rng_alloc(gsl_rng_taus); //pointer to random number generator state + + srand((unsigned)time(0)); + gsl_rng_set(rnd,rand()); + + r.setFiles(); + //FILE *popout=fopen(r.trajectoryOutfile.c_str(),"w"); + FILE *supout=fopen(r.supplementaryOutfile.c_str(),"w"); + + if (r.importState) importState(r); + + + // Run (w/ targeted epitope) + + if (r.useEpitope) { + + for (unsigned int n=0;n esc_var; + unsigned int esc_num=P.escape_variant(H, esc_var); + + fprintf(supout,"%d\t%d",i,esc_num); + for (std::set::iterator iter=esc_var.begin(); iter!=esc_var.end(); ++iter) fprintf(supout,"\t%d",*iter); + fprintf(supout,"\n"); + + fflush(supout); + + } + + } + + + // Run (w/out targeted epitope) + + else { + + Hamiltonian H(r.couplingsInfile); + H.set_temp(r.bh, r.bJ); + Population P(H, r.n, r.mu); + + for (unsigned int i=0; i -std::ostream& operator<<(std::ostream& os, const std::set &v) -{ - typename std::set::iterator i = v.begin(), end = v.end(); - os << *i; // print first element - for (++i; i != end; ++i) // then with comma as prefix - os << ',' << *i; - return os; } -int main(int argc, char **argv) -{ - argc++; argv++; // avoid "warning: unused parameter" - - gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - gsl_rng_set(rng,ising_sim_random_seed()); - - unsigned int NUMSITES = 100; - - pop_t Pop; - - spin_t muts[] = {3,0,5}; - mutset_t s(muts,muts+3); - /* - mutset_t s; - for (int i = 0; i < 3; ++i) - s.insert(muts[i]); - */ - std::cout << gsl_ran_binomial(rng,0.1,NUMSITES) << std::endl; +// Import initial state from a state file + +void importState(RunParameters &r) { + + FILE *input=fopen(r.stateInfile.c_str(),"r"); + + char o; + double frac; + unsigned int site; + + // Import initial state of the population + + while (fscanf(input,"%le",&frac)==1) { + + r.initFrac.push_back(frac); + + std::set tempSet; + + while (fscanf(input,"%c",&o)==1) { + + if (o=='\n' || o==';') break; + + else { + + fscanf(input,"%u",&site); + tempSet.insert(site); + + } + + } + + r.initPop.push_back(tempSet); + + if (o==';') break; + + } + + // Import targeted epitope + + while (fscanf(input,"%u",&site)==1) { + + r.eWT.push_back(site); + + fscanf(input,"%c",&o); + if (o=='\n') break; + + } + + while (fscanf(input,"%u",&site)==1) { + + r.eMut.push_back(site); + + fscanf(input,"%c",&o); + if (o=='\n') break; + + } + + r.useEpitope=true; - Pop[s] = 100; - Pop[s] += 50; +} -#if 0 - for(pop_t::iterator vi = Pop.begin(); vi != Pop.end(); ++vi) { - mutset_t v = vi->first; - /* - // appends "," to each element - std::ostream_iterator output(std::cout, ","); - std::copy(v.begin(),v.end(), output); - */ - /* - // appends "," to each element - for(mutset_t::iterator i = v.begin(); i != v.end(); ++i) { - std::cout << (*i) << ","; - } - */ - /* - // prefix every element (except the first) with "," - mutset_t::iterator i = v.begin(), end = v.end(); - std::cout << *i; - for (++i; i != end; ++i) - std::cout << ',' << *i; - std::cout << ' '; - */ - ///* - // proper printing function - std::cout << v << ' '; - //*/ +/* + * Command line rules: + * + * -d (string) working directory + * -i (string) input couplings file + * -o (string) output file stem + * -s (string) input state file, containing initial population fraction and targeted epitope + * -n (int/double) population size + * -g (int/double) number of generations + * -mu (double) mutation rate + * -b (double) "inverse temperature" multiplier + * -e (int) (int) targeted epitope spans sites [e#1, ..., e#2], assuming all WT + * -r flag to use relative energy condition for survival probability + * -esc flag to run until escape observed (or max number of generations reached) + * -v flag for verbose output + * + */ - std::cout << vi->second << std::endl; +int main(int argc, char *argv[]) { + + RunParameters r; + + // Process command line input + + for (int i=1;i +#include +#include +#include +#include +#include +#include +#include + + +// STRING MANIPULATION + +// Converts generic to string + +template + +inline std::string tostring (const T& t) { + + std::stringstream ss; + ss << t; + return ss.str(); + +} + + +// Converts a string to an integer + +inline int strtoint(const std::string &s) { + + std::istringstream i(s); + int x; + + if (!(i >> x)) printf("String to integer conversion failed!"); + return x; + +} + + +// Converts a string to an unsigned integer + +inline int strtouint(const std::string &s) { + + std::istringstream i(s); + unsigned int x; + + if (!(i >> x)) printf("String to unsigned integer conversion failed!"); + return x; + +} + + +// Converts a string to a double + +inline double strtodouble(const std::string &s) { + + std::istringstream i(s); + double x; + + if (!(i >> x)) printf("String to double conversion failed!"); + return x; + +} + + +// PROGRAM SETTINGS + +// This class holds the parameters needed for running the simulation + +class RunParameters { + +public: + + std::string directory; // Path to the directory where the inut file is located + // Output will also be sent to this directory + std::string infile; // Input file from which couplings are to be read + std::string outfile; // Output file (prefix) where data is to be written + std::string statefile; // Input state file which contains initial population fractions + // and targeted epitope + + unsigned int n; // Population size + unsigned int g; // Number of generations + unsigned int num_runs; // Number of times to run the simulation for gathering overall statistics + double mu; // Mutation rate + double bh; // Field "inverse temperature" multiplier + double bJ; // Coupling "inverse temperature" multiplier + int estart; // Epitope start position + int eend; // Epitope end position + bool runUntilEscape; // If true, run the simulation until the population escapes + bool importState; // If true, import state from a state file + bool useEpitope; // Include selective pressure on an epitope + bool useRelative; // If true, use relative energy for survival probability + bool useVerbose; // If true, print extra information while program is running + + std::vector > initPop; // Initial population sequences + std::vector initFrac; // Initial population fraction + std::vector eWT; // Sites that are consensus (WT) in the targeted epitope + std::vector eMut; // Sites that are mutant in the targeted epitope + double penalty; // Energy penalty if sequence contains the targeted epitope + + std::string couplingsInfile; + std::string trajectoryOutfile; + std::string supplementaryOutfile; + std::string stateInfile; + + + RunParameters() { + + directory=""; + + n = 100000; + g = 150; + num_runs = 1000; + mu = 6.0 * pow(10.0,-5.0); + bh = 1.0; + bJ = 1.0; + + estart = 0; + eend = 0; + + penalty = 10.0; + + runUntilEscape=false; + importState=false; + useEpitope=false; + useRelative=false; + useVerbose=false; + + } + void setFiles() { + + if (strcmp(directory.c_str(),"")!=0) { + + couplingsInfile=directory+"/"+infile+".j"; + trajectoryOutfile=directory+"/"+outfile+".dat"; + supplementaryOutfile=directory+"/"+outfile+".sum"; + stateInfile=directory+"/"+statefile+".st"; + + } + + else { + + couplingsInfile=infile+".j"; + trajectoryOutfile=outfile+".dat"; + supplementaryOutfile=outfile+".sum"; + stateInfile=statefile+".st"; + + } + + //printf("%s\n%s\n%s\n",couplingsInfile.c_str(),trajectoryOutfile.c_str(),stateInfile.c_str()); + + } + ~RunParameters() {} + +}; + + +void run(RunParameters &r); +void importState(RunParameters &r); + + +#endif