durationOfStateI <- function(popSize, state) { prob <- choose(state, 2) / (2 * popSize) # prob. of coalescence generations <- rgeom(1, prob) + 1 return(generations) } makeGenealogy <- function (popSize, sampleSize) { result <- numeric(0) for (i in 2:sampleSize) { ti <- durationOfStateI(popSize, i) result <- c(result, ti) } return(result) } ### The following is a better way of doing the same thing makeGenealogy <- function (popSize, sampleSize) { return(sapply(2:sampleSize, durationOfStateI, popSize=popSize)) } totalTreeLength <- function(popSize, sampleSize) { genealogy <- makeGenealogy(popSize, sampleSize) brlen <- sum(genealogy * 2:sampleSize) return(brlen) } numSegSites <- function(popSize, sampleSize, mu) { totBrLen <- totalTreeLength(popSize, sampleSize) numMut <- rpois(1, totBrLen * mu) # poisson distribution return(numMut) } simCoal <- function(popSize, sampleSize, mu, trials){ result <- numeric(0) for (i in 1:trials) { mut <- numSegSites(popSize, sampleSize, mu) result <- c(result, mut) } return(result) } > simDat <- simCoal(popSize=1000, sampleSize=10, mu=0.001, trials=2000) > 4 * 1000 * 0.0001 * wattersonCoef(10)
Remember
, where S is number of segregating
sites,
, and
is Watterson's coefficient?
### freq(A1) = p, freq(A2) = 1-p ### w11 = 1, w12 = 1-h * s, w22 = 1 - s ## population mean fitness wbar <- function (p,s,h) { return(1 - 2 * p * (1-p) * h * s - (1-p)^2 * s) } ## Changes in allele frequency after 1 generation delta.p <- function (p, s, h) { q <- 1-p mean.w <- wbar(p, s, h) result <- p * q * s * (p * h + q * (1-h))/mean.w return (result) } recursiveAlleleFreq <- function (init.p, s, h, generations) { result <- c(init.p) curFreq <-init.p for (i in 1:generations) { nextFreq <- curFreq + delta.p(curFreq, s, h) result <- c(result, nextFreq) curFreq <- nextFreq } return(result) } > inip <- 0.01 > sel <- 0.1 > num.gen <- 1200 > > domi <- recursiveAlleleFreq(inip, sel, 0, num.gen) # A1 domi > rec <- recursiveAlleleFreq(inip, sel, 1, num.gen) # A1 recessive > add <- recursiveAlleleFreq(inip, sel, 0.5, num.gen) # Additive > > plot(0:num.gen, domi, type="l", xlab="generations", ylab="p", cex.lab=1.4, cex.axis=1.3, main="s=0.1, init(p) = 0.01") > lines(0:num.gen, rec)
> source("gen.drift.r") > plot.gen.drift(popSize=10, initFreq=0.2, num.generations=100, num.trials=10)
This will start the simulation with N=10 and initial allele freq=0.2. It will repeat the simulation for 10 times (num.trials). In addition to the plot, it will print out number of trials that the allele went fixation and extinction, and calculate the fixation probability. Remember fixation probability = p (the initial allele frequency). Confirm this with various setting of popSize and initFreq. Notice that population become homozygous immediately with N=10, but it takes longer time when N is larger. So you may need to increase the number of generations (e.g. num.generations=1000).