;;; genetic-drift.ss -- simulate genetic drift in a population under ;;; selctive pressure ;;; John David Stone ;;; Department of Mathematics and Computer Science ;;; Grinnell College ;;; stone@cs.grinnell.edu ;;; created March 20, 2000 ;;; last revised April 3, 2000 ;;; This program traces changes in the frequencies of the dominant and ;;; recessive alleles of a gene in a population, under the assumption that ;;; the probability of an organism's surviving to adulthood is determined ;;; by its genotype. It constructs an initial population of organisms, ;;; randomly constructed to approximate the given initial frequencies of ;;; the dominant and recessive alleles and repeats a process of breeding ;;; and culling to construct each successive generation. ;;; ------------------ ;;; Utility procedures ;;; ------------------ ;;; We begin with several utility procedures that will subsequently appear ;;; in the definitions of others that are more closely connected with the ;;; problem at hand. ;;; The REPEAT procedure, introduced in the lab on folding, constructs a ;;; procedure that applies a transformer repeatedly, starting from a given ;;; base value, given the desired number of repetitions: (define repeat (lambda (base-case-value transformer) (letrec ((recurrer (lambda (number) (if (zero? number) base-case-value (transformer (recurrer (- number 1))))))) recurrer))) ;;; At several points in the following procedures, we'll need to select one ;;; of two values, not necessarily with equal frequency. The SELECTOR ;;; procedure takes three arguments -- two values, LEFT and RIGHT, and the ;;; probability PROB with which LEFT should be returned. It is a ;;; precondition of thie procedure that PROB be a real number in the range ;;; from 0 to 1 inclusive. SELECTOR returns LEFT with probability PROB and ;;; RIGHT with probability 1 - PROB. ;;; DrScheme's RANDOM procedure accepts only exact positive integers less ;;; than or equal to the value of RANDOM-MAX, as defined below. We ;;; use the largest possible argument to that procedure to obtain the ;;; closest possible approximation to the specified probability. (define selector (let ((random-max 2147483647)) (lambda (left right prob) (if (< (random random-max) (* prob random-max)) left right)))) ;;; The RANDOM-ELEMENT procedure selects and returns a randomly ;;; selected element from a given non-empty list. (define random-element (lambda (ls) (list-ref ls (random (length ls))))) ;;; The FOLD-LIST procedure, also introduced in the lab on folding, returns ;;; a procedure that enacts a list recursion over a given list. (define fold-list (lambda (base-case-value combiner) (letrec ((recurrer (lambda (ls) (if (null? ls) base-case-value (combiner (car ls) (recurrer (cdr ls))))))) recurrer))) ;;; The BUMP procedure takes two arguments, of which the second must be an ;;; association list in which the data are numbers. It returns an ;;; association list with the same keys and the same associated data, ;;; except that the datum associated with the value that is the first ;;; argument to BUMP has been increased by 1. If the first argument is not ;;; a key of the original association list, an entry for that value is ;;; added, with 1 as the associated value. (define bump (lambda (key als) (let kernel ((rest als)) (cond ((null? rest) (list (cons key 1))) ((eq? key (caar rest)) (cons (cons key (+ (cadr rest) 1)) (cdr rest))) (else (cons (car rest) (kernel (cdr rest)))))))) ;;; The PROMPT procedure prints out a prompt, suggesting that the user ;;; supply some datum, then reads in and returns the datum she supplies. (define prompt (lambda (tag) (display tag) (read))) ;;; The DISPLAY-LINE procedure displays each of its arguments in turn and ;;; then starts a new output line. (define display-line (lambda arguments (for-each display arguments) (newline))) ;;; The PERCENTAGE procedure displays its argument, which must be a real ;;; number, as a precentage (rounded to the nearest integer). (define percentage (lambda (number) (* 100 (round number)))) ;;; ------------------------- ;;; Organisms and populations ;;; ------------------------- ;;; Each of our organisms is represented by a three-element list. The ;;; first element indicates the organism's sex: It is either the symbol ;;; 'FEMALE or the symbol 'MALE. The remaining elements give the ;;; organism's genotype: Each is either the symbol 'DOMINANT or the symbol ;;; 'RECESSIVE. ;;; To construct an organism for the initial population, we choose the sex ;;; by tossing a coin (that is, choosing 'FEMALE or 'MALE with equal ;;; probability) and the genotype by using SELECTOR to pick an allele ;;; twice. It is a precondition of this procedure that the frequency of ;;; the dominant allele be a real number in the range from 0 to 1 ;;; inclusive. (define random-organism (lambda (dominant-frequency) (list (selector 'female 'male 1/2) (selector 'dominant 'recessive dominant-frequency) (selector 'dominant 'recessive dominant-frequency)))) ;;; We represent the whole population, in any given generation, by a list ;;; of organisms. The INITIAL-POPULATION procedure generates such a list, ;;; given the number of organisms in the population the desired frequency ;;; of the dominant allele. ;;; As preconditions of this procedure, the population size must be an ;;; exact integer greater than or equal to 2 and the frequency of the ;;; dominant allele a real number in the range from 0 to 1 inclusive. (define initial-population (lambda (population-size dominant-frequency) ((repeat null (lambda (ls) (cons (random-organism dominant-frequency) ls))) population-size))) ;;; The SEPARATE-SEXES procedure takes a population as argument and returns ;;; four values: a list of the females, the length of that list, a list of ;;; the males, and the length of _that_ list. (define separate-sexes (lambda (population) (let kernel ((rest population) (females null) (female-count 0) (males null) (male-count 0)) (cond ((null? rest) (values females female-count males male-count)) ((eq? (caar rest) 'female) (kernel (cdr rest) (cons (car rest) females) (+ female-count 1) males male-count)) (else (kernel (cdr rest) females female-count (cons (car rest) males) (+ male-count 1))))))) ;;; Given two organisms, one male and one female, the OFFSPRING procedure ;;; constructs and returns a new organism. Its sex is determined by a coin ;;; toss, and it receives one gene from its male parent and one from its ;;; female parent, in each case randomly selected from the two available. (define offspring (lambda (mother father) (list (selector 'female 'male 1/2) (selector (cadr mother) (caddr mother) 1/2) (selector (cadr father) (caddr father) 1/2)))) ;;; Typically, not all of the offspring survive to become adult members of ;;; the next generation. A culling procedure imposes selective pressure ;;; against some genotypes. This higher-order procedure, CULLER-MAKER, ;;; takes three arguments, each one of which must be a real number in the ;;; range from 0 to 1, respectively representing the survival rates of ;;; organisms having two dominant genes, organisms having one dominant and ;;; one recessive gene, and organism bearing two recessive genes. It ;;; constructs and returns a procedure that takes an organism as argument, ;;; examines its genotype, and decides whether the organism survives to ;;; adulthood (with the appropriate probability), returning #T if it does ;;; and #F if it does not. (define culler-maker (lambda (dd-rate dr-rate rr-rate) (lambda (organism) (cond ((not (eq? (cadr organism) (caddr organism))) (selector #t #f dr-rate)) ((eq? (cadr organism) 'dominant) (selector #t #f dd-rate)) (else (selector #t #f rr-rate)))))) ;;; Given a population and a culling procedure, this procedure attempts to ;;; construct a new population of the same size, made up of offspring of ;;; the current generation that survive to adulthood. ;;; If the current population contains no females or no males, the new ;;; population is empty. ;;; The number of offspring produced, before culling, is no greater than ;;; 256 times the size of the current population. If fewer than 1/256 of ;;; those offspring survive culling, the new population is smaller than the ;;; old. (define new-population (lambda (population culler) (let* ((population-size (length population)) (offspring-limit (* 256 population-size))) (call-with-values (lambda () (separate-sexes population)) (lambda (females female-count males male-count) (if (or (zero? female-count) (zero? male-count)) null (let kernel ((offspring-count 0) (new-population null) (new-population-size 0)) (if (or (= new-population-size population-size) (= offspring-count offspring-limit)) new-population (let ((child (offspring (random-element females) (random-element males)))) (if (culler child) (kernel (+ offspring-count 1) (cons child new-population) (+ new-population-size 1)) (kernel (+ offspring-count 1) new-population new-population-size))))))))))) ;;; The GENOTYPES procedure runs through a population and counts the number ;;; of organism of each of the three genotypes 'DD (dominant/dominant), 'DR ;;; (dominant/recessive), and 'RR (recessive/recessive), returning the ;;; tallies as the data of an association list in which the keys are the ;;; symbols distinguishing the three genotypes. (define genotypes (fold-list (list (cons 'dd null) (cons 'dr null) (cons 'rr null)) (lambda (organism recursive-result) (cond ((not (eq? (cadr organism) (caddr organism))) (bump 'dr recursive-result)) ((eq? (cadr organism) 'dominant) (bump 'dd recursive-result)) (else (bump 'rr recursive-result)))))) ;;; ------------------ ;;; The user interface ;;; ------------------ ;;; The GET-INITIAL-SIZE procedure prompts the user for the size of the ;;; initial population, which is required to be a natural number. (define get-initial-size (lambda () (let kernel ((proposed (prompt "Initial population size: "))) (if (and (integer? proposed) (exact? proposed) (not (negative? proposed))) proposed (begin (display-line "The initial population size must be " "a natural number.") (kernel (prompt "Initial population size: "))))))) ;;; The GET-DOMINANT-FREQUENCY procedure prompts the user for the frequency ;;; of the dominant allele in the initial population, which is required to ;;; be a real number in the range from 0 to 1 (inclusive). (define get-dominant-frequency (lambda () (let kernel ((proposed (prompt "Frequency of the dominant allele: "))) (if (and (real? proposed) (<= 0 proposed 1)) proposed (begin (display-line "The frequency of the dominant allele must be " "a real number in the range from 0 to 1 " "(inclusive)." ) (kernel (prompt "Frequency of the dominant allele: "))))))) ;;; The GET-SURVIVAL-RATE procedure prompts the user for the survival rate ;;; of an organism with a given genotype, as a real number in the range ;;; from 0 to 1 (inclusive). (define get-survival-rate (lambda (genotype) (let ((prompt-text (string-append "Survival rate for organisms having the " genotype " genotype: "))) (let kernel ((proposed (prompt prompt-text))) (if (and (real? proposed) (<= 0 proposed 1)) proposed (begin (display-line "The survival rate must be a real number in " "the range from 0 to 1 (inclusive)." ) (kernel (prompt prompt-text)))))))) ;;; The GET-GENERATIONS procedure prompts the user for the number of ;;; generations to be simulated, which is required to be a natural number. (define get-generations (lambda () (let kernel ((proposed (prompt "Number of generations: "))) (if (and (integer? proposed) (exact? proposed) (not (negative? proposed))) proposed (begin (display-line "The number of generations to be simulated " "must be a natural number.") (kernel (prompt "Number of generations: "))))))) ;;; The DESCRIBE-POPULATION procedure writes out a one-line summary of the ;;; genotypes of the members of a given population and the frequency of the ;;; dominant allele. (define describe-population (lambda (population) (let ((genotype-table (genotypes population))) (let ((population-size (apply + (map cdr genotype-table))) (dominant-count (+ (* (cdr (assq 'dd genotype-table)) 2) (cdr (assq 'dr genotype-table))))) (display-line "Dominant alleles: " dominant-count "/" population-size " (" (percentage (/ dominant-count population-size)) "%) " "Genotypes: " (cdr (assq 'dd genotype-table)) " dominant/dominant, " (cdr (assq 'dr genotype-table)) " dominant/recessive, " (cdr (assq 'rr genotype-table)) " recessive/recessive."))))) ;;; The RUN-EXPERIMENT procedure collects from the user the various ;;; parameters of the simulation (the size of the initial population, the ;;; initial frequency of the dominant allele, the survival rate of each ;;; genotype, and the number of generations to be simulated). It then ;;; constructs an initial population and proceeds to simulate the specified ;;; number of breeding-and-culling cycles, reporting various statistics ;;; about the population at each step. (define run-experiment (lambda () (let* ((initial-size (get-initial-size)) (dominant-frequency (get-dominant-frequency)) (dd-survival-rate (get-survival-rate "dominant/dominant")) (dr-survival-rate (get-survival-rate "dominant/recessive")) (rr-survival-rate (get-survival-rate "recessive/recessive")) (generations (get-generations)) (culler (culler-maker dd-survival-rate dr-survival-rate rr-survival-rate))) (newline) ((repeat (initial-population initial-size dominant-frequency) (lambda (old-population) (let ((children (new-population old-population culler))) (describe-population children) children))) generations)) (newline))) ;;; The following procedure call starts the program moving. (run-experiment)