;;; John David Stone ;;; Department of Computer Science ;;; Grinnell College ;;; stone@cs.grinnell.edu ;;; created December 5, 1999 ;;; last revised December 8, 2010 ;;; The genetic algorithm ;;; When no efficient algorithm for solving an optimization problem is ;;; available, the genetic algorithm heuristic can often be used to find a ;;; near-optimal solution. (However, it is not guaranteed to work; the ;;; justification for using it is entirely pragmatic.) ;;; The metaphor underlying the genetic algorithm is biological adaptation ;;; through selective breeding. Think of the problem to be solved as a ;;; constraint imposed by a natural environment and of potential solutions ;;; to that problem as organisms. As the genetic constitution of an ;;; organism partially determines whether it thrives or languishes in a ;;; particular environment, so the internal structure and contents of each ;;; potential solution determine how good or bad it is. The genetic ;;; algorithm simulates the natural selection of organisms that are well ;;; adapted to their environment. ;;; Initially, a ``population'' of possible solutions is constructed, ;;; either at random (as in this implementation) or by some method that ;;; tends to generate good but not necessarily optimal solutions. This ;;; initial population is used to breed a second generation of slightly ;;; better solutions, which is used to breed a third, and so on, until some ;;; specified number of generations have been simulated. At the end of the ;;; process, the best solution in the final generation is returned. ;;; The process by which each new generation replaces its predecessor comprises ;;; three main steps. First, a large number of new potential solutions are ;;; ``bred'' by combining elements of randomly selected individuals of the old ;;; generation. Secondly, ``mutations'' are applied to these new solutions, ;;; replacing some of their elements with randomly selected values. Finally, ;;; the brood of new solutions is culled: Most of the individuals are discarded ;;; as ``less fit,'' that is, farther from the desired optimal solution. The ;;; rest make up the new population. ;;; The algorithm takes seven arguments: ;;; RANDOM-ORGANISM is a procedure of zero arguments that, when invoked, ;;; constructs and returns a randomly constructed possible solution to ;;; whatever problem is to be solved. ;;; POPULATION-SIZE is the number of organisms in the breeding pool in each ;;; generation. Its value is a trade-off between the need for genetic ;;; variation in the population and the need for fast running time in the ;;; program. ;;; SPLICE is a procedure that takes two organisms as arguments and returns ;;; a new organism comprising some genetic material from each parent, ;;; randomly selected. ;;; MUTATE is a procedure that takes an organism and returns a similar ;;; organism, except that a random change may have been made in some ;;; element of its genetic material. ;;; BROOD-SIZE is the number of organisms generated from the breeding pool, ;;; before culling. It must be greater than or equal to POPULATION-SIZE ;;; and is ordinarily several times greater. ;;; FITNESS is a procedure that takes an organism as argument and returns ;;; an exact real number indicating its degree of fitness (i.e., its ;;; success as a solution to the problem). ;;; NUMBER-OF-GENERATIONS is the number of cycles of natural selection that ;;; the algorithm will simulate before delivering an answer. Again, its ;;; value is a trade-off between processing time and the likelihood of ;;; obtaining a near-optimal solution. (define genetic-algorithm (lambda (random-organism population-size splice mutate brood-size fitness number-of-generations) (let ((initial-population (make-vector population-size))) (do ((index 0 (+ index 1))) ((= index population-size)) (vector-set! initial-population index (random-organism))) (let generation-loop ((chronon 0) (population initial-population)) ; (report chronon (vector-ref population (- population-size 1))) (if (= chronon number-of-generations) (vector-ref population (- population-size 1)) (let offspring-loop ((count 0) (brood '())) (if (= count brood-size) (generation-loop (+ chronon 1) (cull fitness population-size brood)) (let ((alpha (random-element population)) (beta (random-element population))) (offspring-loop (+ count 1) (cons (mutate (splice alpha beta)) brood)))))))))) ;;; The RANDOM-ELEMENT procedure selects a random element from a non-empty ;;; vector. (define random-element (lambda (vec) (vector-ref vec (random (vector-length vec))))) ;;; The REPORT procedure gives a progress report, displaying a specified ;;; organism. (define report (lambda (chronon best-of-breed) (display "Generation ") (display chronon) (display ": ") (newline) (display best-of-breed) (newline) (newline))) ;;; To cull the brood, evaluate the fitness of each new organism and keep ;;; track of the n fittest, where n is the desired population size. (define cull (lambda (fitness population-size brood) (let loop ((rest brood) (fittest '()) (counter 0)) (if (null? rest) (list->vector (map car fittest)) (let ((score (fitness (car rest)))) (let ((updated (cond ((< counter population-size) (insert (cons (car rest) score) fittest)) ((< (cdar fittest) score) (insert (cons (car rest) score) (cdr fittest))) (else fittest)))) (loop (cdr rest) updated (+ counter 1)))))))) ;;; The INSERT procedure expects to receive (1) a pair consisting of an ;;; organism and its fitness score and (2) a list of such pairs, in ;;; ascending order of fitness. It returns a similarly ordered list, to ;;; which the new pair has been added. (define insert (lambda (new ls) (cond ((null? ls) (list new)) ((<= (cdr new) (cdar ls)) (cons new ls)) (else (cons (car ls) (insert new (cdr ls))))))) ;;; As a small example of the genetic algorithm, let's try to get a ;;; random-number generator to write Shakespeare -- specifically, Hamlet's ;;; reassurance to his friends Rosencrantz and Guildenstern that he is only ;;; occasionally insane: (define hamlet-speech "I am but mad north-north-west. When the wind is southerly I know a hawk from a handsaw.") (define length-of-speech (string-length hamlet-speech)) ;;; Our organisms will be strings of text characters, equal in length to ;;; HAMLET-SPEECH. (define random-text-char (let ((text-chars (vector #\newline #\space #\! #\( #\) #\- #\: #\; #\' #\" #\, #\. #\? #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T #\U #\V #\W #\X #\Y #\Z #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z))) (let ((number-of-text-chars (vector-length text-chars))) (lambda () (vector-ref text-chars (random number-of-text-chars)))))) (define random-string (lambda (len) (let ((result (make-string len))) (do ((index 0 (+ index 1))) ((= index len) result) (string-set! result index (random-text-char)))))) ;;; Our splicing procedure will choose a substring of the father (selecting ;;; the end-points randomly) and replace the corresponding substring of the ;;; mother with it. (define splicer (lambda (alpha beta) (let ((one-end (random (+ length-of-speech 1))) (other-end (random (+ length-of-speech 1)))) (let ((start (min one-end other-end)) (finish (max one-end other-end))) (string-append (substring beta 0 start) (substring alpha start finish) (substring beta finish length-of-speech)))))) ;;; Our mutation procedure passes over each character of the string, ;;; occasionally (with probability 1/64) replacing it with a randomly selected ;;; character. (define mutator (lambda (organism) (let ((result (make-string length-of-speech))) (do ((index 0 (+ index 1))) ((= index length-of-speech) result) (string-set! result index (if (zero? (random 64)) (random-text-char) (string-ref organism index))))))) ;;; The fitness evaluator determines the number of positions at which the ;;; organism correctly matches a character in the speech. (define speech-fitness (lambda (organism) (let loop ((index 0) (tally 0)) (if (= index length-of-speech) tally (loop (+ index 1) (if (char=? (string-ref organism index) (string-ref hamlet-speech index)) (+ tally 1) tally)))))) ;;; Filling in the rest of the parameters with plausible values (population ;;; size 200, brood size 1000, number of generations 100), let's see what a ;;; few hundred thousand monkeys can come up with: (display (genetic-algorithm (lambda () (random-string length-of-speech)) 200 splicer mutator 1000 speech-fitness 100)) (newline)