; XEmacs: This file contains -*-Scheme-*- source code. ;;; John David Stone ;;; Department of Mathematics and Computer Science ;;; Grinnell College ;;; stone@cs.grinnell.edu ;;; created November 29, 1999 ;;; last revised November 26, 2000 ;;; greatest-satisfying: Given a natural number UPPER-BOUND and a predicate ;;; that is satisfied by at least one natural number less than or equal to ;;; UPPER-BOUND, determine the greatest natural number less than or equal ;;; to UPPER-BOUND that satisfies the predicate. (define (greatest-satisfying upper-bound predicate) ((until predicate predecessor) upper-bound)) ;;; generate-list: Construct a list of a given size by applying a given ;;; procedure GENERATOR successively to each natural number up to, but ;;; not including, the size. (define (generate-list generator size) (unfold-list (next-section = size) generator successor 0)) ;;; generate-vector: Construct a vector of a given size by applying a given ;;; procedure GENERATOR successively to each natural number up to, but not ;;; including, the size. (define generate-vector (pipe generate-list list->vector)) ;;; prefix-matches-suffix?: Within an initial segment of a given string, ;;; determine whether a prefix of a given length matches the suffix of the ;;; same length. (define (prefix-matches-suffix? str segment-length len) (letrec ((matches-from? (lambda (so-far) (or (= so-far len) (and (char=? (string-ref str so-far) (string-ref str (+ so-far (- segment-length len)))) (matches-from? (successor so-far))))))) (matches-from? 0))) ;;; prefix-vector: Given a string, construct a vector of the same ;;; length as the string, in which the element at position i is the length ;;; of the longest prefix of the string that is a proper suffix of the ;;; prefix of length i + 1 of the string. (define (prefix-vector str) (generate-vector (lambda (index) (let ((segment-length (successor index))) (greatest-satisfying index (lambda (len) (prefix-matches-suffix? str segment-length len))))) (string-length str))) ;;; test-and-jump-maker: Given a text string and a pattern string, ;;; construct and return a procedure that checks whether a specified ;;; character of the text string matches the next unmatched character of ;;; the pattern string, adding the current shift to the set of valid shifts ;;; if the character matches and is the last character of the pattern, and ;;; in any case returning the number of characters of the pattern that are ;;; now known to match characters of the text. ;;; The internally defined JUMP procedure consults the pattern's prefix ;;; vector when a mismatch is detected in order to determine how far the ;;; shift can be advanced without missing a possible match. (define (test-and-jump-maker text pattern) (let ((jump (pipe predecessor (initial-section vector-ref (prefix-vector pattern)))) (pattern-length (string-length pattern))) (lambda (index valid-shifts matched) (letrec ((adjust (lambda (try) (if (char=? (string-ref pattern try) (string-ref text index)) (successor try) (if (zero? try) 0 (adjust (jump try))))))) (let ((adjusted (adjust matched))) (if (= pattern-length adjusted) (values (adjoin-to-set (- (successor index) adjusted) valid-shifts) (jump adjusted)) (values valid-shifts adjusted))))))) ;;; KNUTH-MORRIS-PRATT-MATCHER takes two strings, TEXT and PATTERN, and ;;; returns a set of ``valid shifts,'' that is, positions in TEXT at which ;;; copies of PATTERN begin. The initial shift of 0 aligns PATTERN with ;;; the beginning of TEXT; subsequent adjustments advance the alignment in ;;; jumps, as calculated by TEST-AND-JUMP. (define (Knuth-Morris-Pratt-matcher text pattern) (let ((test-and-jump (test-and-jump-maker text pattern)) (text-length (string-length text))) (if (string=? "" pattern) (lower-ply-natural (successor text-length) adjoin-to-set (set)) (receive (all-valid-shifts matched-at-end) (lower-ply-natural text-length test-and-jump (set) 0) all-valid-shifts))))