;;; mathematician.ss -- a program to sort and display names and dates
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; the problem specification ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The file mathematicians.data contains the names and dates of birth of an
;; indefinite number of mathematicians born in the nineteenth century.
;; Here's a typical line:
;;
;; 1815/11/02 Boole, George
;;
;; The birth date appears first on each line, in the format year/month/day;
;; both the month and the day are always given as two-digit numbers, as in
;; the example above. The birth date is always followed by exactly six
;; spaces. The mathematician's name is then given, last name first, with
;; the surname followed by a comma, a space, and the forename(s).
;;
;; This program reads in data from the mathematicians.data file and creates
;; two output files, mathematicians-by-date-of-birth.data and
;; mathematicians-by-surname.data, in the current working directory. The
;; mathematicians-by-date-of-birth.data file contains the same data as the
;; source file, in the same format, but with the lines arranged
;; chronologically by date, from 1802/08/05 at the beginning to 1894/11/26
;; at the end. The other file, mathematicians-by-surname.data, contains a
;; list of all the mathematicians named in the source file, arranged
;; alphabetically by surname, in the following format:
;;
;; George BOOLE (November 2, 1815)
;;
;; In other words, in this file, the forename(s) is placed before
;; the surname, the surname is capitalized, and the date of birth is
;; written in the more human-readable format month-name day, year, enclosed
;; in parentheses, and placed after the mathematician's name.
;; The component procedures of this program fall into three groups: those
;; related to input, those related to sorting, and those related to output.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; procedures relating to input ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The READ-LINES-INTO-LIST procedure takes two arguments -- the name of a
;; text file, and a predicate that tests whether a given line is correctly
;; formatted -- and returns a list of strings, each string being one
;; correctly formateed line of the text file (not including the #\NEWLINE
;; that terminates it). Lines that are incorrectly formatted are echoed
;; to standard output, although with warning messages, but are not added
;; to the list.
(define read-lines-into-list
(lambda (source-file-name correctly-formatted?)
;; Make sure that SOURCE-FILE-NAME is a string and that it denotes
;; a file that really exists.
(if (not (and (string? source-file-name)
(file-exists? source-file-name)))
(error 'read-lines-into-list
"The argument must be the name of an existing file"))
;; Open the source file and read it in, a line at a time, adding
;; each correctly formatted line to a list and issuing complaints about
;; incorrectly formatted lines; when the end-of-file object is
;; encountered, close the file and return the list (reversed).
(let ((source (open-input-file source-file-name)))
(do ((line (read-line source) (read-line source))
(line-list '() (if (correctly-formatted? line)
(cons line line-list)
(begin
(display "Warning: Line ")
(display line-number)
(display " of ")
(display source-file-name)
(display " is incorrectly formatted and ")
(display "will be discarded:")
(newline)
(display line)
(newline)
line-list))))
((eof-object? line) (close-input-port source)
(reverse line-list))))))
;; The READ-LINE procedure takes one argument, an open input port, and
;; recovers one line of text through that port. The line is considered to
;; be terminated when a #\NEWLINE character or the end-of-file object is
;; encountered. If the #\NEWLINE character terminates the line, READ-LINE
;; consumes it but does not add it to the string it returns.
(define read-line
(lambda (source)
;; Make sure that SOURCE is an input port.
(if (not (input-port? source))
(error 'read-line "The argument must be an input port"))
;; If we're already at the end of the file, return the end-of-file
;; object.
(let ((next (peek-char source)))
(if (eof-object? next)
next
;; Otherwise, collect in a list one character at a time from the
;; input port until either the end-of-file object or the end of
;; the line is encountered. At that time, reverse the list,
;; convert it into a string, and return it.
(do ((ch (read-char source) (read-char source))
(char-list '() (cons ch char-list)))
((or (eof-object? ch) (char=? ch #\newline))
(apply string (reverse char-list))))))))
;; In this application, a correctly formatted line comprises four digits,
;; a slash, two digits, a slash, two digits, six spaces, and some number
;; of additional characters somewhere including a comma. The leftmost
;; occurrence of a comma must be followed by a space. Moreover, the digits
;; in positions 5 and 6 must denote an integer in the range from 1 to 12 --
;; a month number. The following procedure determines whether a given
;; string is a correctly formatted line.
(define in-mathematician-format?
(lambda (str)
(and (string? str)
(let ((len (string-length str)))
(and (<= 18 len)
;; four-digit year
(char-numeric? (string-ref str 0))
(char-numeric? (string-ref str 1))
(char-numeric? (string-ref str 2))
(char-numeric? (string-ref str 3))
(char=? #\/ (string-ref str 4))
;; two-digit month, in the range from 1 to 12
(char-numeric? (string-ref str 5))
(char-numeric? (string-ref str 6))
(<= 1 (string->number (substring str 5 7)) 12)
(char=? #\/ (string-ref str 7))
;; two-digit day
(char-numeric? (string-ref str 8))
(char-numeric? (string-ref str 9))
(char=? #\space (string-ref str 10))
(char=? #\space (string-ref str 11))
(char=? #\space (string-ref str 12))
(char=? #\space (string-ref str 13))
(char=? #\space (string-ref str 14))
(char=? #\space (string-ref str 15))
;; any characters, so long as there is a comma and
;; the first comma is followed by a space
(let loop ((position 16))
(and (not (= position len))
(if (char=? #\, (string-ref str position))
(and (not (= (+ position 1) len))
(char=? #\space
(string-ref str (+ position 1))))
(loop (+ position 1))))))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; procedures relating to sorting ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The SORT-LIST procedure takes as argument a total ordering predicate (a
;; binary predicate that is irreflexive, asymmetric, and transitive on the
;; type that it relates) and returns a procedure that sorts a list of values
;; into the order implied by that predicate. It uses the insertion-sorting
;; algorithm.
(define sort-list
(lambda (precedes?)
;; Make sure that PRECEDES? is a procedure.
(if (not (procedure? precedes?))
(error 'sort-list "The argument must be a procedure"))
;; The INSERT procedure adds an item at the appropriate position in a
;; correctly sorted list.
(letrec ((insert (lambda (item ls)
;; If LS is empty, return a list in which ITEM
;; is the only element.
(cond ((null? ls) (list item))
;; If ITEM belongs at the beginning of the
;; list, put it there.
((precedes? item (car ls))
(cons item ls))
;; Otherwise, insert it in the cdr of LS
;; and attach the car of LS at the front
;; of the result.
(else
(cons (car ls) (insert item (cdr ls))))))))
;; Here's the sorting procedure that SORT-LIST returns.
(lambda (ls)
;; Make sure that LS is a list.
(if (not (list? ls))
(error '*sort-list "The argument must be a list"))
;; Insert each element of LS into an initially empty result list,
;; then return that list.
(do ((rest ls (cdr rest))
(result '() (insert (car rest) result)))
((null? rest) result))))))
;; The program specification calls for two different orderings of the
;; mathematicians mentioned in the source file -- one by date of birth, the
;; other by surname. To use SORT-LIST for both arrangements, we need to
;; write binary predicates to compare particular strings in these two ways.
;; It will be helpful to have procedures to break out the various data that
;; occur at various positions in the line. I've omitted precondition tests
;; here, because I'm expecting these procedures to be invoked only on
;; strings that have been found to be correctly formatted.
(define birth-year-field
(lambda (str)
(string->number (substring str 0 4))))
(define birth-month-field
(lambda (str)
(string->number (substring str 5 7))))
(define birth-day-field
(lambda (str)
(string->number (substring str 8 10))))
(define surname-field
(lambda (str)
(substring str 16 (find-char #\, str 16))))
(define forenames-field
(lambda (str)
(substring str (+ (find-char #\, str 16) 2) (string-length str))))
;; The FIND-CHAR procedure takes three arguments -- a character, a string,
;; and a natural number less than or equal to the length of the string.
;; If the specifiec character occurs in the given string at a position
;; greater than or equal to the one specified by the natural number,
;; FIND-CHAR returns the least such position; otherwise, FIND-CHAR returns
;; the length of the string.
(define find-char
(lambda (sought str start)
;; Make sure that SOUGHT is a character.
(if (not (char? sought))
(error 'find-char "The first argument must be a character"))
;; Make sure that STR is a string.
(if (not (string? str))
(error 'find-char "The second argument must be a string"))
;; Make sure that START is a natural number not exceeding the length
;; of STR.
(if (or (not (integer? start))
(negative? start)
(< (string-length str) start))
(error 'find-char
(string-append "The third argument must be a natural "
"number not exceeding the length of the "
"string")))
(let ((len (string-length str)))
;; Examine each position in turn, beginning with START. If you
;; reach the end of the string or find an occurrence of SOUGHT,
;; stop and return POSITION; otherwise, go on to the next position.
(let loop ((position start))
(if (or (= position len)
(char=? sought (string-ref str position)))
position
(loop (+ position 1)))))))
;; The BORN-EARLIER? predicate compares two correctly formatted strings
;; and determines whether the first of them describes a mathematician with
;; an earlier date of birth than the other.
;; Given the yyyy/mm/dd format of the first ten characters of each entry,
;; there's a short way to do this:
;;
;; (define born-earlier? string<)
;;
;; In case this seems like a kludge, however, I've actually implemented
;; the procedure the long way:
(define born-earlier?
(lambda (str-1 str-2)
(cond ((< (birth-year-field str-1) (birth-year-field str-2)) #t)
((< (birth-year-field str-2) (birth-year-field str-1)) #f)
((< (birth-month-field str-1) (birth-month-field str-2)) #t)
((< (birth-month-field str-2) (birth-month-field str-1)) #f)
(else (< (birth-day-field str-1) (birth-day-field str-2))))))
;; The PRECEDES-LEXICOGRAPHICALLY? predicate compares two correctly
;; formatted strings and determines whether the first of them describes a
;; mathematician that should precede the other in lexicographical order
;; (first by surname, then by forenames).
(define precedes-lexicographically?
(lambda (str-1 str-2)
(cond ((string<? (surname-field str-1) (surname-field str-2)) #t)
((string<? (surname-field str-2) (surname-field str-1)) #f)
(else (string<? (forenames-field str-1)
(forenames-field str-2))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; procedures relating to output ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The WRITE-LIST-OF-LINES procedure takes as its argument a
;; ``transformer'' procedure that takes strings as arguments and returns
;; strings as values, and returns a procedure that takes two arguments, a
;; list of strings and the name of a file to which output is to be written;
;; this latter procedure, when invoked, creates the specified output file
;; and writes to it the result of applying the transformer procedure to
;; each element of the list of strings.
(define write-list-of-lines
(lambda (transformer)
;; Make sure that TRANSFORMER is a procedure.
(if (not (procedure? transformer))
(error 'write-list-of-lines "The argument must be a procedure"))
(lambda (line-list target-file-name)
;; Open the file to obtain an output port.
(let ((target (open-output-file target-file-name)))
;; Process each element of LINE-LIST.
(do ((rest line-list (cdr rest)))
((null? rest) (close-output-port target))
(display (transformer (car rest)) target)
(newline target))))))
;; In the output file containing the list of mathematicians by date of
;; birth, the format of each line is supposed to be identical to that of
;; the input file, so the transformer procedure can simply be IDENTITY:
(define identity
(lambda (str)
str))
;; In the output file that is arranged alphabetically by surname, however,
;; each line is supposed to comprise first the mathematician's forenames,
;; then his or her surname (in capital letters), and then his or her date
;; of birth, in parentheses. Moreover, the date is to comprise the name
;; of the month, the day of the month, a comma, and the year. The
;; following transformer carries out this reconstruction:
(define rearrange-for-output
(lambda (str)
(string-append (forenames-field str)
" "
(string-upcase (surname-field str))
" ("
(month-name (birth-month-field str))
" "
(number->string (birth-day-field str))
", "
(number->string (birth-year-field str))
")")))
;; The STRING-UPCASE procedure takes a string as argument and returns a
;; similar string, but with each of the lower-case letters converted to
;; upper case.
(define string-upcase
(lambda (str)
(let* ((len (string-length str))
(result (make-string len)))
(do ((position 0 (+ position 1)))
((= position len) result)
(string-set! result position
(char-upcase (string-ref str position)))))))
;; The MONTH-NAME procedure takes the number of a month and returns its
;; name, as a string.
(define month-name
(lambda (month)
(case month
((1) "January")
((2) "February")
((3) "March")
((4) "April")
((5) "May")
((6) "June")
((7) "July")
((8) "August")
((9) "September")
((10) "October")
((11) "November")
((12) "December")
(else
(error 'month-name
"The argument must be an integer in the range from 1 to 12")))))
;;;;;;;;;;;;;;;;;;;;;;;;
;;; the main program ;;;
;;;;;;;;;;;;;;;;;;;;;;;;
;; Now we have all the resources needed to write the program concisely:
(let ((mathematicians
(read-lines-into-list "mathematicians.data"
in-mathematician-format?)))
((write-list-of-lines identity)
((sort-list born-earlier?) mathematicians)
"mathematicians-by-date-of-birth.data")
((write-list-of-lines rearrange-for-output)
((sort-list precedes-lexicographically?) mathematicians)
"mathematicians-by-surname.data"))
This document is available on the World Wide Web as
http://www.math.grin.edu/courses/Scheme/solutions-5.html