;;; File: ;;; textgen.ss ;;; Authors: ;;; Samuel A. Rebelsky ;;; Janet L. N. Davis ;;; YOUR NAME HERE ;;; Version: ;;; 1.0 of October 2006 ;;; Summary: ;;; A variety of procedures to support generate of English-like sentences. ;;; Contents: ;;; Configuration variables: ;;; root - a string that names the starting point for files ;;; parts-of-speech - a list of the valid parts of speech ;;; Punctuation: ;;; space - a space ;;; period - a period ;;; exclamation-point - same. ;;; Structural Procedures: ;;; (sentence) - generate a "random" sentence ;;; (noun-phrase) - generate a "random" noun phrase ;;; Word Procedures: ;;; (generate-part-of-speech part) - generate a random element ;;; of that part of speech, where part is 'adjective, 'article, ;;; 'noun, or 'tverb. ;;; Helper Procedures: ;;; (check-words-file fname) - Make sure that the given file matches ;;; the structure we expect. ;;; (random-word fname) - Select a word "randomly" from the named ;;; file. ;;; (part->filename part) - Convert a part of speech to a file ;;; name. ;;; (value->string) - Convert a Scheme value to a string. ; +------------------+------------------------------------------------ ; | Design Decisions | ; +------------------+ ; Each of the "parts of speech" files `contains a sequence of Scheme values ; of the form ; (ENTRY FREQUENCY OTHER-INFO) ; * ENTRY is a string, in which case it represents one example of the given ; part of speech. ; * FREQUENCY gives the number of times the word appears in 1000 words. ; * OTHER-INFO depends on the application. ; +---------------+--------------------------------------------------- ; | Configuration | ; +---------------+ ;;; Name: ;;; root ;;; Type: ;;; string ;;; Contents: ;;; The name of the root directory of the writer files. ;;; Sample Values: ;;; On a Mac: /users/USERNAME/ ;;; On a Linux System: /home/USERNAME/TextGeneration/ ;;; Note: ;;; Make sure to end the string with a slash. (define root "/home/rebelsky/Web/Courses/CS151/2006F/Examples/SampleTextGen/") ;;; Name: ;;; parts-of-speech ;;; Type: ;;; List of symbols ;;; Contents: ;;; The names of the parts of speech for which there are corresponding ;;; parts-of-speech files. For example, if 'noun is in parts-of-speech, ;;; then there should be a file "nouns" with the format described above. (define parts-of-speech (list 'adjective 'adjective1 'adjective2 'article 'exclamation 'five-syllable 'iverb 'name 'noun 'noun2 'noun-phrase 'sentence 'tverb)) ; +-----------+------------------------------------------------------- ; | Variables | ; +-----------+ ;;; Name: ;;; space ;;; Type: ;;; string ;;; Contents: ;;; A space. ;;; Notes: ;;; Included primarily for readability (define space " ") ;;; Name: ;;; period ;;; Type: ;;; string ;;; Contents: ;;; The period mark. ;;; Notes: ;;; Included primarily for readability (define period ".") ;;; Name: ;;; exclamation-point ;;; Type: ;; String ;;; Contents: ;;; The exclamation point punctuation mark. ;;; Notes: ;;; Included primarily for readability. (define exclamation-point "!") ; +--------------------+---------------------------------------------- ; | Primary Procedures | ; +--------------------+ ;;; Procedure: ;;; sentence ;;; Parameters: ;;; (none) ;;; Purpose: ;;; Generates a "random" sentence. ;;; Produces: ;;; sent, a string. ;;; Preconditions: ;;; The parts-of-speech files are available and in the correct format. (define sentence (lambda () (string-append (noun-phrase) space (generate-part-of-speech 'tverb) space (noun-phrase) period))) (define noun-phrase (lambda () (string-append (generate-part-of-speech 'article) space (generate-part-of-speech 'adjective) space (generate-part-of-speech 'noun)))) ;;; Procedure: ;;; generate-part-of-speech ;;; Parameters: ;;; part, a symbol ;;; Purpose: ;;; Generates a "random" instance of the given part of speech. ;;; Produces: ;;; sample, a string ;;; Preconditions: ;;; part is 'adjective, 'article, 'noun, or 'verb (define generate-part-of-speech (lambda (part) (if (member part parts-of-speech) (random-structure (part->filename part)) (string-append "ERROR[" (value->string part) "]")))) ;;; Procedure: ;;; part->filename ;;; Parameters: ;;; part, a symbol that represents a part of speech ;;; Purpose: ;;; Determine the name of the file that contains entries for the given ;;; part of speech. ;;; Produces: ;;; fname, a string ;;; Preconditions: ;;; part is 'adjective, 'article, 'noun, or 'tverb ;;; Postconditions: ;;; fname is a file of the appropriate format. (define part->filename (lambda (part) (string-append root (symbol->string part) "s"))) ; +--------------------+---------------------------------------------- ; | Utility Procedures | ; +--------------------+ ;;; Procedure: ;;; check-words-file ;;; Parameters: ;;; fname, the name of a Scheme file. ;;; Purpose: ;;; Scans through the named file and reports on any errors it ;;; finds. ;;; Produces: ;;; ok?, a Boolean ;;; Preconditions: ;;; fname names a valid file. ;;; Postconditions: ;;; If each line of the named file is of the appropriate form and ;;; the sum of the frequencies is 1000, returns #t. ;;; Otherwise, prints a report of errors and returns #f. (define check-words-file (letrec ((kernel (lambda (port status count) (let ((val (read port))) (if (eof-object? val) ; When we hit the end of the file, we close ; the port and check to see that the sum ; of frequencies is 1000. (begin (close-input-port port) (if (= count 1000) status (begin (display "ERROR: Sum of frequencies = ") (display count) (newline) #f))) ; For other values, we make sure that they ; have an appropriate form and then recurse. (cond ((not (list? val)) (display "ERROR: Invalid entry: ") (display val) (newline) (kernel port #f count)) ((or (null? val) (null? (cdr val)) ) (display "ERROR: Entries must have at least two elements: ") (display val) (newline) (kernel port #f count)) ((not (string? (car val))) (display "ERROR: First value not a string: ") (display val) (newline) (kernel port #f count)) ((not (integer? (cadr val))) (display "ERROR: Second value not an integer: ") (display val) (newline) (kernel port #f count)) (else (kernel port status (+ (cadr val) count))))))))) (lambda (fname) (if (not (file-exists? fname)) (begin (display (string-append "The file " fname " does not exist.")) (newline) #f) (kernel (open-input-file fname) #t 0))))) ;;; Procedure: ;;; value->string ;;; Parameters: ;;; value, a Scheme value. ;;; Purpose: ;;; Convert value to a string. ;;; Produces: ;;; valuestr, a string ;;; Preconditions: ;;; (none) ;;; Postconditions: ;;; valuestr, if displayed to a file, will then be read again as something ;;; equivalent to value. (define value->string (let ((tmpfile (string-append root "tmp"))) (lambda (value) ; Write to the file. (if (file-exists? tmpfile) (delete-file tmpfile)) (let ((port (open-output-file tmpfile))) (write value port) (newline) (close-output-port port)) ; Read back from the file. (let* ((port (open-input-file tmpfile)) (valuestr (read-line port))) (close-input-port port) valuestr)))) ;;; Procedure: ;;; read-line ;;; Parameters: ;;; source, an input port ;;; Purpose: ;;; Read one line of input from a source and return that line ;;; as a string. ;;; Produces: ;;; line, a string ;;; Preconditions: ;;; The source is open for reading. [Unverified] ;;; Postconditions: ;;; Has read characters from the source (thereby affecting ;;; future calls to read-char and peek-char). ;;; line represents the characters in the file from the ;;; "current" point at the time read-line was called ;;; until the first end-of-line or end-of-file character. ;;; line does not contain a newline. (define read-line (lambda (source) ; Read all the characters remaining on the line and ; then convert them to a string. (list->string (read-line-of-chars source)))) ;;; Procedure: ;;; read-line-of-chars ;;; Parameters: ;;; source, an input port ;;; Purpose: ;;; Read one line of input from a source and return that line ;;; as a list of characters. ;;; Produces: ;;; chars, a list of characters. ;;; Preconditions: ;;; The source is open for reading. [Unverified] ;;; Postconditions: ;;; Has read characters from the source (thereby affecting ;;; future calls to read-char and peek-char). ;;; chars represents the characters in the file from the ;;; "current" point at the time read-line was called ;;; until the first end-of-line or end-of-file character. ;;; chars does not contain a newline. (define read-line-of-chars (lambda (source) ; Get the next character. (let ((next (read-char source))) ; If we're at the end of the line or the end of the file, ; then there are no more characters, so return the empty list. (if (or (eof-object? next) (char=? next #\newline)) null ; Otherwise, read the remaining characters and shove this ; one on the front of the list. (cons next (read-line-of-chars source)))))) (define build (letrec ((build-all (lambda (structures) (if (null? structures) null (cons (build (car structures)) (build-all (cdr structures))))))) (lambda (structure) (cond ; Strings we accept verbatim ((string? structure) structure) ; Symbols we translate accordingly ((symbol? structure) (build-from-symbol structure)) ; Lists require us to build the arguments recursively and then ; apply the procedure (else (apply-procedure (car structure) (build-all (cdr structure)))))))) (define build-from-symbol (lambda (symbol) (cond ((eq? symbol 'adjective) (generate-part-of-speech 'adjective)) ((eq? symbol 'adjective1) (generate-part-of-speech 'adjective1)) ((eq? symbol 'adjective2) (generate-part-of-speech 'adjective2)) ((eq? symbol 'article) (generate-part-of-speech 'article)) ((eq? symbol 'cr) "\n") ((eq? symbol 'exclamation) (generate-part-of-speech 'exclamation)) ((eq? symbol 'five-syllable) (generate-part-of-speech 'five-syllable)) ((eq? symbol 'intransitive-verb) (generate-part-of-speech 'iverb)) ((eq? symbol 'name) (generate-part-of-speech 'name)) ((eq? symbol 'noun) (generate-part-of-speech 'noun)) ((eq? symbol 'noun2) (generate-part-of-speech 'noun2)) ((eq? symbol 'noun-phrase) (generate-part-of-speech 'noun-phrase)) ((eq? symbol 'sentence) (generate-part-of-speech 'sentence)) ((eq? symbol 'transitive-verb) (generate-part-of-speech 'tverb)) ((eq? symbol 'period) ".") ((eq? symbol 'space) " ") (else (symbol->string symbol))))) ;;; Procedure: ;;; structure? ;;; Parameters: ;;; val, a Scheme value ;;; Purpose: ;;; Determine whether lst is a valid structure. ;;; Produces: ;;; ok?, a boolean ;;; Preconditions: ;;; (none) ;;; Postconditions: ;;; If ok? is #t, then val is either a string, a symbol, or ;;; a list of structures. ;;; If ok? is #f, it is something else. (define structure? (lambda (val) (or (null? val) (string? val) (symbol? val) (and (list? val) (structure? (car val)) (structure? (cdr val)))))) (define apply-procedure (lambda (proc params) (cond ((eq? proc 'join) (join params)) ((eq? proc 'capitalize) (capitalize (car params))) ((eq? proc 'indefinite-article) (indefinite-article (car params))) (else (symbol->string proc))))) (define capitalize (lambda (str) (string-append (string (char-upcase (string-ref str 0))) (substring str 1 (string-length str))))) (define join (lambda (strings) (if (null? strings) "" (string-append (car strings) (join (cdr strings)))))) (define noun-phrase (lambda () (let ((choice (random 100))) (cond ; 50% of the time, we use the article adjective noun structure ((< choice 50) (build '(join article space adjective space noun))) ; 25% of the time, we use the article noun structure ((< choice 75) (build '(join article space noun))) ; 15% of the time, we use a name ((< choice 90) (build 'name)) ; 10% of the time, use a possessive (else (build '(join name "'s" space noun))))))) (define random-structure (lambda (fname) (let ((port (open-input-file fname)) (rnd (random 1000))) (letrec ((kernel (lambda (num) (let ((entry (read port))) (if (< num (cadr entry)) (begin (close-input-port port) (build (car entry))) (kernel (- num (cadr entry)))))))) (kernel rnd))))) ;;; Procedure: ;;; check-structure-file ;;; Parameters: ;;; fname, the name of a Scheme file. ;;; Purpose: ;;; Scans through the named file and reports on any errors it ;;; finds. ;;; Produces: ;;; ok?, a Boolean ;;; Preconditions: ;;; fname names a valid file. ;;; Postconditions: ;;; If each line of the named file is of the appropriate form and ;;; the sum of the frequencies is 1000, returns #t. ;;; Otherwise, prints a report of errors and returns #f. (define check-structure-file (letrec ((kernel (lambda (port status count) (let ((val (read port))) (if (eof-object? val) ; When we hit the end of the file, we close ; the port and check to see that the sum ; of frequencies is 1000. (begin (close-input-port port) (if (= count 1000) status (begin (display "ERROR: Sum of frequencies = ") (display count) (newline) #f))) ; For other values, we make sure that they ; have an appropriate form and then recurse. (cond ((not (list? val)) (display "ERROR: Invalid entry: ") (display val) (newline) (kernel port #f count)) ((or (null? val) (null? (cdr val)) ) (display "ERROR: Entries must have at least two elements: ") (display val) (newline) (kernel port #f count)) ((not (structure? (car val))) (display "ERROR: First value not a structure: ") (display val) (newline) (kernel port #f count)) ((not (integer? (cadr val))) (display "ERROR: Second value not an integer: ") (display val) (newline) (kernel port #f count)) (else (kernel port status (+ (cadr val) count))))))))) (lambda (fname) (if (not (file-exists? fname)) (begin (display (string-append "The file " fname " does not exist.")) (newline) #f) (kernel (open-input-file fname) #t 0))))) (define indefinite-article (lambda (str) (if (member (string-ref str 0) (list #\a #\e #\i #\o #\u)) (string-append "an " str) (string-append "a " str)))) ; The following lines were added to suggest to someone how they might select ; nouns with a particular number of syllables. It is, however, probably ; easier to create files like "noun1s" with one-syllable nouns and ; "noun2s" with two-syllable nouns. ;;; Procedure: ;;; noun ;;; Parameters: ;;; syllables, an integer ;;; Purpose: ;;; Select a "random" noun with the appropriate number of syllables ;;; Produces: ;;; anoun, a string ;;; Preconditions: ;;; The file nouns must contain at least one noun with the specified ;;; number of syllables. ;;; Postconditions: ;;; anoun is a word listed in nouns. ;;; It is difficult to predict which element anoun is. ;;; anoun has syllables syllables. (define noun (lambda (syllables) (let ((entry (random-entry "nouns"))) (display "Trying ") (display entry) (newline) (if (= (caddr entry) syllables) (car entry) (noun syllables))))) (define random-entry (lambda (fname) (let ((port (open-input-file fname)) (rnd (random 1000))) (letrec ((kernel (lambda (num) (let ((entry (read port))) (if (< num (cadr entry)) (begin (close-input-port port) entry) (kernel (- num (cadr entry)))))))) (kernel rnd))))) (define three-by-five (lambda () (display (build 'five-syllable)) (newline) (display (build 'five-syllable)) (newline) (display (build 'five-syllable)) (newline)))