;;; File: ;;; newtextgen.ss ;;; Authors: ;;; Samuel A. Rebelsky ;;; Janet L. N. Davis ;;; YOUR NAME HERE ;;; Version: ;;; 1.1.1 of March 2007 ;;; Summary: ;;; A variety of procedures to support generate of English-like sentences. ;;; Contents: ;;; Configuration variables: ;;; root - a string that names the starting point for files ;;; Punctuation: ;;; space - a space ;;; period - a period ;;; exclamation-point - same. ;;; Structural Procedures: ;;; (sentence) - generate a "random" sentence ;;; (noun-phrase) - generate a "random" noun phrase ;;; Word Procedures: ;;; (generate-part-of-speech part) - generate a random element ;;; of that part of speech, where part is 'adjective, 'article, ;;; 'noun, or 'tverb. ;;; Helper Procedures: ;;; (check-words-file fname) - Make sure that the given file matches ;;; the structure we expect. ;;; (random-word fname) - Select a word "randomly" from the named ;;; file. ;;; (part->filename part) - Convert a part of speech to a file ;;; name. ;;; (value->string) - Convert a Scheme value to a string. ; +------------------+------------------------------------------------ ; | Design Decisions | ; +------------------+ ; Each line of the "parts of speech" files contains a sequence of ; Scheme values of the form ; (ENTRY FREQUENCY OTHER-INFO) ; * ENTRY is a string, in which case it represents one example of the given ; part of speech. ; * FREQUENCY gives the number of times the word appears in 1000 words. ; * OTHER-INFO depends on the application. ; +---------------+--------------------------------------------------- ; | Configuration | ; +---------------+ ;;; Name: ;;; root ;;; Type: ;;; string ;;; Contents: ;;; The name of the root directory of the writer files. ;;; Sample Values: ;;; On a Mac: /users/USERNAME/ ;;; On a Linux System: /home/USERNAME/TextGeneration/ ;;; Note: ;;; Make sure to end the string with a slash. (define root "/home/rebelsky/Web/Courses/CS151/2007S/Examples/TextGeneration/") ; +-----------+------------------------------------------------------- ; | Variables | ; +-----------+ ;;; Name: ;;; space ;;; Type: ;;; string ;;; Contents: ;;; A space. ;;; Notes: ;;; Included primarily for readability (define space " ") ;;; Name: ;;; period ;;; Type: ;;; string ;;; Contents: ;;; The period mark. ;;; Notes: ;;; Included primarily for readability (define period ".") ;;; Name: ;;; exclamation-point ;;; Type: ;; String ;;; Contents: ;;; The exclamation point punctuation mark. ;;; Notes: ;;; Included primarily for readability. (define exclamation-point "!") ; +--------------------+---------------------------------------------- ; | Primary Procedures | ; +--------------------+ ;;; Procedure: ;;; sentence ;;; Parameters: ;;; (none) ;;; Purpose: ;;; Generates a "random" sentence. ;;; Produces: ;;; sent, a string. ;;; Preconditions: ;;; The partsoof-speech files are available and in the correct format. (define sentence (lambda () (string-append (noun-phrase) space (generate-part-of-speech 'tverb) space (noun-phrase) period))) (define noun-phrase (lambda () (string-append (generate-part-of-speech 'article) space (generate-part-of-speech 'adjective) space (generate-part-of-speech 'noun)))) ;;; Procedure: ;;; generate-part-of-speech ;;; Parameters: ;;; part, a symbol ;;; Purpose: ;;; Generates a "random" instance of the given part of speech. ;;; Produces: ;;; sample, a string ;;; Preconditions: ;;; part is 'adjective, 'article, 'noun, or 'verb (define generate-part-of-speech (lambda (part) (random-word (part->filename part)))) ;;; Procedure: ;;; part->filename ;;; Parameters: ;;; part, a symbol that represents a part of speech ;;; Purpose: ;;; Determine the name of the file that contains entries for the given ;;; part of speech. ;;; Produces: ;;; fname, a string ;;; Preconditions: ;;; part is 'adjective, 'article, 'noun, or 'tverb ;;; Postconditions: ;;; fname is a file of the appropriate format. (define part->filename (lambda (part) (let ((pos (if (string? part) part (symbol->string part)))) (string-append root pos "s.txt")))) ;;; Procedure: ;;; random-word ;;; Parameters: ;;; fname, a string ;;; Purpose: ;;; To select a word unpredictably from the file named by fname. ;;; Produces: ;;; word, a string ;;; Preconditions: ;;; fname contains a sequence of lines of the form ;;; (ENTRY FREQUENCY OTHERINFO) ;;; The sum of frequencies is 1000. ;;; Postconditions: ;;; word is an entry from fname. ;;; In a long sequence of calls to random-word, word appears about ;;; FREQUENCY/1000 percent of the time. (define random-word (lambda (fname) (let ((port (open-input-file fname)) (rnd (random 1000))) (letrec ((kernel (lambda (wordnum) (let ((entry (read port))) (if (< wordnum (cadr entry)) (begin (close-input-port port) (car entry)) (kernel (- wordnum (cadr entry)))))))) (kernel rnd))))) ; +--------------------+---------------------------------------------- ; | Utility Procedures | ; +--------------------+ ;;; Procedure: ;;; check-words-file ;;; Parameters: ;;; fname, the name of a Scheme file. ;;; Purpose: ;;; Scans through the named file and reports on any errors it ;;; finds. ;;; Produces: ;;; ok?, a Boolean ;;; Preconditions: ;;; fname names a valid file. ;;; Postconditions: ;;; If each line of the named file is of the appropriate form and ;;; the sum of the frequencies is 1000, returns #t. ;;; Otherwise, prints a report of errors and returns #f. (define check-words-file (letrec ((kernel (lambda (port status count) (let ((val (read port))) (if (eof-object? val) ; When we hit the end of the file, we close ; the port and check to see that the sum ; of frequencies is 1000. (begin (close-input-port port) (if (= count 1000) status (begin (display "ERROR: Sum of frequencies = ") (display count) (newline) #f))) ; For other values, we make sure that they ; have an appropriate form and then recurse. (cond ((not (list? val)) (display "ERROR: Invalid entry: ") (display val) (newline) (kernel port #f count)) ((or (null? val) (null? (cdr val)) ) (display "ERROR: Entries must have at least two elements: ") (display val) (newline) (kernel port #f count)) ((not (string? (car val))) (display "ERROR: First value not a string: ") (display val) (newline) (kernel port #f count)) ((not (integer? (cadr val))) (display "ERROR: Second value not an integer: ") (display val) (newline) (kernel port #f count)) (else (kernel port status (+ (cadr val) count))))))))) (lambda (fname) (if (not (file-exists? fname)) (begin (display (string-append "The file " fname " does not exist.")) (newline) #f) (kernel (open-input-file fname) #t 0))))) ;;; Procedure: ;;; value->string ;;; Parameters: ;;; value, a Scheme value. ;;; Purpose: ;;; Convert value to a string. ;;; Produces: ;;; valuestr, a string ;;; Preconditions: ;;; (none) ;;; Postconditions: ;;; valuestr, if displayed to a file, will then be read again as something ;;; equivalent to value. (define value->string (let ((tmpfile (string-append root "tmp"))) (lambda (value) ; Write to the file. (if (file-exists? tmpfile) (delete-file tmpfile)) (let ((port (open-output-file tmpfile))) (write value port) (newline) (close-output-port port)) ; Read back from the file. (let* ((port (open-input-file tmpfile)) (valuestr (read-line port))) (close-input-port port) valuestr)))) ;;; Procedure: ;;; read-line ;;; Parameters: ;;; source, an input port ;;; Purpose: ;;; Read one line of input from a source and return that line ;;; as a string. ;;; Produces: ;;; line, a string ;;; Preconditions: ;;; The source is open for reading. [Unverified] ;;; Postconditions: ;;; Has read characters from the source (thereby affecting ;;; future calls to read-char and peek-char). ;;; line represents the characters in the file from the ;;; "current" point at the time read-line was called ;;; until the first end-of-line or end-of-file character. ;;; line does not contain a newline. (define read-line (lambda (source) ; Read all the characters remaining on the line and ; then convert them to a string. (list->string (read-line-of-chars source)))) ;;; Procedure: ;;; read-line-of-chars ;;; Parameters: ;;; source, an input port ;;; Purpose: ;;; Read one line of input from a source and return that line ;;; as a list of characters. ;;; Produces: ;;; chars, a list of characters. ;;; Preconditions: ;;; The source is open for reading. [Unverified] ;;; Postconditions: ;;; Has read characters from the source (thereby affecting ;;; future calls to read-char and peek-char). ;;; chars represents the characters in the file from the ;;; "current" point at the time read-line was called ;;; until the first end-of-line or end-of-file character. ;;; chars does not contain a newline. (define read-line-of-chars (lambda (source) ; Get the next character. (let ((next (read-char source))) ; If we're at the end of the line or the end of the file, ; then there are no more characters, so return the empty list. (if (or (eof-object? next) (char=? next #\newline)) null ; Otherwise, read the remaining characters and shove this ; one on the front of the list. (cons next (read-line-of-chars source)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define build (letrec ((build-all (lambda (structures) (if (null? structures) null (cons (build (car structures)) (build-all (cdr structures))))))) (lambda (structure) (cond ; Strings we accept verbatim ((string? structure) structure) ; Symbols we translate accordingly ((symbol? structure) (build-from-symbol structure)) ; Lists require us to build the arguments recursively and then ; apply the procedure (else (apply-procedure (car structure) (build-all (cdr structure)))))))) (define parts (list 'adjective 'article 'exclamation 'iverb 'name 'noun 'tverb)) (define build-from-symbol (lambda (symbol) (cond ((member symbol parts) (generate-part-of-speech symbol)) ((eq? symbol 'period) ".") ((eq? symbol 'space) " ") (else (symbol->string symbol))))) (define apply-procedure (lambda (proc params) (cond ((eq? proc 'join) (join params)) ((eq? proc 'capitalize) (capitalize (car params))) (else (symbol->string proc))))) (define join (lambda (strings) (if (null? strings) "" (string-append (car strings) (join (cdr strings))))))