package username.textanalysis; import java.io.BufferedReader; import java.io.IOException; import java.util.Hashtable; import java.util.Vector; /** * Something that performs simple analyses on collections of words * read from a BufferedReader. * * @author Your Name Here * @author Andy Analyst * @author Anna Analyst * @author Samuel A. Rebelsky * @version 1.0 of April 2006 */ public class Analyst { /** * Compute the twenty most frequently appearing words in the * sequence of words represented by 'words'. * * @return frequent * An array of frequently occuring words and their * frequency of occurence * @pre * Each line of words contains a single word. * At least twenty different words appear in words. * @post * frequent[0].word is the most frequently occuring word * frequent[1].word is the second most frequntly occuring word * ... * frequent[9].word is the 10th most frequntly occuring word * frequent[i].frequency = * (# of occurences of frequent[i].word / total-number-of-words) */ public static WordFrequency[] analyze(BufferedReader words) { // The dictionary that stores our word/count pairs. Hashtable dict = new Hashtable(); // The counter that keeps track of the total number of words Counter total = new Counter(); // A vector to keep track of unique words (so that we can // easily iterate these words). (Yes, the built-in Hashtable // class provides a way to get this list, but it's helpful to // support other kinds of hash tables.) Vector unique = new Vector(); // A word read from various places String word; // For each word try { while ((word = words.readLine()) != null) { total.increment(); try { dict.get(word).increment(); } // try catch (Exception e) { dict.put(word, new Counter(1)); unique.add(word); } // catch } // while } catch (IOException e) { // If we get an IOException, give up. } return mostFrequent(unique, dict, (double) total.get()); } // analyze(BufferedReader) /** * Compute the twenty most frequently appearing words, given * a list of words, a dictionary of word/frequency counts, * and the total number of words. */ public static WordFrequency[] mostFrequent(Vector unique, Hashtable dict, double total) { // STUB String word; WordFrequency[] frequent = new WordFrequency[20]; for (int i = 0; i < 20; i++) { word = unique.get(i); frequent[i] = new WordFrequency(word, dict.get(word).get()/total); } // for return frequent; } // mostFrequent } // class Analyst