edu.iastate.jtm.util
Class VocabularyCounter

java.lang.Object
  extended by edu.iastate.jtm.util.VocabularyCounter

public abstract class VocabularyCounter
extends java.lang.Object

Author:
Jing Ding edu.iastate.jtm.util jtm

Nested Class Summary
static class VocabularyCounter.TfDf
           
 
Field Summary
protected static int DF
           
protected  int grandDocumentCount
           
protected  int grandTermCount
           
protected static int TDF
           
protected static int TF
           
protected  java.util.Map vocabulary
           
 
Constructor Summary
VocabularyCounter()
           
 
Method Summary
abstract  void count(java.io.File input)
          Count word frequency in input.
 int getDocumentCount(java.lang.Object term)
           
 double getDocumentFrequency(java.lang.Object term)
           
 int getGrandDocumentCount()
           
 int getGrandTermCount()
           
 double getIdf(java.lang.Object term)
           
 VocabularyCounter.TfDf[] getSortedFrequency()
           
 java.lang.Object[] getSortedVocabulary()
           
 int getTermCount(java.lang.Object term)
           
 double getTermFrequency(java.lang.Object term)
           
 VocabularyCounter.TfDf getTfDf(java.lang.Object term)
           
 java.util.Set getVocabulary()
           
 int getVocabularySize()
           
 void register(java.lang.Object term, int which)
          Add a new word to the vocabulary, or update count of a registered word.
 void remove(java.lang.Object term)
           
 void save(java.io.File output)
          Save word count to output.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

vocabulary

protected java.util.Map vocabulary

grandTermCount

protected int grandTermCount

grandDocumentCount

protected int grandDocumentCount

TF

protected static final int TF
See Also:
Constant Field Values

DF

protected static final int DF
See Also:
Constant Field Values

TDF

protected static final int TDF
See Also:
Constant Field Values
Constructor Detail

VocabularyCounter

public VocabularyCounter()
Method Detail

register

public void register(java.lang.Object term,
                     int which)
Add a new word to the vocabulary, or update count of a registered word.

Parameters:
term -

save

public void save(java.io.File output)
          throws java.io.IOException
Save word count to output.

Parameters:
output -
Throws:
java.io.IOException

getGrandTermCount

public int getGrandTermCount()

getGrandDocumentCount

public int getGrandDocumentCount()

getTermCount

public int getTermCount(java.lang.Object term)

getDocumentCount

public int getDocumentCount(java.lang.Object term)

getTermFrequency

public double getTermFrequency(java.lang.Object term)

getDocumentFrequency

public double getDocumentFrequency(java.lang.Object term)

getIdf

public double getIdf(java.lang.Object term)

getTfDf

public VocabularyCounter.TfDf getTfDf(java.lang.Object term)

remove

public void remove(java.lang.Object term)

getVocabulary

public java.util.Set getVocabulary()

getVocabularySize

public int getVocabularySize()

getSortedVocabulary

public java.lang.Object[] getSortedVocabulary()

getSortedFrequency

public VocabularyCounter.TfDf[] getSortedFrequency()

count

public abstract void count(java.io.File input)
                    throws java.io.IOException
Count word frequency in input.

Parameters:
input -
Throws:
java.io.FileNotFoundException
java.io.IOException