|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.ObjectUtil.VectorManager
public class VectorManager
Class that keeps track of how often a word occurs in a document or globally.
Used during SVM to create vectors from the words.
| Constructor Summary | |
|---|---|
VectorManager()
|
|
| Method Summary | |
|---|---|
boolean |
containsWord(java.lang.String s)
|
void |
deleteWbid(java.lang.String wbid)
Delete all references to this wbid's document A way to save memory |
void |
emptyCounts()
|
static java.lang.String |
fix(java.lang.String s)
Mainly keeps letters, stems the word via the Porter stemming algorithm, and lower-cases. Also some tricks when dealing with hyphens. |
int |
getDocumentCount(int FeatureIdx)
|
int |
getFeatureCount(java.lang.String wbid,
int idx)
|
int |
getFeatureLength()
|
java.lang.String |
getFeatureString(int idx)
|
int |
getIdxFromFeatureToUse(int k)
|
int |
getMaxFeatureCount(java.lang.String wbid)
|
int |
getTotalFeatureCount(int idx)
|
int |
getVocabularySize()
|
boolean |
isVocabularyLocked()
|
boolean |
isWord(java.lang.String s)
|
void |
registerLocally(int wordIndex,
java.lang.String wbid)
|
void |
registerWord(java.lang.String s,
java.lang.String wbid)
Adds this word to VectorManager's count of used words and this document's count |
void |
setVocabularyLocked()
|
int |
wordIndex(java.lang.String origS)
Gives the index for this word |
int |
wordIndexWithoutFix(java.lang.String s)
|
int |
wordIndexWithoutFix(java.lang.String s,
java.lang.String origS)
|
| Methods inherited from class java.lang.Object |
|---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public VectorManager()
| Method Detail |
|---|
public static java.lang.String fix(java.lang.String s)
s -
public boolean isWord(java.lang.String s)
public int wordIndex(java.lang.String origS)
s -
public int wordIndexWithoutFix(java.lang.String s)
public int wordIndexWithoutFix(java.lang.String s,
java.lang.String origS)
public java.lang.String getFeatureString(int idx)
idx -
public int getFeatureLength()
public boolean containsWord(java.lang.String s)
s - Will not be fixed in this function
public int getDocumentCount(int FeatureIdx)
public int getIdxFromFeatureToUse(int k)
public int getFeatureCount(java.lang.String wbid,
int idx)
public void registerWord(java.lang.String s,
java.lang.String wbid)
public void registerLocally(int wordIndex,
java.lang.String wbid)
public void deleteWbid(java.lang.String wbid)
wbid - public int getMaxFeatureCount(java.lang.String wbid)
public int getTotalFeatureCount(int idx)
public void emptyCounts()
public int getVocabularySize()
public boolean isVocabularyLocked()
public void setVocabularyLocked()
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||