|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectbenchmark.ClusterableReuters
public class ClusterableReuters
| Constructor Summary | |
|---|---|
ClusterableReuters(int id,
java.lang.String text,
VectorManager vm)
|
|
| Method Summary | |
|---|---|
void |
addTermSetCount(Phrase termSet,
int n)
This document should record how frequently this termSet occured |
void |
checkSourceExists()
Throw an exception if this file won't be cluster-able |
static java.lang.String |
clean(java.lang.String s)
|
int |
compareTo(ClusterDoc arg0)
|
void |
destroyLocalDoc()
After finding how often all the phrases are in this doc, this method should allow the supporting document to be released to free up memory. |
java.lang.String[][] |
getFixedWordSentences()
Each String should be fixed by VectorManager before being returned |
int |
getId()
|
int[][] |
getIdxSentences(VectorManager vm)
Each entry represents the integer. |
int |
getNumInstancesOfTermSet(Phrase s)
Each document should get a unique id |
java.lang.String[][] |
getSentences()
|
double |
getTermSetsSupported()
|
java.lang.String |
getText()
|
java.lang.String |
getTopic()
|
boolean |
isJunkPhrase(java.lang.String phrase)
Added to allow differentiation between phrases of scientific articles and general search results |
void |
loadWindowedDoc()
Initially, the idea was to support proximity windows (eg. |
void |
setTopic(java.lang.String topic)
|
| Methods inherited from class java.lang.Object |
|---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public ClusterableReuters(int id,
java.lang.String text,
VectorManager vm)
| Method Detail |
|---|
public void addTermSetCount(Phrase termSet,
int n)
ClusterDoc
addTermSetCount in interface ClusterDocpublic int getNumInstancesOfTermSet(Phrase s)
ClusterDoc
getNumInstancesOfTermSet in interface ClusterDocpublic double getTermSetsSupported()
getTermSetsSupported in interface ClusterDocpublic boolean isJunkPhrase(java.lang.String phrase)
ClusterDoc
isJunkPhrase in interface ClusterDocphrase - Space-separated words
public void checkSourceExists()
throws java.io.FileNotFoundException
ClusterDoc
checkSourceExists in interface ClusterDocjava.io.FileNotFoundException
public java.lang.String[][] getFixedWordSentences()
throws java.io.FileNotFoundException,
java.io.IOException
ClusterDoc
getFixedWordSentences in interface ClusterDocjava.io.FileNotFoundException
java.io.IOExceptionpublic int[][] getIdxSentences(VectorManager vm)
ClusterDoc
getIdxSentences in interface ClusterDoc
public java.lang.String[][] getSentences()
throws java.io.FileNotFoundException,
java.io.IOException
getSentences in interface ClusterDocjava.io.FileNotFoundException
java.io.IOExceptionpublic void loadWindowedDoc()
ClusterDoc
loadWindowedDoc in interface ClusterDocpublic void destroyLocalDoc()
ClusterDoc
destroyLocalDoc in interface ClusterDocpublic int compareTo(ClusterDoc arg0)
compareTo in interface java.lang.Comparable<ClusterDoc>public java.lang.String getText()
public static java.lang.String clean(java.lang.String s)
public int getId()
public java.lang.String getTopic()
public void setTopic(java.lang.String topic)
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||