|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectcluster.TreeHelper
public class TreeHelper
Provides many functions for analyzing the tree, creating the hierarchy, pruning the tree, etc.
| Constructor Summary | |
|---|---|
TreeHelper()
|
|
| Method Summary | |
|---|---|
static void |
calculateMemberships(java.util.List<Phrase> clusters,
java.util.List<? extends ClusterDoc> files)
Assigns all documents into the hierarchy, following certain limitations and goals The number of clusters per doc is at most 4 Try to place documents in smaller clusters |
static java.util.List<Phrase> |
createClusterTemplate(java.util.List<? extends ClusterDoc> files,
java.util.List<Phrase> terms)
|
static java.util.List<Phrase> |
createClusterTemplateWithCatBoost(java.util.List<? extends ClusterDoc> files,
java.util.List<Phrase> terms,
int cat)
This makes the hierarchy and defines the terms for each node If using snippets, abstracts, it may be possible to just use this |
static void |
createHierarchyByCrossSimilarity(java.util.List<Phrase> allClusters)
allClusters will be reduced to only the root clusters, but the tree is traversable by looking at their child nodes |
static double[] |
genFreqList(int idx,
VectorManager vm,
java.util.List<TestDoc> docs)
|
static int |
getCoverSize(java.util.List<Phrase> clusters)
Returns the total number of unique documents that are present somewhere in the tree |
static java.util.List<Phrase> |
getFlatView(java.util.List<Phrase> clusters)
Uses a queue to traverse the tree and return everything in one list The parent-child relationships are preserved in the returned list |
static void |
mergeClusters(java.util.List<Phrase> rootClusters)
Merges the clusters together via different methods |
static void |
printClusters(java.util.List<Phrase> clusters,
int depth)
|
static void |
printTotalClusterSize(java.util.List<Phrase> clusters)
|
static void |
pruneTree(java.util.List<Phrase> rootClusters,
java.util.List<? extends ClusterDoc> files)
|
static void |
removeNonDescriptiveClusters(java.util.List<Phrase> clusters)
|
static void |
removeSingleRoots(java.util.List<Phrase> rootClusters,
int necessaryCover,
int sufficientCover)
Remove roots that either have no children or whose cover is less than necessaryCover. |
static void |
removeUselessLeaves(java.util.List<Phrase> clusters)
:C elegans using(341) :against C elegans(25) - remove this because against is stopword and C elegans was found in parent |
static void |
restrictDepth(java.util.List<Phrase> rootLayer,
int maxd)
This method will reduce the depth of the tree by bringing up children as needed |
static void |
showPhrases(java.util.List<Phrase> clusters)
Prints the tree out to screen |
static void |
sortByNumChildren(java.util.List<Phrase> clusters)
|
static double |
stdDev(double[] vals)
Calculates standard deviation |
| Methods inherited from class java.lang.Object |
|---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public TreeHelper()
| Method Detail |
|---|
public static java.util.List<Phrase> createClusterTemplateWithCatBoost(java.util.List<? extends ClusterDoc> files,
java.util.List<Phrase> terms,
int cat)
throws java.io.IOException
files - terms - cat -
java.io.IOException
public static java.util.List<Phrase> createClusterTemplate(java.util.List<? extends ClusterDoc> files,
java.util.List<Phrase> terms)
throws java.io.IOException
java.io.IOExceptionpublic static void showPhrases(java.util.List<Phrase> clusters)
clusters - public static void createHierarchyByCrossSimilarity(java.util.List<Phrase> allClusters)
allClusters -
public static void calculateMemberships(java.util.List<Phrase> clusters,
java.util.List<? extends ClusterDoc> files)
throws java.io.IOException
clusters - - this is the output of createHierarchyfiles - - all files that should be fit onto this hierarchy
java.io.IOExceptionpublic static java.util.List<Phrase> getFlatView(java.util.List<Phrase> clusters)
clusters -
public static void pruneTree(java.util.List<Phrase> rootClusters,
java.util.List<? extends ClusterDoc> files)
public static void printClusters(java.util.List<Phrase> clusters,
int depth)
public static void printTotalClusterSize(java.util.List<Phrase> clusters)
public static int getCoverSize(java.util.List<Phrase> clusters)
clusters -
public static void mergeClusters(java.util.List<Phrase> rootClusters)
rootClusters - public static void removeUselessLeaves(java.util.List<Phrase> clusters)
clusters - public static void removeNonDescriptiveClusters(java.util.List<Phrase> clusters)
public static void removeSingleRoots(java.util.List<Phrase> rootClusters,
int necessaryCover,
int sufficientCover)
rootClusters - necessaryCover - set to 0 to ignore paramsufficientCover - set to 0 to ignore param
public static double[] genFreqList(int idx,
VectorManager vm,
java.util.List<TestDoc> docs)
public static double stdDev(double[] vals)
vals -
public static void restrictDepth(java.util.List<Phrase> rootLayer,
int maxd)
d - Start maxd at 1 (ie. 1 means no nesting at all)public static void sortByNumChildren(java.util.List<Phrase> clusters)
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||