diff --git a/htroot/api/ymarks/get_treeview.java b/htroot/api/ymarks/get_treeview.java index e7bf2f8a3..b3558b834 100644 --- a/htroot/api/ymarks/get_treeview.java +++ b/htroot/api/ymarks/get_treeview.java @@ -3,12 +3,14 @@ import java.net.MalformedURLException; import java.util.Date; import java.util.EnumMap; import java.util.Iterator; -import java.util.Map; +import java.util.List; + import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.DateFormatter; +import de.anomic.data.YMarkKeyValueEntry; import de.anomic.data.YMarkTables; import de.anomic.data.userDB; import de.anomic.data.YMarkTables.METADATA; @@ -176,16 +178,13 @@ public class get_treeview { } } else if (isWordCount) { try { - final Map words = YMarkTables.getWordFrequencies(post.get(ROOT).substring(2), sb.loader); - final Iterator iter = words.keySet().iterator(); + final List> list = YMarkTables.getWordFrequencies(post.get(ROOT).substring(2), sb.loader, 10); + final Iterator> iter = list.iterator(); while (iter.hasNext()) { - String key = iter.next(); - int value = words.get(key); - if(value > 5 && value < 15) { - prop.put("folders_"+count+"_foldername",""+key+": [" + value + "]"); - putProp(count, "meta"); - count++; - } + YMarkKeyValueEntry e = iter.next(); + prop.put("folders_"+count+"_foldername",""+e.getKey()+": [" + e.getValue() + "]"); + putProp(count, "meta"); + count++; } count--; prop.put("folders_"+count+"_comma", ""); diff --git a/source/de/anomic/data/YMarkKeyValueEntry.java b/source/de/anomic/data/YMarkKeyValueEntry.java new file mode 100644 index 000000000..7c70caa4d --- /dev/null +++ b/source/de/anomic/data/YMarkKeyValueEntry.java @@ -0,0 +1,66 @@ +package de.anomic.data; + +/** + * @author apfelmaennchen + * + * @param + * @param + */ +public class YMarkKeyValueEntry,V extends Comparable> extends Object implements Comparable> { + + private K key; + private V value; + + + public YMarkKeyValueEntry() { + this.key = null; + this.value = null; + } + + public YMarkKeyValueEntry(K key, V value) { + this.key = key; + this.value = value; + } + + /** + * The natural order of objects in this class is determind by their value components
+ * Note: this class has a natural ordering that is inconsistent with equals. + * @see java.lang.Comparable#compareTo(java.lang.Object) + */ + public int compareTo(YMarkKeyValueEntry e) { + return this.value.compareTo(e.value); + } + + /** + * Two objects of this class are considered to be equal, if their keys are equal.
+ * Note: this class has a natural ordering that is inconsistent with equals. + */ + @SuppressWarnings("unchecked") + @Override + public boolean equals(Object obj) { + if(this.getClass() == obj.getClass()) + return this.key.equals(((YMarkKeyValueEntry)obj).getKey()); + else return false; + } + + public K getKey() { + return this.key; + } + + public V getValue() { + return this.value; + } + + public void setValue(V value) { + this.value = value; + } + + public void setKey(K key) { + this.key = key; + } + + public void set(K key, V value) { + this.key = key; + this.value = value; + } +} diff --git a/source/de/anomic/data/YMarkTables.java b/source/de/anomic/data/YMarkTables.java index 9f50ea6e3..ff8e5b775 100644 --- a/source/de/anomic/data/YMarkTables.java +++ b/source/de/anomic/data/YMarkTables.java @@ -4,7 +4,9 @@ import java.io.IOException; import java.net.MalformedURLException; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.Date; import java.util.EnumMap; import java.util.EnumSet; @@ -12,6 +14,7 @@ import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; import net.yacy.document.Condenser; import net.yacy.document.Document; @@ -452,7 +455,7 @@ public class YMarkTables { metadata.put(METADATA.MIMETYPE, document.dc_format()); metadata.put(METADATA.LANGUAGE, document.dc_language()); metadata.put(METADATA.CHARSET, document.getCharset()); - metadata.put(METADATA.SIZE, String.valueOf(document.getTextLength())); + // metadata.put(METADATA.SIZE, String.valueOf(document.getTextLength())); } } catch (IOException e) { Log.logException(e); @@ -462,12 +465,13 @@ public class YMarkTables { return metadata; } - public static Map getWordFrequencies(final String url, final LoaderDispatcher loader) throws MalformedURLException { - final Map words = new HashMap(); - final DigestURI u = new DigestURI(url); + public static List> getWordFrequencies(final String url, final LoaderDispatcher loader, final int top) throws MalformedURLException { + final List> list = new ArrayList>(); + final DigestURI u = new DigestURI(url); Response response = null; int wordcount = 0; String sentence, token; + final YMarkKeyValueEntry entry = new YMarkKeyValueEntry(); try { response = loader.load(loader.request(u, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE); final Document document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse()); @@ -481,12 +485,12 @@ public class YMarkTables { token = tokens.nextElement(); if (token.length() > 2) { wordcount++; - if(words.containsKey(token)) { - int count = words.get(token); - count++; - words.put(token, count); + entry.set(token.toLowerCase(), 1); + if(list.contains(entry)) { + int v = list.get(list.indexOf(entry)).getValue() + 1; + list.get(list.indexOf(entry)).setValue(v); } else { - words.put(token, 1); + list.add(new YMarkKeyValueEntry(token.toLowerCase(), 1)); } } } @@ -499,6 +503,14 @@ public class YMarkTables { } catch (Failure e) { Log.logException(e); } - return words; + Collections.sort(list); + float c = list.size(); + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "size: "+c); + int end = (int) (c*0.9); + int start = end - top; + if (start < 0) + start = 0; + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "start: "+start+" end: "+end); + return list.subList(start,end); } }