diff --git a/htroot/api/ymarks/get_treeview.java b/htroot/api/ymarks/get_treeview.java
index 33411844a..510f8fcb1 100644
--- a/htroot/api/ymarks/get_treeview.java
+++ b/htroot/api/ymarks/get_treeview.java
@@ -1,18 +1,19 @@
import java.io.IOException;
import java.net.MalformedURLException;
+import java.util.ArrayList;
import java.util.Date;
import java.util.EnumMap;
import java.util.Iterator;
-import java.util.List;
+import java.util.TreeMap;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.blob.Tables;
+import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.DateFormatter;
-import de.anomic.data.YMarkKeyValueEntry;
-import de.anomic.data.YMarkTables;
import de.anomic.data.UserDB;
+import de.anomic.data.YMarkTables;
import de.anomic.data.YMarkTables.METADATA;
import de.anomic.search.Segments;
import de.anomic.search.Switchboard;
@@ -163,7 +164,7 @@ public class get_treeview {
prop.put("folders_"+count+"_hash", "m:"+url);
prop.put("folders_"+count+"_hasChildren", "true");
count++;
- prop.put("folders_"+count+"_foldername","WordCount");
+ prop.put("folders_"+count+"_foldername","WordCounts");
putProp(count, "meta");
prop.put("folders_"+count+"_hash", "w:"+url);
prop.put("folders_"+count+"_hasChildren", "true");
@@ -178,11 +179,12 @@ public class get_treeview {
}
} else if (isWordCount) {
try {
- final List> list = YMarkTables.getWordFrequencies(post.get(ROOT).substring(2), sb.loader, 10);
- final Iterator> iter = list.iterator();
- while (iter.hasNext()) {
- YMarkKeyValueEntry e = iter.next();
- prop.put("folders_"+count+"_foldername",""+e.getKey()+": [" + e.getValue() + "]");
+ final TreeMap words = YMarkTables.getWordCounts(post.get(ROOT).substring(2), sb.loader);
+ final ArrayList topwords = new ArrayList(words.descendingKeySet());
+ for(int i = 0; i < 20 && i < topwords.size(); i++) {
+ String word = topwords.get(i);
+ int occur = words.get(word).occurrences();
+ prop.put("folders_"+count+"_foldername",""+word+": [" + occur + "]");
putProp(count, "meta");
count++;
}
@@ -208,8 +210,8 @@ public class get_treeview {
putProp(count, "meta");
count++;
}
- count--;
- prop.put("folders_"+count+"_comma", "");
+ prop.put("folders_"+count+"_foldername","autotag: " + sb.tables.bookmarks.autoTag(post.get(ROOT).substring(2), sb.loader, bmk_user, 5) + "");
+ putProp(count, "meta");
count++;
prop.put("folders", count);
} catch (MalformedURLException e) {
diff --git a/source/de/anomic/data/YMarkKeyValueEntry.java b/source/de/anomic/data/YMarkKeyValueEntry.java
deleted file mode 100644
index 7c70caa4d..000000000
--- a/source/de/anomic/data/YMarkKeyValueEntry.java
+++ /dev/null
@@ -1,66 +0,0 @@
-package de.anomic.data;
-
-/**
- * @author apfelmaennchen
- *
- * @param
- * @param
- */
-public class YMarkKeyValueEntry,V extends Comparable> extends Object implements Comparable> {
-
- private K key;
- private V value;
-
-
- public YMarkKeyValueEntry() {
- this.key = null;
- this.value = null;
- }
-
- public YMarkKeyValueEntry(K key, V value) {
- this.key = key;
- this.value = value;
- }
-
- /**
- * The natural order of objects in this class is determind by their value components
- * Note: this class has a natural ordering that is inconsistent with equals.
- * @see java.lang.Comparable#compareTo(java.lang.Object)
- */
- public int compareTo(YMarkKeyValueEntry e) {
- return this.value.compareTo(e.value);
- }
-
- /**
- * Two objects of this class are considered to be equal, if their keys are equal.
- * Note: this class has a natural ordering that is inconsistent with equals.
- */
- @SuppressWarnings("unchecked")
- @Override
- public boolean equals(Object obj) {
- if(this.getClass() == obj.getClass())
- return this.key.equals(((YMarkKeyValueEntry)obj).getKey());
- else return false;
- }
-
- public K getKey() {
- return this.key;
- }
-
- public V getValue() {
- return this.value;
- }
-
- public void setValue(V value) {
- this.value = value;
- }
-
- public void setKey(K key) {
- this.key = key;
- }
-
- public void set(K key, V value) {
- this.key = key;
- this.value = value;
- }
-}
diff --git a/source/de/anomic/data/YMarkTables.java b/source/de/anomic/data/YMarkTables.java
index 9057f3baa..fab6fcf56 100644
--- a/source/de/anomic/data/YMarkTables.java
+++ b/source/de/anomic/data/YMarkTables.java
@@ -1,12 +1,11 @@
package de.anomic.data;
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
import java.util.Date;
import java.util.EnumMap;
import java.util.EnumSet;
@@ -14,8 +13,9 @@ import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.List;
import java.util.Map;
+import java.util.TreeMap;
+
import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.Parser.Failure;
@@ -31,6 +31,7 @@ import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Response;
import de.anomic.search.Segment;
+import de.anomic.data.YMarkWordCountComparator;
public class YMarkTables {
@@ -449,7 +450,7 @@ public class YMarkTables {
if(document != null) {
metadata.put(METADATA.TITLE, document.dc_title());
metadata.put(METADATA.CREATOR, document.dc_creator());
- metadata.put(METADATA.KEYWORDS, document.dc_subject(','));
+ metadata.put(METADATA.KEYWORDS, document.dc_subject(' '));
metadata.put(METADATA.PUBLISHER, document.dc_publisher());
metadata.put(METADATA.DESCRIPTION, document.dc_description());
metadata.put(METADATA.MIMETYPE, document.dc_format());
@@ -465,52 +466,88 @@ public class YMarkTables {
return metadata;
}
- public static List> getWordFrequencies(final String url, final LoaderDispatcher loader, final int top) throws MalformedURLException {
- final List> list = new ArrayList>();
- final DigestURI u = new DigestURI(url);
+ public String autoTag(final String url, final LoaderDispatcher loader, final String bmk_user, final int count) throws MalformedURLException {
+ final StringBuilder buffer = new StringBuilder();
+ final Map words;
+ final DigestURI u = new DigestURI(url);
Response response = null;
- int wordcount = 0;
- String sentence, token;
- final YMarkKeyValueEntry entry = new YMarkKeyValueEntry();
try {
response = loader.load(loader.request(u, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE);
final Document document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
if(document != null) {
- final Collection sentences = document.getSentences(false);
- if (sentences != null) {
- for (StringBuilder s: sentences) {
- sentence = s.toString();
- Enumeration tokens = Condenser.wordTokenizer(sentence, "UTF-8", LibraryProvider.dymLib);
- while (tokens.hasMoreElements()) {
- token = tokens.nextElement();
- if (token.length() > 2) {
- wordcount++;
- entry.set(token.toLowerCase(), 1);
- if(list.contains(entry)) {
- int v = list.get(list.indexOf(entry)).getValue() + 1;
- list.get(list.indexOf(entry)).setValue(v);
- } else {
- list.add(new YMarkKeyValueEntry(token.toLowerCase(), 1));
- }
- }
- }
- }
- }
- document.close();
+ try {
+ words = new Condenser(document, true, true, LibraryProvider.dymLib).words();
+ buffer.append(document.dc_title());
+ buffer.append(document.dc_description());
+ buffer.append(document.dc_subject(' '));
+ final Enumeration tokens = Condenser.wordTokenizer(buffer.toString(), "UTF-8", LibraryProvider.dymLib);
+ while(tokens.hasMoreElements()) {
+ int max = 1;
+ String token = tokens.nextElement();
+ Word word = words.get(token);
+ if (words.containsKey(token)) {
+ if (this.worktables.has(TABLES.TAGS.tablename(bmk_user), getKeyId(token))) {
+ max = word.occurrences() * 1000;
+ } else if (token.length()>3) {
+ max = word.occurrences() * 100;
+ }
+ for(int i=0; i topwords = new ArrayList(sortWordCounts(words).descendingKeySet());
+ for(int i=0; i 100) {
+ buffer.append(topwords.get(i));
+ /*
+ buffer.append('[');
+ buffer.append(words.get(topwords.get(i)).occurrences());
+ buffer.append(']');
+ */
+ buffer.append(',');
+ }
+ }
+ if(buffer.length() > 0) {
+ buffer.deleteCharAt(buffer.length()-1);
+ }
+
+ } catch (UnsupportedEncodingException e) {
+ Log.logException(e);
+ } catch (IOException e) {
+ Log.logException(e);
+ }
+ }
+ } catch (IOException e) {
+ Log.logException(e);
+ } catch (Failure e) {
+ Log.logException(e);
+ }
+ return buffer.toString();
+ }
+
+ public static TreeMap getWordCounts(final String url, final LoaderDispatcher loader) throws MalformedURLException {
+ final DigestURI u = new DigestURI(url);
+ Response response = null;
+ try {
+ response = loader.load(loader.request(u, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE);
+ final Document document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
+ if(document != null) {
+ return sortWordCounts(new Condenser(document, true, true, LibraryProvider.dymLib).words());
}
- } catch (IOException e) {
+ } catch (IOException e) {
Log.logException(e);
} catch (Failure e) {
Log.logException(e);
}
- Collections.sort(list);
- float c = list.size();
- Log.logInfo(YMarkTables.BOOKMARKS_LOG, "size: "+c);
- int end = (int) (c*0.9);
- int start = end - top;
- if (start < 0)
- start = 0;
- Log.logInfo(YMarkTables.BOOKMARKS_LOG, "start: "+start+" end: "+end);
- return list.subList(start,end);
+ return new TreeMap();
}
+
+ public static TreeMap sortWordCounts(final Map unsorted_words) {
+ final TreeMap sorted_words = new TreeMap(new YMarkWordCountComparator(unsorted_words));
+ sorted_words.putAll(unsorted_words);
+ return sorted_words;
+ }
+
}
diff --git a/source/de/anomic/data/YMarkWordCountComparator.java b/source/de/anomic/data/YMarkWordCountComparator.java
new file mode 100644
index 000000000..6505bd846
--- /dev/null
+++ b/source/de/anomic/data/YMarkWordCountComparator.java
@@ -0,0 +1,27 @@
+package de.anomic.data;
+
+import java.util.Comparator;
+import java.util.Map;
+
+import net.yacy.kelondro.data.word.Word;
+
+public class YMarkWordCountComparator implements Comparator {
+
+ private Map words;
+
+ public YMarkWordCountComparator(final Map words) {
+ this.words = words;
+ }
+
+ public int compare(final String k1, final String k2) {
+ final Word w1 = this.words.get(k1);
+ final Word w2 = this.words.get(k2);
+
+ if(w1.occurrences() > w2.occurrences())
+ return 1;
+ else if(w1.occurrences() < w2.occurrences())
+ return -1;
+ else
+ return 0;
+ }
+}