refactoring for ymarks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7648 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
apfelmaennchen 14 years ago
parent 399d7d6878
commit 78d6d6ca06

@ -11,7 +11,8 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import de.anomic.data.YMarkTables;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkUtil;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -59,6 +60,7 @@ public class Table_YMark_p {
count = 0;
byte[] key;
String name;
/*
try {
Iterator<byte[]> iter = sb.tables.keys(YMarkTables.TABLES.TAGS.tablename(bmk_user));
while(iter.hasNext()) {
@ -86,6 +88,7 @@ public class Table_YMark_p {
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
*/
final String counts = post.get("count", null);
int maxcount = (counts == null || counts.equals("all")) ? Integer.MAX_VALUE : post.getInt("count", 10);
@ -132,13 +135,16 @@ public class Table_YMark_p {
Log.logException(e);
}
// apply rebuildIndex request
/*
if (!post.get("rebuildindex", "").isEmpty()) try {
sb.tables.bookmarks.folders.rebuildIndex(bmk_user);
sb.tables.bookmarks.tags.rebuildIndex(bmk_user);
} catch (IOException e) {
Log.logException(e);
}
*/
if (!post.get("deleterows", "").isEmpty()) {
for (final Map.Entry<String, String> entry: post.entrySet()) {
@ -234,9 +240,12 @@ public class Table_YMark_p {
try {
Iterator<Tables.Row> mapIterator;
if (post.containsKey("folders") && !post.get("folders").isEmpty()) {
mapIterator = sb.tables.orderByPK(sb.tables.bookmarks.folders.getBookmarks(bmk_user, post.get("folders")), maxcount).iterator();
// mapIterator = sb.tables.orderByPK(sb.tables.bookmarks.folders.getBookmarks(bmk_user, post.get("folders")), maxcount).iterator();
mapIterator = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, post.get("folders"));
} else if(post.containsKey("tags") && !post.get("tags").isEmpty()) {
mapIterator = sb.tables.orderByPK(sb.tables.bookmarks.tags.getBookmarks(bmk_user, post.get("tags")), maxcount).iterator();
// mapIterator = sb.tables.orderByPK(sb.tables.bookmarks.tags.getBookmarks(bmk_user, post.get("tags")), maxcount).iterator();
final String[] tagArray = YMarkUtil.cleanTagsString(post.get(YMarkTables.BOOKMARK.TAGS.key())).split(YMarkUtil.TAGS_SEPARATOR);
mapIterator = sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagArray);
} else {
mapIterator = sb.tables.orderByPK(sb.tables.iterator(table, matcher), maxcount).iterator();
}
@ -261,9 +270,7 @@ public class Table_YMark_p {
}
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
}
prop.put("showtable_list", count);
prop.put("showtable_num", count);
}

@ -4,8 +4,9 @@ import java.util.HashMap;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import de.anomic.data.YMarkTables;
import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkUtil;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -39,8 +40,8 @@ public class add_ymark {
data.put(YMarkTables.BOOKMARK.TITLE.key(), post.get(YMarkTables.BOOKMARK.TITLE.key(),YMarkTables.BOOKMARK.TITLE.deflt()));
data.put(YMarkTables.BOOKMARK.DESC.key(), post.get(YMarkTables.BOOKMARK.DESC.key(),YMarkTables.BOOKMARK.DESC.deflt()));
data.put(YMarkTables.BOOKMARK.PUBLIC.key(), post.get(YMarkTables.BOOKMARK.PUBLIC.key(),YMarkTables.BOOKMARK.PUBLIC.deflt()));
data.put(YMarkTables.BOOKMARK.TAGS.key(), YMarkTables.cleanTagsString(post.get(YMarkTables.BOOKMARK.TAGS.key(),YMarkTables.BOOKMARK.TAGS.deflt())));
data.put(YMarkTables.BOOKMARK.FOLDERS.key(), YMarkTables.cleanFoldersString(post.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.FOLDERS_UNSORTED)));
data.put(YMarkTables.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(post.get(YMarkTables.BOOKMARK.TAGS.key(),YMarkTables.BOOKMARK.TAGS.deflt())));
data.put(YMarkTables.BOOKMARK.FOLDERS.key(), YMarkUtil.cleanFoldersString(post.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.FOLDERS_UNSORTED)));
try {
sb.tables.bookmarks.addBookmark(bmk_user, data, false);

@ -3,8 +3,9 @@ import java.io.IOException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import de.anomic.data.YMarkTables;
import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkUtil;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -29,7 +30,7 @@ public class delete_ymark {
if(post.containsKey(YMarkTables.BOOKMARKS_ID)) {
urlHash = post.get(YMarkTables.BOOKMARKS_ID).getBytes();
} else if(post.containsKey(YMarkTables.BOOKMARK.URL.key())) {
urlHash = YMarkTables.getBookmarkId(post.get(YMarkTables.BOOKMARK.URL.key()));
urlHash = YMarkUtil.getBookmarkId(post.get(YMarkTables.BOOKMARK.URL.key()));
} else {
prop.put("result", "0");
return prop;

@ -9,19 +9,17 @@ import java.util.TreeMap;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.Document;
import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Response;
import de.anomic.data.UserDB;
import de.anomic.data.YMarkTables;
import de.anomic.data.YMarkTables.METADATA;
import de.anomic.search.Segments;
import de.anomic.data.ymark.YMarkCrawlStart;
import de.anomic.data.ymark.YMarkMetadata;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkUtil;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -49,6 +47,8 @@ public class get_treeview {
boolean isFolder = true;
boolean isBookmark = false;
boolean isMetadata = false;
boolean isURLdb = false;
boolean isCrawlStart = false;
boolean isWordCount = false;
if (post != null){
@ -63,29 +63,37 @@ public class get_treeview {
} else if (post.get(ROOT).startsWith("m:")) {
isMetadata = true;
isFolder = false;
} else if (post.get(ROOT).startsWith("u:")) {
isURLdb = true;
isFolder = false;
} else if (post.get(ROOT).startsWith("w:")) {
isWordCount = true;
isFolder = false;
} else if (post.get(ROOT).startsWith("c:")) {
isCrawlStart = true;
isFolder = false;
}
}
}
Iterator<String> it = null;
Iterator<Tables.Row> bit = null;
Tables.Row bmk_row = null;
int count = 0;
if(isFolder) {
// loop through folderList
try {
it = sb.tables.bookmarks.folders.getFolders(bmk_user, root);
try {
// it = sb.tables.bookmarks.folders.getFolders(bmk_user, root);
it = sb.tables.bookmarks.getFolders(bmk_user, root).iterator();
} catch (IOException e) {
Log.logException(e);
}
int n = root.split(YMarkTables.FOLDERS_SEPARATOR).length;
int n = root.split(YMarkUtil.FOLDERS_SEPARATOR).length;
if (n == 0) n = 1;
while (it.hasNext()) {
String folder = it.next();
foldername = folder.split(YMarkTables.FOLDERS_SEPARATOR);
foldername = folder.split(YMarkUtil.FOLDERS_SEPARATOR);
if (foldername.length == n+1) {
prop.put("folders_"+count+"_foldername", foldername[n]);
prop.put("folders_"+count+"_expanded", "false");
@ -99,42 +107,40 @@ public class get_treeview {
}
// loop through bookmarkList
try {
it = sb.tables.bookmarks.folders.getBookmarkIds(bmk_user, root).iterator();
while (it.hasNext()) {
final String urlHash = it.next();
bmk_row = sb.tables.select(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), urlHash.getBytes());
if(bmk_row != null) {
final String url = UTF8.String(bmk_row.get(YMarkTables.BOOKMARK.URL.key()));
final String title = bmk_row.get(YMarkTables.BOOKMARK.TITLE.key(), YMarkTables.BOOKMARK.TITLE.deflt());
// TODO: get_treeview - get rid of bmtype
if (post.containsKey("bmtype")) {
if (post.get("bmtype").equals("title")) {
prop.put("folders_"+count+"_foldername", title);
} else if (post.get("bmtype").equals("href")) {
prop.put("folders_"+count+"_foldername",
"<a href='"+url+" 'target='_blank'>"+title+"</a>");
}
} else {
prop.put("folders_"+count+"_foldername", url);
}
prop.put("folders_"+count+"_expanded", "false");
prop.put("folders_"+count+"_url", url);
prop.put("folders_"+count+"_type", "file");
prop.put("folders_"+count+"_hash", "b:"+urlHash);
prop.put("folders_"+count+"_hasChildren", "true");
prop.put("folders_"+count+"_comma", ",");
count++;
}
}
if(!root.isEmpty()) {
bit = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, root);
while (bit.hasNext()) {
bmk_row = bit.next();
if(bmk_row != null) {
final String url = UTF8.String(bmk_row.get(YMarkTables.BOOKMARK.URL.key()));
final String title = bmk_row.get(YMarkTables.BOOKMARK.TITLE.key(), YMarkTables.BOOKMARK.TITLE.deflt());
// TODO: get_treeview - get rid of bmtype
if (post.containsKey("bmtype")) {
if (post.get("bmtype").equals("title")) {
prop.putJSON("folders_"+count+"_foldername", title);
} else if (post.get("bmtype").equals("href")) {
prop.putJSON("folders_"+count+"_foldername", "<a href='"+url+"' target='_blank'>"+title+"</a>");
}
} else {
prop.putJSON("folders_"+count+"_foldername", url);
}
prop.put("folders_"+count+"_expanded", "false");
prop.put("folders_"+count+"_url", url);
prop.put("folders_"+count+"_type", "file");
prop.put("folders_"+count+"_hash", "b:"+new String(bmk_row.getPK()));
prop.put("folders_"+count+"_hasChildren", "true");
prop.put("folders_"+count+"_comma", ",");
count++;
}
}
}
count--;
prop.put("folders_"+count+"_comma", "");
count++;
prop.put("folders", count);
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
} else if(isBookmark) {
try {
@ -170,6 +176,16 @@ public class get_treeview {
prop.put("folders_"+count+"_hash", "m:"+url);
prop.put("folders_"+count+"_hasChildren", "true");
count++;
prop.put("folders_"+count+"_foldername","<small><b>URLdb</b></small>");
putProp(count, "meta");
prop.put("folders_"+count+"_hash", "u:"+url);
prop.put("folders_"+count+"_hasChildren", "true");
count++;
prop.put("folders_"+count+"_foldername","<small><b>CrawlStart</b></small>");
putProp(count, "meta");
prop.put("folders_"+count+"_hash", "c:"+url);
prop.put("folders_"+count+"_hasChildren", "true");
count++;
prop.put("folders_"+count+"_foldername","<small><b>WordCounts</b></small>");
putProp(count, "meta");
prop.put("folders_"+count+"_hash", "w:"+url);
@ -183,46 +199,42 @@ public class get_treeview {
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
} else if (isWordCount || isMetadata) {
} else if (isWordCount || isMetadata || isURLdb || isCrawlStart) {
try {
final DigestURI u = new DigestURI(post.get(ROOT).substring(2));
Response response = null;
response = sb.loader.load(sb.loader.request(u, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE, true);
final Document document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
if(document != null) {
if(isWordCount) {
final TreeMap<String,Word> words = YMarkTables.getWordCounts(document);
final ArrayList<String> topwords = new ArrayList<String>(words.descendingKeySet());
for(int i = 0; i < 20 && i < topwords.size(); i++) {
String word = topwords.get(i);
int occur = words.get(word).occurrences();
prop.put("folders_"+count+"_foldername","<small><b>"+word+":</b> [" + occur + "]</small>");
putProp(count, "meta");
count++;
}
count--;
prop.put("folders_"+count+"_comma", "");
final YMarkMetadata meta = new YMarkMetadata(new DigestURI(post.get(ROOT).substring(2)), sb.indexSegments);
meta.loadDocument(sb.loader);
if(isWordCount) {
final TreeMap<String,Word> words = meta.getWordCounts();
final ArrayList<String> topwords = new ArrayList<String>(words.descendingKeySet());
for(int i = 0; i < 20 && i < topwords.size(); i++) {
String word = topwords.get(i);
int occur = words.get(word).occurrences();
prop.put("folders_"+count+"_foldername","<small><b>"+word+":</b> [" + occur + "]</small>");
putProp(count, "meta");
count++;
prop.put("folders", count);
} else if(isMetadata) {
EnumMap<METADATA, String> metadata;
metadata = YMarkTables.getMetadata(YMarkTables.getBookmarkId(post.get(ROOT).substring(2)), sb.indexSegments.segment(Segments.Process.PUBLIC));
if (metadata.isEmpty())
metadata = YMarkTables.getMetadata(document);
final Iterator<METADATA> iter = metadata.keySet().iterator();
while (iter.hasNext()) {
final METADATA key = iter.next();
final String value = metadata.get(key);
prop.put("folders_"+count+"_foldername","<small><b>"+key.toString().toLowerCase()+":</b> " + value + "</small>");
putProp(count, "meta");
count++;
}
prop.put("folders_"+count+"_foldername","<small><b>autotag:</b> " + sb.tables.bookmarks.autoTag(document, bmk_user, 5) + "</small>");
putProp(count, "meta");
count++;
prop.put("folders", count);
}
}
}
count--;
prop.put("folders_"+count+"_comma", "");
count++;
prop.put("folders", count);
} else if(isMetadata) {
count = putMeta(count, meta.loadMetadata());
} else if(isURLdb) {
count = putMeta(count, meta.getMetadata());
} else if(isCrawlStart) {
Log.logInfo("YMark", "I am looking for CrawlStart: "+post.get(ROOT).substring(2));
final YMarkCrawlStart crawlStart = new YMarkCrawlStart(sb.tables, post.get(ROOT).substring(2));
final Iterator<String> iter = crawlStart.keySet().iterator();
String key;
while(iter.hasNext()) {
key = iter.next();
prop.put("folders_"+count+"_foldername","<small><b>"+key.toLowerCase()+":</b> " + crawlStart.get(key) + "</small>");
putProp(count, "meta");
count++;
}
prop.put("folders", count);
}
} catch (MalformedURLException e) {
Log.logException(e);
} catch (IOException e) {
@ -245,4 +257,16 @@ public class get_treeview {
prop.put("folders_"+count+"_hasChildren", "false");
prop.put("folders_"+count+"_comma", ",");
}
public static int putMeta(int count, final EnumMap<YMarkMetadata.METADATA, String> metadata) {
final Iterator<YMarkMetadata.METADATA> iter = metadata.keySet().iterator();
while (iter.hasNext()) {
final YMarkMetadata.METADATA key = iter.next();
final String value = metadata.get(key);
prop.put("folders_"+count+"_foldername","<small><b>"+key.toString().toLowerCase()+":</b> " + value + "</small>");
putProp(count, "meta");
count++;
}
prop.put("folders", count);
return count;
}
}

@ -6,11 +6,12 @@ import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import de.anomic.data.YMarkTables;
import de.anomic.data.YMarksXBELImporter;
import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkDate;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkUtil;
import de.anomic.data.ymark.YMarkXBELImporter;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -50,14 +51,15 @@ public class get_xbel {
root = "";
}
final int root_depth = root.split(YMarkTables.FOLDERS_SEPARATOR).length;
final int root_depth = root.split(YMarkUtil.FOLDERS_SEPARATOR).length;
Iterator<String> fit = null;
Iterator<String> bit = null;
Iterator<Tables.Row> bit = null;
int count = 0;
int n = root_depth;
try {
fit = sb.tables.bookmarks.folders.getFolders(bmk_user, root);
// fit = sb.tables.bookmarks.folders.getFolders(bmk_user, root);
fit = sb.tables.bookmarks.getFolders(bmk_user, root).iterator();
} catch (IOException e) {
Log.logException(e);
}
@ -66,7 +68,7 @@ public class get_xbel {
while (fit.hasNext()) {
String folder = fit.next();
foldername = folder.split(YMarkTables.FOLDERS_SEPARATOR);
foldername = folder.split(YMarkUtil.FOLDERS_SEPARATOR);
if (n != root_depth && foldername.length <= n) {
prop.put("xbel_"+count+"_elements", "</folder>");
count++;
@ -74,90 +76,95 @@ public class get_xbel {
if (foldername.length >= n) {
n = foldername.length;
if(n != root_depth) {
prop.put("xbel_"+count+"_elements", "<folder id=\"f:"+UTF8.String(YMarkTables.getKeyId(foldername[n-1]))+"\">");
prop.put("xbel_"+count+"_elements", "<folder id=\"f:"+UTF8.String(YMarkUtil.getKeyId(foldername[n-1]))+"\">");
count++;
prop.put("xbel_"+count+"_elements", "<title>" + CharacterCoding.unicode2xml(foldername[n-1], true) + "</title>");
count++;
}
try {
bit = sb.tables.bookmarks.folders.getBookmarkIds(bmk_user, folder).iterator();
Tables.Row bmk_row = null;
String urlHash;
while(bit.hasNext()){
urlHash = bit.next();
if(alias.contains(urlHash)) {
buffer.setLength(0);
buffer.append(YMarksXBELImporter.XBEL.ALIAS.startTag(true));
buffer.append(" ref=\"b:");
buffer.append(urlHash);
buffer.append("\"/>");
prop.put("xbel_"+count+"_elements", buffer.toString());
count++;
} else {
alias.add(urlHash);
bmk_row = sb.tables.select(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), urlHash.getBytes());
if(bmk_row != null) {
buffer.setLength(0);
buffer.append(YMarksXBELImporter.XBEL.BOOKMARK.startTag(true));
buffer.append(" id=\"b:");
buffer.append(urlHash);
buffer.append(YMarkTables.BOOKMARK.URL.xbel());
buffer.append(CharacterCoding.unicode2xml(bmk_row.get(YMarkTables.BOOKMARK.URL.key(), YMarkTables.BOOKMARK.URL.deflt()), true));
buffer.append(YMarkTables.BOOKMARK.DATE_ADDED.xbel());
buffer.append(CharacterCoding.unicode2xml(YMarkTables.getISO8601(bmk_row.get(YMarkTables.BOOKMARK.DATE_ADDED.key())), true));
buffer.append(YMarkTables.BOOKMARK.DATE_MODIFIED.xbel());
buffer.append(CharacterCoding.unicode2xml(YMarkTables.getISO8601(bmk_row.get(YMarkTables.BOOKMARK.DATE_MODIFIED.key())), true));
buffer.append(YMarkTables.BOOKMARK.DATE_VISITED.xbel());
buffer.append(CharacterCoding.unicode2xml(YMarkTables.getISO8601(bmk_row.get(YMarkTables.BOOKMARK.DATE_VISITED.key())), true));
buffer.append(YMarkTables.BOOKMARK.TAGS.xbel());
buffer.append(bmk_row.get(YMarkTables.BOOKMARK.TAGS.key(), YMarkTables.BOOKMARK.TAGS.deflt()));
buffer.append(YMarkTables.BOOKMARK.PUBLIC.xbel());
buffer.append(bmk_row.get(YMarkTables.BOOKMARK.PUBLIC.key(), YMarkTables.BOOKMARK.PUBLIC.deflt()));
buffer.append(YMarkTables.BOOKMARK.VISITS.xbel());
buffer.append(bmk_row.get(YMarkTables.BOOKMARK.VISITS.key(), YMarkTables.BOOKMARK.VISITS.deflt()));
buffer.append("\"\n>");
prop.put("xbel_"+count+"_elements", buffer.toString());
count++;
buffer.setLength(0);
buffer.append(YMarksXBELImporter.XBEL.TITLE.startTag(false));
buffer.append(CharacterCoding.unicode2xml(bmk_row.get(YMarkTables.BOOKMARK.TITLE.key(), YMarkTables.BOOKMARK.TITLE.deflt()), true));
buffer.append(YMarksXBELImporter.XBEL.TITLE.endTag(false));
prop.put("xbel_"+count+"_elements", buffer.toString());
count++;
buffer.setLength(0);
buffer.append(YMarksXBELImporter.XBEL.DESC.startTag(false));
buffer.append(CharacterCoding.unicode2xml(bmk_row.get(YMarkTables.BOOKMARK.DESC.key(), YMarkTables.BOOKMARK.DESC.deflt()), true));
buffer.append(YMarksXBELImporter.XBEL.DESC.endTag(false));
prop.put("xbel_"+count+"_elements", buffer.toString());
count++;
prop.put("xbel_"+count+"_elements", YMarksXBELImporter.XBEL.BOOKMARK.endTag(false));
count++;
}
}
}
// bit = sb.tables.bookmarks.folders.getBookmarkIds(bmk_user, folder).iterator();
try {
bit = sb.tables.bookmarks.getBookmarksByFolder(bmk_user, folder);
} catch (IOException e) {
Log.logException(e);
continue;
} catch (RowSpaceExceededException e) {
Log.logException(e);
continue;
// TODO: better error handling (avoid NPE)
bit = null;
}
Tables.Row bmk_row = null;
String urlHash;
final YMarkDate date = new YMarkDate();
while(bit.hasNext()){
// urlHash = bit.next();
bmk_row = bit.next();
urlHash = new String(bmk_row.getPK());
if(alias.contains(urlHash)) {
buffer.setLength(0);
buffer.append(YMarkXBELImporter.XBEL.ALIAS.startTag(true));
buffer.append(" ref=\"b:");
buffer.append(urlHash);
buffer.append("\"/>");
prop.put("xbel_"+count+"_elements", buffer.toString());
count++;
} else {
alias.add(urlHash);
// bmk_row = sb.tables.select(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), urlHash.getBytes());
if(bmk_row != null) {
buffer.setLength(0);
buffer.append(YMarkXBELImporter.XBEL.BOOKMARK.startTag(true));
buffer.append(" id=\"b:");
buffer.append(urlHash);
buffer.append(YMarkTables.BOOKMARK.URL.xbel());
buffer.append(CharacterCoding.unicode2xml(bmk_row.get(YMarkTables.BOOKMARK.URL.key(), YMarkTables.BOOKMARK.URL.deflt()), true));
buffer.append(YMarkTables.BOOKMARK.DATE_ADDED.xbel());
date.set(bmk_row.get(YMarkTables.BOOKMARK.DATE_ADDED.key()));
buffer.append(CharacterCoding.unicode2xml(date.toISO8601(), true));
buffer.append(YMarkTables.BOOKMARK.DATE_MODIFIED.xbel());
date.set(bmk_row.get(YMarkTables.BOOKMARK.DATE_MODIFIED.key()));
buffer.append(CharacterCoding.unicode2xml(date.toISO8601(), true));
buffer.append(YMarkTables.BOOKMARK.DATE_VISITED.xbel());
date.set(bmk_row.get(YMarkTables.BOOKMARK.DATE_VISITED.key()));
buffer.append(CharacterCoding.unicode2xml(date.toISO8601(), true));
buffer.append(YMarkTables.BOOKMARK.TAGS.xbel());
buffer.append(bmk_row.get(YMarkTables.BOOKMARK.TAGS.key(), YMarkTables.BOOKMARK.TAGS.deflt()));
buffer.append(YMarkTables.BOOKMARK.PUBLIC.xbel());
buffer.append(bmk_row.get(YMarkTables.BOOKMARK.PUBLIC.key(), YMarkTables.BOOKMARK.PUBLIC.deflt()));
buffer.append(YMarkTables.BOOKMARK.VISITS.xbel());
buffer.append(bmk_row.get(YMarkTables.BOOKMARK.VISITS.key(), YMarkTables.BOOKMARK.VISITS.deflt()));
buffer.append("\"\n>");
prop.put("xbel_"+count+"_elements", buffer.toString());
count++;
buffer.setLength(0);
buffer.append(YMarkXBELImporter.XBEL.TITLE.startTag(false));
buffer.append(CharacterCoding.unicode2xml(bmk_row.get(YMarkTables.BOOKMARK.TITLE.key(), YMarkTables.BOOKMARK.TITLE.deflt()), true));
buffer.append(YMarkXBELImporter.XBEL.TITLE.endTag(false));
prop.put("xbel_"+count+"_elements", buffer.toString());
count++;
buffer.setLength(0);
buffer.append(YMarkXBELImporter.XBEL.DESC.startTag(false));
buffer.append(CharacterCoding.unicode2xml(bmk_row.get(YMarkTables.BOOKMARK.DESC.key(), YMarkTables.BOOKMARK.DESC.deflt()), true));
buffer.append(YMarkXBELImporter.XBEL.DESC.endTag(false));
prop.put("xbel_"+count+"_elements", buffer.toString());
count++;
prop.put("xbel_"+count+"_elements", YMarkXBELImporter.XBEL.BOOKMARK.endTag(false));
count++;
}
}
}
}
}
while(n > root_depth) {
prop.put("xbel_"+count+"_elements", YMarksXBELImporter.XBEL.FOLDER.endTag(false));
prop.put("xbel_"+count+"_elements", YMarkXBELImporter.XBEL.FOLDER.endTag(false));
count++;
n--;
}

@ -1,14 +1,13 @@
import java.io.IOException;
import java.util.Iterator;
import java.util.TreeSet;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import de.anomic.data.YMarkTables;
import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkUtil;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -28,22 +27,21 @@ public class get_ymark {
final UserDB.Entry user = sb.userDB.getUser(header);
final boolean isAdmin = (sb.verifyAuthentication(header, true));
final boolean isAuthUser = user!= null && user.hasRight(UserDB.AccessRight.BOOKMARK_RIGHT);
final TreeSet<String> bookmarks = new TreeSet<String>();
Iterator<Tables.Row> bookmarks = null;
if(isAdmin || isAuthUser) {
final String bmk_user = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN);
if(post.containsKey(YMarkTables.BOOKMARK.TAGS.key())) {
tags = true;
final String[] tagArray = YMarkTables.cleanTagsString(post.get(YMarkTables.BOOKMARK.TAGS.key())).split(YMarkTables.TAGS_SEPARATOR);
final String[] tagArray = YMarkUtil.cleanTagsString(post.get(YMarkTables.BOOKMARK.TAGS.key())).split(YMarkUtil.TAGS_SEPARATOR);
try {
bookmarks.addAll(sb.tables.bookmarks.tags.getBookmarkIds(bmk_user, tagArray));
bookmarks = sb.tables.bookmarks.getBookmarksByTag(bmk_user, tagArray);
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
}
/*
if(post.containsKey(YMarkTables.BOOKMARK.FOLDERS.key())) {
final String[] folderArray = YMarkTables.cleanFoldersString(post.get(YMarkTables.BOOKMARK.FOLDERS.key())).split(YMarkTables.TAGS_SEPARATOR);
try {
@ -57,7 +55,8 @@ public class get_ymark {
Log.logException(e);
}
}
putBookmarks(bookmarks, YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user));
*/
putBookmarks(bookmarks);
} else {
prop.put(YMarkTables.USER_AUTHENTICATE,YMarkTables.USER_AUTHENTICATE_MSG);
@ -66,25 +65,16 @@ public class get_ymark {
return prop;
}
private static void putBookmarks(final TreeSet<String> urlSet, final String bmk_table) {
final Iterator<String>urlIter = urlSet.iterator();
private static void putBookmarks(final Iterator<Tables.Row> bit) {
int count = 0;
while(urlIter.hasNext()) {
final byte[] urlHash = urlIter.next().getBytes();
Tables.Row bmk_row = null;
try {
bmk_row = sb.tables.select(bmk_table, urlHash);
if (bmk_row != null) {
prop.putXML("bookmarks_"+count+"_id", UTF8.String(urlHash));
for (YMarkTables.BOOKMARK bmk : YMarkTables.BOOKMARK.values()) {
prop.putXML("bookmarks_"+count+"_"+bmk.key(), bmk_row.get(bmk.key(),bmk.deflt()));
}
count++;
}
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
while(bit.hasNext()) {
Tables.Row bmk_row = bit.next();
if (bmk_row != null) {
prop.putXML("bookmarks_"+count+"_id", UTF8.String(bmk_row.getPK()));
for (YMarkTables.BOOKMARK bmk : YMarkTables.BOOKMARK.values()) {
prop.putXML("bookmarks_"+count+"_"+bmk.key(), bmk_row.get(bmk.key(),bmk.deflt()));
}
count++;
}
}
prop.put("bookmarks", count);

@ -4,7 +4,6 @@ import java.util.HashMap;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.Document;
import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -12,12 +11,11 @@ import net.yacy.kelondro.logging.Log;
import org.xml.sax.SAXException;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Response;
import de.anomic.data.UserDB;
import de.anomic.data.YMarkTables;
import de.anomic.data.YMarksHTMLImporter;
import de.anomic.data.YMarksXBELImporter;
import de.anomic.data.ymark.YMarkHTMLImporter;
import de.anomic.data.ymark.YMarkMetadata;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.data.ymark.YMarkXBELImporter;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -44,7 +42,7 @@ public class import_ymark {
if(post.containsKey("bmkfile") && post.containsKey("importer")){
byteIn = new ByteArrayInputStream(UTF8.getBytes(post.get("bmkfile$file")));
if(post.get("importer").equals("html") && byteIn != null) {
final YMarksHTMLImporter htmlImporter = new YMarksHTMLImporter(byteIn, 100);
final YMarkHTMLImporter htmlImporter = new YMarkHTMLImporter(byteIn, 10);
t = new Thread(htmlImporter, "YMarks - HTML Importer");
t.start();
while ((bmk = htmlImporter.take()) != YMarkTables.POISON) {
@ -52,10 +50,10 @@ public class import_ymark {
}
prop.put("result", "1");
} else if(post.get("importer").equals("xbel") && byteIn != null) {
final YMarksXBELImporter xbelImporter;
final YMarkXBELImporter xbelImporter;
try {
//TODO: make RootFold
xbelImporter = new YMarksXBELImporter(byteIn, 100, YMarkTables.FOLDERS_IMPORTED);
xbelImporter = new YMarkXBELImporter(byteIn, 100, YMarkTables.FOLDERS_IMPORTED);
} catch (SAXException e) {
//TODO: display an error message
Log.logException(e);
@ -84,12 +82,9 @@ public class import_ymark {
public static void putBookmark(final Switchboard sb, final String bmk_user, final HashMap<String, String> bmk) {
try {
if(!bmk.containsKey(YMarkTables.BOOKMARK.TAGS.key()) || bmk.get(YMarkTables.BOOKMARK.TAGS.key()).equals(YMarkTables.BOOKMARK.TAGS.deflt())) {
final DigestURI u = new DigestURI(bmk.get(YMarkTables.BOOKMARK.URL.key()));
Response response = sb.loader.load(sb.loader.request(u, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE, true);
final Document document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
if(document != null) {
bmk.put(YMarkTables.BOOKMARK.TAGS.key(), sb.tables.bookmarks.autoTag(document, bmk_user, 3));
}
final YMarkMetadata meta = new YMarkMetadata(new DigestURI(bmk.get(YMarkTables.BOOKMARK.URL.key())));
meta.loadDocument(sb.loader);
bmk.put(YMarkTables.BOOKMARK.TAGS.key(), meta.autoTag(3));
}
sb.tables.bookmarks.addBookmark(bmk_user, bmk, true);
} catch (IOException e) {

@ -1,4 +1,4 @@
// Work.java
// WorkTables.java
// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 04.02.2010 on http://yacy.net
//
@ -47,6 +47,7 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.rwi.IndexCell;
import de.anomic.data.ymark.YMarkTables;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
@ -87,12 +88,6 @@ public class WorkTables extends Tables {
this.bookmarks = new YMarkTables(this);
}
@Override
public void clear(final String tablename) throws IOException {
super.clear(tablename);
this.bookmarks.clearIndex(tablename);
}
/**
* recording of a api call. stores the call parameters into the API database table
* @param post the post arguments of the api call

@ -1,259 +0,0 @@
package de.anomic.data;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.TreeSet;
import java.util.regex.Pattern;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Data;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
public class YMarkIndex {
public static enum INDEX {
ID ("id", ""),
NAME ("name", ""),
DESC ("desc", ""),
URLS ("urls", "");
private String key;
private String dflt;
private INDEX(String k, String s) {
this.key = k;
this.dflt = s;
}
public String key() {
return this.key;
}
public String deflt() {
return this.dflt;
}
}
public static enum INDEX_ACTION {
ADD,
REMOVE
}
public final static String PATTERN_PREFIX = "^\\Q";
public final static String PATTERN_POSTFIX = YMarkTables.FOLDERS_SEPARATOR+"\\E.*$";
private final WorkTables worktables;
private final String table_basename;
private final ConcurrentARC<String, byte[]> cache;
public YMarkIndex(final Tables wt, final String tb) {
this.worktables = (WorkTables)wt;
this.table_basename = tb;
this.cache = new ConcurrentARC<String, byte[]>(50,1);
}
public String getKeyname(final String user, final byte[] key) throws IOException, RowSpaceExceededException {
final String index_table = user + this.table_basename;
Tables.Row row = this.worktables.select(index_table, key);
return row.get(INDEX.NAME.key(), INDEX.NAME.deflt());
}
public Iterator<String> getFolders(final String user, final String root) throws IOException {
final String index_table = user + this.table_basename;
final TreeSet<String> folders = new TreeSet<String>();
final Pattern r = Pattern.compile(PATTERN_PREFIX + root + PATTERN_POSTFIX);
final Iterator<Row> it = this.worktables.iterator(index_table, INDEX.NAME.key(), r);
final StringBuilder path = new StringBuilder(100);
Row folder;
while (it.hasNext()) {
folder = it.next();
path.setLength(0);
path.append(folder.get(INDEX.NAME.key(), INDEX.NAME.deflt()));
//TODO: get rid of .toString.equals()
while(path.length() > 0 && !path.toString().equals(root)){
folders.add(path.toString());
path.setLength(path.lastIndexOf(YMarkTables.FOLDERS_SEPARATOR));
}
}
if (!root.equals(YMarkTables.FOLDERS_ROOT)) { folders.add(root); }
return folders.iterator();
}
protected void clearCache() {
this.cache.clear();
}
protected void createIndexEntry(final String user, final String keyname, final HashSet<String> urlSet) throws IOException {
final byte[] key = YMarkTables.getKeyId(keyname);
final String index_table = user + this.table_basename;
final String cacheKey = index_table+":"+keyname;
final byte[] BurlSet = YMarkTables.keySetToBytes(urlSet);
Data tagEntry = new Data();
this.cache.insert(cacheKey, BurlSet);
tagEntry.put(INDEX.NAME.key, keyname);
tagEntry.put(INDEX.URLS.key, BurlSet);
this.worktables.insert(index_table, key, tagEntry);
}
protected void removeIndexEntry(final String user, String keysString, final byte[] urlHash) {
final String[] keyArray = keysString.split(YMarkTables.TAGS_SEPARATOR);
for (final String key : keyArray) {
this.updateIndexTable(user, key, urlHash, INDEX_ACTION.REMOVE);
}
}
protected void insertIndexEntry(final String user, String keysString, final byte[] urlHash) {
final String[] keyArray = keysString.split(YMarkTables.TAGS_SEPARATOR);
for (final String key : keyArray) {
this.updateIndexTable(user, key, urlHash, INDEX_ACTION.ADD);
}
}
protected void updateIndexEntry(final String user, final byte[] urlHash, final HashSet<String> oldSet, final HashSet<String> newSet) {
Iterator <String> tagIter;
HashSet<String> urlSet = new HashSet<String>(newSet);
newSet.removeAll(oldSet);
tagIter = newSet.iterator();
while(tagIter.hasNext()) {
this.updateIndexTable(user, tagIter.next(), urlHash, INDEX_ACTION.ADD);
}
oldSet.removeAll(urlSet);
tagIter=oldSet.iterator();
while(tagIter.hasNext()) {
this.updateIndexTable(user, tagIter.next(), urlHash, INDEX_ACTION.REMOVE);
}
}
public HashSet<String> getBookmarkIds(final String user, final String keyname) throws IOException, RowSpaceExceededException {
final String index_table = user + this.table_basename;
final String cacheKey = index_table+":"+keyname;
if (this.cache.containsKey(cacheKey)) {
return YMarkTables.keysStringToSet(UTF8.String(this.cache.get(cacheKey)));
} else {
final Tables.Row idx_row = this.worktables.select(index_table, YMarkTables.getKeyId(keyname));
if (idx_row != null) {
final byte[] keys = idx_row.get(INDEX.URLS.key);
this.cache.put(cacheKey, keys);
return YMarkTables.keysStringToSet(UTF8.String(keys));
}
}
return new HashSet<String>();
}
public Iterator<Tables.Row> getBookmarks(final String user, final String keyname) throws IOException, RowSpaceExceededException {
final Iterator<String> bit = getBookmarkIds(user, keyname).iterator();
final HashSet<Tables.Row> bookmarks = new HashSet<Tables.Row>();
while(bit.hasNext()) {
bookmarks.add(this.worktables.select(YMarkTables.TABLES.BOOKMARKS.tablename(user), bit.next().getBytes()));
}
return bookmarks.iterator();
}
public HashSet<String> getBookmarkIds(final String user, final String[] keyArray) throws IOException, RowSpaceExceededException {
final HashSet<String> urlSet = new HashSet<String>();
urlSet.addAll(getBookmarkIds(user, keyArray[0]));
if (urlSet.isEmpty())
return urlSet;
if (keyArray.length > 1) {
for (final String keyname : keyArray) {
urlSet.retainAll(getBookmarkIds(user, keyname));
if (urlSet.isEmpty())
return urlSet;
}
}
return urlSet;
}
public void rebuildIndex(final String bmk_user) throws IOException {
final Iterator<Tables.Row> plainIterator = this.worktables.iterator(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user));
this.clearCache();
this.worktables.clear(bmk_user + this.table_basename);
while (plainIterator.hasNext()) {
Tables.Row row = plainIterator.next();
if (row != null && row.containsKey(this.table_basename.substring(1))) {
final String url = UTF8.String(row.get(YMarkTables.BOOKMARK.URL.key()));
final String key = this.table_basename.substring(1);
final String keysString = row.get(key, YMarkTables.BOOKMARK.get(key).deflt());
this.insertIndexEntry(bmk_user, keysString, YMarkTables.getBookmarkId(url));
}
}
}
/**
* YMark function that updates the tag/folder index
* @param user
* @param keyname
* @param url is the url has as returned by DigestURI.hash()
* @param action is either add (1) or remove (2)
*/
protected void updateIndexTable(final String user, final String keyname, final byte[] url, final INDEX_ACTION action) {
final String index_table = user + this.table_basename;
final String cacheKey = index_table+":"+keyname;
final byte[] key = YMarkTables.getKeyId(keyname);
final String urlHash = UTF8.String(url);
Tables.Row row = null;
// try to load urlSet from cache
HashSet<String>urlSet = this.cache.containsKey(cacheKey) ? YMarkTables.keysStringToSet(UTF8.String(this.cache.get(cacheKey))) : new HashSet<String>();
try {
row = this.worktables.select(index_table, key);
// key has no index_table entry
if(row == null) {
switch (action) {
case ADD:
urlSet.add(urlHash);
createIndexEntry(user, keyname, urlSet);
break;
case REMOVE:
// key has no index_table entry but a cache entry
// TODO: this shouldn't happen
if(!urlSet.isEmpty()) {
urlSet.remove(urlHash);
createIndexEntry(user, keyname, urlSet);
}
break;
default:
break;
}
}
// key has an existing index_table entry
else {
byte[] BurlSet = null;
// key has no cache entry
if (urlSet.isEmpty()) {
// load urlSet from index_table
urlSet = YMarkTables.keysStringToSet(UTF8.String(row.get(INDEX.URLS.key)));
}
switch (action) {
case ADD:
urlSet.add(urlHash);
break;
case REMOVE:
urlSet.remove(urlHash);
break;
default:
break;
}
if (urlSet.isEmpty()) {
this.cache.remove(cacheKey);
this.worktables.delete(index_table, key);
} else {
BurlSet = YMarkTables.keySetToBytes(urlSet);
this.cache.insert(cacheKey, BurlSet);
row.put(INDEX.URLS.key, BurlSet);
this.worktables.update(index_table, row);
}
}
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
}
}

@ -1,517 +0,0 @@
package de.anomic.data;
import java.io.IOException;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.UTF8;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
import net.yacy.document.WordTokenizer;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Data;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import de.anomic.search.Segment;
public class YMarkTables {
public static enum TABLES {
BOOKMARKS ("_bookmarks"),
TAGS ("_tags"),
FOLDERS ("_folders");
private String basename;
private TABLES(String b) {
this.basename = b;
}
public String basename() {
return this.basename;
}
public String tablename(String bmk_user) {
return bmk_user+this.basename;
}
}
public static enum PROTOCOLS {
HTTP ("http://"),
HTTPS ("https://");
private String protocol;
private PROTOCOLS(String s) {
this.protocol = s;
}
public String protocol() {
return this.protocol;
}
public String protocol(String s) {
return this.protocol+s;
}
}
public static enum BOOKMARK {
// key dflt html_attrb xbel_attrb type
URL ("url", "", "href", "href", "link"),
TITLE ("title", "", "", "", "meta"),
DESC ("desc", "", "", "", "comment"),
DATE_ADDED ("date_added", "", "add_date", "added", "date"),
DATE_MODIFIED ("date_modified", "", "last_modified", "modified", "date"),
DATE_VISITED ("date_visited", "", "last_visited", "visited", "date"),
PUBLIC ("public", "flase", "", "yacy:public", "lock"),
TAGS ("tags", "unsorted", "shortcuturl", "yacy:tags", "tag"),
VISITS ("visits", "0", "", "yacy:visits", "stat"),
FOLDERS ("folders", "/unsorted", "", "", "folder");
private String key;
private String dflt;
private String html_attrb;
private String xbel_attrb;
private String type;
private static final Map<String,BOOKMARK> lookup = new HashMap<String,BOOKMARK>();
static {
for(BOOKMARK b : EnumSet.allOf(BOOKMARK.class))
lookup.put(b.key(), b);
}
private static StringBuilder buffer = new StringBuilder(25);;
private BOOKMARK(String k, String s, String a, String x, String t) {
this.key = k;
this.dflt = s;
this.html_attrb = a;
this.xbel_attrb = x;
this.type = t;
}
public static BOOKMARK get(String key) {
return lookup.get(key);
}
public static boolean contains(String key) {
return lookup.containsKey(key);
}
public String key() {
return this.key;
}
public String deflt() {
return this.dflt;
}
public String html_attrb() {
return this.html_attrb;
}
public String xbel_attrb() {
return this.xbel_attrb;
}
public String xbel() {
buffer.setLength(0);
buffer.append('"');
buffer.append('\n');
buffer.append(' ');
buffer.append(this.xbel_attrb);
buffer.append('=');
buffer.append('"');
return buffer.toString();
}
public String type() {
return this.type;
}
}
public enum METADATA {
TITLE,
DESCRIPTION,
FAVICON,
KEYWORDS,
LANGUAGE,
CREATOR,
PUBLISHER,
CHARSET,
MIMETYPE,
SIZE,
WORDCOUNT,
IN_URLDB,
FRESHDATE,
LOADDATE,
MODDATE,
SNIPPET
}
public final static HashMap<String,String> POISON = new HashMap<String,String>();
public final static String TAGS_SEPARATOR = ",";
public final static String FOLDERS_SEPARATOR = "/";
public final static String FOLDERS_ROOT = "/";
public final static String FOLDERS_UNSORTED = "/unsorted";
public final static String FOLDERS_IMPORTED = "/imported";
public static final int FOLDER_BUFFER_SIZE = 100;
public final static String BOOKMARKS_LOG = "BOOKMARKS";
public final static String BOOKMARKS_ID = "id";
public final static String USER_ADMIN = "admin";
public final static String USER_AUTHENTICATE = "AUTHENTICATE";
public final static String USER_AUTHENTICATE_MSG = "Authentication required!";
private WorkTables worktables;
public YMarkIndex tags;
public YMarkIndex folders;
public YMarkTables(final Tables wt) {
this.worktables = (WorkTables)wt;
this.folders = new YMarkIndex(this.worktables, TABLES.FOLDERS.basename());
this.tags = new YMarkIndex(this.worktables, TABLES.TAGS.basename());
}
public static Date parseISO8601(final String s) throws ParseException {
if(s == null || s.length() < 1) {
throw new ParseException("parseISO8601 - empty string, nothing to parse", 0);
}
SimpleDateFormat dateformat;
StringBuilder date = new StringBuilder(s);
if(s.length()==10)
dateformat = new SimpleDateFormat("yyyy-MM-dd");
else {
dateformat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssz");
if(date.charAt(date.length()-1) == 'Z') {
date.deleteCharAt(date.length()-1);
date.append("GMT-00:00");
} else {
date.insert(date.length()-6, "GMT");
}
}
return dateformat.parse(date.toString());
}
public static String getISO8601(final byte[] date) {
if(date != null) {
final String s = UTF8.String(date);
if(s != null && s.length() > 0)
return ISO8601Formatter.FORMATTER.format(new Date(Long.parseLong(s)));
}
return "";
}
public final static byte[] getBookmarkId(String url) throws MalformedURLException {
return (new DigestURI(url, null)).hash();
}
public final static byte[] getKeyId(final String tag) {
return Word.word2hash(tag.toLowerCase());
}
public final static byte[] keySetToBytes(final HashSet<String> urlSet) {
return keySetToString(urlSet).getBytes();
}
public final static String keySetToString(final HashSet<String> urlSet) {
final Iterator<String> urlIter = urlSet.iterator();
final
StringBuilder urls = new StringBuilder(urlSet.size()*20);
while(urlIter.hasNext()) {
urls.append(TAGS_SEPARATOR);
urls.append(urlIter.next());
}
urls.deleteCharAt(0);
return urls.toString();
}
public final static HashSet<String> keysStringToSet(final String keysString) {
HashSet<String> keySet = new HashSet<String>();
final String[] keyArray = keysString.split(TAGS_SEPARATOR);
for (final String key : keyArray) {
keySet.add(key);
}
return keySet;
}
public final static String cleanTagsString(final String tagsString) {
StringBuilder ts = new StringBuilder(tagsString);
if(ts.length() == 0)
return YMarkTables.BOOKMARK.TAGS.deflt();
// get rid of double commas and space characters following a comma
for (int i = 0; i < ts.length()-1; i++) {
if (ts.charAt(i) == TAGS_SEPARATOR.charAt(0)) {
if (ts.charAt(i+1) == TAGS_SEPARATOR.charAt(0) || ts.charAt(i+1) == ' ') {
ts.deleteCharAt(i+1);
i--;
}
}
}
// get rid of heading and trailing comma
if (ts.charAt(0) == TAGS_SEPARATOR.charAt(0))
ts.deleteCharAt(0);
if (ts.charAt(ts.length()-1) == TAGS_SEPARATOR.charAt(0))
ts.deleteCharAt(ts.length()-1);
return ts.toString();
}
public final static String cleanFoldersString(final String foldersString) {
StringBuilder fs = new StringBuilder(cleanTagsString(foldersString));
if(fs.length() == 0)
return YMarkTables.BOOKMARK.FOLDERS.deflt();
for (int i = 0; i < fs.length()-1; i++) {
if (fs.charAt(i) == FOLDERS_SEPARATOR.charAt(0)) {
if (fs.charAt(i+1) == TAGS_SEPARATOR.charAt(0) || fs.charAt(i+1) == FOLDERS_SEPARATOR.charAt(0)) {
fs.deleteCharAt(i);
i--;
} else if (fs.charAt(i+1) == ' ') {
fs.deleteCharAt(i+1);
i--;
}
}
}
if (fs.charAt(fs.length()-1) == FOLDERS_SEPARATOR.charAt(0)) {
fs.deleteCharAt(fs.length()-1);
}
return fs.toString();
}
public void clearIndex(String tablename) {
if (tablename.endsWith(TABLES.TAGS.basename()))
this.tags.clearCache();
if (tablename.endsWith(TABLES.FOLDERS.basename()))
this.folders.clearCache();
}
public void deleteBookmark(final String bmk_user, final byte[] urlHash) throws IOException, RowSpaceExceededException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
Tables.Row bmk_row = null;
bmk_row = this.worktables.select(bmk_table, urlHash);
if(bmk_row != null) {
final String tagsString = bmk_row.get(YMarkTables.BOOKMARK.TAGS.key(),YMarkTables.BOOKMARK.TAGS.deflt());
tags.removeIndexEntry(bmk_user, tagsString, urlHash);
final String foldersString = bmk_row.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.FOLDERS_ROOT);
folders.removeIndexEntry(bmk_user, foldersString, urlHash);
this.worktables.delete(bmk_table,urlHash);
}
}
public void deleteBookmark(final String bmk_user, final String url) throws IOException, RowSpaceExceededException {
this.deleteBookmark(bmk_user, getBookmarkId(url));
}
public void addBookmark(final String bmk_user, final HashMap<String,String> bmk, final boolean importer) throws IOException, RowSpaceExceededException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final String date = String.valueOf(System.currentTimeMillis());
final byte[] urlHash = getBookmarkId(bmk.get(BOOKMARK.URL.key()));
Tables.Row bmk_row = null;
if (urlHash != null) {
bmk_row = this.worktables.select(bmk_table, urlHash);
if (bmk_row == null) {
// create and insert new entry
final Data data = new Data();
for (BOOKMARK b : BOOKMARK.values()) {
switch(b) {
case DATE_ADDED:
case DATE_MODIFIED:
if(bmk.containsKey(b.key()) && bmk.get(b.key()) != null) {
data.put(b.key(), bmk.get(b.key()));
} else {
data.put(b.key(), String.valueOf(System.currentTimeMillis()).getBytes());
}
break;
case TAGS:
if(bmk.containsKey(b.key()) && bmk.get(b.key()) != null) {
this.tags.insertIndexEntry(bmk_user, bmk.get(b.key()), urlHash);
data.put(b.key(), bmk.get(b.key()));
} else {
this.tags.insertIndexEntry(bmk_user, b.deflt(), urlHash);
data.put(b.key(), b.deflt());
}
break;
case FOLDERS:
if(bmk.containsKey(b.key()) && bmk.get(b.key()) != null) {
this.folders.insertIndexEntry(bmk_user, bmk.get(b.key()), urlHash);
data.put(b.key(), bmk.get(b.key()));
} else {
this.folders.insertIndexEntry(bmk_user, b.deflt(), urlHash);
data.put(b.key(), b.deflt());
}
break;
default:
if(bmk.containsKey(b.key()) && bmk.get(b.key()) != null) {
data.put(b.key(), bmk.get(b.key()));
}
}
}
this.worktables.insert(bmk_table, urlHash, data);
} else {
// modify and update existing entry
HashSet<String> oldSet;
HashSet<String> newSet;
for (BOOKMARK b : BOOKMARK.values()) {
switch(b) {
case DATE_ADDED:
if(!bmk_row.containsKey(b.key))
bmk_row.put(b.key(), date);
break;
case DATE_MODIFIED:
bmk_row.put(b.key(), date);
break;
case TAGS:
oldSet = keysStringToSet(bmk_row.get(b.key(),b.deflt()));
if(bmk.containsKey(b.key())) {
newSet = keysStringToSet(bmk.get(b.key()));
if(importer) {
newSet.addAll(oldSet);
bmk_row.put(b.key(), keySetToString(newSet));
oldSet.clear();
} else {
bmk_row.put(b.key, bmk.get(b.key()));
}
} else {
newSet = new HashSet<String>();
bmk_row.put(b.key, bmk_row.get(b.key(), b.deflt()));
}
this.tags.updateIndexEntry(bmk_user, urlHash, oldSet, newSet);
break;
case FOLDERS:
oldSet = keysStringToSet(bmk_row.get(b.key(),b.deflt()));
if(bmk.containsKey(b.key())) {
newSet = keysStringToSet(bmk.get(b.key()));
if(importer) {
newSet.addAll(oldSet);
bmk_row.put(b.key(), keySetToString(newSet));
oldSet.clear();
} else {
bmk_row.put(b.key, bmk.get(b.key()));
}
} else {
newSet = new HashSet<String>();
bmk_row.put(b.key, bmk_row.get(b.key(), b.deflt()));
}
this.folders.updateIndexEntry(bmk_user, urlHash, oldSet, newSet);
break;
default:
if(bmk.containsKey(b.key())) {
bmk_row.put(b.key, bmk.get(b.key()));
} else {
bmk_row.put(b.key, bmk_row.get(b.key(), b.deflt()));
}
}
}
// update bmk_table
this.worktables.update(bmk_table, bmk_row);
}
}
}
public static EnumMap<METADATA, String> getMetadata(final byte[] urlHash, final Segment indexSegment) {
final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);
final URIMetadataRow urlEntry = indexSegment.urlMetadata().load(urlHash, null, 0);
if (urlEntry != null) {
metadata.put(METADATA.IN_URLDB, "true");
metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size()));
metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate()));
metadata.put(METADATA.LOADDATE, ISO8601Formatter.FORMATTER.format(urlEntry.loaddate()));
metadata.put(METADATA.MODDATE, ISO8601Formatter.FORMATTER.format(urlEntry.moddate()));
metadata.put(METADATA.SNIPPET, String.valueOf(urlEntry.snippet()));
metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount()));
metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype()));
metadata.put(METADATA.LANGUAGE, UTF8.String(urlEntry.language()));
final URIMetadataRow.Components meta = urlEntry.metadata();
if (meta != null) {
metadata.put(METADATA.TITLE, meta.dc_title());
metadata.put(METADATA.CREATOR, meta.dc_creator());
metadata.put(METADATA.KEYWORDS, meta.dc_subject());
metadata.put(METADATA.PUBLISHER, meta.dc_publisher());
}
}
return metadata;
}
public static EnumMap<METADATA, String> getMetadata(final Document document) {
final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);
metadata.put(METADATA.IN_URLDB, "false");
if(document != null) {
metadata.put(METADATA.TITLE, document.dc_title());
metadata.put(METADATA.CREATOR, document.dc_creator());
metadata.put(METADATA.KEYWORDS, document.dc_subject(' '));
metadata.put(METADATA.PUBLISHER, document.dc_publisher());
metadata.put(METADATA.DESCRIPTION, document.dc_description());
metadata.put(METADATA.MIMETYPE, document.dc_format());
metadata.put(METADATA.LANGUAGE, document.dc_language());
metadata.put(METADATA.CHARSET, document.getCharset());
// metadata.put(METADATA.SIZE, String.valueOf(document.getTextLength()));
}
return metadata;
}
public String autoTag(final Document document, final String bmk_user, final int count) {
final StringBuilder buffer = new StringBuilder();
final Map<String, Word> words;
if(document != null) {
try {
words = new Condenser(document, true, true, LibraryProvider.dymLib).words();
buffer.append(document.dc_title());
buffer.append(document.dc_description());
buffer.append(document.dc_subject(' '));
final Enumeration<String> tokens = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(buffer.toString())), LibraryProvider.dymLib);
while(tokens.hasMoreElements()) {
int max = 1;
String token = tokens.nextElement();
Word word = words.get(token);
if (words.containsKey(token)) {
if (this.worktables.has(TABLES.TAGS.tablename(bmk_user), getKeyId(token))) {
max = word.occurrences() * 1000;
} else if (token.length()>3) {
max = word.occurrences() * 100;
}
for(int i=0; i<max; i++) {
word.inc();
}
}
}
buffer.setLength(0);
final ArrayList<String> topwords = new ArrayList<String>(sortWordCounts(words).descendingKeySet());
for(int i=0; i<count && i<topwords.size() ; i++) {
if(words.get(topwords.get(i)).occurrences() > 100) {
buffer.append(topwords.get(i));
buffer.append(YMarkTables.TAGS_SEPARATOR);
}
}
} catch (UnsupportedEncodingException e) {
Log.logException(e);
} catch (IOException e) {
Log.logException(e);
}
}
return YMarkTables.cleanTagsString(buffer.toString());
}
public static TreeMap<String,Word> getWordCounts(final Document document) {
if (document != null) {
return sortWordCounts(new Condenser(document, true, true, LibraryProvider.dymLib).words());
}
return new TreeMap<String, Word>();
}
public static TreeMap<String,Word> sortWordCounts(final Map<String, Word> unsorted_words) {
final TreeMap<String, Word> sorted_words = new TreeMap<String, Word>(new YMarkWordCountComparator(unsorted_words));
sorted_words.putAll(unsorted_words);
return sorted_words;
}
}

@ -1,27 +0,0 @@
package de.anomic.data;
import java.util.Comparator;
import java.util.Map;
import net.yacy.kelondro.data.word.Word;
public class YMarkWordCountComparator implements Comparator<String> {
private Map<String,Word> words;
public YMarkWordCountComparator(final Map<String,Word> words) {
this.words = words;
}
public int compare(final String k1, final String k2) {
final Word w1 = this.words.get(k1);
final Word w2 = this.words.get(k2);
if(w1.occurrences() > w2.occurrences())
return 1;
else if(w1.occurrences() < w2.occurrences())
return -1;
else
return 0;
}
}

@ -0,0 +1,90 @@
// YMarkCrawlStart.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2011-03-09 13:50:39 +0100 (Mi, 09 Mrz 2011) $
// $LastChangedRevision: 7574 $
// $LastChangedBy: apfelmaennchen $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.regex.Pattern;
import net.yacy.kelondro.blob.Tables;
import de.anomic.data.WorkTables;
public class YMarkCrawlStart extends HashMap<String,String>{
private static final long serialVersionUID = 1L;
private WorkTables worktables;
public YMarkCrawlStart(final WorkTables worktables) {
this.worktables = worktables;
}
public YMarkCrawlStart(final WorkTables worktables, final String url) {
this.worktables = worktables;
this.clear();
this.load(url);
}
public void load(String url) {
try {
final StringBuffer buffer = new StringBuffer(500);
buffer.append("^.*crawlingURL=\\Q");
buffer.append(url);
buffer.append("\\E?.*");
final Pattern pattern = Pattern.compile(buffer.toString());
final Iterator<Tables.Row> APIcalls = this.worktables.iterator(WorkTables.TABLE_API_NAME, WorkTables.TABLE_API_COL_URL, pattern);
Tables.Row row = null;
while(APIcalls.hasNext()) {
row = APIcalls.next();
if(row.get(WorkTables.TABLE_API_COL_TYPE, "").equals("crawler")) {
buffer.setLength(0);
buffer.append(row.get(WorkTables.TABLE_API_COL_URL, ""));
buffer.delete(0, buffer.indexOf("?")+1);
int start = 0;
int end = 0;
String key;
String value;
while(start < buffer.length()) {
end = buffer.indexOf("=", start);
key = buffer.substring(start, end);
start = end+1;
end = buffer.indexOf("&", start);
if(end < 0 || end > buffer.length())
end = buffer.length()-1;
value = buffer.substring(start, end);
start = end+1;
this.put(key, value);
}
break;
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
}
}
}

@ -0,0 +1,92 @@
// YMarkDate.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2011-03-09 13:50:39 +0100 (Mi, 09 Mrz 2011) $
// $LastChangedRevision: 7574 $
// $LastChangedBy: apfelmaennchen $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.UTF8;
public class YMarkDate {
private long date;
public YMarkDate() {
this.date = System.currentTimeMillis();
}
public YMarkDate(final byte[] date) {
this.set(date);
}
public long parseISO8601(final String s) throws ParseException {
if(s == null || s.length() < 1) {
throw new ParseException("parseISO8601 - empty string, nothing to parse", 0);
}
SimpleDateFormat dateformat;
StringBuilder date = new StringBuilder(s);
if(s.length()==10)
dateformat = new SimpleDateFormat("yyyy-MM-dd");
else {
dateformat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssz");
if(date.charAt(date.length()-1) == 'Z') {
date.deleteCharAt(date.length()-1);
date.append("GMT-00:00");
} else {
date.insert(date.length()-6, "GMT");
}
}
this.date = dateformat.parse(date.toString()).getTime();
return this.date;
}
public String toISO8601() {
return ISO8601Formatter.FORMATTER.format(new Date(this.date));
}
public byte[] toBytes() {
return String.valueOf(this.date).getBytes();
}
public String toString() {
return String.valueOf(this.date);
}
public long get() {
return this.date;
}
public void set(long date) {
this.date = date;
}
public void set(byte[] date) {
this.date = Long.parseLong(UTF8.String(date));
}
}

@ -1,4 +1,30 @@
package de.anomic.data;
// YMarkHTMLImporter.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.io.IOException;
import java.io.InputStream;
@ -14,7 +40,7 @@ import javax.swing.text.html.parser.ParserDelegator;
import net.yacy.kelondro.logging.Log;
public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback implements Runnable {
public class YMarkHTMLImporter extends HTMLEditorKit.ParserCallback implements Runnable {
public static enum STATE {
NOTHING,
@ -35,7 +61,7 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback implements
private final BlockingQueue<HashMap<String,String>> bookmarks;
private final ParserDelegator htmlParser;
public YMarksHTMLImporter(final InputStream input, int queueSize) {
public YMarkHTMLImporter(final InputStream input, int queueSize) {
this.state = STATE.NOTHING;
this.prevTag = null;
this.bmk = new HashMap<String,String>();
@ -76,7 +102,7 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback implements
this.bmk.put(YMarkTables.BOOKMARK.VISITS.key(), YMarkTables.BOOKMARK.VISITS.deflt());
break;
case FOLDER:
this.folder.append(YMarkTables.FOLDERS_SEPARATOR);
this.folder.append(YMarkUtil.FOLDERS_SEPARATOR);
this.folder.append(data);
break;
case FOLDER_DESC:
@ -109,7 +135,7 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback implements
switch(bmk) {
case TAGS:
// mozilla shortcuturl
this.bmk.put(bmk.key(), YMarkTables.cleanTagsString(s));
this.bmk.put(bmk.key(), YMarkUtil.cleanTagsString(s));
break;
case DATE_ADDED:
case DATE_MODIFIED:
@ -138,7 +164,7 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback implements
} else if (t == HTML.Tag.DL) {
//TODO: get rid of .toString.equals()
if(!this.folder.toString().equals(YMarkTables.FOLDERS_IMPORTED)) {
folder.setLength(folder.lastIndexOf(YMarkTables.FOLDERS_SEPARATOR));
folder.setLength(folder.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
} else {
state = STATE.NOTHING;

@ -0,0 +1,201 @@
// YMarkMetadata.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2011-03-09 13:50:39 +0100 (Mi, 09 Mrz 2011) $
// $LastChangedRevision: 7574 $
// $LastChangedBy: apfelmaennchen $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.Enumeration;
import java.util.Map;
import java.util.TreeMap;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.UTF8;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
import net.yacy.document.WordTokenizer;
import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
import net.yacy.repository.LoaderDispatcher;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Response;
import de.anomic.search.Segments;
public class YMarkMetadata {
private DigestURI uri;
Document document;
Segments indexSegment;
public enum METADATA {
TITLE,
DESCRIPTION,
FAVICON,
KEYWORDS,
LANGUAGE,
CREATOR,
PUBLISHER,
CHARSET,
MIMETYPE,
SIZE,
WORDCOUNT,
IN_URLDB,
FRESHDATE,
LOADDATE,
MODDATE,
SNIPPET,
AUTOTAG
}
public YMarkMetadata(final DigestURI uri) {
this.uri = uri;
this.document = null;
this.indexSegment = null;
}
public YMarkMetadata(final DigestURI uri, final Segments indexSegment) {
this.uri = uri;
this.document = null;
this.indexSegment = indexSegment;
}
public YMarkMetadata(final Document document) {
this.document = document;
try {
this.uri = new DigestURI(this.document.dc_identifier());
} catch (MalformedURLException e) {
this.uri = null;
}
this.indexSegment = null;
}
public void loadDocument(LoaderDispatcher loader) throws IOException, Failure {
if(document == null) {
Response response = null;
response = loader.load(loader.request(this.uri, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE, true);
this.document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
}
}
public EnumMap<METADATA, String> getMetadata() {
final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);
final URIMetadataRow urlEntry = this.indexSegment.segment(Segments.Process.PUBLIC).urlMetadata().load(this.uri.hash(), null, 0);
if (urlEntry != null) {
metadata.put(METADATA.SIZE, String.valueOf(urlEntry.size()));
metadata.put(METADATA.FRESHDATE, ISO8601Formatter.FORMATTER.format(urlEntry.freshdate()));
metadata.put(METADATA.LOADDATE, ISO8601Formatter.FORMATTER.format(urlEntry.loaddate()));
metadata.put(METADATA.MODDATE, ISO8601Formatter.FORMATTER.format(urlEntry.moddate()));
metadata.put(METADATA.SNIPPET, String.valueOf(urlEntry.snippet()));
metadata.put(METADATA.WORDCOUNT, String.valueOf(urlEntry.wordCount()));
metadata.put(METADATA.MIMETYPE, String.valueOf(urlEntry.doctype()));
metadata.put(METADATA.LANGUAGE, UTF8.String(urlEntry.language()));
final URIMetadataRow.Components meta = urlEntry.metadata();
if (meta != null) {
metadata.put(METADATA.TITLE, meta.dc_title());
metadata.put(METADATA.CREATOR, meta.dc_creator());
metadata.put(METADATA.KEYWORDS, meta.dc_subject());
metadata.put(METADATA.PUBLISHER, meta.dc_publisher());
}
}
return metadata;
}
public EnumMap<METADATA, String> loadMetadata() {
final EnumMap<METADATA, String> metadata = new EnumMap<METADATA, String>(METADATA.class);
if(this.document != null) {
metadata.put(METADATA.TITLE, this.document.dc_title());
metadata.put(METADATA.CREATOR, this.document.dc_creator());
metadata.put(METADATA.KEYWORDS, this.document.dc_subject(' '));
metadata.put(METADATA.PUBLISHER, this.document.dc_publisher());
metadata.put(METADATA.DESCRIPTION, this.document.dc_description());
metadata.put(METADATA.MIMETYPE, this.document.dc_format());
metadata.put(METADATA.LANGUAGE, this.document.dc_language());
metadata.put(METADATA.CHARSET, this.document.getCharset());
// metadata.put(METADATA.SIZE, String.valueOf(document.getTextLength()));
metadata.put(METADATA.AUTOTAG, this.autoTag(5));
}
return metadata;
}
public String autoTag(final int count) {
final StringBuilder buffer = new StringBuilder();
final Map<String, Word> words;
if(this.document != null) {
words = new Condenser(this.document, true, true, LibraryProvider.dymLib).words();
buffer.append(this.document.dc_title());
buffer.append(this.document.dc_description());
buffer.append(this.document.dc_subject(' '));
final Enumeration<String> tokens = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(buffer.toString())), LibraryProvider.dymLib);
while(tokens.hasMoreElements()) {
int max = 1;
String token = tokens.nextElement();
Word word = words.get(token);
if (words.containsKey(token)) {
/*
if (this.worktables.has(TABLES.TAGS.tablename(bmk_user), YMarkUtil.getKeyId(token))) {
max = word.occurrences() * 1000;
} else
*/
if (token.length()>3) {
max = word.occurrences() * 100;
}
for(int i=0; i<max; i++) {
word.inc();
}
}
}
buffer.setLength(0);
final ArrayList<String> topwords = new ArrayList<String>(sortWordCounts(words).descendingKeySet());
for(int i=0; i<count && i<topwords.size() ; i++) {
if(words.get(topwords.get(i)).occurrences() > 100) {
buffer.append(topwords.get(i));
buffer.append(YMarkUtil.TAGS_SEPARATOR);
}
}
}
return YMarkUtil.cleanTagsString(buffer.toString());
}
public TreeMap<String,Word> getWordCounts() {
if (this.document != null) {
return sortWordCounts(new Condenser(this.document, true, true, LibraryProvider.dymLib).words());
}
return new TreeMap<String, Word>();
}
public static TreeMap<String,Word> sortWordCounts(final Map<String, Word> unsorted_words) {
final TreeMap<String, Word> sorted_words = new TreeMap<String, Word>(new YMarkWordCountComparator(unsorted_words));
sorted_words.putAll(unsorted_words);
return sorted_words;
}
}

@ -0,0 +1,340 @@
// YMarkTables.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.io.IOException;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import java.util.regex.Pattern;
import de.anomic.data.WorkTables;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Data;
import net.yacy.kelondro.index.RowSpaceExceededException;
public class YMarkTables {
public static enum TABLES {
BOOKMARKS ("_bookmarks"),
TAGS ("_tags"),
FOLDERS ("_folders");
private String basename;
private TABLES(String b) {
this.basename = b;
}
public String basename() {
return this.basename;
}
public String tablename(String bmk_user) {
return bmk_user+this.basename;
}
}
public static enum PROTOCOLS {
HTTP ("http://"),
HTTPS ("https://");
private String protocol;
private PROTOCOLS(String s) {
this.protocol = s;
}
public String protocol() {
return this.protocol;
}
public String protocol(String s) {
return this.protocol+s;
}
}
public static enum BOOKMARK {
// key dflt html_attrb xbel_attrb type
URL ("url", "", "href", "href", "link"),
TITLE ("title", "", "", "", "meta"),
DESC ("desc", "", "", "", "comment"),
DATE_ADDED ("date_added", "", "add_date", "added", "date"),
DATE_MODIFIED ("date_modified", "", "last_modified", "modified", "date"),
DATE_VISITED ("date_visited", "", "last_visited", "visited", "date"),
PUBLIC ("public", "flase", "", "yacy:public", "lock"),
TAGS ("tags", "unsorted", "shortcuturl", "yacy:tags", "tag"),
VISITS ("visits", "0", "", "yacy:visits", "stat"),
FOLDERS ("folders", "/unsorted", "", "", "folder");
private String key;
private String dflt;
private String html_attrb;
private String xbel_attrb;
private String type;
private static final Map<String,BOOKMARK> lookup = new HashMap<String,BOOKMARK>();
static {
for(BOOKMARK b : EnumSet.allOf(BOOKMARK.class))
lookup.put(b.key(), b);
}
private static StringBuilder buffer = new StringBuilder(25);;
private BOOKMARK(String k, String s, String a, String x, String t) {
this.key = k;
this.dflt = s;
this.html_attrb = a;
this.xbel_attrb = x;
this.type = t;
}
public static BOOKMARK get(String key) {
return lookup.get(key);
}
public static boolean contains(String key) {
return lookup.containsKey(key);
}
public String key() {
return this.key;
}
public String deflt() {
return this.dflt;
}
public String html_attrb() {
return this.html_attrb;
}
public String xbel_attrb() {
return this.xbel_attrb;
}
public String xbel() {
buffer.setLength(0);
buffer.append('"');
buffer.append('\n');
buffer.append(' ');
buffer.append(this.xbel_attrb);
buffer.append('=');
buffer.append('"');
return buffer.toString();
}
public String type() {
return this.type;
}
}
public final static HashMap<String,String> POISON = new HashMap<String,String>();
public final static String FOLDERS_ROOT = "/";
public final static String FOLDERS_UNSORTED = "/unsorted";
public final static String FOLDERS_IMPORTED = "/imported";
public static final int FOLDER_BUFFER_SIZE = 100;
public final static String BOOKMARKS_LOG = "BOOKMARKS";
public final static String BOOKMARKS_ID = "id";
public final static String USER_ADMIN = "admin";
public final static String USER_AUTHENTICATE = "AUTHENTICATE";
public final static String USER_AUTHENTICATE_MSG = "Authentication required!";
private WorkTables worktables;
public YMarkTables(final Tables wt) {
this.worktables = (WorkTables)wt;
}
public void deleteBookmark(final String bmk_user, final byte[] urlHash) throws IOException, RowSpaceExceededException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
Tables.Row bmk_row = null;
bmk_row = this.worktables.select(bmk_table, urlHash);
if(bmk_row != null) {
this.worktables.delete(bmk_table,urlHash);
}
}
public void deleteBookmark(final String bmk_user, final String url) throws IOException, RowSpaceExceededException {
this.deleteBookmark(bmk_user, YMarkUtil.getBookmarkId(url));
}
public TreeSet<String> getFolders(final String bmk_user, final String root) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final Pattern r = Pattern.compile("(?:^|.*,)("+root+"/.*)(?:,|$)");
final Iterator<Tables.Row> bit = this.worktables.iterator(bmk_table, YMarkTables.BOOKMARK.FOLDERS.key(), r);
final TreeSet<String> folders = new TreeSet<String>();
final StringBuilder path = new StringBuilder(200);
Tables.Row bmk_row = null;
while(bit.hasNext()) {
bmk_row = bit.next();
if(bmk_row.containsKey(BOOKMARK.FOLDERS.key())) {
final String[] folderArray = (new String(bmk_row.get(BOOKMARK.FOLDERS.key()),"UTF8")).split(YMarkUtil.TAGS_SEPARATOR);
for (final String folder : folderArray) {
if(folder.startsWith(root)) {
if(!folders.contains(folder)) {
path.setLength(0);
path.append(folder);
//TODO: get rid of .toString.equals()
while(path.length() > 0 && !path.toString().equals(root)){
folders.add(path.toString());
path.setLength(path.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
}
}
}
}
}
if (!root.equals(YMarkTables.FOLDERS_ROOT)) { folders.add(root); }
return folders;
}
public Iterator<Tables.Row> getBookmarksByFolder(final String bmk_user, final String folder) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final StringBuffer buffer = new StringBuffer(folder.length()+30);
buffer.append("(?:^|.*,)(\\Q");
buffer.append(folder);
buffer.append("\\E)(?:,|$)");
final Pattern p = Pattern.compile(buffer.toString());
return this.worktables.iterator(bmk_table, YMarkTables.BOOKMARK.FOLDERS.key(), p);
}
public Iterator<Tables.Row> getBookmarksByTag(final String bmk_user, final String[] tagArray) throws IOException {
// "(?:^|.*,)((?:tag4|tag2|tag5),*.*){3}"
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final StringBuffer buffer = new StringBuffer((tagArray.length * 25)+25);
buffer.append("(?:^|.*,)((?:");
for (final String tag : tagArray) {
buffer.append("\\Q");
buffer.append(tag);
buffer.append("\\E");
buffer.append("|");
}
buffer.deleteCharAt(buffer.length()-1);
buffer.append("),*.*){");
buffer.append(tagArray.length);
buffer.append("}");
final Pattern p = Pattern.compile(buffer.toString());
return this.worktables.iterator(bmk_table, YMarkTables.BOOKMARK.TAGS.key(), p);
}
public void addBookmark(final String bmk_user, final HashMap<String,String> bmk, final boolean importer) throws IOException, RowSpaceExceededException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final String date = String.valueOf(System.currentTimeMillis());
final byte[] urlHash = YMarkUtil.getBookmarkId(bmk.get(BOOKMARK.URL.key()));
Tables.Row bmk_row = null;
if (urlHash != null) {
bmk_row = this.worktables.select(bmk_table, urlHash);
if (bmk_row == null) {
// create and insert new entry
final Data data = new Data();
for (BOOKMARK b : BOOKMARK.values()) {
switch(b) {
case DATE_ADDED:
case DATE_MODIFIED:
if(bmk.containsKey(b.key()) && bmk.get(b.key()) != null) {
data.put(b.key(), bmk.get(b.key()));
} else {
data.put(b.key(), String.valueOf(System.currentTimeMillis()).getBytes());
}
break;
case TAGS:
if(bmk.containsKey(b.key()) && bmk.get(b.key()) != null) {
data.put(b.key(), bmk.get(b.key()));
} else {
data.put(b.key(), b.deflt());
}
break;
case FOLDERS:
if(bmk.containsKey(b.key()) && bmk.get(b.key()) != null) {
data.put(b.key(), bmk.get(b.key()));
} else {
data.put(b.key(), b.deflt());
}
break;
default:
if(bmk.containsKey(b.key()) && bmk.get(b.key()) != null) {
data.put(b.key(), bmk.get(b.key()));
}
}
}
this.worktables.insert(bmk_table, urlHash, data);
} else {
// modify and update existing entry
HashSet<String> oldSet;
HashSet<String> newSet;
for (BOOKMARK b : BOOKMARK.values()) {
switch(b) {
case DATE_ADDED:
if(!bmk_row.containsKey(b.key))
bmk_row.put(b.key(), date);
break;
case DATE_MODIFIED:
bmk_row.put(b.key(), date);
break;
case TAGS:
oldSet = YMarkUtil.keysStringToSet(bmk_row.get(b.key(),b.deflt()));
if(bmk.containsKey(b.key())) {
newSet = YMarkUtil.keysStringToSet(bmk.get(b.key()));
if(importer) {
newSet.addAll(oldSet);
bmk_row.put(b.key(), YMarkUtil.keySetToString(newSet));
oldSet.clear();
} else {
bmk_row.put(b.key, bmk.get(b.key()));
}
} else {
newSet = new HashSet<String>();
bmk_row.put(b.key, bmk_row.get(b.key(), b.deflt()));
}
break;
case FOLDERS:
oldSet = YMarkUtil.keysStringToSet(bmk_row.get(b.key(),b.deflt()));
if(bmk.containsKey(b.key())) {
newSet = YMarkUtil.keysStringToSet(bmk.get(b.key()));
if(importer) {
newSet.addAll(oldSet);
bmk_row.put(b.key(), YMarkUtil.keySetToString(newSet));
oldSet.clear();
} else {
bmk_row.put(b.key, bmk.get(b.key()));
}
} else {
newSet = new HashSet<String>();
bmk_row.put(b.key, bmk_row.get(b.key(), b.deflt()));
}
break;
default:
if(bmk.containsKey(b.key())) {
bmk_row.put(b.key, bmk.get(b.key()));
} else {
bmk_row.put(b.key, bmk_row.get(b.key(), b.deflt()));
}
}
}
// update bmk_table
this.worktables.update(bmk_table, bmk_row);
}
}
}
}

@ -0,0 +1,114 @@
// YMarkUtil.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2011-03-09 13:50:39 +0100 (Mi, 09 Mrz 2011) $
// $LastChangedRevision: 7574 $
// $LastChangedBy: apfelmaennchen $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.Iterator;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
public class YMarkUtil {
public final static String TAGS_SEPARATOR = ",";
public final static String FOLDERS_SEPARATOR = "/";
public final static byte[] getBookmarkId(String url) throws MalformedURLException {
return (new DigestURI(url, null)).hash();
}
public final static byte[] getKeyId(final String tag) {
return Word.word2hash(tag.toLowerCase());
}
public final static byte[] keySetToBytes(final HashSet<String> urlSet) {
return keySetToString(urlSet).getBytes();
}
public final static String keySetToString(final HashSet<String> urlSet) {
final Iterator<String> urlIter = urlSet.iterator();
final
StringBuilder urls = new StringBuilder(urlSet.size()*20);
while(urlIter.hasNext()) {
urls.append(TAGS_SEPARATOR);
urls.append(urlIter.next());
}
urls.deleteCharAt(0);
return urls.toString();
}
public final static HashSet<String> keysStringToSet(final String keysString) {
HashSet<String> keySet = new HashSet<String>();
final String[] keyArray = keysString.split(TAGS_SEPARATOR);
for (final String key : keyArray) {
keySet.add(key);
}
return keySet;
}
public final static String cleanTagsString(final String tagsString) {
StringBuilder ts = new StringBuilder(tagsString);
if(ts.length() == 0)
return YMarkTables.BOOKMARK.TAGS.deflt();
// get rid of double commas and space characters following a comma
for (int i = 0; i < ts.length()-1; i++) {
if (ts.charAt(i) == TAGS_SEPARATOR.charAt(0)) {
if (ts.charAt(i+1) == TAGS_SEPARATOR.charAt(0) || ts.charAt(i+1) == ' ') {
ts.deleteCharAt(i+1);
i--;
}
}
}
// get rid of heading and trailing comma
if (ts.charAt(0) == TAGS_SEPARATOR.charAt(0))
ts.deleteCharAt(0);
if (ts.charAt(ts.length()-1) == TAGS_SEPARATOR.charAt(0))
ts.deleteCharAt(ts.length()-1);
return ts.toString();
}
public final static String cleanFoldersString(final String foldersString) {
StringBuilder fs = new StringBuilder(cleanTagsString(foldersString));
if(fs.length() == 0)
return YMarkTables.BOOKMARK.FOLDERS.deflt();
for (int i = 0; i < fs.length()-1; i++) {
if (fs.charAt(i) == FOLDERS_SEPARATOR.charAt(0)) {
if (fs.charAt(i+1) == TAGS_SEPARATOR.charAt(0) || fs.charAt(i+1) == FOLDERS_SEPARATOR.charAt(0)) {
fs.deleteCharAt(i);
i--;
} else if (fs.charAt(i+1) == ' ') {
fs.deleteCharAt(i+1);
i--;
}
}
}
if (fs.charAt(fs.length()-1) == FOLDERS_SEPARATOR.charAt(0)) {
fs.deleteCharAt(fs.length()-1);
}
return fs.toString();
}
}

@ -0,0 +1,53 @@
// YMarkWordCountComparator.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.util.Comparator;
import java.util.Map;
import net.yacy.kelondro.data.word.Word;
public class YMarkWordCountComparator implements Comparator<String> {
private Map<String,Word> words;
public YMarkWordCountComparator(final Map<String,Word> words) {
this.words = words;
}
public int compare(final String k1, final String k2) {
final Word w1 = this.words.get(k1);
final Word w2 = this.words.get(k2);
if(w1.occurrences() > w2.occurrences())
return 1;
else if(w1.occurrences() < w2.occurrences())
return -1;
else
return 0;
}
}

@ -1,4 +1,30 @@
package de.anomic.data;
// YMarkXBELImporter.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data.ymark;
import java.io.IOException;
import java.io.InputStream;
@ -17,7 +43,7 @@ import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class YMarksXBELImporter extends DefaultHandler implements Runnable {
public class YMarkXBELImporter extends DefaultHandler implements Runnable {
public static enum XBEL {
NOTHING (""),
@ -76,7 +102,7 @@ public class YMarksXBELImporter extends DefaultHandler implements Runnable {
private final XMLReader xmlReader;
private final String RootFolder;
public YMarksXBELImporter (final InputStream input, int queueSize, String root) throws SAXException {
public YMarkXBELImporter (final InputStream input, int queueSize, String root) throws SAXException {
this.bmk = null;
this.RootFolder = root;
@ -126,30 +152,30 @@ public class YMarksXBELImporter extends DefaultHandler implements Runnable {
}
public void startElement(final String uri, final String name, String tag, final Attributes atts) throws SAXException {
String date;
YMarkDate date = new YMarkDate();
if (tag == null) return;
tag = tag.toLowerCase();
if (XBEL.BOOKMARK.tag().equals(tag)) {
this.bmk = new HashMap<String,String>();
this.bmk.put(YMarkTables.BOOKMARK.URL.key(), atts.getValue(uri, YMarkTables.BOOKMARK.URL.xbel_attrb()));
try {
date = String.valueOf(YMarkTables.parseISO8601(atts.getValue(uri, YMarkTables.BOOKMARK.DATE_ADDED.xbel_attrb())).getTime());
date.parseISO8601(atts.getValue(uri, YMarkTables.BOOKMARK.DATE_ADDED.xbel_attrb()));
} catch (ParseException e) {
date = String.valueOf(System.currentTimeMillis());
// TODO: exception handling
}
this.bmk.put(YMarkTables.BOOKMARK.DATE_ADDED.key(), date);
this.bmk.put(YMarkTables.BOOKMARK.DATE_ADDED.key(), date.toString());
try {
date = String.valueOf(YMarkTables.parseISO8601(atts.getValue(uri, YMarkTables.BOOKMARK.DATE_VISITED.xbel_attrb())).getTime());
date.parseISO8601(atts.getValue(uri, YMarkTables.BOOKMARK.DATE_VISITED.xbel_attrb()));
} catch (ParseException e) {
date = YMarkTables.BOOKMARK.DATE_VISITED.deflt();
// TODO: exception handling
}
this.bmk.put(YMarkTables.BOOKMARK.DATE_VISITED.key(), date);
this.bmk.put(YMarkTables.BOOKMARK.DATE_VISITED.key(), date.toString());
try {
date = String.valueOf(YMarkTables.parseISO8601(atts.getValue(uri, YMarkTables.BOOKMARK.DATE_MODIFIED.xbel_attrb())).getTime());
date.parseISO8601(atts.getValue(uri, YMarkTables.BOOKMARK.DATE_MODIFIED.xbel_attrb()));
} catch (ParseException e) {
date = String.valueOf(System.currentTimeMillis());
// TODO: exception handling
}
this.bmk.put(YMarkTables.BOOKMARK.DATE_MODIFIED.key(), date);
this.bmk.put(YMarkTables.BOOKMARK.DATE_MODIFIED.key(), date.toString());
UpdateBmkRef(atts.getValue(uri, "id"), true);
outer_state = XBEL.BOOKMARK;
inner_state = XBEL.NOTHING;
@ -201,7 +227,7 @@ public class YMarksXBELImporter extends DefaultHandler implements Runnable {
// go up one folder
//TODO: get rid of .toString.equals()
if(!this.folder.toString().equals(this.RootFolder)) {
folder.setLength(folder.lastIndexOf(YMarkTables.FOLDERS_SEPARATOR));
folder.setLength(folder.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
this.outer_state = XBEL.FOLDER;
} else if (XBEL.INFO.tag().equals(tag)) {
@ -213,15 +239,15 @@ public class YMarksXBELImporter extends DefaultHandler implements Runnable {
public void characters(final char ch[], final int start, final int length) {
if (parse_value) {
buffer.append(ch, start, length);
switch(outer_state) {
buffer.append(ch, start, length);
switch(outer_state) {
case BOOKMARK:
switch(inner_state) {
case DESC:
this.bmk.put(YMarkTables.BOOKMARK.DESC.key(), this.buffer.toString());
case DESC:
this.bmk.put(YMarkTables.BOOKMARK.DESC.key(), buffer.toString());
break;
case TITLE:
this.bmk.put(YMarkTables.BOOKMARK.TITLE.key(), this.buffer.toString());
this.bmk.put(YMarkTables.BOOKMARK.TITLE.key(), buffer.toString());
break;
case METADATA:
// TODO: handle xbel bookmark metadata
@ -235,7 +261,7 @@ public class YMarksXBELImporter extends DefaultHandler implements Runnable {
case DESC:
break;
case TITLE:
this.folder.append(YMarkTables.FOLDERS_SEPARATOR);
this.folder.append(YMarkUtil.FOLDERS_SEPARATOR);
this.folder.append(this.buffer);
break;
case METADATA:
Loading…
Cancel
Save