From 244b56e9d304451f1074df751c43be6958f8c295 Mon Sep 17 00:00:00 2001 From: apfelmaennchen Date: Thu, 21 Oct 2010 19:18:17 +0000 Subject: [PATCH] an update to the new bookmark code... git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7264 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/api/ymarks/add_ymark.java | 79 +++--- htroot/api/ymarks/delete_ymark.java | 15 +- htroot/api/ymarks/get_ymark.java | 93 ++++--- htroot/api/ymarks/import_html.java | 4 +- source/de/anomic/data/WorkTables.java | 5 + source/de/anomic/data/YMarkTables.java | 229 +++++++++++------- source/de/anomic/data/YMarksHTMLImporter.java | 102 +++++--- 7 files changed, 327 insertions(+), 200 deletions(-) diff --git a/htroot/api/ymarks/add_ymark.java b/htroot/api/ymarks/add_ymark.java index e457ba327..304840ef6 100644 --- a/htroot/api/ymarks/add_ymark.java +++ b/htroot/api/ymarks/add_ymark.java @@ -1,14 +1,13 @@ import java.io.IOException; import java.net.MalformedURLException; -import java.util.Date; import java.util.HashSet; import java.util.Iterator; + import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.blob.Tables.Data; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; -import net.yacy.kelondro.util.DateFormatter; import de.anomic.data.YMarkTables; import de.anomic.data.userDB; import de.anomic.search.Switchboard; @@ -31,6 +30,7 @@ public class add_ymark { if(isAdmin || isAuthUser) { final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_BOOKMARKS_BASENAME; final String tag_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_TAGS_BASENAME; + final String folder_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_FOLDERS_BASENAME; byte[] urlHash = null; String url =""; @@ -63,12 +63,13 @@ public class add_ymark { } // insert or update entry + final byte[] date = String.valueOf(System.currentTimeMillis()).getBytes(); try { if (bmk_row == null) { // create and insert new entry - Data data = new Data(); - final String tagsString = YMarkTables.cleanTagsString(post.get(YMarkTables.TABLE_BOOKMARKS_COL_TAGS,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT)); - final byte[] date = DateFormatter.formatShortMilliSecond(new Date()).getBytes(); + Data data = new Data(); + final String tagsString = YMarkTables.cleanTagsString(post.get(YMarkTables.TABLE_BOOKMARKS_COL_TAGS,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT)); + final String foldersString = YMarkTables.cleanFoldersString(post.get(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS,YMarkTables.TABLE_FOLDERS_UNSORTED)); data.put(YMarkTables.TABLE_BOOKMARKS_COL_URL, url.getBytes()); data.put(YMarkTables.TABLE_BOOKMARKS_COL_TITLE, post.get(YMarkTables.TABLE_BOOKMARKS_COL_TITLE,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT).getBytes()); @@ -76,16 +77,20 @@ public class add_ymark { data.put(YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC, post.get(YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC,YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC_FALSE).getBytes()); data.put(YMarkTables.TABLE_BOOKMARKS_COL_TAGS, tagsString.getBytes()); data.put(YMarkTables.TABLE_BOOKMARKS_COL_VISITS, YMarkTables.TABLE_BOOKMARKS_COL_VISITS_ZERO.getBytes()); - data.put(YMarkTables.TABLE_BOOKMARKS_COL_FOLDER, post.get(YMarkTables.TABLE_BOOKMARKS_COL_FOLDER,YMarkTables.TABLE_FOLDERS_UNSORTED).getBytes()); + data.put(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS, foldersString.getBytes()); data.put(YMarkTables.TABLE_BOOKMARKS_COL_DATE_ADDED, date); data.put(YMarkTables.TABLE_BOOKMARKS_COL_DATE_MODIFIED, date); data.put(YMarkTables.TABLE_BOOKMARKS_COL_DATE_VISITED, date); - sb.tables.insert(bmk_table, urlHash, data); - + sb.tables.insert(bmk_table, urlHash, data); + final String[] folderArray = foldersString.split(YMarkTables.TABLE_TAGS_SEPARATOR); + for (final String folder : folderArray) { + sb.tables.bookmarks.updateIndexTable(folder_table, folder, urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); + } + final String[] tagArray = tagsString.split(YMarkTables.TABLE_TAGS_SEPARATOR); for (final String tag : tagArray) { - sb.tables.bookmarks.updateTAGTable(tag_table, tag, urlHash, YMarkTables.TABLE_TAGS_ACTION_ADD); + sb.tables.bookmarks.updateIndexTable(tag_table, tag, urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); } @@ -94,34 +99,27 @@ public class add_ymark { bmk_row.put(YMarkTables.TABLE_BOOKMARKS_COL_TITLE, post.get(YMarkTables.TABLE_BOOKMARKS_COL_TITLE,bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_TITLE,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT)).getBytes()); bmk_row.put(YMarkTables.TABLE_BOOKMARKS_COL_DESC, post.get(YMarkTables.TABLE_BOOKMARKS_COL_DESC,bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_DESC,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT)).getBytes()); bmk_row.put(YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC, post.get(YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC,bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC,YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC_FALSE)).getBytes()); - bmk_row.put(YMarkTables.TABLE_BOOKMARKS_COL_FOLDER, post.get(YMarkTables.TABLE_BOOKMARKS_COL_FOLDER,bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_FOLDER,YMarkTables.TABLE_FOLDERS_UNSORTED)).getBytes()); + + HashSet oldSet; + HashSetnewSet; + + final String foldersString = post.get(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS,bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS,YMarkTables.TABLE_FOLDERS_UNSORTED)); + oldSet = YMarkTables.keysStringToSet(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS,YMarkTables.TABLE_FOLDERS_UNSORTED)); + newSet = YMarkTables.keysStringToSet(foldersString); + updateIndex(folder_table, urlHash, oldSet, newSet); + bmk_row.put(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS, foldersString.getBytes()); final String tagsString = YMarkTables.cleanTagsString(post.get(YMarkTables.TABLE_BOOKMARKS_COL_TAGS,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT)); - HashSetold_tagSet = YMarkTables.getTagSet(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_TAGS,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT), false); - HashSetnew_tagSet = YMarkTables.getTagSet(tagsString, false); - bmk_row.put(YMarkTables.TABLE_BOOKMARKS_COL_TAGS, tagsString.getBytes()); + oldSet = YMarkTables.keysStringToSet(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_TAGS,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT)); + newSet = YMarkTables.keysStringToSet(tagsString); + updateIndex(tag_table, urlHash, oldSet, newSet); + bmk_row.put(YMarkTables.TABLE_BOOKMARKS_COL_TAGS, tagsString.getBytes()); // modify date attribute - bmk_row.put(YMarkTables.TABLE_BOOKMARKS_COL_DATE_MODIFIED, DateFormatter.formatShortMilliSecond(new Date()).getBytes()); + bmk_row.put(YMarkTables.TABLE_BOOKMARKS_COL_DATE_MODIFIED, date); // update bmk_table - sb.tables.update(bmk_table, bmk_row); - - //update tag_table - Iterator tagIter; - - new_tagSet.removeAll(old_tagSet); - tagIter = new_tagSet.iterator(); - while(tagIter.hasNext()) { - sb.tables.bookmarks.updateTAGTable(tag_table, tagIter.next(), urlHash, YMarkTables.TABLE_TAGS_ACTION_ADD); - } - - new_tagSet = YMarkTables.getTagSet(tagsString, false); - old_tagSet.removeAll(new_tagSet); - tagIter=old_tagSet.iterator(); - while(tagIter.hasNext()) { - sb.tables.bookmarks.updateTAGTable(tag_table, tagIter.next(), urlHash, YMarkTables.TABLE_TAGS_ACTION_REMOVE); - } + sb.tables.update(bmk_table, bmk_row); } } catch (IOException e) { Log.logException(e); @@ -132,5 +130,22 @@ public class add_ymark { } // return rewrite properties return prop; - } + } + + private static void updateIndex(final String index_table, final byte[] urlHash, final HashSet oldSet, final HashSet newSet) { + Iterator tagIter; + HashSet urlSet = new HashSet(newSet); + + newSet.removeAll(oldSet); + tagIter = newSet.iterator(); + while(tagIter.hasNext()) { + sb.tables.bookmarks.updateIndexTable(index_table, tagIter.next(), urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); + } + + oldSet.removeAll(urlSet); + tagIter=oldSet.iterator(); + while(tagIter.hasNext()) { + sb.tables.bookmarks.updateIndexTable(index_table, tagIter.next(), urlHash, YMarkTables.TABLE_INDEX_ACTION_REMOVE); + } + } } diff --git a/htroot/api/ymarks/delete_ymark.java b/htroot/api/ymarks/delete_ymark.java index 9bc36dd77..a1c9a879d 100644 --- a/htroot/api/ymarks/delete_ymark.java +++ b/htroot/api/ymarks/delete_ymark.java @@ -26,6 +26,7 @@ public class delete_ymark { if(isAdmin || isAuthUser) { final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_BOOKMARKS_BASENAME; final String tag_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_TAGS_BASENAME; + final String folder_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_FOLDERS_BASENAME; byte[] urlHash = null; @@ -42,10 +43,9 @@ public class delete_ymark { bmk_row = sb.tables.select(bmk_table, urlHash); if(bmk_row != null) { final String tagsString = bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_TAGS,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT); - final String[] tagArray = tagsString.split(YMarkTables.TABLE_TAGS_SEPARATOR); - for (final String tag : tagArray) { - sb.tables.bookmarks.updateTAGTable(tag_table, tag, urlHash,YMarkTables.TABLE_TAGS_ACTION_REMOVE); - } + removeIndexEntry(tag_table, tagsString, urlHash); + final String foldersString = bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS,YMarkTables.TABLE_FOLDERS_ROOT); + removeIndexEntry(folder_table, foldersString, urlHash); } sb.tables.delete(bmk_table,urlHash); prop.put("result", "1"); @@ -60,4 +60,11 @@ public class delete_ymark { // return rewrite properties return prop; } + + private static void removeIndexEntry(final String index_table, String keysString, final byte[] urlHash) { + final String[] keyArray = keysString.split(YMarkTables.TABLE_TAGS_SEPARATOR); + for (final String tag : keyArray) { + sb.tables.bookmarks.updateIndexTable(index_table, tag, urlHash, YMarkTables.TABLE_INDEX_ACTION_REMOVE); + } + } } diff --git a/htroot/api/ymarks/get_ymark.java b/htroot/api/ymarks/get_ymark.java index ae807b13b..72258d12d 100644 --- a/htroot/api/ymarks/get_ymark.java +++ b/htroot/api/ymarks/get_ymark.java @@ -1,5 +1,6 @@ import java.io.IOException; import java.util.Iterator; +import java.util.TreeSet; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.blob.Tables; @@ -13,59 +14,79 @@ import de.anomic.server.serverSwitch; public class get_ymark { + + private static Switchboard sb = null; + private static serverObjects prop = null; + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - final Switchboard sb = (Switchboard) env; - final serverObjects prop = new serverObjects(); + sb = (Switchboard) env; + prop = new serverObjects(); final userDB.Entry user = sb.userDB.getUser(header); final boolean isAdmin = (sb.verifyAuthentication(header, true)); final boolean isAuthUser = user!= null && user.hasRight(userDB.Entry.BOOKMARK_RIGHT); - + final TreeSet bookmarks = new TreeSet(); + if(isAdmin || isAuthUser) { final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_BOOKMARKS_BASENAME; final String tag_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_TAGS_BASENAME; - - if(post.containsKey(YMarkTables.TABLE_TAGS_COL_TAG)) { - final byte[] tagHash = YMarkTables.getTagId(post.get(YMarkTables.TABLE_TAGS_COL_TAG)); - Tables.Row tag_row = null; + final String folder_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_FOLDERS_BASENAME; + + if(post.containsKey(YMarkTables.TABLE_BOOKMARKS_COL_TAGS)) { + final String[] tagArray = YMarkTables.cleanTagsString(post.get(YMarkTables.TABLE_BOOKMARKS_COL_TAGS)).split(YMarkTables.TABLE_TAGS_SEPARATOR); try { - tag_row = sb.tables.select(tag_table, tagHash); - if (tag_row != null) { - final IteratorurlIter = (YMarkTables.keysStringToKeySet(new String(tag_row.get(YMarkTables.TABLE_TAGS_COL_URLS)))).iterator(); - int count = 0; - while(urlIter.hasNext()) { - final byte[] urlHash = urlIter.next().getBytes(); - Tables.Row bmk_row = null; - bmk_row = sb.tables.select(bmk_table, urlHash); - if (bmk_row != null) { - prop.putXML("bookmarks_"+count+"_id", new String(urlHash)); - prop.putXML("bookmarks_"+count+"_url", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_URL,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); - prop.putXML("bookmarks_"+count+"_title", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_TITLE,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); - prop.putXML("bookmarks_"+count+"_desc", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_DESC,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); - prop.putXML("bookmarks_"+count+"_added", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_DATE_ADDED,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); - prop.putXML("bookmarks_"+count+"_modified", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_DATE_MODIFIED,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); - prop.putXML("bookmarks_"+count+"_visited", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_DATE_VISITED,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); - prop.putXML("bookmarks_"+count+"_public", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC,YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC_FALSE))); - prop.putXML("bookmarks_"+count+"_tags", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_TAGS,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); - count++; - } - } - prop.put("bookmarks", count); - } - else { - prop.put("result", "0"); - return prop; - } + bookmarks.addAll(sb.tables.bookmarks.getBookmarks(tag_table, tagArray)); + } catch (IOException e) { + Log.logException(e); + } catch (RowSpaceExceededException e) { + Log.logException(e); + } + } else if(post.containsKey(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS)) { + final String[] folderArray = YMarkTables.cleanFoldersString(post.get(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS)).split(YMarkTables.TABLE_TAGS_SEPARATOR); + try { + bookmarks.retainAll(sb.tables.bookmarks.getBookmarks(folder_table, folderArray)); } catch (IOException e) { - Log.logException(e); + Log.logException(e); } catch (RowSpaceExceededException e) { - Log.logException(e); + Log.logException(e); } } + + putBookmarks(bookmarks, bmk_table); + } else { prop.put(YMarkTables.TABLE_BOOKMARKS_USER_AUTHENTICATE,YMarkTables.TABLE_BOOKMARKS_USER_AUTHENTICATE_MSG); } // return rewrite properties return prop; } + + private static void putBookmarks(final TreeSet urlSet, final String bmk_table) { + final IteratorurlIter = urlSet.iterator(); + int count = 0; + while(urlIter.hasNext()) { + final byte[] urlHash = urlIter.next().getBytes(); + Tables.Row bmk_row = null; + try { + bmk_row = sb.tables.select(bmk_table, urlHash); + if (bmk_row != null) { + prop.putXML("bookmarks_"+count+"_id", new String(urlHash)); + prop.putXML("bookmarks_"+count+"_url", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_URL,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); + prop.putXML("bookmarks_"+count+"_title", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_TITLE,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); + prop.putXML("bookmarks_"+count+"_desc", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_DESC,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); + prop.putXML("bookmarks_"+count+"_added", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_DATE_ADDED,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); + prop.putXML("bookmarks_"+count+"_modified", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_DATE_MODIFIED,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); + prop.putXML("bookmarks_"+count+"_visited", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_DATE_VISITED,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); + prop.putXML("bookmarks_"+count+"_public", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC,YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC_FALSE))); + prop.putXML("bookmarks_"+count+"_tags", new String(bmk_row.get(YMarkTables.TABLE_BOOKMARKS_COL_TAGS,YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT))); + count++; + } + } catch (IOException e) { + Log.logException(e); + } catch (RowSpaceExceededException e) { + Log.logException(e); + } + } + prop.put("bookmarks", count); + } } diff --git a/htroot/api/ymarks/import_html.java b/htroot/api/ymarks/import_html.java index e0891302d..2d1dbae2d 100644 --- a/htroot/api/ymarks/import_html.java +++ b/htroot/api/ymarks/import_html.java @@ -25,14 +25,14 @@ public class import_html { final boolean isAuthUser = user!= null && user.hasRight(userDB.Entry.BOOKMARK_RIGHT); if(isAdmin || isAuthUser) { - final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLE_BOOKMARKS_BASENAME; + final String bmk_user = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN); if(post.containsKey("htmlfile")){ try { final ByteArrayInputStream byteIn = new ByteArrayInputStream(post.get("htmlfile$file").getBytes("UTF-8")); if(byteIn !=null) { final InputStreamReader reader = new InputStreamReader(byteIn,"UTF-8"); final ParserDelegator delegator = new ParserDelegator(); - final YMarksHTMLImporter htmlHandler = new YMarksHTMLImporter(sb.tables, bmk_table); + final YMarksHTMLImporter htmlHandler = new YMarksHTMLImporter(sb.tables, bmk_user); delegator.parse(reader, htmlHandler, true); } } catch (UnsupportedEncodingException e) { diff --git a/source/de/anomic/data/WorkTables.java b/source/de/anomic/data/WorkTables.java index a0afcbfdc..d41f38870 100644 --- a/source/de/anomic/data/WorkTables.java +++ b/source/de/anomic/data/WorkTables.java @@ -72,6 +72,11 @@ public class WorkTables extends Tables { this.bookmarks = new YMarkTables(this); } + public void clear(final String tablename) throws IOException { + super.clear(tablename); + this.bookmarks.cleanCache(tablename); + } + /** * recording of a api call. stores the call parameters into the API database table * @param post the post arguments of the api call diff --git a/source/de/anomic/data/YMarkTables.java b/source/de/anomic/data/YMarkTables.java index ec9ce63d4..1fc17bf40 100644 --- a/source/de/anomic/data/YMarkTables.java +++ b/source/de/anomic/data/YMarkTables.java @@ -4,8 +4,7 @@ import java.io.IOException; import java.net.MalformedURLException; import java.util.HashSet; import java.util.Iterator; -import java.util.Set; - +import net.yacy.cora.storage.ConcurrentARC; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.blob.Tables.Data; import net.yacy.kelondro.data.meta.DigestURI; @@ -35,68 +34,44 @@ public class YMarkTables { public final static String TABLE_BOOKMARKS_COL_PUBLIC = "public"; public final static String TABLE_BOOKMARKS_COL_TAGS = "tags"; public final static String TABLE_BOOKMARKS_COL_VISITS = "visits"; - public final static String TABLE_BOOKMARKS_COL_FOLDER = "folder"; - + public final static String TABLE_BOOKMARKS_COL_FOLDERS = "folders"; public final static String TABLE_BOOKMARKS_COL_DEFAULT = ""; public final static String TABLE_BOOKMARKS_COL_PUBLIC_TRUE = "true"; public final static String TABLE_BOOKMARKS_COL_PUBLIC_FALSE = "false"; public final static String TABLE_BOOKMARKS_COL_VISITS_ZERO = "0"; - public final static String TABLE_TAGS_BASENAME = "_tags"; + public final static String TABLE_TAGS_BASENAME = "_tags"; public final static String TABLE_TAGS_SEPARATOR = ","; - public final static String TABLE_TAGS_COL_ID = "id"; - public final static String TABLE_TAGS_COL_TAG = "tag"; - public final static String TABLE_TAGS_COL_URLS = "urls"; - - public final static int TABLE_TAGS_ACTION_ADD = 1; - public final static int TABLE_TAGS_ACTION_REMOVE = 2; - + public final static String TABLE_INDEX_COL_ID = "id"; + public final static String TABLE_INDEX_COL_NAME = "name"; + public final static String TABLE_INDEX_DESC = "desc"; + public final static String TABLE_INDEX_COL_URLS = "urls"; + public final static short TABLE_INDEX_ACTION_ADD = 1; + public final static short TABLE_INDEX_ACTION_REMOVE = 2; + + public final static String TABLE_FOLDERS_BASENAME = "_folders"; public final static String TABLE_FOLDERS_SEPARATOR = "/"; public final static String TABLE_FOLDERS_ROOT = "/"; public final static String TABLE_FOLDERS_UNSORTED = "/unsorted"; public final static String TABLE_FOLDERS_IMPORTED = "/imported"; - private Tables worktables; + private WorkTables worktables; + public ConcurrentARC cache; public YMarkTables(final Tables wt) { - this.worktables = wt; + this.worktables = (WorkTables)wt; + this.cache = new ConcurrentARC(50,1); } public final static byte[] getBookmarkId(String url) throws MalformedURLException { return (new DigestURI(url, null)).hash(); } - public final static byte[] getTagId(final String tag) { + public final static byte[] getKeyId(final String tag) { return Word.word2hash(tag.toLowerCase()); } - public final static HashSet getTagSet(final String tagsString, boolean clean) { - HashSettagSet = new HashSet(); - final String[] tagArray = clean ? cleanTagsString(tagsString).split(TABLE_TAGS_SEPARATOR) : tagsString.split(TABLE_TAGS_SEPARATOR); - for (final String tag : tagArray) { - tagSet.add(tag); - } - return tagSet; - } - - public final static HashSet getTagSet(final String tagsString) { - return getTagSet(tagsString, true); - } - - public final static HashSet getTagIdSet(final String tagsString, boolean clean) { - HashSettagSet = new HashSet(); - final String[] tagArray = clean ? cleanTagsString(tagsString).split(TABLE_TAGS_SEPARATOR) : tagsString.split(TABLE_TAGS_SEPARATOR); - for (final String tag : tagArray) { - tagSet.add(getTagId(tag)); - } - return tagSet; - } - - public final static Set getTagIdSet(final String tagsString) { - return getTagIdSet(tagsString, true); - } - public final static byte[] keySetToBytes(final HashSet urlSet) { final Iterator urlIter = urlSet.iterator(); final @@ -109,12 +84,12 @@ public class YMarkTables { return urls.toString().getBytes(); } - public final static HashSet keysStringToKeySet(final String keysString) { + public final static HashSet keysStringToSet(final String keysString) { HashSet keySet = new HashSet(); final String[] keyArray = keysString.split(TABLE_TAGS_SEPARATOR); for (final String key : keyArray) { keySet.add(key); - } + } return keySet; } @@ -137,73 +112,141 @@ public class YMarkTables { public final static String cleanFoldersString(String foldersString) { foldersString = cleanTagsString(foldersString); // get rid of double and trailing slashes - while (foldersString.endsWith("/")){ + while (foldersString.endsWith(TABLE_FOLDERS_SEPARATOR)){ foldersString = foldersString.substring(0, foldersString.length() -1); } while (foldersString.contains("/,")){ foldersString = foldersString.replaceAll("/,", TABLE_TAGS_SEPARATOR); } while (foldersString.contains("//")){ - foldersString = foldersString.replaceAll("//", "/"); + foldersString = foldersString.replaceAll("//", TABLE_FOLDERS_SEPARATOR); } return foldersString; } + + public void cleanCache(final String tablename) { + final Iterator iter = this.cache.keySet().iterator(); + while(iter.hasNext()) { + final String key = iter.next(); + if (key.startsWith(tablename)) { + this.cache.remove(key); + } + } + } + + public void createIndexEntry(final String index_table, final String keyname, final HashSet urlSet) throws IOException { + final byte[] key = YMarkTables.getKeyId(keyname); + final String cacheKey = index_table+":"+keyname; + final byte[] BurlSet = keySetToBytes(urlSet); + Data tagEntry = new Data(); + + this.cache.insert(cacheKey, BurlSet); + + tagEntry.put(TABLE_INDEX_COL_NAME, keyname); + tagEntry.put(TABLE_INDEX_COL_URLS, BurlSet); + this.worktables.insert(index_table, key, tagEntry); + } + + public HashSet getBookmarks(final String index_table, final String keyname) throws IOException, RowSpaceExceededException { + final String cacheKey = index_table+":"+keyname; + if (this.cache.containsKey(cacheKey)) { + return keysStringToSet(new String(this.cache.get(cacheKey))); + } else { + final Tables.Row idx_row = this.worktables.select(index_table, YMarkTables.getKeyId(keyname)); + if (idx_row != null) { + final byte[] keys = idx_row.get(YMarkTables.TABLE_INDEX_COL_URLS); + this.cache.put(cacheKey, keys); + return keysStringToSet(new String(keys)); + } + } + return new HashSet(); + } + + public HashSet getBookmarks(final String index_table, final String[] keyArray) throws IOException, RowSpaceExceededException { + final HashSet urlSet = new HashSet(); + urlSet.addAll(getBookmarks(index_table, keyArray[0])); + if (urlSet.isEmpty()) + return urlSet; + if (keyArray.length > 1) { + for (final String keyname : keyArray) { + urlSet.retainAll(getBookmarks(index_table, keyname)); + if (urlSet.isEmpty()) + return urlSet; + } + } + return urlSet; + } /** - * YMark function that updates the tag index - * @param tag_table is the user specific tag index - * @param tag is a single tag + * YMark function that updates the tag/folder index + * @param index_table is the user specific index + * @param keyname * @param url is the url has as returned by DigestURI.hash() * @param action is either add (1) or remove (2) - * @return */ - public int updateTAGTable(final String tag_table, final String tag, final byte[] url, final int action) { - Tables.Row tag_row = null; - final byte[] tagHash = YMarkTables.getTagId(tag); - final String urlHash = new String(url); - HashSeturlSet = new HashSet(); - try { - tag_row = this.worktables.select(tag_table, tagHash); - if(tag_row == null) { - switch (action) { - case YMarkTables.TABLE_TAGS_ACTION_ADD: - urlSet.add(urlHash); - break; - default: - return 0; - } - Data tagEntry = new Data(); - tagEntry.put(YMarkTables.TABLE_TAGS_COL_TAG, tag.getBytes()); - tagEntry.put(YMarkTables.TABLE_TAGS_COL_URLS, YMarkTables.keySetToBytes(urlSet)); - this.worktables.insert(tag_table, tagHash, tagEntry); - return 1; - } else { - urlSet = YMarkTables.keysStringToKeySet(new String(tag_row.get(YMarkTables.TABLE_TAGS_COL_URLS))); - if(urlSet.contains(urlHash)) - Log.logInfo(YMarkTables.TABLE_BOOKMARKS_LOG, "ok, urlHash found!"); - switch (action) { - case YMarkTables.TABLE_TAGS_ACTION_ADD: - urlSet.add(urlHash); - break; - case YMarkTables.TABLE_TAGS_ACTION_REMOVE: - urlSet.remove(urlHash); - if(urlSet.isEmpty()) { - this.worktables.delete(tag_table, tagHash); - return 1; - } - break; - default: - return 1; - } - tag_row.put(YMarkTables.TABLE_TAGS_COL_URLS, YMarkTables.keySetToBytes(urlSet)); - this.worktables.update(tag_table, tag_row); - return 1; - } - } catch (IOException e) { + public void updateIndexTable(final String index_table, final String keyname, final byte[] url, final int action) { + final byte[] key = YMarkTables.getKeyId(keyname); + final String urlHash = new String(url); + Tables.Row row = null; + + // try to load urlSet from cache + final String cacheKey = index_table+":"+keyname; + HashSeturlSet = this.cache.containsKey(cacheKey) ? keysStringToSet(new String(this.cache.get(cacheKey))) : new HashSet(); + + try { + row = this.worktables.select(index_table, key); + + // key has no index_table entry + if(row == null) { + switch (action) { + case TABLE_INDEX_ACTION_ADD: + urlSet.add(urlHash); + createIndexEntry(index_table, keyname, urlSet); + break; + case TABLE_INDEX_ACTION_REMOVE: + // key has no index_table entry but a cache entry + // TODO: this shouldn't happen + if(!urlSet.isEmpty()) { + urlSet.remove(urlHash); + createIndexEntry(index_table, keyname, urlSet); + } + break; + default: + break; + } + } + // key has an existing index_table entry + else { + byte[] BurlSet = null; + // key has no cache entry + if (urlSet.isEmpty()) { + // load urlSet from index_table + urlSet = keysStringToSet(new String(row.get(TABLE_INDEX_COL_URLS))); + } + switch (action) { + case TABLE_INDEX_ACTION_ADD: + urlSet.add(urlHash); + break; + case TABLE_INDEX_ACTION_REMOVE: + urlSet.remove(urlHash); + break; + default: + break; + } + if (urlSet.isEmpty()) { + this.cache.remove(cacheKey); + this.worktables.delete(index_table, key); + } else { + BurlSet = keySetToBytes(urlSet); + this.cache.insert(cacheKey, BurlSet); + row.put(TABLE_INDEX_COL_URLS, BurlSet); + this.worktables.update(index_table, row); + } + } + } catch (IOException e) { Log.logException(e); } catch (RowSpaceExceededException e) { Log.logException(e); } - return 0; } } diff --git a/source/de/anomic/data/YMarksHTMLImporter.java b/source/de/anomic/data/YMarksHTMLImporter.java index 67965960c..0f9dcbd31 100644 --- a/source/de/anomic/data/YMarksHTMLImporter.java +++ b/source/de/anomic/data/YMarksHTMLImporter.java @@ -2,70 +2,106 @@ package de.anomic.data; import java.io.IOException; import java.net.MalformedURLException; -import java.util.Date; import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLEditorKit; -import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.blob.Tables.Data; import net.yacy.kelondro.logging.Log; public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback { - private static final int NOTHING = 0; - private static final int BOOKMARK = 2; - private static final int FOLDER = 3; + private static final short NOTHING = 0; + private static final short BOOKMARK = 2; + private static final short FOLDER = 3; + private static final String MILLIS = "000"; - private Tables worktables; - private String bmk_table; - - private int state; - private String folder = YMarkTables.TABLE_FOLDERS_IMPORTED; + private final WorkTables worktables; + private final String bmk_table; + private final String tag_table; + private final String folder_table; + private final String[] tagArray; + private final String tagsString; + + private short state; + private String folder; private String href; - private Date date_added; - private Date date_visited; - private Date date_modified; + private String date_added; + private String date_visited; + private String date_modified; - public YMarksHTMLImporter(final Tables worktables, final String bmk_table) { - this.bmk_table = bmk_table; - this.worktables = worktables; + + public YMarksHTMLImporter(final WorkTables worktables, final String user) { + this(worktables, user, YMarkTables.TABLE_FOLDERS_IMPORTED, null); + } + + public YMarksHTMLImporter(final WorkTables worktables, final String user, final String folder) { + this(worktables, user, folder, null); } - public void handleText(char[] data, int pos) { + public YMarksHTMLImporter(final WorkTables worktables, final String user, final String folder, final String tagsString) { + this.bmk_table = user + YMarkTables.TABLE_BOOKMARKS_BASENAME; + this.tag_table = user + YMarkTables.TABLE_TAGS_BASENAME; + this.folder_table = user + YMarkTables.TABLE_FOLDERS_BASENAME; + this.worktables = worktables; + + if(folder.contains(YMarkTables.TABLE_TAGS_SEPARATOR)) + this.folder = folder.substring(0, folder.indexOf(',')); + else if(!folder.startsWith(YMarkTables.TABLE_FOLDERS_ROOT)) + this.folder = YMarkTables.TABLE_FOLDERS_ROOT + folder; + else + this.folder = folder; + + this.tagsString = tagsString; + if(tagsString != null) + this.tagArray = tagsString.split(YMarkTables.TABLE_TAGS_SEPARATOR); + else + this.tagArray = null; + } + + public void handleText(char[] data, int pos) { switch (state) { case NOTHING: break; case BOOKMARK: - Data bmk = new Data(); - byte[] urlHash; try { - if(href.toLowerCase().startsWith("http://") || href.toLowerCase().startsWith("http://")) { - urlHash = YMarkTables.getBookmarkId(this.href); + final byte[] urlHash = YMarkTables.getBookmarkId(this.href); + // only import new bookmarks + if(!worktables.has(this.bmk_table, urlHash)) { // create and insert new entry - bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_URL, this.href.getBytes()); + final Data bmk = new Data(); + bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_URL, this.href.getBytes()); bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_TITLE, (new String(data)).getBytes()); bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_DESC, YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT.getBytes()); bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC, YMarkTables.TABLE_BOOKMARKS_COL_PUBLIC_FALSE.getBytes()); - bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_TAGS, YMarkTables.TABLE_BOOKMARKS_COL_DEFAULT.getBytes()); bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_VISITS, YMarkTables.TABLE_BOOKMARKS_COL_VISITS_ZERO.getBytes()); - bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_FOLDER, this.folder.getBytes()); - // bmk.put(YMarkStatics.TABLE_BOOKMARKS_COL_DATE_ADDED, DateFormatter.formatShortMilliSecond(this.date_added).getBytes()); - // bmk.put(YMarkStatics.TABLE_BOOKMARKS_COL_DATE_MODIFIED, DateFormatter.formatShortMilliSecond(this.date_modified).getBytes()); - // bmk.put(YMarkStatics.TABLE_BOOKMARKS_COL_DATE_VISITED, DateFormatter.formatShortMilliSecond(this.date_visited).getBytes()); - worktables.insert(bmk_table, urlHash, bmk); + bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_DATE_ADDED, this.date_added.getBytes()); + bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_DATE_MODIFIED, this.date_modified.getBytes()); + bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_DATE_VISITED, this.date_visited.getBytes()); + bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_FOLDERS, this.folder.getBytes()); + this.worktables.bookmarks.updateIndexTable(this.folder_table, this.folder, urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); + Log.logInfo(YMarkTables.TABLE_BOOKMARKS_LOG, "YMarksHTMLImporter - folder: "+this.folder); + if (this.tagsString != null) { + bmk.put(YMarkTables.TABLE_BOOKMARKS_COL_TAGS, this.tagsString.getBytes()); + for (final String tag : tagArray) { + this.worktables.bookmarks.updateIndexTable(this.tag_table, tag, urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); + } + } + this.worktables.insert(bmk_table, urlHash, bmk); + Log.logInfo(YMarkTables.TABLE_BOOKMARKS_LOG, "YMarksHTMLImporter - url successfully imported: "+this.href); + } else { + Log.logInfo(YMarkTables.TABLE_BOOKMARKS_LOG, "YMarksHTMLImporter - url already exists: "+this.href); } break; } catch (MalformedURLException e) { - Log.logException(e); + Log.logInfo(YMarkTables.TABLE_BOOKMARKS_LOG, "YMarksHTMLImporter - malformed url: "+this.href); } catch (IOException e) { Log.logException(e); } break; case FOLDER: this.folder = this.folder + YMarkTables.TABLE_FOLDERS_SEPARATOR + new String(data); - Log.logInfo("IMPORT folder:", folder); break; default: break; @@ -76,9 +112,9 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback { public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { if (t == HTML.Tag.A) { this.href = (String)a.getAttribute(HTML.Attribute.HREF); - // this.date_added = new Date(Long.parseLong((String)a.getAttribute("add_date"))*1000l); - // this.date_visited = new Date(Long.parseLong((String)a.getAttribute("last_visit"))*1000l); - // this.date_modified = new Date(Long.parseLong((String)a.getAttribute("last_modified"))*1000l); + this.date_added = (String)a.getAttribute("add_date")+MILLIS; + this.date_visited = (String)a.getAttribute("last_visit")+MILLIS; + this.date_modified = (String)a.getAttribute("last_modified")+MILLIS; state = BOOKMARK; } else if (t == HTML.Tag.H3) { state = FOLDER;