From d0e6c03b5118bd519e7551d3b0e32deb340d386f Mon Sep 17 00:00:00 2001 From: apfelmaennchen Date: Mon, 25 Oct 2010 22:44:05 +0000 Subject: [PATCH] some updates to the new bookmark code... git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7272 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/api/ymarks/add_ymark.java | 34 ++-- htroot/api/ymarks/delete_ymark.java | 18 +- htroot/api/ymarks/get_ymark.java | 12 +- htroot/api/ymarks/import_html.java | 2 +- source/de/anomic/data/YMarkTables.java | 178 ++++++++++-------- source/de/anomic/data/YMarksHTMLImporter.java | 152 ++++++++------- 6 files changed, 222 insertions(+), 174 deletions(-) diff --git a/htroot/api/ymarks/add_ymark.java b/htroot/api/ymarks/add_ymark.java index 9b201cdc5..f99a1decd 100644 --- a/htroot/api/ymarks/add_ymark.java +++ b/htroot/api/ymarks/add_ymark.java @@ -28,9 +28,9 @@ public class add_ymark { final boolean isAuthUser = user!= null && user.hasRight(userDB.Entry.BOOKMARK_RIGHT); if(isAdmin || isAuthUser) { - final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLES.BOOKMARKS.basename(); - final String tag_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLES.TAGS.basename(); - final String folder_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLES.FOLDERS.basename(); + final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN)+YMarkTables.TABLES.BOOKMARKS.basename(); + final String tag_table = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN)+YMarkTables.TABLES.TAGS.basename(); + final String folder_table = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN)+YMarkTables.TABLES.FOLDERS.basename(); byte[] urlHash = null; String url =""; @@ -38,18 +38,18 @@ public class add_ymark { try { url = post.get(YMarkTables.BOOKMARK.URL.key(),YMarkTables.BOOKMARK.URL.deflt()); boolean hasProtocol = false; - for (YMarkTables.PROTOCOL p : YMarkTables.PROTOCOL.values()) { + for (YMarkTables.PROTOCOLS p : YMarkTables.PROTOCOLS.values()) { hasProtocol = url.toLowerCase().startsWith(p.protocol()); } if (!hasProtocol) { - url=YMarkTables.PROTOCOL.HTTP.protocol(url); + url=YMarkTables.PROTOCOLS.HTTP.protocol(url); } urlHash = YMarkTables.getBookmarkId(url); } catch (MalformedURLException e) { Log.logException(e); } - } else if (post.containsKey(YMarkTables.TABLE_BOOKMARKS_COL_ID)) { - urlHash = post.get(YMarkTables.TABLE_BOOKMARKS_COL_ID).getBytes(); + } else if (post.containsKey(YMarkTables.BOOKMARKS_ID)) { + urlHash = post.get(YMarkTables.BOOKMARKS_ID).getBytes(); } if(urlHash == null) { prop.put("result", "0"); @@ -73,28 +73,28 @@ public class add_ymark { // create and insert new entry Data data = new Data(); final String tagsString = YMarkTables.cleanTagsString(post.get(YMarkTables.BOOKMARK.TAGS.key(),YMarkTables.BOOKMARK.TAGS.deflt())); - final String foldersString = YMarkTables.cleanFoldersString(post.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.TABLE_FOLDERS_UNSORTED)); + final String foldersString = YMarkTables.cleanFoldersString(post.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.FOLDERS_UNSORTED)); data.put(YMarkTables.BOOKMARK.URL.key(), url.getBytes()); data.put(YMarkTables.BOOKMARK.TITLE.key(), post.get(YMarkTables.BOOKMARK.TITLE.key(),YMarkTables.BOOKMARK.TITLE.deflt())); data.put(YMarkTables.BOOKMARK.DESC.key(), post.get(YMarkTables.BOOKMARK.DESC.key(),YMarkTables.BOOKMARK.DESC.deflt())); data.put(YMarkTables.BOOKMARK.PUBLIC.key(), post.get(YMarkTables.BOOKMARK.PUBLIC.key(),YMarkTables.BOOKMARK.PUBLIC.deflt())); data.put(YMarkTables.BOOKMARK.TAGS.key(), tagsString.getBytes()); - data.put(YMarkTables.BOOKMARK.VISITS.key(), YMarkTables.BOOKMARK.VISITS.b_deflt()); + data.put(YMarkTables.BOOKMARK.VISITS.key(), YMarkTables.BOOKMARK.VISITS.deflt().getBytes()); data.put(YMarkTables.BOOKMARK.FOLDERS.key(), foldersString.getBytes()); data.put(YMarkTables.BOOKMARK.DATE_ADDED.key(), date); data.put(YMarkTables.BOOKMARK.DATE_MODIFIED.key(), date); data.put(YMarkTables.BOOKMARK.DATE_VISITED.key(), date); sb.tables.insert(bmk_table, urlHash, data); - final String[] folderArray = foldersString.split(YMarkTables.TABLE_TAGS_SEPARATOR); + final String[] folderArray = foldersString.split(YMarkTables.TAGS_SEPARATOR); for (final String folder : folderArray) { - sb.tables.bookmarks.updateIndexTable(folder_table, folder, urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); + sb.tables.bookmarks.updateIndexTable(folder_table, folder, urlHash, YMarkTables.INDEX_ACTION.ADD); } - final String[] tagArray = tagsString.split(YMarkTables.TABLE_TAGS_SEPARATOR); + final String[] tagArray = tagsString.split(YMarkTables.TAGS_SEPARATOR); for (final String tag : tagArray) { - sb.tables.bookmarks.updateIndexTable(tag_table, tag, urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); + sb.tables.bookmarks.updateIndexTable(tag_table, tag, urlHash, YMarkTables.INDEX_ACTION.ADD); } @@ -107,7 +107,7 @@ public class add_ymark { HashSet oldSet; HashSetnewSet; - final String foldersString = post.get(YMarkTables.BOOKMARK.FOLDERS.key(),bmk_row.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.BOOKMARK.FOLDERS.deflt())); + final String foldersString = YMarkTables.cleanFoldersString(post.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.BOOKMARK.FOLDERS.deflt())); oldSet = YMarkTables.keysStringToSet(bmk_row.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.BOOKMARK.FOLDERS.deflt())); newSet = YMarkTables.keysStringToSet(foldersString); updateIndex(folder_table, urlHash, oldSet, newSet); @@ -130,7 +130,7 @@ public class add_ymark { } prop.put("result", "1"); } else { - prop.put(YMarkTables.TABLE_BOOKMARKS_USER_AUTHENTICATE,YMarkTables.TABLE_BOOKMARKS_USER_AUTHENTICATE_MSG); + prop.put(YMarkTables.USER_AUTHENTICATE,YMarkTables.USER_AUTHENTICATE_MSG); } // return rewrite properties return prop; @@ -143,13 +143,13 @@ public class add_ymark { newSet.removeAll(oldSet); tagIter = newSet.iterator(); while(tagIter.hasNext()) { - sb.tables.bookmarks.updateIndexTable(index_table, tagIter.next(), urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); + sb.tables.bookmarks.updateIndexTable(index_table, tagIter.next(), urlHash, YMarkTables.INDEX_ACTION.ADD); } oldSet.removeAll(urlSet); tagIter=oldSet.iterator(); while(tagIter.hasNext()) { - sb.tables.bookmarks.updateIndexTable(index_table, tagIter.next(), urlHash, YMarkTables.TABLE_INDEX_ACTION_REMOVE); + sb.tables.bookmarks.updateIndexTable(index_table, tagIter.next(), urlHash, YMarkTables.INDEX_ACTION.REMOVE); } } } diff --git a/htroot/api/ymarks/delete_ymark.java b/htroot/api/ymarks/delete_ymark.java index e04cc1d3a..8e490c3bb 100644 --- a/htroot/api/ymarks/delete_ymark.java +++ b/htroot/api/ymarks/delete_ymark.java @@ -24,15 +24,15 @@ public class delete_ymark { final boolean isAuthUser = user!= null && user.hasRight(userDB.Entry.BOOKMARK_RIGHT); if(isAdmin || isAuthUser) { - final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLES.BOOKMARKS.basename(); - final String tag_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLES.TAGS.basename(); - final String folder_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLES.FOLDERS.basename(); + final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN)+YMarkTables.TABLES.BOOKMARKS.basename(); + final String tag_table = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN)+YMarkTables.TABLES.TAGS.basename(); + final String folder_table = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN)+YMarkTables.TABLES.FOLDERS.basename(); byte[] urlHash = null; try { - if(post.containsKey(YMarkTables.TABLE_BOOKMARKS_COL_ID)) { - urlHash = post.get(YMarkTables.TABLE_BOOKMARKS_COL_ID).getBytes(); + if(post.containsKey(YMarkTables.BOOKMARKS_ID)) { + urlHash = post.get(YMarkTables.BOOKMARKS_ID).getBytes(); } else if(post.containsKey(YMarkTables.BOOKMARK.URL.key())) { urlHash = YMarkTables.getBookmarkId(post.get(YMarkTables.BOOKMARK.URL.key())); } else { @@ -44,7 +44,7 @@ public class delete_ymark { if(bmk_row != null) { final String tagsString = bmk_row.get(YMarkTables.BOOKMARK.TAGS.key(),YMarkTables.BOOKMARK.TAGS.deflt()); removeIndexEntry(tag_table, tagsString, urlHash); - final String foldersString = bmk_row.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.TABLE_FOLDERS_ROOT); + final String foldersString = bmk_row.get(YMarkTables.BOOKMARK.FOLDERS.key(),YMarkTables.FOLDERS_ROOT); removeIndexEntry(folder_table, foldersString, urlHash); } sb.tables.delete(bmk_table,urlHash); @@ -55,16 +55,16 @@ public class delete_ymark { Log.logException(e); } } else { - prop.put(YMarkTables.TABLE_BOOKMARKS_USER_AUTHENTICATE,YMarkTables.TABLE_BOOKMARKS_USER_AUTHENTICATE_MSG); + prop.put(YMarkTables.USER_AUTHENTICATE,YMarkTables.USER_AUTHENTICATE_MSG); } // return rewrite properties return prop; } private static void removeIndexEntry(final String index_table, String keysString, final byte[] urlHash) { - final String[] keyArray = keysString.split(YMarkTables.TABLE_TAGS_SEPARATOR); + final String[] keyArray = keysString.split(YMarkTables.TAGS_SEPARATOR); for (final String tag : keyArray) { - sb.tables.bookmarks.updateIndexTable(index_table, tag, urlHash, YMarkTables.TABLE_INDEX_ACTION_REMOVE); + sb.tables.bookmarks.updateIndexTable(index_table, tag, urlHash, YMarkTables.INDEX_ACTION.REMOVE); } } } diff --git a/htroot/api/ymarks/get_ymark.java b/htroot/api/ymarks/get_ymark.java index 982e82699..8a812269b 100644 --- a/htroot/api/ymarks/get_ymark.java +++ b/htroot/api/ymarks/get_ymark.java @@ -30,13 +30,13 @@ public class get_ymark { final TreeSet bookmarks = new TreeSet(); if(isAdmin || isAuthUser) { - final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLES.BOOKMARKS.basename(); - final String tag_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLES.TAGS.basename(); - final String folder_table = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN)+YMarkTables.TABLES.FOLDERS.basename(); + final String bmk_table = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN)+YMarkTables.TABLES.BOOKMARKS.basename(); + final String tag_table = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN)+YMarkTables.TABLES.TAGS.basename(); + final String folder_table = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN)+YMarkTables.TABLES.FOLDERS.basename(); if(post.containsKey(YMarkTables.BOOKMARK.TAGS.key())) { tags = true; - final String[] tagArray = YMarkTables.cleanTagsString(post.get(YMarkTables.BOOKMARK.TAGS.key())).split(YMarkTables.TABLE_TAGS_SEPARATOR); + final String[] tagArray = YMarkTables.cleanTagsString(post.get(YMarkTables.BOOKMARK.TAGS.key())).split(YMarkTables.TAGS_SEPARATOR); try { bookmarks.addAll(sb.tables.bookmarks.getBookmarks(tag_table, tagArray)); } catch (IOException e) { @@ -45,7 +45,7 @@ public class get_ymark { Log.logException(e); } } else if(post.containsKey(YMarkTables.BOOKMARK.FOLDERS.key())) { - final String[] folderArray = YMarkTables.cleanFoldersString(post.get(YMarkTables.BOOKMARK.FOLDERS.key())).split(YMarkTables.TABLE_TAGS_SEPARATOR); + final String[] folderArray = YMarkTables.cleanFoldersString(post.get(YMarkTables.BOOKMARK.FOLDERS.key())).split(YMarkTables.TAGS_SEPARATOR); try { if(tags) bookmarks.retainAll(sb.tables.bookmarks.getBookmarks(folder_table, folderArray)); @@ -61,7 +61,7 @@ public class get_ymark { putBookmarks(bookmarks, bmk_table); } else { - prop.put(YMarkTables.TABLE_BOOKMARKS_USER_AUTHENTICATE,YMarkTables.TABLE_BOOKMARKS_USER_AUTHENTICATE_MSG); + prop.put(YMarkTables.USER_AUTHENTICATE,YMarkTables.USER_AUTHENTICATE_MSG); } // return rewrite properties return prop; diff --git a/htroot/api/ymarks/import_html.java b/htroot/api/ymarks/import_html.java index 2d1dbae2d..90aef17f2 100644 --- a/htroot/api/ymarks/import_html.java +++ b/htroot/api/ymarks/import_html.java @@ -25,7 +25,7 @@ public class import_html { final boolean isAuthUser = user!= null && user.hasRight(userDB.Entry.BOOKMARK_RIGHT); if(isAdmin || isAuthUser) { - final String bmk_user = (isAuthUser ? user.getUserName() : YMarkTables.TABLE_BOOKMARKS_USER_ADMIN); + final String bmk_user = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN); if(post.containsKey("htmlfile")){ try { final ByteArrayInputStream byteIn = new ByteArrayInputStream(post.get("htmlfile$file").getBytes("UTF-8")); diff --git a/source/de/anomic/data/YMarkTables.java b/source/de/anomic/data/YMarkTables.java index b721bab3c..498f963d8 100644 --- a/source/de/anomic/data/YMarkTables.java +++ b/source/de/anomic/data/YMarkTables.java @@ -32,13 +32,13 @@ public class YMarkTables { } } - public static enum PROTOCOL { + public static enum PROTOCOLS { HTTP ("http://"), HTTPS ("https://"); private String protocol; - private PROTOCOL(String s) { + private PROTOCOLS(String s) { this.protocol = s; } public String protocol() { @@ -48,25 +48,27 @@ public class YMarkTables { return this.protocol+s; } } - + public static enum BOOKMARK { - URL ("url", ""), - TITLE ("title", ""), - DESC ("desc", ""), - DATE_ADDED ("date_added", ""), - DATE_MODIFIED ("date_modified", ""), - DATE_VISITED ("date_visited", ""), - PUBLIC ("public", "flase"), - TAGS ("tags", "unsorted"), - VISITS ("visits", "0"), - FOLDERS ("folders", "/unsorted"); + URL ("url", "", "HREF"), + TITLE ("title", "", ""), + DESC ("desc", "", ""), + DATE_ADDED ("date_added", "", "ADD_DATE"), + DATE_MODIFIED ("date_modified", "", "LAST_MODIFIED"), + DATE_VISITED ("date_visited", "", "LAST_VISITED"), + PUBLIC ("public", "flase", ""), + TAGS ("tags", "unsorted", "SHORTCUTURL"), + VISITS ("visits", "0", ""), + FOLDERS ("folders", "/unsorted", ""); private String key; private String dflt; + private String html_attrb; - private BOOKMARK(String k, String s) { + private BOOKMARK(String k, String s, String a) { this.key = k; this.dflt = s; + this.html_attrb = a; } public String key() { return this.key; @@ -74,31 +76,50 @@ public class YMarkTables { public String deflt() { return this.dflt; } + public String html_attrb() { + return this.html_attrb.toLowerCase(); + } + } + + public static enum INDEX { + ID ("id", ""), + NAME ("name", ""), + DESC ("desc", ""), + URLS ("urls", ""); + + private String key; + private String dflt; + + private INDEX(String k, String s) { + this.key = k; + this.dflt = s; + } + public String key() { + return this.key; + } + public String deflt() { + return this.dflt; + } public byte[] b_deflt() { return dflt.getBytes(); } } - - public final static String TABLE_BOOKMARKS_LOG = "BOOKMARKS"; - public final static String TABLE_BOOKMARKS_COL_ID = "id"; - - public final static String TABLE_BOOKMARKS_USER_ADMIN = "admin"; - public final static String TABLE_BOOKMARKS_USER_AUTHENTICATE = "AUTHENTICATE"; - public final static String TABLE_BOOKMARKS_USER_AUTHENTICATE_MSG = "Authentication required!"; - - public final static String TABLE_TAGS_SEPARATOR = ","; - - public final static String TABLE_INDEX_COL_ID = "id"; - public final static String TABLE_INDEX_COL_NAME = "name"; - public final static String TABLE_INDEX_DESC = "desc"; - public final static String TABLE_INDEX_COL_URLS = "urls"; - public final static short TABLE_INDEX_ACTION_ADD = 1; - public final static short TABLE_INDEX_ACTION_REMOVE = 2; - - public final static String TABLE_FOLDERS_SEPARATOR = "/"; - public final static String TABLE_FOLDERS_ROOT = "/"; - public final static String TABLE_FOLDERS_UNSORTED = "/unsorted"; - public final static String TABLE_FOLDERS_IMPORTED = "/imported"; + + public static enum INDEX_ACTION { + ADD, + REMOVE + } + + public final static String TAGS_SEPARATOR = ","; + public final static String FOLDERS_SEPARATOR = "/"; + public final static String FOLDERS_ROOT = "/"; + public final static String FOLDERS_UNSORTED = "/unsorted"; + public final static String FOLDERS_IMPORTED = "/imported"; + public final static String BOOKMARKS_LOG = "BOOKMARKS"; + public final static String BOOKMARKS_ID = "id"; + public final static String USER_ADMIN = "admin"; + public final static String USER_AUTHENTICATE = "AUTHENTICATE"; + public final static String USER_AUTHENTICATE_MSG = "Authentication required!"; private WorkTables worktables; public ConcurrentARC cache; @@ -121,7 +142,7 @@ public class YMarkTables { final StringBuilder urls = new StringBuilder(urlSet.size()*20); while(urlIter.hasNext()) { - urls.append(TABLE_TAGS_SEPARATOR); + urls.append(TAGS_SEPARATOR); urls.append(urlIter.next()); } urls.deleteCharAt(0); @@ -130,42 +151,49 @@ public class YMarkTables { public final static HashSet keysStringToSet(final String keysString) { HashSet keySet = new HashSet(); - final String[] keyArray = keysString.split(TABLE_TAGS_SEPARATOR); + final String[] keyArray = keysString.split(TAGS_SEPARATOR); for (final String key : keyArray) { keySet.add(key); } return keySet; } - public final static String cleanTagsString(String tagsString) { - // get rid of heading, trailing and double commas since they are useless - while (tagsString.length() > 0 && tagsString.charAt(0) == TABLE_TAGS_SEPARATOR.charAt(0)) { - tagsString = tagsString.substring(1); - } - while (tagsString.endsWith(TABLE_TAGS_SEPARATOR)) { - tagsString = tagsString.substring(0,tagsString.length() -1); - } - while (tagsString.contains(",,")){ - tagsString = tagsString.replaceAll(",,", TABLE_TAGS_SEPARATOR); - } - // space characters following a comma are removed - tagsString = tagsString.replaceAll(",\\s+", TABLE_TAGS_SEPARATOR); - return tagsString; + public final static String cleanTagsString(final String tagsString) { + StringBuilder ts = new StringBuilder(tagsString); + // get rid of double commas and space characters following a comma + for (int i = 0; i < ts.length()-1; i++) { + if (ts.charAt(i) == TAGS_SEPARATOR.charAt(0)) { + if (ts.charAt(i+1) == TAGS_SEPARATOR.charAt(0) || ts.charAt(i+1) == ' ') { + ts.deleteCharAt(i+1); + i--; + } + } + } + // get rid of heading and trailing comma + if (ts.charAt(0) == TAGS_SEPARATOR.charAt(0)) + ts.deleteCharAt(0); + if (ts.charAt(ts.length()-1) == TAGS_SEPARATOR.charAt(0)) + ts.deleteCharAt(ts.length()-1); + return ts.toString(); } - public final static String cleanFoldersString(String foldersString) { - foldersString = cleanTagsString(foldersString); - // get rid of double and trailing slashes - while (foldersString.endsWith(TABLE_FOLDERS_SEPARATOR)){ - foldersString = foldersString.substring(0, foldersString.length() -1); - } - while (foldersString.contains("/,")){ - foldersString = foldersString.replaceAll("/,", TABLE_TAGS_SEPARATOR); - } - while (foldersString.contains("//")){ - foldersString = foldersString.replaceAll("//", TABLE_FOLDERS_SEPARATOR); - } - return foldersString; + public final static String cleanFoldersString(final String foldersString) { + StringBuilder fs = new StringBuilder(cleanTagsString(foldersString)); + for (int i = 0; i < fs.length()-1; i++) { + if (fs.charAt(i) == FOLDERS_SEPARATOR.charAt(0)) { + if (fs.charAt(i+1) == TAGS_SEPARATOR.charAt(0) || fs.charAt(i+1) == FOLDERS_SEPARATOR.charAt(0)) { + fs.deleteCharAt(i); + i--; + } else if (fs.charAt(i+1) == ' ') { + fs.deleteCharAt(i+1); + i--; + } + } + } + if (fs.charAt(fs.length()-1) == FOLDERS_SEPARATOR.charAt(0)) { + fs.deleteCharAt(fs.length()-1); + } + return fs.toString(); } public void cleanCache(final String tablename) { @@ -186,8 +214,8 @@ public class YMarkTables { this.cache.insert(cacheKey, BurlSet); - tagEntry.put(TABLE_INDEX_COL_NAME, keyname); - tagEntry.put(TABLE_INDEX_COL_URLS, BurlSet); + tagEntry.put(INDEX.NAME.key, keyname); + tagEntry.put(INDEX.URLS.key, BurlSet); this.worktables.insert(index_table, key, tagEntry); } @@ -196,9 +224,9 @@ public class YMarkTables { if (this.cache.containsKey(cacheKey)) { return keysStringToSet(new String(this.cache.get(cacheKey))); } else { - final Tables.Row idx_row = this.worktables.select(index_table, YMarkTables.getKeyId(keyname)); + final Tables.Row idx_row = this.worktables.select(index_table, getKeyId(keyname)); if (idx_row != null) { - final byte[] keys = idx_row.get(YMarkTables.TABLE_INDEX_COL_URLS); + final byte[] keys = idx_row.get(INDEX.URLS.key); this.cache.put(cacheKey, keys); return keysStringToSet(new String(keys)); } @@ -228,7 +256,7 @@ public class YMarkTables { * @param url is the url has as returned by DigestURI.hash() * @param action is either add (1) or remove (2) */ - public void updateIndexTable(final String index_table, final String keyname, final byte[] url, final int action) { + public void updateIndexTable(final String index_table, final String keyname, final byte[] url, final INDEX_ACTION action) { final byte[] key = YMarkTables.getKeyId(keyname); final String urlHash = new String(url); Tables.Row row = null; @@ -243,11 +271,11 @@ public class YMarkTables { // key has no index_table entry if(row == null) { switch (action) { - case TABLE_INDEX_ACTION_ADD: + case ADD: urlSet.add(urlHash); createIndexEntry(index_table, keyname, urlSet); break; - case TABLE_INDEX_ACTION_REMOVE: + case REMOVE: // key has no index_table entry but a cache entry // TODO: this shouldn't happen if(!urlSet.isEmpty()) { @@ -265,13 +293,13 @@ public class YMarkTables { // key has no cache entry if (urlSet.isEmpty()) { // load urlSet from index_table - urlSet = keysStringToSet(new String(row.get(TABLE_INDEX_COL_URLS))); + urlSet = keysStringToSet(new String(row.get(INDEX.URLS.key))); } switch (action) { - case TABLE_INDEX_ACTION_ADD: + case ADD: urlSet.add(urlHash); break; - case TABLE_INDEX_ACTION_REMOVE: + case REMOVE: urlSet.remove(urlHash); break; default: @@ -283,7 +311,7 @@ public class YMarkTables { } else { BurlSet = keySetToBytes(urlSet); this.cache.insert(cacheKey, BurlSet); - row.put(TABLE_INDEX_COL_URLS, BurlSet); + row.put(INDEX.URLS.key, BurlSet); this.worktables.update(index_table, row); } } diff --git a/source/de/anomic/data/YMarksHTMLImporter.java b/source/de/anomic/data/YMarksHTMLImporter.java index df1d5428a..fd25c4c44 100644 --- a/source/de/anomic/data/YMarksHTMLImporter.java +++ b/source/de/anomic/data/YMarksHTMLImporter.java @@ -11,53 +11,48 @@ import net.yacy.kelondro.blob.Tables.Data; import net.yacy.kelondro.logging.Log; public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback { - - private static final short NOTHING = 0; - private static final short BOOKMARK = 2; - private static final short FOLDER = 3; - private static final String MILLIS = "000"; + + public static enum STATE { + NOTHING, + BOOKMARK, + FOLDER, + BMK_DESC, + FOLDER_DESC + } + + private static final String MILLIS = "000"; private final WorkTables worktables; private final String bmk_table; private final String tag_table; private final String folder_table; - private final String[] tagArray; - private final String tagsString; - private short state; + private STATE state; + private HTML.Tag prevTag; + private Data bookmark; private String folder; - private String href; - private String date_added; - private String date_visited; - private String date_modified; + private String[] tagArray; + private byte[] urlHash; - public YMarksHTMLImporter(final WorkTables worktables, final String user) { - this(worktables, user, YMarkTables.TABLE_FOLDERS_IMPORTED, null); + this(worktables, user, YMarkTables.FOLDERS_IMPORTED); } public YMarksHTMLImporter(final WorkTables worktables, final String user, final String folder) { - this(worktables, user, folder, null); - } - - public YMarksHTMLImporter(final WorkTables worktables, final String user, final String folder, final String tagsString) { this.bmk_table = YMarkTables.TABLES.BOOKMARKS.tablename(user); this.tag_table = YMarkTables.TABLES.TAGS.tablename(user); this.folder_table = YMarkTables.TABLES.FOLDERS.tablename(user); this.worktables = worktables; - if(folder.contains(YMarkTables.TABLE_TAGS_SEPARATOR)) + this.state = STATE.NOTHING; + this.bookmark = new Data(); + + if(folder.contains(YMarkTables.TAGS_SEPARATOR)) this.folder = folder.substring(0, folder.indexOf(',')); - else if(!folder.startsWith(YMarkTables.TABLE_FOLDERS_ROOT)) - this.folder = YMarkTables.TABLE_FOLDERS_ROOT + folder; + else if(!folder.startsWith(YMarkTables.FOLDERS_ROOT)) + this.folder = YMarkTables.FOLDERS_ROOT + folder; else this.folder = folder; - - this.tagsString = tagsString; - if(tagsString != null) - this.tagArray = tagsString.split(YMarkTables.TABLE_TAGS_SEPARATOR); - else - this.tagArray = null; } public void handleText(char[] data, int pos) { @@ -66,66 +61,91 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback { break; case BOOKMARK: try { - final byte[] urlHash = YMarkTables.getBookmarkId(this.href); - // only import new bookmarks - if(!worktables.has(this.bmk_table, urlHash)) { - // create and insert new entry - final Data bmk = new Data(); - bmk.put(YMarkTables.BOOKMARK.URL.key(), this.href.getBytes()); - bmk.put(YMarkTables.BOOKMARK.TITLE.key(), (new String(data)).getBytes()); - bmk.put(YMarkTables.BOOKMARK.DESC.key(), YMarkTables.BOOKMARK.DESC.b_deflt()); - bmk.put(YMarkTables.BOOKMARK.PUBLIC.key(), YMarkTables.BOOKMARK.PUBLIC.b_deflt()); - bmk.put(YMarkTables.BOOKMARK.VISITS.key(), YMarkTables.BOOKMARK.VISITS.b_deflt()); - bmk.put(YMarkTables.BOOKMARK.DATE_ADDED.key(), this.date_added.getBytes()); - bmk.put(YMarkTables.BOOKMARK.DATE_MODIFIED.key(), this.date_modified.getBytes()); - bmk.put(YMarkTables.BOOKMARK.DATE_VISITED.key(), this.date_visited.getBytes()); - bmk.put(YMarkTables.BOOKMARK.FOLDERS.key(), this.folder.getBytes()); - this.worktables.bookmarks.updateIndexTable(this.folder_table, this.folder, urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); - Log.logInfo(YMarkTables.TABLE_BOOKMARKS_LOG, "YMarksHTMLImporter - folder: "+this.folder); - if (this.tagsString != null) { - bmk.put(YMarkTables.BOOKMARK.TAGS.key(), this.tagsString.getBytes()); - for (final String tag : tagArray) { - this.worktables.bookmarks.updateIndexTable(this.tag_table, tag, urlHash, YMarkTables.TABLE_INDEX_ACTION_ADD); - } - } - this.worktables.insert(bmk_table, urlHash, bmk); - Log.logInfo(YMarkTables.TABLE_BOOKMARKS_LOG, "YMarksHTMLImporter - url successfully imported: "+this.href); - } else { - Log.logInfo(YMarkTables.TABLE_BOOKMARKS_LOG, "YMarksHTMLImporter - url already exists: "+this.href); + if(this.urlHash != null) { + // only import new bookmarks + if(!worktables.has(this.bmk_table, this.urlHash)) { + bookmark.put(YMarkTables.BOOKMARK.FOLDERS.key(), this.folder.getBytes()); + this.worktables.bookmarks.updateIndexTable(this.folder_table, this.folder, this.urlHash, YMarkTables.INDEX_ACTION.ADD); + if (this.tagArray != null) { + for (final String tag : this.tagArray) { + this.worktables.bookmarks.updateIndexTable(this.tag_table, tag, this.urlHash, YMarkTables.INDEX_ACTION.ADD); + } + } + this.worktables.insert(bmk_table, urlHash, bookmark); + } } break; - } catch (MalformedURLException e) { - Log.logInfo(YMarkTables.TABLE_BOOKMARKS_LOG, "YMarksHTMLImporter - malformed url: "+this.href); } catch (IOException e) { Log.logException(e); } break; case FOLDER: - this.folder = this.folder + YMarkTables.TABLE_FOLDERS_SEPARATOR + new String(data); + this.folder = this.folder + YMarkTables.FOLDERS_SEPARATOR + new String(data); + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - folder: "+this.folder); break; + case FOLDER_DESC: + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - folder_desc: "+new String(data)); + break; + case BMK_DESC: + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - bmk_desc: "+new String(data)); + break; default: break; } - state = NOTHING; } public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { - if (t == HTML.Tag.A) { - this.href = (String)a.getAttribute(HTML.Attribute.HREF); - this.date_added = (String)a.getAttribute("add_date")+MILLIS; - this.date_visited = (String)a.getAttribute("last_visit")+MILLIS; - this.date_modified = (String)a.getAttribute("last_modified")+MILLIS; - state = BOOKMARK; + if (t == HTML.Tag.A) { + this.urlHash = null; + this.tagArray = null; + this.bookmark.clear(); + final String url = (String)a.getAttribute(HTML.Attribute.HREF); + try { + this.urlHash = YMarkTables.getBookmarkId(url); + this.bookmark.put(YMarkTables.BOOKMARK.URL.key(), url); + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - url: "+url); + } catch (MalformedURLException e) { + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - bmk_url malformed: "+url); + } + for (YMarkTables.BOOKMARK bmk : YMarkTables.BOOKMARK.values()) { + final String s = (String)a.getAttribute(bmk.html_attrb()); + if(s != null) { + switch(bmk) { + case TAGS: + this.tagArray = s.split(YMarkTables.TAGS_SEPARATOR); + this.bookmark.put(bmk.key(), YMarkTables.cleanTagsString(s)); + break; + case DATE_ADDED: + case DATE_MODIFIED: + case DATE_VISITED: + this.bookmark.put(bmk.key(), s+MILLIS); + break; + default: + break; + } + } + } + state = STATE.BOOKMARK; } else if (t == HTML.Tag.H3) { - state = FOLDER; + state = STATE.FOLDER; + } else if (t == HTML.Tag.DD && this.prevTag == HTML.Tag.A) { + state = STATE.BMK_DESC; + } else { + state = STATE.NOTHING; } + this.prevTag = t; } public void handleEndTag(HTML.Tag t, int pos) { - if (t == HTML.Tag.DL) { - if(!folder.equals(YMarkTables.TABLE_FOLDERS_IMPORTED)) { + if (t == HTML.Tag.H3) { + // for some reason the
is not recognized as StartTag + state = STATE.FOLDER_DESC; + } else if (t == HTML.Tag.DL) { + if(!folder.equals(YMarkTables.FOLDERS_IMPORTED)) { folder = folder.replaceAll("(/.[^/]*$)", ""); } + } else { + state = STATE.NOTHING; } } }