diff --git a/htroot/api/ymarks/get_ymark.xml b/htroot/api/ymarks/get_ymark.xml index 626679d3b..fbc05ea0b 100644 --- a/htroot/api/ymarks/get_ymark.xml +++ b/htroot/api/ymarks/get_ymark.xml @@ -1,6 +1,6 @@ #{bookmarks}# - + #{/bookmarks}# \ No newline at end of file diff --git a/htroot/api/ymarks/import.html b/htroot/api/ymarks/import.html index c98cbb246..a5d1ec422 100644 --- a/htroot/api/ymarks/import.html +++ b/htroot/api/ymarks/import.html @@ -33,5 +33,34 @@ +
+
+ Import XBEL Bookmarks +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+
\ No newline at end of file diff --git a/htroot/api/ymarks/import_html.java b/htroot/api/ymarks/import_html.java index 90aef17f2..a080ac4ee 100644 --- a/htroot/api/ymarks/import_html.java +++ b/htroot/api/ymarks/import_html.java @@ -1,14 +1,13 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; - -import javax.swing.text.html.parser.ParserDelegator; +import java.util.HashMap; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.logging.Log; -import de.anomic.data.YMarksHTMLImporter; import de.anomic.data.YMarkTables; +import de.anomic.data.YMarksHTMLImporter; +import de.anomic.data.YMarksXBELImporter; import de.anomic.data.userDB; import de.anomic.search.Switchboard; import de.anomic.server.serverObjects; @@ -16,30 +15,54 @@ import de.anomic.server.serverSwitch; public class import_html { - + + private static Switchboard sb = null; + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - final Switchboard sb = (Switchboard) env; + sb = (Switchboard) env; final serverObjects prop = new serverObjects(); final userDB.Entry user = sb.userDB.getUser(header); final boolean isAdmin = (sb.verifyAuthentication(header, true)); final boolean isAuthUser = user!= null && user.hasRight(userDB.Entry.BOOKMARK_RIGHT); - + if(isAdmin || isAuthUser) { - final String bmk_user = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN); + final String bmk_user = (isAuthUser ? user.getUserName() : YMarkTables.USER_ADMIN); if(post.containsKey("htmlfile")){ try { final ByteArrayInputStream byteIn = new ByteArrayInputStream(post.get("htmlfile$file").getBytes("UTF-8")); if(byteIn !=null) { - final InputStreamReader reader = new InputStreamReader(byteIn,"UTF-8"); - final ParserDelegator delegator = new ParserDelegator(); - final YMarksHTMLImporter htmlHandler = new YMarksHTMLImporter(sb.tables, bmk_user); - delegator.parse(reader, htmlHandler, true); + final YMarksHTMLImporter htmlImporter = new YMarksHTMLImporter(byteIn, 100); + Thread t = new Thread(htmlImporter, "YMarks - HTML Importer"); + t.start(); + HashMap bmk; + while ((bmk = htmlImporter.take()) != YMarkTables.POISON) { + sb.tables.bookmarks.addBookmark(bmk, bmk_user); + } + } + } catch (UnsupportedEncodingException e) { + Log.logException(e); + } catch (IOException e) { + Log.logException(e); + } + prop.put("result", "1"); + } + if(post.containsKey("xbelfile")){ + try { + final ByteArrayInputStream byteIn = new ByteArrayInputStream(post.get("xbelfile$file").getBytes("UTF-8")); + if(byteIn != null) { + final YMarksXBELImporter xbelImporter = new YMarksXBELImporter(byteIn, 100); + Thread t = new Thread(xbelImporter, "YMarks - HTML Importer"); + t.start(); + HashMap bmk; + while ((bmk = xbelImporter.take()) != YMarkTables.POISON) { + sb.tables.bookmarks.addBookmark(bmk, bmk_user); + } } } catch (UnsupportedEncodingException e) { - Log.logException(e); + Log.logException(e); } catch (IOException e) { - Log.logException(e); - } + Log.logException(e); + } prop.put("result", "1"); } } diff --git a/source/de/anomic/data/YMarkTables.java b/source/de/anomic/data/YMarkTables.java index 498f963d8..e87388aaf 100644 --- a/source/de/anomic/data/YMarkTables.java +++ b/source/de/anomic/data/YMarkTables.java @@ -2,6 +2,7 @@ package de.anomic.data; import java.io.IOException; import java.net.MalformedURLException; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import net.yacy.cora.storage.ConcurrentARC; @@ -50,25 +51,27 @@ public class YMarkTables { } public static enum BOOKMARK { - URL ("url", "", "HREF"), - TITLE ("title", "", ""), - DESC ("desc", "", ""), - DATE_ADDED ("date_added", "", "ADD_DATE"), - DATE_MODIFIED ("date_modified", "", "LAST_MODIFIED"), - DATE_VISITED ("date_visited", "", "LAST_VISITED"), - PUBLIC ("public", "flase", ""), - TAGS ("tags", "unsorted", "SHORTCUTURL"), - VISITS ("visits", "0", ""), - FOLDERS ("folders", "/unsorted", ""); + URL ("url", "", "href", "href"), + TITLE ("title", "", "", ""), + DESC ("desc", "", "", ""), + DATE_ADDED ("date_added", "", "add_date", "added"), + DATE_MODIFIED ("date_modified", "", "last_modified", "modified"), + DATE_VISITED ("date_visited", "", "last_visited", "visited"), + PUBLIC ("public", "flase", "", ""), + TAGS ("tags", "unsorted", "shortcuturl", ""), + VISITS ("visits", "0", "", ""), + FOLDERS ("folders", "/unsorted", "", ""); private String key; private String dflt; private String html_attrb; + private String xbel_attrb; - private BOOKMARK(String k, String s, String a) { + private BOOKMARK(String k, String s, String a, String x) { this.key = k; this.dflt = s; this.html_attrb = a; + this.xbel_attrb = x; } public String key() { return this.key; @@ -77,7 +80,10 @@ public class YMarkTables { return this.dflt; } public String html_attrb() { - return this.html_attrb.toLowerCase(); + return this.html_attrb; + } + public String xbel_attrb() { + return this.xbel_attrb; } } @@ -110,6 +116,7 @@ public class YMarkTables { REMOVE } + public final static HashMap POISON = new HashMap(); public final static String TAGS_SEPARATOR = ","; public final static String FOLDERS_SEPARATOR = "/"; public final static String FOLDERS_ROOT = "/"; @@ -321,4 +328,73 @@ public class YMarkTables { Log.logException(e); } } + + public void addBookmark(final HashMap bmk, final String bmk_user) { + final String bmk_table = bmk_user + TABLES.BOOKMARKS.basename(); + final String folder_table = bmk_user + TABLES.FOLDERS.basename(); + final String tag_table = bmk_user + TABLES.TAGS.basename(); + + Tables.Row bmk_row = null; + byte[] urlHash = null; + + try { + urlHash = getBookmarkId(bmk.get(BOOKMARK.URL.key())); + } catch (MalformedURLException e) { + Log.logInfo(BOOKMARKS_LOG, "Malformed URL:"+bmk.get(BOOKMARK.URL.key())); + return; + } + if (urlHash != null) { + try { + bmk_row = this.worktables.select(bmk_table, urlHash); + } catch (IOException e) { + Log.logException(e); + } catch (RowSpaceExceededException e) { + Log.logException(e); + } + + if (bmk_row == null) { + Data data = new Data(); + for (BOOKMARK b : BOOKMARK.values()) { + switch(b) { + case DATE_ADDED: + case DATE_MODIFIED: + if(bmk.containsKey(b.key())) { + data.put(b.key(), bmk.get(b.key())); + } else { + data.put(b.key(), String.valueOf(System.currentTimeMillis()).getBytes()); + } + break; + case TAGS: + if(bmk.containsKey(b.key())) { + final String[] tagArray = bmk.get(b.key()).split(TAGS_SEPARATOR); + for (final String tag : tagArray) { + this.worktables.bookmarks.updateIndexTable(tag_table, tag, urlHash, INDEX_ACTION.ADD); + } + data.put(b.key(), bmk.get(b.key())); + } + break; + case FOLDERS: + if(bmk.containsKey(b.key())) { + final String[] folderArray = bmk.get(b.key()).split(TAGS_SEPARATOR); + for (final String folder : folderArray) { + this.worktables.bookmarks.updateIndexTable(folder_table, folder, urlHash, INDEX_ACTION.ADD); + } + data.put(b.key(), bmk.get(b.key())); + } + break; + default: + if(bmk.containsKey(b.key())) { + data.put(b.key(), bmk.get(b.key())); + } + } + } + try { + Log.logInfo(BOOKMARKS_LOG, "Add URL:"+bmk.get(BOOKMARK.URL.key())); + this.worktables.insert(bmk_table, urlHash, data); + } catch (IOException e) { + Log.logException(e); + } + } + } + } } diff --git a/source/de/anomic/data/YMarksHTMLImporter.java b/source/de/anomic/data/YMarksHTMLImporter.java index fd25c4c44..dc899b79c 100644 --- a/source/de/anomic/data/YMarksHTMLImporter.java +++ b/source/de/anomic/data/YMarksHTMLImporter.java @@ -1,16 +1,20 @@ package de.anomic.data; import java.io.IOException; -import java.net.MalformedURLException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashMap; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLEditorKit; +import javax.swing.text.html.parser.ParserDelegator; -import net.yacy.kelondro.blob.Tables.Data; import net.yacy.kelondro.logging.Log; -public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback { +public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback implements Runnable { public static enum STATE { NOTHING, @@ -21,73 +25,61 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback { } private static final String MILLIS = "000"; - - private final WorkTables worktables; - private final String bmk_table; - private final String tag_table; - private final String folder_table; private STATE state; private HTML.Tag prevTag; - private Data bookmark; + private HashMap bmk; private String folder; - private String[] tagArray; - private byte[] urlHash; - public YMarksHTMLImporter(final WorkTables worktables, final String user) { - this(worktables, user, YMarkTables.FOLDERS_IMPORTED); - } + private final InputStream input; + private final BlockingQueue> bookmarks; + private final ParserDelegator htmlParser; - public YMarksHTMLImporter(final WorkTables worktables, final String user, final String folder) { - this.bmk_table = YMarkTables.TABLES.BOOKMARKS.tablename(user); - this.tag_table = YMarkTables.TABLES.TAGS.tablename(user); - this.folder_table = YMarkTables.TABLES.FOLDERS.tablename(user); - this.worktables = worktables; - + public YMarksHTMLImporter(final InputStream input, int queueSize) throws IOException { this.state = STATE.NOTHING; - this.bookmark = new Data(); - - if(folder.contains(YMarkTables.TAGS_SEPARATOR)) - this.folder = folder.substring(0, folder.indexOf(',')); - else if(!folder.startsWith(YMarkTables.FOLDERS_ROOT)) - this.folder = YMarkTables.FOLDERS_ROOT + folder; - else - this.folder = folder; + this.prevTag = null; + this.bmk = new HashMap(); + this.folder = YMarkTables.FOLDERS_IMPORTED; + this.bookmarks = new ArrayBlockingQueue>(queueSize); + this.input = input; + this.htmlParser = new ParserDelegator(); } + public void run() { + try { + this.htmlParser.parse(new InputStreamReader(this.input,"UTF-8"), this, true); + } catch (IOException e) { + Log.logException(e); + } finally { + try { + this.bookmarks.put(YMarkTables.POISON); + } catch (InterruptedException e) { + Log.logException(e); + } + try { + this.input.close(); + } catch (IOException e) { + Log.logException(e); + } + } + } + public void handleText(char[] data, int pos) { switch (state) { case NOTHING: break; case BOOKMARK: - try { - if(this.urlHash != null) { - // only import new bookmarks - if(!worktables.has(this.bmk_table, this.urlHash)) { - bookmark.put(YMarkTables.BOOKMARK.FOLDERS.key(), this.folder.getBytes()); - this.worktables.bookmarks.updateIndexTable(this.folder_table, this.folder, this.urlHash, YMarkTables.INDEX_ACTION.ADD); - if (this.tagArray != null) { - for (final String tag : this.tagArray) { - this.worktables.bookmarks.updateIndexTable(this.tag_table, tag, this.urlHash, YMarkTables.INDEX_ACTION.ADD); - } - } - this.worktables.insert(bmk_table, urlHash, bookmark); - } - } - break; - } catch (IOException e) { - Log.logException(e); - } + this.bmk.put(YMarkTables.BOOKMARK.TITLE.key(), new String(data)); + this.bmk.put(YMarkTables.BOOKMARK.FOLDERS.key(), this.folder); break; case FOLDER: this.folder = this.folder + YMarkTables.FOLDERS_SEPARATOR + new String(data); - Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - folder: "+this.folder); - break; + break; case FOLDER_DESC: - Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - folder_desc: "+new String(data)); + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - folder: "+this.folder+" desc: "+new String(data)); break; case BMK_DESC: - Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - bmk_desc: "+new String(data)); + this.bmk.put(YMarkTables.BOOKMARK.DESC.key(), new String(data)); break; default: break; @@ -95,30 +87,29 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback { } public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { - if (t == HTML.Tag.A) { - this.urlHash = null; - this.tagArray = null; - this.bookmark.clear(); - final String url = (String)a.getAttribute(HTML.Attribute.HREF); - try { - this.urlHash = YMarkTables.getBookmarkId(url); - this.bookmark.put(YMarkTables.BOOKMARK.URL.key(), url); - Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - url: "+url); - } catch (MalformedURLException e) { - Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksHTMLImporter - bmk_url malformed: "+url); + if (t == HTML.Tag.A) { + if (!this.bmk.isEmpty()) { + try { + this.bookmarks.put(this.bmk); + bmk = new HashMap(); + } catch (InterruptedException e) { + Log.logException(e); + } } - for (YMarkTables.BOOKMARK bmk : YMarkTables.BOOKMARK.values()) { + final String url = (String)a.getAttribute(HTML.Attribute.HREF); + this.bmk.put(YMarkTables.BOOKMARK.URL.key(), url); + + for (YMarkTables.BOOKMARK bmk : YMarkTables.BOOKMARK.values()) { final String s = (String)a.getAttribute(bmk.html_attrb()); if(s != null) { switch(bmk) { case TAGS: - this.tagArray = s.split(YMarkTables.TAGS_SEPARATOR); - this.bookmark.put(bmk.key(), YMarkTables.cleanTagsString(s)); + this.bmk.put(bmk.key(), YMarkTables.cleanTagsString(s)); break; case DATE_ADDED: case DATE_MODIFIED: case DATE_VISITED: - this.bookmark.put(bmk.key(), s+MILLIS); + this.bmk.put(bmk.key(), s+MILLIS); break; default: break; @@ -138,7 +129,6 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback { public void handleEndTag(HTML.Tag t, int pos) { if (t == HTML.Tag.H3) { - // for some reason the
is not recognized as StartTag state = STATE.FOLDER_DESC; } else if (t == HTML.Tag.DL) { if(!folder.equals(YMarkTables.FOLDERS_IMPORTED)) { @@ -148,4 +138,13 @@ public class YMarksHTMLImporter extends HTMLEditorKit.ParserCallback { state = STATE.NOTHING; } } + + public HashMap take() { + try { + return this.bookmarks.take(); + } catch (InterruptedException e) { + Log.logException(e); + return null; + } + } } diff --git a/source/de/anomic/data/YMarksXBELImporter.java b/source/de/anomic/data/YMarksXBELImporter.java new file mode 100644 index 000000000..bc45b5238 --- /dev/null +++ b/source/de/anomic/data/YMarksXBELImporter.java @@ -0,0 +1,188 @@ +package de.anomic.data; + +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.concurrent.ArrayBlockingQueue; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import net.yacy.kelondro.logging.Log; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.helpers.DefaultHandler; + +public class YMarksXBELImporter extends DefaultHandler implements Runnable { + + public static enum XBEL { + XBEL, + TITLE, + DESC, + BOOKMARK, + FOLDER, + SEPARATOR, + ALIAS, + INFO, + METADATA; + + public String tag() { + return this.toString().toLowerCase(); + } + } + + public static enum STATE { + NOTHING, + BOOKMARK, + FOLDER, + FOLDER_DESC + } + + private HashMap bmk; + private boolean parsingValue; + private STATE state; + private String keyname; + private String folder; + private final InputStream input; + private final StringBuilder buffer; + private final ArrayBlockingQueue> bookmarks; + private final SAXParser saxParser; + + + public YMarksXBELImporter (final InputStream input, int queueSize) throws IOException { + this.buffer = new StringBuilder(); + this.bmk = null; + this.folder = YMarkTables.FOLDERS_IMPORTED; + this.bookmarks = new ArrayBlockingQueue>(queueSize); + this.input = input; + final SAXParserFactory factory = SAXParserFactory.newInstance(); + try { + this.saxParser = factory.newSAXParser(); + } catch (ParserConfigurationException e) { + Log.logException(e); + throw new IOException(e.getMessage()); + } catch (SAXException e) { + Log.logException(e); + throw new IOException(e.getMessage()); + } + } + + public void run() { + try { + this.saxParser.parse(this.input, this); + } catch (SAXParseException e) { + Log.logException(e); + } catch (SAXException e) { + Log.logException(e); + } catch (IOException e) { + Log.logException(e); + } finally { + try { + this.bookmarks.put(YMarkTables.POISON); + } catch (InterruptedException e1) { + Log.logException(e1); + } + try { + this.input.close(); + } catch (IOException e) { + Log.logException(e); + } + } + } + + public void startElement(final String uri, final String name, String tag, final Attributes atts) throws SAXException { + if (tag == null) return; + tag = tag.toLowerCase(); + if (XBEL.BOOKMARK.tag().equals(tag)) { + this.bmk = new HashMap(); + this.bmk.put(YMarkTables.BOOKMARK.URL.key(), atts.getValue(uri, YMarkTables.BOOKMARK.URL.xbel_attrb())); + this.bmk.put(YMarkTables.BOOKMARK.DATE_ADDED.key(), atts.getValue(uri, YMarkTables.BOOKMARK.DATE_ADDED.xbel_attrb())); + this.bmk.put(YMarkTables.BOOKMARK.DATE_VISITED.key(), atts.getValue(uri, YMarkTables.BOOKMARK.DATE_VISITED.xbel_attrb())); + this.bmk.put(YMarkTables.BOOKMARK.DATE_MODIFIED.key(), atts.getValue(uri, YMarkTables.BOOKMARK.DATE_MODIFIED.xbel_attrb())); + state = STATE.BOOKMARK; + this.parsingValue = false; + } else if(XBEL.FOLDER.tag().equals(tag)) { + this.state = STATE.FOLDER; + } else if (XBEL.DESC.tag().equals(tag)) { + if(this.state == STATE.FOLDER) { + this.keyname = null; + this.state = STATE.FOLDER_DESC; + } else if (this.state == STATE.BOOKMARK) { + this.keyname = YMarkTables.BOOKMARK.DESC.key(); + } else { + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksXBELImporter - state: "+this.state+" tag: "+tag); + this.parsingValue = false; + return; + } + this.parsingValue = true; + } else if (XBEL.TITLE.tag().equals(tag)) { + if(this.state == STATE.FOLDER) { + this.keyname = null; + } else if (this.state == STATE.BOOKMARK) { + this.keyname = YMarkTables.BOOKMARK.TITLE.key(); + } else { + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksXBELImporter - state: "+this.state+" tag: "+tag); + this.parsingValue = false; + return; + } + this.parsingValue = true; + } else { + this.parsingValue = false; + this.state = STATE.NOTHING; + } + + } + + public void endElement(final String uri, final String name, String tag) { + if (tag == null) return; + tag = tag.toLowerCase(); + if(XBEL.BOOKMARK.tag().equals(tag)) { + // write bookmark + if (!this.bmk.isEmpty()) { + this.bmk.put(YMarkTables.BOOKMARK.FOLDERS.key(), this.folder); + try { + this.bookmarks.put(this.bmk); + bmk = new HashMap(); + } catch (InterruptedException e) { + Log.logException(e); + } + } + this.state = STATE.FOLDER; + } else if (XBEL.FOLDER.tag().equals(tag)) { + this.state = STATE.NOTHING; + // go up one folder + if(!folder.equals(YMarkTables.FOLDERS_IMPORTED)) { + folder = folder.replaceAll("(/.[^/]*$)", ""); + this.state = STATE.FOLDER; + } + } + } + + public void characters(final char ch[], final int start, final int length) { + if (parsingValue) { + buffer.append(ch, start, length); + if (this.state == STATE.BOOKMARK) { + this.bmk.put(this.keyname, this.buffer.toString()); + } else if (this.state == STATE.FOLDER) { + this.folder = this.folder + YMarkTables.FOLDERS_SEPARATOR + this.buffer.toString(); + } else if (this.state == STATE.FOLDER_DESC) { + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "YMarksXBELImporter - folder: "+this.folder+" desc: "+this.buffer.toString()); + this.state = STATE.FOLDER; + } + this.buffer.setLength(0); + this.parsingValue = false; + } + } + + public HashMap take() { + try { + return this.bookmarks.take(); + } catch (InterruptedException e) { + Log.logException(e); + return null; + } + } +}