diff --git a/.classpath b/.classpath index 4df1295e9..e95a21b7f 100644 --- a/.classpath +++ b/.classpath @@ -41,9 +41,13 @@ +<<<<<<< .mine + +======= +>>>>>>> .r7657 diff --git a/build.xml b/build.xml index 8081fb740..46c690039 100644 --- a/build.xml +++ b/build.xml @@ -199,6 +199,7 @@ + diff --git a/htroot/api/ymarks/import_ymark.java b/htroot/api/ymarks/import_ymark.java index c06ab59bb..f65b352e5 100644 --- a/htroot/api/ymarks/import_ymark.java +++ b/htroot/api/ymarks/import_ymark.java @@ -13,6 +13,7 @@ import org.xml.sax.SAXException; import de.anomic.data.UserDB; import de.anomic.data.ymark.YMarkHTMLImporter; +import de.anomic.data.ymark.YMarkJSONImporter; import de.anomic.data.ymark.YMarkMetadata; import de.anomic.data.ymark.YMarkTables; import de.anomic.data.ymark.YMarkXBELImporter; @@ -66,6 +67,14 @@ public class import_ymark { putBookmark(sb, bmk_user, bmk); } prop.put("result", "1"); + } else if(post.get("importer").equals("json") && byteIn != null) { + final YMarkJSONImporter jsonImporter = new YMarkJSONImporter(byteIn, 10); + t = new Thread(jsonImporter, "YMarks - JSON Importer"); + t.start(); + while ((bmk = jsonImporter.take()) != YMarkTables.POISON) { + putBookmark(sb, bmk_user, bmk); + } + prop.put("result", "1"); } } } else { @@ -92,7 +101,7 @@ public class import_ymark { } catch (RowSpaceExceededException e) { Log.logException(e); } catch (Failure e) { - Log.logWarning(YMarkTables.BOOKMARKS_LOG.toString(), "Importer - Failure for URL: "+bmk.get(YMarkTables.BOOKMARK.URL.key())); + Log.logException(e); } } } diff --git a/htroot/api/ymarks/test_import.html b/htroot/api/ymarks/test_import.html index 5d3b2bbaa..05aee6a02 100644 --- a/htroot/api/ymarks/test_import.html +++ b/htroot/api/ymarks/test_import.html @@ -15,6 +15,7 @@
diff --git a/lib/json-simple-1.1.License b/lib/json-simple-1.1.License new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/lib/json-simple-1.1.License @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/lib/json-simple-1.1.jar b/lib/json-simple-1.1.jar new file mode 100644 index 000000000..f395f4147 Binary files /dev/null and b/lib/json-simple-1.1.jar differ diff --git a/source/de/anomic/data/ymark/YMarkJSONImporter.java b/source/de/anomic/data/ymark/YMarkJSONImporter.java new file mode 100644 index 000000000..144ebb42d --- /dev/null +++ b/source/de/anomic/data/ymark/YMarkJSONImporter.java @@ -0,0 +1,183 @@ +package de.anomic.data.ymark; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.HashMap; +import java.util.concurrent.ArrayBlockingQueue; + +import net.yacy.kelondro.logging.Log; + +import org.json.simple.parser.ContentHandler; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; + +public class YMarkJSONImporter implements Runnable, ContentHandler{ + + public final static String FOLDER = "text/x-moz-place-container"; + public final static String BOOKMARK = "text/x-moz-place"; + + public static enum JSON_KEY { + annos, + type, + title, + children, + keyword, + dateAdded, + lastModified, + uri; + } + + private final JSONParser parser; + private final ArrayBlockingQueue> bookmarks; + private final Reader json; + private final StringBuilder folderstring; + private final StringBuilder value; + private final StringBuilder key; + private final HashMap obj; + + private HashMap bmk; + private int depth; + private Boolean isFolder; + private Boolean isBookmark; + private Boolean isAnnos; + + public YMarkJSONImporter(final InputStream input, int queueSize) { + this.parser = new JSONParser(); + this.bookmarks = new ArrayBlockingQueue>(queueSize); + this.json = new InputStreamReader(input); + this.folderstring = new StringBuilder(256); + this.key = new StringBuilder(16); + this.value = new StringBuilder(128); + this.obj = new HashMap(); + this.bmk = new HashMap(); + this.depth = 0; + this.isAnnos = false; + this.isBookmark = false; + this.isFolder = true; + } + + public void startJSON() throws ParseException, IOException { + } + + public void endJSON() throws ParseException, IOException { + } + + public boolean startArray() throws ParseException, IOException { + final String key = this.key.toString(); + if(key.equals(JSON_KEY.children.toString()) && this.isFolder) { + if(this.depth > 0) { + this.folderstring.append(YMarkUtil.FOLDERS_SEPARATOR); + this.folderstring.append(this.obj.get(JSON_KEY.title.toString())); + } + this.depth++; + } else if(key.equals(JSON_KEY.annos.toString())) { + this.isAnnos = true; + } + return true; + } + + public boolean endArray() throws ParseException, IOException { + if(this.isAnnos) { + this.isAnnos = false; + } else if(this.depth > 0) { + folderstring.setLength(folderstring.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR)); + this.depth--; + } + return true; + } + + public boolean startObject() throws ParseException, IOException { + if(!this.isAnnos) { + this.obj.clear(); + } + return true; + } + + public boolean endObject() throws ParseException, IOException { + if(this.isBookmark) { + this.bmk.put(YMarkTables.BOOKMARK.TITLE.key(),obj.get(JSON_KEY.title.toString())); + this.bmk.put(YMarkTables.BOOKMARK.URL.key(),obj.get(JSON_KEY.uri.toString())); + this.bmk.put(YMarkTables.BOOKMARK.DATE_ADDED.key(),obj.get(JSON_KEY.dateAdded.toString())); + this.bmk.put(YMarkTables.BOOKMARK.DATE_MODIFIED.key(),obj.get(JSON_KEY.lastModified.toString())); + this.bmk.put(YMarkTables.BOOKMARK.FOLDERS.key(),this.folderstring.toString()); + if(this.obj.containsKey(JSON_KEY.keyword.toString())) { + this.bmk.put(YMarkTables.BOOKMARK.TAGS.key(),obj.get(JSON_KEY.keyword.toString())); + } + try { + this.bookmarks.put(this.bmk); + } catch (InterruptedException e) { + Log.logException(e); + } + this.bmk = new HashMap(); + } + this.isBookmark = false; + return true; + } + + public boolean startObjectEntry(String key) throws ParseException, IOException { + if(!this.isAnnos) { + this.key.setLength(0); + this.key.append(key); + } + return true; + } + + public boolean primitive(Object value) throws ParseException, IOException { + if(!this.isAnnos) { + this.value.setLength(0); + if(value instanceof java.lang.String) { + this.value.append((String)value); + } else if(value instanceof java.lang.Boolean) { + this.value.append((Boolean)value); + } else if(value instanceof java.lang.Number) { + this.value.append((Number)value); + } + } + return true; + } + + public boolean endObjectEntry() throws ParseException, IOException { + if(!this.isAnnos) { + final String key = this.key.toString(); + final String value = this.value.toString(); + if(key.equals(JSON_KEY.type.toString())) { + if(value.equals(FOLDER)) { + this.isFolder = true; + } else if(value.equals(BOOKMARK)) { + this.isBookmark = true; + } + } + this.obj.put(key, value); + } + return true; + } + + public void run() { + try { + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "JSON Importer run()"); + this.parser.parse(json, this, true); + } catch (IOException e) { + Log.logException(e); + } catch (ParseException e) { + Log.logException(e); + } finally { + try { + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "JSON Importer inserted poison pill in queue"); + this.bookmarks.put(YMarkTables.POISON); + } catch (InterruptedException e) { + Log.logException(e); + } + } + } + + public HashMap take() { + try { + return this.bookmarks.take(); + } catch (InterruptedException e) { + Log.logException(e); + return null; + } + } +} diff --git a/source/de/anomic/data/ymark/YMarkUtil.java b/source/de/anomic/data/ymark/YMarkUtil.java index 86c7c7a20..3506fc64b 100644 --- a/source/de/anomic/data/ymark/YMarkUtil.java +++ b/source/de/anomic/data/ymark/YMarkUtil.java @@ -37,12 +37,25 @@ public class YMarkUtil { public final static String TAGS_SEPARATOR = ","; public final static String FOLDERS_SEPARATOR = "/"; + /** + * conveniance function to generate url hashes for YMark bookmarks + * @param url a string representation of a valid url + * @return a byte[] hash for the input URL string + * @throws MalformedURLException + * @see net.yacy.kelondro.data.meta.DigestURI.DigestURI(String url, byte[] hash).hash() + */ public final static byte[] getBookmarkId(String url) throws MalformedURLException { return (new DigestURI(url, null)).hash(); } - public final static byte[] getKeyId(final String tag) { - return Word.word2hash(tag.toLowerCase()); + /** + * conveniance function to generate word hashes for YMark tags and folders + * @param key a tag or folder name + * @return a byte[] hash for the input string + * @see net.yacy.kelondro.data.word.Word.word2hash(final String word) + */ + public final static byte[] getKeyId(final String key) { + return Word.word2hash(key.toLowerCase()); } public final static byte[] keySetToBytes(final HashSet urlSet) { diff --git a/source/de/anomic/data/ymark/YMarkXBELImporter.java b/source/de/anomic/data/ymark/YMarkXBELImporter.java index 85debaeba..141b74374 100644 --- a/source/de/anomic/data/ymark/YMarkXBELImporter.java +++ b/source/de/anomic/data/ymark/YMarkXBELImporter.java @@ -240,6 +240,12 @@ public class YMarkXBELImporter extends DefaultHandler implements Runnable { public void characters(final char ch[], final int start, final int length) { if (parse_value) { buffer.append(ch, start, length); + for (int i = 0; i < buffer.length()-1; i++) { + if(buffer.charAt(i) == '\n' | buffer.charAt(i) == '\t') { + buffer.deleteCharAt(i); + i--; + } + } switch(outer_state) { case BOOKMARK: switch(inner_state) {