diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java index bda54ac5d..78b201434 100644 --- a/htroot/Bookmarks.java +++ b/htroot/Bookmarks.java @@ -43,10 +43,10 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.DateFormatter; import net.yacy.repository.LoaderDispatcher; +import de.anomic.data.BookmarkHelper; import de.anomic.data.bookmarksDB; import de.anomic.data.listManager; import de.anomic.data.userDB; -import de.anomic.data.bookmarksDB.Tag; import de.anomic.http.server.RequestHeader; import de.anomic.search.Segments; import de.anomic.search.Switchboard; @@ -143,7 +143,7 @@ public class Bookmarks { pathString="/unsorted"; //default folder } tagsString=tagsString+","+pathString; - final Set tags=listManager.string2set(bookmarksDB.cleanTagsString(tagsString)); + final Set tags=listManager.string2set(BookmarkHelper.cleanTagsString(tagsString)); final bookmarksDB.Bookmark bookmark = sb.bookmarksDB.createBookmark(url, username); if(bookmark != null){ bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_TITLE, title); @@ -233,7 +233,7 @@ public class Bookmarks { Log.logInfo("BOOKMARKS", "I try to import bookmarks from HTML-file"); try { final File file=new File(post.get("htmlfile")); - sb.bookmarksDB.importFromBookmarks(new DigestURI(file) , post.get("htmlfile$file"), tags, isPublic); + BookmarkHelper.importFromBookmarks(sb.bookmarksDB, new DigestURI(file), post.get("htmlfile$file"), tags, isPublic); } catch (final MalformedURLException e) {} Log.logInfo("BOOKMARKS", "success!!"); }else if(post.containsKey("xmlfile")){ @@ -241,7 +241,7 @@ public class Bookmarks { if((post.get("public")).equals("public")){ isPublic=true; } - sb.bookmarksDB.importFromXML(post.get("xmlfile$file"), isPublic); + BookmarkHelper.importFromXML(sb.bookmarksDB, post.get("xmlfile$file"), isPublic); }else if(post.containsKey("delete")){ final String urlHash=post.get("delete"); sb.bookmarksDB.removeBookmark(urlHash); @@ -357,8 +357,8 @@ public class Bookmarks { private static void printTagList(final String id, final String tagName, final int comp, final int max, final boolean opt){ int count=0; - bookmarksDB.Tag tag; - Iterator it = null; + bookmarksDB.Tag tag; + Iterator it = null; if (tagName.equals("")) { it = sb.bookmarksDB.getTagIterator(isAdmin, comp, max); diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 409dad7ed..73d8f3331 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -45,6 +45,7 @@ import net.yacy.kelondro.util.FileUtils; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.SitemapImporter; import de.anomic.crawler.retrieval.Request; +import de.anomic.data.BookmarkHelper; import de.anomic.data.Tables; import de.anomic.data.bookmarksDB; import de.anomic.data.listManager; @@ -259,7 +260,7 @@ public class Crawler_p { if (reasonString == null) { // create a bookmark from crawl start url - Set tags=listManager.string2set(bookmarksDB.cleanTagsString(post.get("bookmarkFolder","/crawlStart"))); + Set tags=listManager.string2set(BookmarkHelper.cleanTagsString(post.get("bookmarkFolder","/crawlStart"))); tags.add("crawlStart"); if (post.get("createBookmark","off").equals("on")) { bookmarksDB.Bookmark bookmark = sb.bookmarksDB.createBookmark(crawlingStart, "admin"); diff --git a/htroot/Network.java b/htroot/Network.java index c16c51b97..bdc545348 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -291,6 +291,7 @@ public class Network { if(e != null) { while (e.hasNext() && conCount < maxCount) { seed = e.next(); + assert seed != null; if (seed != null) { if((post != null && post.containsKey("search")) && peerSearchPattern != null /*(wrongregex == null)*/) { boolean abort = true; diff --git a/htroot/api/bookmarks/get_bookmarks.java b/htroot/api/bookmarks/get_bookmarks.java index a884801f1..07c200135 100644 --- a/htroot/api/bookmarks/get_bookmarks.java +++ b/htroot/api/bookmarks/get_bookmarks.java @@ -6,6 +6,7 @@ import java.util.Iterator; import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.util.DateFormatter; +import de.anomic.data.BookmarkHelper; import de.anomic.data.bookmarksDB; import de.anomic.data.userDB; import de.anomic.http.server.RequestHeader; @@ -96,7 +97,7 @@ public class get_bookmarks { if (qtype.equals("tags") && !query.equals("")) { prop.putHTML("display_folder", "1"); prop.putHTML("display_folder_foldername", query); - prop.putHTML("display_folder_folderhash", bookmarksDB.tagHash(query)); + prop.putHTML("display_folder_folderhash", BookmarkHelper.tagHash(query)); it = sb.bookmarksDB.getBookmarksIterator(query, isAdmin); count = print_XBEL(it, count); prop.put("display_xbel", count); @@ -202,7 +203,7 @@ public class get_bookmarks { } if(fn.startsWith((root.equals("/") ? root : root+"/"))){ - prop.put("display_xbel_"+count+"_elements", ""); + prop.put("display_xbel_"+count+"_elements", ""); count++; final String title = fn; // just to make sure fn stays untouched diff --git a/htroot/api/bookmarks/posts/add_p.java b/htroot/api/bookmarks/posts/add_p.java index 3e752e78a..7d862764c 100755 --- a/htroot/api/bookmarks/posts/add_p.java +++ b/htroot/api/bookmarks/posts/add_p.java @@ -2,6 +2,7 @@ import java.util.HashMap; import java.util.Set; +import de.anomic.data.BookmarkHelper; import de.anomic.data.bookmarksDB; import de.anomic.data.listManager; import de.anomic.data.userDB; @@ -45,7 +46,7 @@ public class add_p { String tagsString = post.get("tags",""); String pathString = post.get("path","/unsorted"); tagsString=tagsString+","+pathString; - final Set tags=listManager.string2set(bookmarksDB.cleanTagsString(tagsString)); + final Set tags=listManager.string2set(BookmarkHelper.cleanTagsString(tagsString)); final bookmarksDB.Bookmark bookmark = sb.bookmarksDB.createBookmark(url, username); if(bookmark != null){ bookmark.setProperty(bookmarksDB.Bookmark.BOOKMARK_TITLE, title); diff --git a/htroot/api/bookmarks/tags/getTag.java b/htroot/api/bookmarks/tags/getTag.java index 2a7bd5d8c..1d67caeb8 100755 --- a/htroot/api/bookmarks/tags/getTag.java +++ b/htroot/api/bookmarks/tags/getTag.java @@ -2,7 +2,6 @@ import java.util.Iterator; import de.anomic.data.bookmarksDB; -import de.anomic.data.bookmarksDB.Tag; import de.anomic.http.server.RequestHeader; import de.anomic.search.Switchboard; import de.anomic.server.serverObjects; @@ -18,7 +17,7 @@ public class getTag { final Switchboard switchboard = (Switchboard) env; final boolean isAdmin=switchboard.verifyAuthentication(header, true); final serverObjects prop = new serverObjects(); - Iterator it = null; + Iterator it = null; String tagName = ""; int top = SHOW_ALL; int comp = SORT_ALPHA; diff --git a/htroot/api/bookmarks/xbel/xbel.java b/htroot/api/bookmarks/xbel/xbel.java index 2e9c88052..addeb98af 100755 --- a/htroot/api/bookmarks/xbel/xbel.java +++ b/htroot/api/bookmarks/xbel/xbel.java @@ -6,6 +6,7 @@ import java.util.Iterator; import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.util.DateFormatter; +import de.anomic.data.BookmarkHelper; import de.anomic.data.bookmarksDB; import de.anomic.http.server.RequestHeader; import de.anomic.search.Switchboard; @@ -80,7 +81,7 @@ public class xbel { } if(fn.startsWith((root.equals("/") ? root : root+"/"))){ - prop.put("xbel_"+count+"_elements", ""); + prop.put("xbel_"+count+"_elements", ""); count++; final String title = fn; // just to make sure fn stays untouched diff --git a/source/de/anomic/data/BookmarkDate.java b/source/de/anomic/data/BookmarkDate.java new file mode 100644 index 000000000..d9c2d3ada --- /dev/null +++ b/source/de/anomic/data/BookmarkDate.java @@ -0,0 +1,165 @@ +// BookmarkHelper.java +// ------------------------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@yacy.net +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// +// Methods from this file has been originally contributed by Alexander Schier +// and had been refactored by Michael Christen for better a method structure 30.01.2010 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +package de.anomic.data; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import de.anomic.data.bookmarksDB.Bookmark; + +import net.yacy.kelondro.blob.MapHeap; +import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.order.NaturalOrder; + +public class BookmarkDate { + + MapHeap datesTable; + + public BookmarkDate(File datesFile) throws IOException { + this.datesTable = new MapHeap(datesFile, 20, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); + } + + public void close() { + this.datesTable.close(); + } + + + public Entry getDate(final String date) { + Map map; + try { + map = datesTable.get(date); + } catch (final IOException e) { + map = null; + } + if (map==null) return new Entry(date); + return new Entry(date, map); + } + + // rebuilds the datesDB from the bookmarksDB + public void init(Iterator it) { + Log.logInfo("BOOKMARKS", "start init dates.db from bookmarks.db..."); + //final Iterator it=bookmarkIterator(true); + Bookmark bookmark; + String date; + Entry bmDate; + int count = 0; + while(it.hasNext()){ + bookmark=it.next(); + date = String.valueOf(bookmark.getTimeStamp()); + bmDate=getDate(date); + if(bmDate==null){ + bmDate=new Entry(date); + } + bmDate.add(bookmark.getUrlHash()); + bmDate.setDatesTable(); + count++; + } + Log.logInfo("BOOKMARKS", "finished init "+datesTable.size()+" dates using " + count + " bookmarks."); + } + + /** + * Subclass of bookmarksDB, which provide the bookmarksDate object-type + */ + public class Entry { + public static final String URL_HASHES="urlHashes"; + private final Map mem; + String date; + + public Entry(final String mydate){ + //round to seconds, but store as milliseconds (java timestamp) + date=String.valueOf((Long.parseLong(mydate)/1000)*1000); + mem=new HashMap(); + mem.put(URL_HASHES, ""); + } + + public Entry(final String mydate, final Map map){ + //round to seconds, but store as milliseconds (java timestamp) + date=String.valueOf((Long.parseLong(mydate)/1000)*1000); + mem=map; + } + public Entry(final String mydate, final ArrayList entries){ + //round to seconds, but store as milliseconds (java timestamp) + date=String.valueOf((Long.parseLong(mydate)/1000)*1000); + mem=new HashMap(); + mem.put(URL_HASHES, listManager.collection2string(entries)); + } + public void add(final String urlHash){ + final String urlHashes = mem.get(URL_HASHES); + ArrayList list; + if(urlHashes != null && !urlHashes.equals("")){ + list=listManager.string2arraylist(urlHashes); + }else{ + list=new ArrayList(); + } + if(!list.contains(urlHash) && urlHash != null && !urlHash.equals("")){ + list.add(urlHash); + } + this.mem.put(URL_HASHES, listManager.collection2string(list)); + /*if(urlHashes!=null && !urlHashes.equals("") ){ + if(urlHashes.indexOf(urlHash) <0){ + this.mem.put(URL_HASHES, urlHashes+","+urlHash); + } + }else{ + this.mem.put(URL_HASHES, urlHash); + }*/ + } + public void delete(final String urlHash){ + final ArrayList list=listManager.string2arraylist(this.mem.get(URL_HASHES)); + if(list.contains(urlHash)){ + list.remove(urlHash); + } + this.mem.put(URL_HASHES, listManager.collection2string(list)); + } + public void setDatesTable() { + if (this.size() >0) { + try { + datesTable.put(getDateString(), mem); + } catch (Exception e) { + Log.logException(e); + } + } else { + try { + datesTable.remove(getDateString()); + } catch (IOException e) { + Log.logException(e); + } + } + } + public String getDateString(){ + return date; + } + public ArrayList getBookmarkList(){ + return listManager.string2arraylist(this.mem.get(URL_HASHES)); + } + public int size(){ + return listManager.string2arraylist(this.mem.get(URL_HASHES)).size(); + } + } +} diff --git a/source/de/anomic/data/BookmarkHelper.java b/source/de/anomic/data/BookmarkHelper.java new file mode 100644 index 000000000..2d88b9f7a --- /dev/null +++ b/source/de/anomic/data/BookmarkHelper.java @@ -0,0 +1,255 @@ +// BookmarkHelper.java +// ------------------------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@yacy.net +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// +// Methods from this file has been originally contributed by Alexander Schier +// and had been refactored by Michael Christen for better a method structure 30.01.2010 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.data; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.text.ParseException; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.Map.Entry; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.w3c.dom.Document; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import de.anomic.data.bookmarksDB.Bookmark; +import net.yacy.document.parser.html.ContentScraper; +import net.yacy.document.parser.html.TransformerWriter; +import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.data.word.Word; +import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.util.DateFormatter; +import net.yacy.kelondro.util.FileUtils; + +public class BookmarkHelper { + + public static String cleanTagsString(String tagsString) { + + // get rid of heading, trailing and double commas since they are useless + while (tagsString.length() > 0 && tagsString.charAt(0) == ',') { + tagsString = tagsString.substring(1); + } + while (tagsString.endsWith(",")) { + tagsString = tagsString.substring(0,tagsString.length() -1); + } + while (tagsString.contains(",,")){ + tagsString = tagsString.replaceAll(",,", ","); + } + // get rid of double and trailing slashes + while (tagsString.endsWith("/")){ + tagsString = tagsString.substring(0, tagsString.length() -1); + } + while (tagsString.contains("/,")){ + tagsString = tagsString.replaceAll("/,", ","); + } + while (tagsString.contains("//")){ + tagsString = tagsString.replaceAll("//", "/"); + } + // space characters following a comma are removed + tagsString = tagsString.replaceAll(",\\s+", ","); + + return tagsString; + } + + + /** + * returns an object of type String that contains a tagHash + * @param tagName an object of type String with the name of the tag. + * tagName is converted to lower case before hash is generated! + */ + public static String tagHash(final String tagName){ + return new String(Word.word2hash(tagName.toLowerCase())); + } + /* + private static String tagHash(final String tagName, final String user){ + return new String(Word.word2hash(user+":"+tagName.toLowerCase())); + } + */ + + + + // -------------------------------------- + // bookmarksDB's Import/Export functions + // -------------------------------------- + + public static int importFromBookmarks(bookmarksDB db, final DigestURI baseURL, final String input, final String tag, final boolean importPublic){ + try { + // convert string to input stream + final ByteArrayInputStream byteIn = new ByteArrayInputStream(input.getBytes("UTF-8")); + final InputStreamReader reader = new InputStreamReader(byteIn,"UTF-8"); + + // import stream + return importFromBookmarks(db, baseURL, reader, tag, importPublic); + } catch (final UnsupportedEncodingException e) { + return 0; + } + } + + private static int importFromBookmarks(bookmarksDB db, final DigestURI baseURL, final InputStreamReader input, final String tag, final boolean importPublic){ + + int importCount = 0; + + Map links = new HashMap(); + String title; + DigestURI url; + Bookmark bm; + final Set tags=listManager.string2set(tag); //this allow multiple default tags + try { + //load the links + final ContentScraper scraper = new ContentScraper(baseURL); + //OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); + final Writer writer= new TransformerWriter(null,null,scraper, null, false); + FileUtils.copy(input,writer); + writer.close(); + links = scraper.getAnchors(); + } catch (final IOException e) { Log.logWarning("BOOKMARKS", "error during load of links: "+ e.getClass() +" "+ e.getMessage());} + for (Entry link: links.entrySet()) { + url= link.getKey(); + title=link.getValue(); + Log.logInfo("BOOKMARKS", "links.get(url)"); + if(title.equals("")){//cannot be displayed + title=url.toString(); + } + bm=db.new Bookmark(url.toString()); + bm.setProperty(Bookmark.BOOKMARK_TITLE, title); + bm.setTags(tags); + bm.setPublic(importPublic); + db.saveBookmark(bm); + + importCount++; + } + + db.flushTagCache(); + + return importCount; + } + + + public static int importFromXML(bookmarksDB db, final String input, final boolean importPublic){ + try { + // convert string to input stream + final ByteArrayInputStream byteIn = new ByteArrayInputStream(input.getBytes("UTF-8")); + + // import stream + return importFromXML(db, byteIn,importPublic); + } catch (final UnsupportedEncodingException e) { + return 0; + } + } + + private static int importFromXML(bookmarksDB db, final InputStream input, final boolean importPublic){ + final DocumentBuilderFactory factory=DocumentBuilderFactory.newInstance(); + factory.setValidating(false); + factory.setNamespaceAware(false); + DocumentBuilder builder; + try { + builder = factory.newDocumentBuilder(); + final Document doc=builder.parse(input); + return parseXMLimport(db, doc, importPublic); + } catch (final ParserConfigurationException e) { + } catch (final SAXException e) { + } catch (final IOException e) { + } + return 0; + + } + + private static int parseXMLimport(bookmarksDB db, final Node doc, final boolean importPublic){ + int importCount = 0; + if (doc.getNodeName().equals("post")) { + final NamedNodeMap attributes = doc.getAttributes(); + final String url=attributes.getNamedItem("href").getNodeValue(); + if(url.equals("")){ + return 0; + } + final Bookmark bm=db.new Bookmark(url); + String tagsString=""; + String title=""; + String description=""; + String time=""; + if(attributes.getNamedItem("tag")!=null){ + tagsString=attributes.getNamedItem("tag").getNodeValue(); + } + if(attributes.getNamedItem("description")!=null){ + title=attributes.getNamedItem("description").getNodeValue(); + } + if(attributes.getNamedItem("extended")!=null){ + description=attributes.getNamedItem("extended").getNodeValue(); + } + if(attributes.getNamedItem("time")!=null){ + time=attributes.getNamedItem("time").getNodeValue(); + } + Set tags=new HashSet(); + + if(title != null){ + bm.setProperty(Bookmark.BOOKMARK_TITLE, title); + } + if(tagsString!=null){ + tags = listManager.string2set(tagsString.replace(' ', ',')); + } + bm.setTags(tags, true); + if(time != null){ + + Date parsedDate = null; + try { + parsedDate = DateFormatter.parseISO8601(time); + } catch (final ParseException e) { + parsedDate = new Date(); + } + bm.setTimeStamp(parsedDate.getTime()); + } + if(description!=null){ + bm.setProperty(Bookmark.BOOKMARK_DESCRIPTION, description); + } + bm.setPublic(importPublic); + db.saveBookmark(bm); + + importCount++; + } + final NodeList children=doc.getChildNodes(); + if(children != null){ + for (int i=0; i tagCache; - - // dates - MapHeap datesTable; + ConcurrentHashMap tagCache; // autoReCrawl private final BusyThread autoReCrawl; + BookmarkDate dates; + // ------------------------------------ // bookmarksDB's class constructor // ------------------------------------ @@ -127,8 +107,8 @@ public class bookmarksDB { // dates final boolean datesExisted = datesFile.exists(); //this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, datesFileNew), 500, '_'); - this.datesTable = new MapHeap(datesFile, 20, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); - if (!datesExisted) rebuildDates(); + this.dates = new BookmarkDate(datesFile); + if (!datesExisted) this.dates.init(bookmarkIterator(true)); // autoReCrawl Switchboard sb = Switchboard.getSwitchboard(); @@ -148,7 +128,7 @@ public class bookmarksDB { bookmarksTable.close(); flushTagCache(); tagsTable.close(); - datesTable.close(); + dates.close(); } // ----------------------------------------------------- @@ -310,19 +290,7 @@ public class bookmarksDB { // ------------------------------------- // bookmarksDB's public helper functions - // ------------------------------------- - - /** - * returns an object of type String that contains a tagHash - * @param tagName an object of type String with the name of the tag. - * tagName is converted to lower case before hash is generated! - */ - public static String tagHash(final String tagName){ - return new String(Word.word2hash(tagName.toLowerCase())); - } - public static String tagHash(final String tagName, final String user){ - return new String(Word.word2hash(user+":"+tagName.toLowerCase())); - } + // ------------------------------------- public Iterator getFolderList(final boolean priv){ return getFolderList("/", priv); @@ -339,7 +307,7 @@ public class bookmarksDB { tag=it.next(); if (tag.getFriendlyName().startsWith((root.equals("/") ? root : root+"/"))) { path = tag.getFriendlyName(); - path = cleanTagsString(path); + path = BookmarkHelper.cleanTagsString(path); while(path.length() > 0 && !path.equals(root)){ folders.add(path); path = path.replaceAll("(/.[^/]*$)", ""); // create missing folders in path @@ -350,34 +318,6 @@ public class bookmarksDB { folders.add("\uffff"); return folders.iterator(); } - - public static String cleanTagsString(String tagsString){ - - // get rid of heading, trailing and double commas since they are useless - while (tagsString.length() > 0 && tagsString.charAt(0) == ',') { - tagsString = tagsString.substring(1); - } - while (tagsString.endsWith(",")) { - tagsString = tagsString.substring(0,tagsString.length() -1); - } - while(tagsString.contains(",,")){ - tagsString = tagsString.replaceAll(",,", ","); - } - // get rid of double and trailing slashes - while(tagsString.endsWith("/")){ - tagsString = tagsString.substring(0, tagsString.length() -1); - } - while(tagsString.contains("/,")){ - tagsString = tagsString.replaceAll("/,", ","); - } - while(tagsString.contains("//")){ - tagsString = tagsString.replaceAll("//", "/"); - } - // space characters following a comma are removed - tagsString = tagsString.replaceAll(",\\s+", ","); - - return tagsString; - } // ----------------------------------------------------------- // bookmarksDB's functions for bookmarksTable / bookmarkCache @@ -426,7 +366,7 @@ public class bookmarksDB { bookmarksDB.Tag tag=null; final Iterator it=tags.iterator(); while(it.hasNext()){ - tag=getTag(tagHash(it.next())); + tag=getTag(BookmarkHelper.tagHash(it.next())); if(tag!=null){ tag.delete(urlHash); saveTag(tag); @@ -465,7 +405,7 @@ public class bookmarksDB { public Iterator getBookmarksIterator(final String tagName, final boolean priv){ final TreeSet set=new TreeSet(new bookmarkComparator(true)); - final String tagHash=tagHash(tagName); + final String tagHash=BookmarkHelper.tagHash(tagName); final Tag tag=getTag(tagHash); Set hashes=new HashSet(); if(tag != null){ @@ -641,7 +581,7 @@ public class bookmarksDB { tags = bm.getTags(); it = tags.iterator(); while (it.hasNext()) { - tag=getTag( tagHash(it.next()) ); + tag=getTag(BookmarkHelper.tagHash(it.next()) ); if(priv ||tag.hasPublicItems()){ set.add(tag); } @@ -672,10 +612,10 @@ public class bookmarksDB { String[] tags; while(it.hasNext()){ bookmark=it.next(); - tags = cleanTagsString(bookmark.getTagsString() + bookmark.getFoldersString()).split(","); + tags = BookmarkHelper.cleanTagsString(bookmark.getTagsString() + bookmark.getFoldersString()).split(","); tag=null; for(int i=0;i map; - try { - map = datesTable.get(date); - } catch (final IOException e) { - map = null; - } - if(map==null) return new bookmarksDate(date); - return new bookmarksDate(date, map); - } - // rebuilds the datesDB from the bookmarksDB - public void rebuildDates(){ - Log.logInfo("BOOKMARKS", "rebuilding dates.db from bookmarks.db..."); - final Iterator it=bookmarkIterator(true); - Bookmark bookmark; - String date; - bookmarksDate bmDate; - while(it.hasNext()){ - bookmark=it.next(); - date = String.valueOf(bookmark.getTimeStamp()); - bmDate=getDate(date); - if(bmDate==null){ - bmDate=new bookmarksDate(date); - } - bmDate.add(bookmark.getUrlHash()); - bmDate.setDatesTable(); - } - Log.logInfo("BOOKMARKS", "Rebuilt "+datesTable.size()+" dates using your "+bookmarksTable.size()+" bookmarks."); - } // ------------------------------------- // bookmarksDB's experimental functions @@ -727,10 +633,10 @@ public class bookmarksDB { public boolean renameTag(final String oldName, final String newName){ - final Tag oldTag=getTag(tagHash(oldName)); + final Tag oldTag=getTag(BookmarkHelper.tagHash(oldName)); if (oldTag != null) { final Set urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag - removeTag(tagHash(oldName)); // remove oldHash from TagsDB + removeTag(BookmarkHelper.tagHash(oldName)); // remove oldHash from TagsDB final Iterator it = urlHashes.iterator(); Bookmark bookmark; Set tags = new TreeSet(String.CASE_INSENSITIVE_ORDER); @@ -747,7 +653,7 @@ public class bookmarksDB { return false; } public void addTag(final String selectTag, final String newTag){ - final Iterator it = getTag(tagHash(selectTag)).getUrlHashes().iterator(); // get urlHashes for selectTag + final Iterator it = getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes().iterator(); // get urlHashes for selectTag Bookmark bookmark; while (it.hasNext()) { // looping through all bookmarks which were tagged with selectTag bookmark = getBookmark(it.next()); @@ -756,155 +662,6 @@ public class bookmarksDB { } } - - // -------------------------------------- - // bookmarksDB's Import/Export functions - // -------------------------------------- - - public int importFromBookmarks(final DigestURI baseURL, final String input, final String tag, final boolean importPublic){ - try { - // convert string to input stream - final ByteArrayInputStream byteIn = new ByteArrayInputStream(input.getBytes("UTF-8")); - final InputStreamReader reader = new InputStreamReader(byteIn,"UTF-8"); - - // import stream - return this.importFromBookmarks(baseURL,reader,tag,importPublic); - } catch (final UnsupportedEncodingException e) { - return 0; - } - } - - public int importFromBookmarks(final DigestURI baseURL, final InputStreamReader input, final String tag, final boolean importPublic){ - - int importCount = 0; - - Map links = new HashMap(); - String title; - DigestURI url; - Bookmark bm; - final Set tags=listManager.string2set(tag); //this allow multiple default tags - try { - //load the links - final ContentScraper scraper = new ContentScraper(baseURL); - //OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); - final Writer writer= new TransformerWriter(null,null,scraper, null, false); - FileUtils.copy(input,writer); - writer.close(); - links = scraper.getAnchors(); - } catch (final IOException e) { Log.logWarning("BOOKMARKS", "error during load of links: "+ e.getClass() +" "+ e.getMessage());} - for (Entry link: links.entrySet()) { - url= link.getKey(); - title=link.getValue(); - Log.logInfo("BOOKMARKS", "links.get(url)"); - if(title.equals("")){//cannot be displayed - title=url.toString(); - } - bm=new Bookmark(url.toString()); - bm.setProperty(Bookmark.BOOKMARK_TITLE, title); - bm.setTags(tags); - bm.setPublic(importPublic); - saveBookmark(bm); - - importCount++; - } - - flushTagCache(); - - return importCount; - } - - public int importFromXML(final String input, final boolean importPublic){ - try { - // convert string to input stream - final ByteArrayInputStream byteIn = new ByteArrayInputStream(input.getBytes("UTF-8")); - - // import stream - return this.importFromXML(byteIn,importPublic); - } catch (final UnsupportedEncodingException e) { - return 0; - } - } - - public int importFromXML(final InputStream input, final boolean importPublic){ - final DocumentBuilderFactory factory=DocumentBuilderFactory.newInstance(); - factory.setValidating(false); - factory.setNamespaceAware(false); - DocumentBuilder builder; - try { - builder = factory.newDocumentBuilder(); - final Document doc=builder.parse(input); - return parseXMLimport(doc, importPublic); - } catch (final ParserConfigurationException e) { - } catch (final SAXException e) { - } catch (final IOException e) { - } - return 0; - - } - - public int parseXMLimport(final Node doc, final boolean importPublic){ - int importCount = 0; - if(doc.getNodeName()=="post"){ - final NamedNodeMap attributes = doc.getAttributes(); - final String url=attributes.getNamedItem("href").getNodeValue(); - if(url.equals("")){ - return 0; - } - final Bookmark bm=new Bookmark(url); - String tagsString=""; - String title=""; - String description=""; - String time=""; - if(attributes.getNamedItem("tag")!=null){ - tagsString=attributes.getNamedItem("tag").getNodeValue(); - } - if(attributes.getNamedItem("description")!=null){ - title=attributes.getNamedItem("description").getNodeValue(); - } - if(attributes.getNamedItem("extended")!=null){ - description=attributes.getNamedItem("extended").getNodeValue(); - } - if(attributes.getNamedItem("time")!=null){ - time=attributes.getNamedItem("time").getNodeValue(); - } - Set tags=new HashSet(); - - if(title != null){ - bm.setProperty(Bookmark.BOOKMARK_TITLE, title); - } - if(tagsString!=null){ - tags = listManager.string2set(tagsString.replace(' ', ',')); - } - bm.setTags(tags, true); - if(time != null){ - - Date parsedDate = null; - try { - parsedDate = DateFormatter.parseISO8601(time); - } catch (final ParseException e) { - parsedDate = new Date(); - } - bm.setTimeStamp(parsedDate.getTime()); - } - if(description!=null){ - bm.setProperty(Bookmark.BOOKMARK_DESCRIPTION, description); - } - bm.setPublic(importPublic); - saveBookmark(bm); - - importCount++; - } - final NodeList children=doc.getChildNodes(); - if(children != null){ - for (int i=0; i(); } public Tag(final String name, final HashSet entries){ - tagHash=tagHash(name); + tagHash=BookmarkHelper.tagHash(name); mem=new HashMap(); //mem.put(URL_HASHES, listManager.arraylist2string(entries)); urlHashes=entries; mem.put(TAG_NAME, name); } public Tag(final String name){ - tagHash=tagHash(name); + tagHash=BookmarkHelper.tagHash(name); mem=new HashMap(); //mem.put(URL_HASHES, ""); urlHashes=new HashSet(); @@ -991,84 +748,7 @@ public class bookmarksDB { return urlHashes.size(); } } - /** - * Subclass of bookmarksDB, which provide the bookmarksDate object-type - */ - public class bookmarksDate{ - public static final String URL_HASHES="urlHashes"; - private final Map mem; - String date; - - public bookmarksDate(final String mydate){ - //round to seconds, but store as milliseconds (java timestamp) - date=String.valueOf((Long.parseLong(mydate)/1000)*1000); - mem=new HashMap(); - mem.put(URL_HASHES, ""); - } - - public bookmarksDate(final String mydate, final Map map){ - //round to seconds, but store as milliseconds (java timestamp) - date=String.valueOf((Long.parseLong(mydate)/1000)*1000); - mem=map; - } - public bookmarksDate(final String mydate, final ArrayList entries){ - //round to seconds, but store as milliseconds (java timestamp) - date=String.valueOf((Long.parseLong(mydate)/1000)*1000); - mem=new HashMap(); - mem.put(URL_HASHES, listManager.collection2string(entries)); - } - public void add(final String urlHash){ - final String urlHashes = mem.get(URL_HASHES); - ArrayList list; - if(urlHashes != null && !urlHashes.equals("")){ - list=listManager.string2arraylist(urlHashes); - }else{ - list=new ArrayList(); - } - if(!list.contains(urlHash) && urlHash != null && !urlHash.equals("")){ - list.add(urlHash); - } - this.mem.put(URL_HASHES, listManager.collection2string(list)); - /*if(urlHashes!=null && !urlHashes.equals("") ){ - if(urlHashes.indexOf(urlHash) <0){ - this.mem.put(URL_HASHES, urlHashes+","+urlHash); - } - }else{ - this.mem.put(URL_HASHES, urlHash); - }*/ - } - public void delete(final String urlHash){ - final ArrayList list=listManager.string2arraylist(this.mem.get(URL_HASHES)); - if(list.contains(urlHash)){ - list.remove(urlHash); - } - this.mem.put(URL_HASHES, listManager.collection2string(list)); - } - public void setDatesTable(){ - if (this.size() >0) { - try { - bookmarksDB.this.datesTable.put(getDateString(), mem); - } catch (Exception e) { - Log.logException(e); - } - } else { - try { - bookmarksDB.this.datesTable.remove(getDateString()); - } catch (IOException e) { - Log.logException(e); - } - } - } - public String getDateString(){ - return date; - } - public ArrayList getBookmarkList(){ - return listManager.string2arraylist(this.mem.get(URL_HASHES)); - } - public int size(){ - return listManager.string2arraylist(this.mem.get(URL_HASHES)).size(); - } - } + /** * Subclass of bookmarksDB, which provides the Bookmark object-type */ @@ -1114,7 +794,7 @@ public class bookmarksDB { }else{ entry.put(BOOKMARK_TIMESTAMP, String.valueOf(System.currentTimeMillis())); } - final bookmarksDate bmDate=getDate(entry.get(BOOKMARK_TIMESTAMP)); + final BookmarkDate.Entry bmDate=dates.getDate(entry.get(BOOKMARK_TIMESTAMP)); bmDate.add(this.urlHash); bmDate.setDatesTable(); @@ -1271,7 +951,7 @@ public class bookmarksDB { final Iterator it=tags.iterator(); while(it.hasNext()){ final String tagName=it.next(); - Tag tag=getTag(tagHash(tagName)); + Tag tag=getTag(BookmarkHelper.tagHash(tagName)); if(tag == null){ tag=new Tag(tagName); } @@ -1442,4 +1122,8 @@ public class bookmarksDB { } } + + public Entry getDate(final String date) { + return dates.getDate(date); + } } \ No newline at end of file diff --git a/source/de/anomic/data/wiki/tokens/LinkToken.java b/source/de/anomic/data/wiki/tokens/LinkToken.java index 01369f3ec..9224f8c92 100644 --- a/source/de/anomic/data/wiki/tokens/LinkToken.java +++ b/source/de/anomic/data/wiki/tokens/LinkToken.java @@ -33,9 +33,9 @@ import java.util.Iterator; import java.util.regex.Matcher; import java.util.regex.Pattern; +import de.anomic.data.BookmarkHelper; import de.anomic.data.bookmarksDB; import de.anomic.data.bookmarksDB.Bookmark; -import de.anomic.data.bookmarksDB.Tag; import de.anomic.data.wiki.wikiParserException; import de.anomic.search.Switchboard; @@ -152,7 +152,7 @@ public class LinkToken extends AbstractToken { } private Link[] getLinksFromBookmarkTag(final String tagName) { - final Tag tag = this.sb.bookmarksDB.getTag(bookmarksDB.tagHash(tagName)); + final bookmarksDB.Tag tag = this.sb.bookmarksDB.getTag(BookmarkHelper.tagHash(tagName)); if (tag == null) return null; final ArrayList r = new ArrayList(); final Iterator it = tag.getUrlHashes().iterator(); diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index eda7ca88d..d4c77b5a9 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -982,13 +982,17 @@ public final class yacySeedDB implements AlternativeDomainNames { public yacySeed internalNext() { if ((it == null) || (!(it.hasNext()))) return null; try { - while (true) { - final Map dna = it.next(); - if (dna == null) return null; + Map dna; + while (it.hasNext()) { + dna = it.next(); + assert dna != null; + if (dna == null) continue; final String hash = dna.remove("key"); - if (hash == null) { continue; } // bad seed + assert hash != null; + if (hash == null) continue; // bad seed return new yacySeed(hash, dna); } + return null; } catch (final Exception e) { Log.logException(e); yacyCore.log.logSevere("ERROR internalNext: seed.db corrupt (" + e.getMessage() + "); resetting seed.db", e); diff --git a/source/net/yacy/kelondro/blob/MapDataMining.java b/source/net/yacy/kelondro/blob/MapDataMining.java index 6514c14f7..0a6d51a0f 100644 --- a/source/net/yacy/kelondro/blob/MapDataMining.java +++ b/source/net/yacy/kelondro/blob/MapDataMining.java @@ -371,12 +371,10 @@ public class MapDataMining extends MapHeap { // the key is also included in every map that is returned; it's key is 'key' Iterator keyIterator; - boolean finish; Map n; public mapIterator(final Iterator keyIterator) { this.keyIterator = keyIterator; - this.finish = false; this.n = next0(); } @@ -391,21 +389,17 @@ public class MapDataMining extends MapHeap { } private Map next0() { - if (finish) return null; if (keyIterator == null) return null; String nextKey; Map map; while (keyIterator.hasNext()) { nextKey = new String(keyIterator.next()); - if (nextKey == null) { - finish = true; - return null; - } try { map = get(nextKey); } catch (final IOException e) { break; } + assert map != null; if (map == null) continue; // circumvention of a modified exception map.put("key", nextKey); return map;