//bookmarksDB.java //------------------------------------- //part of YACY //(C) by Michael Peter Christen; mc@anomic.de //first published on http://www.anomic.de //Frankfurt, Germany, 2004 // //This file ist contributed by Alexander Schier // //This program is free software; you can redistribute it and/or modify //it under the terms of the GNU General Public License as published by //the Free Software Foundation; either version 2 of the License, or //(at your option) any later version. // //This program is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //GNU General Public License for more details. // //You should have received a copy of the GNU General Public License //along with this program; if not, write to the Free Software //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //Using this software in any meaning (reading, learning, copying, compiling, //running) means that you agree that the Author(s) is (are) not responsible //for cost, loss of data or any harm that may be caused directly or indirectly //by usage of this softare or this documentation. The usage of this software //is on your own risk. The installation and usage (starting/running) of this //software may allow other people or application to access your computer and //any attached devices and is highly dependent on the configuration of the //software which must be done by the user of the software; the author(s) is //(are) also not responsible for proper configuration and usage of the //software, even if provoked by documentation provided together with //the software. // //Any changes to this file according to the GPL as documented in the file //gpl.txt aside this file in the shipment you received can be done to the //lines that follows this copyright notice here, but changes must not be //done inside the copyright notive above. A re-distribution must contain //the intact and unchanged copyright notice. //Contributions and changes to the program code must be marked as such. package de.anomic.data; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.TreeSet; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterWriter; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMapObjects; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroObjects; import de.anomic.kelondro.kelondroObjectsMapEntry; import de.anomic.net.URL; import de.anomic.server.serverDate; import de.anomic.server.serverFileUtils; import de.anomic.server.logging.serverLog; public class bookmarksDB { kelondroMapObjects tagsTable; //kelondroMap bookmarksTable; kelondroObjects bookmarksTable; kelondroMapObjects datesTable; HashMap tagCache; HashMap bookmarkCache; public static String tagHash(String tagName){ return plasmaCondenser.word2hash(tagName.toLowerCase()); } public static String tagHash(String tagName, String user){ return plasmaCondenser.word2hash(user+":"+tagName.toLowerCase()); } public bookmarksDB(File bookmarksFile, File tagsFile, File datesFile, long preloadTime) { // bookmarks tagCache=new HashMap(); bookmarkCache=new HashMap(); bookmarksFile.getParentFile().mkdirs(); //this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false)); this.bookmarksTable = new kelondroObjects(new kelondroDyn(bookmarksFile, true, true, preloadTime, 12, 256, '_', kelondroNaturalOrder.naturalOrder, true, false, false), 1000); // tags tagsFile.getParentFile().mkdirs(); boolean tagsFileExisted = tagsFile.exists(); this.tagsTable = new kelondroMapObjects(new kelondroDyn(tagsFile, true, true, preloadTime, 12, 256, '_', kelondroNaturalOrder.naturalOrder, true, false, false), 500); if (!tagsFileExisted) rebuildTags(); // dates boolean datesExisted = datesFile.exists(); this.datesTable = new kelondroMapObjects(new kelondroDyn(datesFile, true, true, preloadTime, 20, 256, '_', kelondroNaturalOrder.naturalOrder, true, false, false), 500); if (!datesExisted) rebuildDates(); } public void close(){ bookmarksTable.close(); flushTagCache(); tagsTable.close(); datesTable.close(); } public int bookmarksSize(){ return bookmarksTable.size(); } public int tagSize(boolean flushed){ if(flushed) flushTagCache(); return tagsTable.size(); } public int tagsSize(){ return tagSize(false); } public void saveBookmark(Bookmark bookmark){ try { bookmarksTable.set(bookmark.getUrlHash(), bookmark); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public Tag loadTag(String hash){ Map map; Tag ret=null; map = tagsTable.getMap(hash); if(map!=null){ ret=new Tag(hash, map); tagCache.put(hash, ret); } return ret; } public void saveTag(Tag tag){ if(tag!=null){ tagCache.put(tag.getTagHash(), tag); } } /** * store a Tag in the tagsDB or remove an empty tag * @param tag the tagobject to be stored/removed */ public void storeTag(Tag tag){ try { if(tag.size() >0){ bookmarksDB.this.tagsTable.set(tag.getTagHash(), tag.getMap()); }else{ bookmarksDB.this.tagsTable.remove(tag.getTagHash()); } } catch (IOException e) {} } public void flushTagCache(){ Iterator it=tagCache.keySet().iterator(); while(it.hasNext()){ storeTag((Tag) tagCache.get(it.next())); } tagCache=new HashMap(); } public String addTag(Tag tag){ //tagsTable.set(tag.getTagName(), tag.getMap()); tagCache.put(tag.getTagHash(), tag); return tag.getTagName(); } public void rebuildTags(){ serverLog.logInfo("BOOKMARKS", "rebuilding tags.db from bookmarks.db..."); Iterator it=bookmarkIterator(true); Bookmark bookmark; Tag tag; String[] tags; while(it.hasNext()){ bookmark=(Bookmark) it.next(); tags = bookmark.getTagsString().split(","); tag=null; for(int i=0;i0){ bookmarksDB.this.datesTable.set(getDateString(), mem); }else{ bookmarksDB.this.datesTable.remove(getDateString()); } } catch (IOException e) {} } public String getDateString(){ return date; } public ArrayList getBookmarkList(){ return listManager.string2arraylist((String)this.mem.get(URL_HASHES)); } public int size(){ return listManager.string2arraylist(((String)this.mem.get(URL_HASHES))).size(); } } /** * Subclass, which stores the bookmark * */ public class Bookmark extends kelondroObjectsMapEntry{ public static final String BOOKMARK_URL="bookmarkUrl"; public static final String BOOKMARK_TITLE="bookmarkTitle"; public static final String BOOKMARK_DESCRIPTION="bookmarkDesc"; public static final String BOOKMARK_TAGS="bookmarkTags"; public static final String BOOKMARK_PUBLIC="bookmarkPublic"; public static final String BOOKMARK_TIMESTAMP="bookmarkTimestamp"; public static final String BOOKMARK_OWNER="bookmarkOwner"; public static final String BOOKMARK_IS_FEED="bookmarkIsFeed"; private String urlHash; private HashSet tags; private long timestamp; public Bookmark(String urlHash, Map map){ super(map); this.urlHash=urlHash; if(map.containsKey(BOOKMARK_TAGS)) tags=listManager.string2hashset((String) map.get(BOOKMARK_TAGS)); else tags=new HashSet(); loadTimestamp(); } public Bookmark(String url){ super(); if(!url.toLowerCase().startsWith("http://") && !url.toLowerCase().startsWith("https://")){ url="http://"+url; } this.urlHash=plasmaURL.urlHash(url); entry.put(BOOKMARK_URL, url); this.timestamp=System.currentTimeMillis(); tags=new HashSet(); Bookmark oldBm=getBookmark(this.urlHash); if(oldBm!=null && oldBm.entry.containsKey(BOOKMARK_TIMESTAMP)){ entry.put(BOOKMARK_TIMESTAMP, oldBm.entry.get(BOOKMARK_TIMESTAMP)); //preserve timestamp on edit }else{ entry.put(BOOKMARK_TIMESTAMP, String.valueOf(System.currentTimeMillis())); } bookmarksDate bmDate=getDate((String) entry.get(BOOKMARK_TIMESTAMP)); bmDate.add(this.urlHash); bmDate.setDatesTable(); removeBookmark(this.urlHash); //prevent empty tags } public Bookmark(String urlHash, URL url){ super(); this.urlHash=urlHash; entry.put(BOOKMARK_URL, url.toString()); tags=new HashSet(); timestamp=System.currentTimeMillis(); } public Bookmark(String urlHash, String url){ super(); this.urlHash=urlHash; entry.put(BOOKMARK_URL, url); tags=new HashSet(); timestamp=System.currentTimeMillis(); } public Bookmark(kelondroObjectsMapEntry map) { this(plasmaURL.urlHash((String)map.map().get(BOOKMARK_URL)), map.map()); } private Map toMap(){ entry.put(BOOKMARK_TAGS, listManager.hashset2string(tags)); entry.put(BOOKMARK_TIMESTAMP, String.valueOf(this.timestamp)); return entry; } private void loadTimestamp(){ if(entry.containsKey(BOOKMARK_TIMESTAMP)) this.timestamp=Long.parseLong((String)entry.get(BOOKMARK_TIMESTAMP)); } public String getUrlHash(){ return urlHash; } public String getUrl(){ return (String) entry.get(BOOKMARK_URL); } public HashSet getTags(){ return tags; } public String getTagsString(){ return listManager.hashset2string(getTags()); } public String getDescription(){ if(entry.containsKey(BOOKMARK_DESCRIPTION)){ return (String) entry.get(BOOKMARK_DESCRIPTION); } return ""; } public String getTitle(){ if(entry.containsKey(BOOKMARK_TITLE)){ return (String) entry.get(BOOKMARK_TITLE); } return (String) entry.get(BOOKMARK_URL); } public String getOwner(){ if(entry.containsKey(BOOKMARK_OWNER)){ return (String) entry.get(BOOKMARK_OWNER); } return null; //null means admin } public void setOwner(String owner){ entry.put(BOOKMARK_OWNER, owner); } public boolean getPublic(){ if(entry.containsKey(BOOKMARK_PUBLIC)){ return ((String) entry.get(BOOKMARK_PUBLIC)).equals("public"); } return false; } public boolean getFeed(){ if(entry.containsKey(BOOKMARK_IS_FEED)){ return ((String) entry.get(BOOKMARK_IS_FEED)).equals("true"); } return false; } public void setPublic(boolean isPublic){ if(isPublic){ entry.put(BOOKMARK_PUBLIC, "public"); }else{ entry.put(BOOKMARK_PUBLIC, "private"); } } public void setFeed(boolean isFeed){ if(isFeed){ entry.put(BOOKMARK_IS_FEED, "true"); }else{ entry.put(BOOKMARK_IS_FEED, "false"); } } public void setProperty(String name, String value){ entry.put(name, value); //setBookmarksTable(); } public void addTag(String tag){ tags.add(tag); } /** * set the Tags of the bookmark, and write them into the tags table. * @param tags a ArrayList with the tags */ public void setTags(HashSet tags){ setTags(tags, true); } /** * set the Tags of the bookmark * @param tags ArrayList with the tagnames * @param local sets, whether the updated tags should be stored to tagsDB */ public void setTags(HashSet mytags, boolean local){ tags.addAll(mytags); Iterator it=tags.iterator(); while(it.hasNext()){ String tagName=(String) it.next(); Tag tag=getTag(tagHash(tagName)); if(tag == null){ tag=new Tag(tagName); } tag.addUrl(getUrlHash()); if(local){ saveTag(tag); } } toMap(); } public long getTimeStamp(){ return timestamp; } public void setTimeStamp(long ts){ this.timestamp=ts; } } public class tagIterator implements Iterator{ kelondroCloneableIterator tagIter; bookmarksDB.Tag nextEntry; public tagIterator(boolean up) throws IOException { flushTagCache(); //XXX: This costs performace :-(( this.tagIter = bookmarksDB.this.tagsTable.keys(up, false); this.nextEntry = null; } public boolean hasNext() { try { return this.tagIter.hasNext(); } catch (kelondroException e) { //resetDatabase(); return false; } } public Object next() { try { return getTag((String) this.tagIter.next()); } catch (kelondroException e) { //resetDatabase(); return null; } } public void remove() { if (this.nextEntry != null) { try { String tagHash = this.nextEntry.getTagHash(); if (tagHash != null) removeTag(tagHash); } catch (kelondroException e) { //resetDatabase(); } } } } public class bookmarkIterator implements Iterator{ Iterator bookmarkIter; bookmarksDB.Bookmark nextEntry; public bookmarkIterator(boolean up) throws IOException { //flushBookmarkCache(); //XXX: this will cost performance this.bookmarkIter = bookmarksDB.this.bookmarksTable.keys(up, false); this.nextEntry = null; } public boolean hasNext() { try { return this.bookmarkIter.hasNext(); } catch (kelondroException e) { //resetDatabase(); return false; } } public Object next() { try { return getBookmark((String) this.bookmarkIter.next()); } catch (kelondroException e) { //resetDatabase(); return null; } } public void remove() { if (this.nextEntry != null) { try { Object bookmarkName = this.nextEntry.getUrlHash(); if (bookmarkName != null) removeBookmark((String) bookmarkName); } catch (kelondroException e) { //resetDatabase(); } } } } /** * Comparator to sort the Bookmarks with Timestamps */ public class bookmarkComparator implements Comparator{ private boolean newestFirst; /** * @param newestFirst newest first, or oldest first? */ public bookmarkComparator(boolean newestFirst){ this.newestFirst=newestFirst; } public int compare(Object obj1, Object obj2){ Bookmark bm1=getBookmark((String)obj1); Bookmark bm2=getBookmark((String)obj2); if(bm1==null || bm2==null) return 0; //XXX: i think this should not happen? maybe this needs further tracing of the bug if(this.newestFirst){ if(bm2.getTimeStamp() - bm1.getTimeStamp() >0) return 1; return -1; } if(bm1.getTimeStamp() - bm2.getTimeStamp() >0) return 1; return -1; } } /** * sorts the tag for name */ public class tagComparator implements Comparator{ public int compare(Object obj1, Object obj2){ return ((Tag)obj1).getTagName().compareTo(((Tag)obj2).getTagName()); } } }