You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/source/de/anomic/data/bookmarksDB.java

1441 lines
53 KiB

//bookmarksDB.java
//-------------------------------------
//part of YACY
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
//
//This file has been originally contributed by Alexander Schier
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Map.Entry;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Request;
import de.anomic.document.Word;
import de.anomic.document.parser.html.ContentScraper;
import de.anomic.document.parser.html.TransformerWriter;
import de.anomic.kelondro.blob.Heap;
import de.anomic.kelondro.blob.MapView;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.NaturalOrder;
import de.anomic.kelondro.util.DateFormatter;
import de.anomic.kelondro.util.kelondroException;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.search.Switchboard;
import de.anomic.server.serverBusyThread;
import de.anomic.server.serverInstantBusyThread;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.yacy.yacyURL;
import de.anomic.yacy.logging.Log;
public class bookmarksDB {
// ------------------------------------
// Declaration of Class-Attributes
// ------------------------------------
final static int SORT_ALPHA = 1;
final static int SORT_SIZE = 2;
final static int SHOW_ALL = -1;
final static String SLEEP_TIME = "3600000"; // default sleepTime: check for recrawls every hour
// bookmarks
MapView bookmarksTable; // kelondroMap bookmarksTable;
// tags
MapView tagsTable;
TreeMap<String, Tag> tagCache;
// dates
MapView datesTable;
// autoReCrawl
private serverBusyThread autoReCrawl;
// ------------------------------------
// bookmarksDB's class constructor
// ------------------------------------
public bookmarksDB(final File bookmarksFile, final File tagsFile, final File datesFile) throws IOException {
// bookmarks
tagCache=new TreeMap<String, Tag>();
bookmarksFile.getParentFile().mkdirs();
//this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false));
//this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, bookmarksFileNew), 1000, '_');
this.bookmarksTable = new MapView(new Heap(bookmarksFile, 12, NaturalOrder.naturalOrder, 1024 * 64), 1000, '_');
// tags
tagsFile.getParentFile().mkdirs();
final boolean tagsFileExisted = tagsFile.exists();
//this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, tagsFileNew), 500, '_');
this.tagsTable = new MapView(new Heap(tagsFile, 12, NaturalOrder.naturalOrder, 1024 * 64), 500, '_');
if (!tagsFileExisted) rebuildTags();
// dates
final boolean datesExisted = datesFile.exists();
//this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, datesFileNew), 500, '_');
this.datesTable = new MapView(new Heap(datesFile, 20, NaturalOrder.naturalOrder, 1024 * 64), 500, '_');
if (!datesExisted) rebuildDates();
// autoReCrawl
Switchboard sb = Switchboard.getSwitchboard();
this.autoReCrawl = new serverInstantBusyThread(this, "autoReCrawl", null, null);
long sleepTime = Long.parseLong(sb.getConfig("autoReCrawl_idlesleep" , SLEEP_TIME));
sb.deployThread("autoReCrawl", "autoReCrawl Scheduler", "simple scheduler for automatic re-crawls of bookmarked urls", null, autoReCrawl, 120000,
sleepTime, sleepTime, Long.parseLong(sb.getConfig("autoReCrawl_memprereq" , "-1"))
);
Log.logInfo("BOOKMARKS", "autoReCrawl - serverBusyThread initialized checking every "+(sleepTime/1000/60)+" minutes for recrawls");
}
// -----------------------------------------------------
// bookmarksDB's functions for 'destructing' the class
// -----------------------------------------------------
public void close(){
bookmarksTable.close();
flushTagCache();
tagsTable.close();
datesTable.close();
}
// -----------------------------------------------------
// bookmarksDB's functions for autoReCrawl
// -----------------------------------------------------
public boolean autoReCrawl() {
// read crontab
File f = new File (Switchboard.getSwitchboard().getRootPath(),"DATA/SETTINGS/autoReCrawl.conf");
String s;
try {
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
Log.logInfo("BOOKMARKS", "autoReCrawl - reading schedules from " + f);
while( null != (s = in.readLine()) ) {
if (!s.startsWith("#") && s.length()>0) {
String parser[] = s.split("\t");
if (parser.length == 13) {
folderReCrawl(Long.parseLong(parser[0]), parser[1], parser[2], Integer.parseInt(parser[3]), Long.parseLong(parser[4]),
Integer.parseInt(parser[5]), Integer.parseInt(parser[6]), Boolean.parseBoolean(parser[7]),
Boolean.parseBoolean(parser[8]), Boolean.parseBoolean(parser[9]),
Boolean.parseBoolean(parser[10]), Boolean.parseBoolean(parser[11]),
Boolean.parseBoolean(parser[12]), CrawlProfile.CACHE_STRATEGY_IFFRESH
);
}
if (parser.length == 14) {
folderReCrawl(Long.parseLong(parser[0]), parser[1], parser[2], Integer.parseInt(parser[3]), Long.parseLong(parser[4]),
Integer.parseInt(parser[5]), Integer.parseInt(parser[6]), Boolean.parseBoolean(parser[7]),
Boolean.parseBoolean(parser[8]), Boolean.parseBoolean(parser[9]),
Boolean.parseBoolean(parser[10]), Boolean.parseBoolean(parser[11]),
Boolean.parseBoolean(parser[12]), Integer.parseInt(parser[13])
);
}
}
}
in.close();
} catch( FileNotFoundException ex ) {
try {
Log.logInfo("BOOKMARKS", "autoReCrawl - creating new autoReCrawl.conf");
File inputFile = new File(Switchboard.getSwitchboard().getRootPath(),"defaults/autoReCrawl.conf");
File outputFile = new File(Switchboard.getSwitchboard().getRootPath(),"DATA/SETTINGS/autoReCrawl.conf");
FileReader i = new FileReader(inputFile);
FileWriter o = new FileWriter(outputFile);
int c;
while ((c = i.read()) != -1)
o.write(c);
i.close();
o.close();
autoReCrawl();
return true;
} catch( FileNotFoundException e ) {
Log.logSevere("BOOKMARKS", "autoReCrawl - file not found error: defaults/autoReCrawl.conf", e);
return false;
} catch (IOException e) {
Log.logSevere("BOOKMARKS", "autoReCrawl - IOException: defaults/autoReCrawl.conf", e);
return false;
}
} catch( Exception ex ) {
Log.logSevere("BOOKMARKS", "autoReCrawl - error reading " + f, ex);
return false;
}
return true;
}
public void folderReCrawl(long schedule, String folder, String crawlingfilter, int newcrawlingdepth, long crawlingIfOlder,
int crawlingDomFilterDepth, int crawlingDomMaxPages, boolean crawlingQ, boolean indexText, boolean indexMedia,
boolean crawlOrder, boolean xsstopw, boolean storeHTCache, int cacheStrategy) {
Switchboard sb = Switchboard.getSwitchboard();
Iterator<String> bit=getBookmarksIterator(folder, true);
Log.logInfo("BOOKMARKS", "autoReCrawl - processing: "+folder);
boolean xdstopw = xsstopw;
boolean xpstopw = xsstopw;
while(bit.hasNext()) {
Bookmark bm = getBookmark(bit.next());
long sleepTime = Long.parseLong(sb.getConfig("autoReCrawl_idlesleep" , SLEEP_TIME));
long interTime = (System.currentTimeMillis()-bm.getTimeStamp())%schedule;
Date date=new Date(bm.getTimeStamp());
Log.logInfo("BOOKMARKS", "autoReCrawl - checking schedule for: "+"["+DateFormatter.formatISO8601(date)+"] "+bm.getUrl());
if (interTime >= 0 && interTime < sleepTime) {
try {
int pos = 0;
// set crawlingStart to BookmarkUrl
String crawlingStart = bm.getUrl();
String newcrawlingMustMatch = crawlingfilter;
yacyURL crawlingStartURL = new yacyURL(crawlingStart, null);
// set the crawling filter
if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = ".*"; // avoid that all urls are filtered out if bad value was submitted
if (crawlingStartURL!= null && newcrawlingMustMatch.equals("dom")) {
newcrawlingMustMatch = ".*" + crawlingStartURL.getHost() + ".*";
}
if (crawlingStart!= null && newcrawlingMustMatch.equals("sub") && (pos = crawlingStart.lastIndexOf("/")) > 0) {
newcrawlingMustMatch = crawlingStart.substring(0, pos + 1) + ".*";
}
// check if the crawl filter works correctly
Pattern.compile(newcrawlingMustMatch);
String urlhash = crawlingStartURL.hash();
sb.indexSegment.urlMetadata().remove(urlhash);
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
sb.crawlQueues.errorURL.remove(urlhash);
// stack url
sb.crawler.profilesPassiveCrawls.removeEntry(crawlingStartURL.hash()); // if there is an old entry, delete it
CrawlProfile.entry pe = sb.crawler.profilesActiveCrawls.newEntry(
folder+"/"+crawlingStartURL, crawlingStartURL,
newcrawlingMustMatch,
CrawlProfile.MATCH_BAD_URL,
newcrawlingdepth,
sb.crawler.profilesActiveCrawls.getRecrawlDate(crawlingIfOlder), crawlingDomFilterDepth, crawlingDomMaxPages,
crawlingQ,
indexText, indexMedia,
storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw, cacheStrategy);
sb.crawlStacker.enqueueEntry(new Request(
sb.peers.mySeed().hash,
crawlingStartURL,
null,
"CRAWLING-ROOT",
new Date(),
null,
pe.handle(),
0,
0,
0
));
Log.logInfo("BOOKMARKS", "autoReCrawl - adding crawl profile for: " + crawlingStart);
// serverLog.logInfo("BOOKMARKS", "autoReCrawl - crawl filter is set to: " + newcrawlingfilter);
// generate a YaCyNews if the global flag was set
if (crawlOrder) {
Map<String, String> m = new HashMap<String, String>(pe.map()); // must be cloned
m.remove("specificDepth");
m.remove("indexText");
m.remove("indexMedia");
m.remove("remoteIndexing");
m.remove("xsstopw");
m.remove("xpstopw");
m.remove("xdstopw");
m.remove("storeTXCache");
m.remove("storeHTCache");
m.remove("generalFilter");
m.remove("specificFilter");
m.put("intention", "Automatic ReCrawl!");
sb.peers.newsPool.publishMyNews(yacyNewsRecord.newRecord(sb.peers.mySeed(), yacyNewsPool.CATEGORY_CRAWL_START, m));
}
} catch (MalformedURLException e1) {}
} // if
} // while(bit.hasNext())
return;
} // } autoReCrawl()
// -------------------------------------
// bookmarksDB's public helper functions
// -------------------------------------
/**
* returns an object of type String that contains a tagHash
* @param tagName an object of type String with the name of the tag.
* tagName is converted to lower case before hash is generated!
*/
public static String tagHash(final String tagName){
return new String(Word.word2hash(tagName.toLowerCase()));
}
public static String tagHash(final String tagName, final String user){
return new String(Word.word2hash(user+":"+tagName.toLowerCase()));
}
public Iterator<String> getFolderList(final boolean priv){
return getFolderList("/", priv);
}
public Iterator<String> getFolderList(final String root, final boolean priv){
final Set<String> folders = new TreeSet<String>();
String path = "";
final Iterator<Tag> it = this.getTagIterator(priv);
Tag tag;
while(it.hasNext()){
tag=it.next();
if (tag.getFriendlyName().startsWith((root.equals("/") ? root : root+"/"))) {
path = tag.getFriendlyName();
path = cleanTagsString(path);
while(path.length() > 0 && !path.equals(root)){
folders.add(path);
path = path.replaceAll("(/.[^/]*$)", ""); // create missing folders in path
}
}
}
if (!root.equals("/")) { folders.add(root); }
folders.add("\uffff");
return folders.iterator();
}
public static String cleanTagsString(String tagsString){
// get rid of heading, trailing and double commas since they are useless
while (tagsString.startsWith(",")) {
tagsString = tagsString.substring(1);
}
while (tagsString.endsWith(",")) {
tagsString = tagsString.substring(0,tagsString.length() -1);
}
while(tagsString.contains(",,")){
tagsString = tagsString.replaceAll(",,", ",");
}
// get rid of double and trailing slashes
while(tagsString.endsWith("/")){
tagsString = tagsString.substring(0, tagsString.length() -1);
}
while(tagsString.contains("/,")){
tagsString = tagsString.replaceAll("/,", ",");
}
while(tagsString.contains("//")){
tagsString = tagsString.replaceAll("//", "/");
}
// space characters following a comma are removed
tagsString = tagsString.replaceAll(",\\s+", ",");
return tagsString;
}
// -----------------------------------------------------------
// bookmarksDB's functions for bookmarksTable / bookmarkCache
// -----------------------------------------------------------
public Bookmark createBookmark(final String url, final String user){
if (url == null || url.length() == 0) return null;
final Bookmark bk = new Bookmark(url);
bk.setOwner(user);
return (bk.getUrlHash() == null || bk.toMap() == null) ? null : bk;
}
// returning the number of bookmarks
public int bookmarksSize(){
return bookmarksTable.size();
}
// adding a bookmark to the bookmarksDB
public void saveBookmark(final Bookmark bookmark){
try {
bookmarksTable.put(bookmark.getUrlHash(), bookmark.entry);
} catch (final IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public String addBookmark(final Bookmark bookmark){
saveBookmark(bookmark);
return bookmark.getUrlHash();
}
public Bookmark getBookmark(final String urlHash){
try {
final Map<String, String> map = bookmarksTable.get(urlHash);
if (map == null) return null;
return new Bookmark(map);
} catch (final IOException e) {
return null;
}
}
public boolean removeBookmark(final String urlHash){
final Bookmark bookmark = getBookmark(urlHash);
if(bookmark == null) return false; //does not exist
final Set<String> tags = bookmark.getTags();
bookmarksDB.Tag tag=null;
final Iterator<String> it=tags.iterator();
while(it.hasNext()){
tag=getTag(tagHash(it.next()));
if(tag!=null){
tag.delete(urlHash);
saveTag(tag);
}
}
Bookmark b;
try {
b = getBookmark(urlHash);
bookmarksTable.remove(urlHash);
} catch (final IOException e) {
b = null;
}
return b != null;
}
public Iterator<Bookmark> bookmarkIterator(final boolean up){
try {
return new bookmarkIterator(up);
} catch (final IOException e) {
return new HashSet<Bookmark>().iterator();
}
}
public Iterator<String> getBookmarksIterator(final boolean priv){
final TreeSet<String> set=new TreeSet<String>(new bookmarkComparator(true));
final Iterator<Bookmark> it=bookmarkIterator(true);
Bookmark bm;
while(it.hasNext()){
bm=it.next();
if(priv || bm.getPublic()){
set.add(bm.getUrlHash());
}
}
return set.iterator();
}
public Iterator<String> getBookmarksIterator(final String tagName, final boolean priv){
final TreeSet<String> set=new TreeSet<String>(new bookmarkComparator(true));
final String tagHash=tagHash(tagName);
final Tag tag=getTag(tagHash);
Set<String> hashes=new HashSet<String>();
if(tag != null){
hashes=getTag(tagHash).getUrlHashes();
}
if(priv){
set.addAll(hashes);
}else{
final Iterator<String> it=hashes.iterator();
Bookmark bm;
while(it.hasNext()){
bm=getBookmark(it.next());
if(bm.getPublic()){
set.add(bm.getUrlHash());
}
}
}
return set.iterator();
}
// -------------------------------------------------
// bookmarksDB's functions for tagsTable / tagCache
// -------------------------------------------------
// returning the number of tags
public int tagsSize(){
return tagSize(false);
}
public int tagSize(final boolean flushed){
if(flushed)
flushTagCache();
return tagsTable.size();
}
/**
* load/retrieve an object of type Tag from the tagsTable (also save it in tagCache)
* @param hash an object of type String, containing a tagHash
*/
private Tag loadTag(final String hash){
Map<String, String> map;
Tag ret=null;
try {
map = tagsTable.get(hash);
} catch (final Exception e) {
e.printStackTrace();
return null;
}
if(map!=null){
ret=new Tag(hash, map);
tagCache.put(hash, ret);
}
return ret;
}
/**
* retrieve an object of type Tag from the the tagCache, if object is not cached return loadTag(hash)
* @param hash an object of type String, containing a tagHash
*/
public Tag getTag(final String hash){
if(tagCache.containsKey(hash)){
return tagCache.get(hash);
}
return loadTag(hash); //null if it does not exists
}
/**
* store a Tag in tagsTable or remove an empty tag
* @param tag an object of type Tag to be stored/removed
*/
public void storeTag(final Tag tag){
if (tag == null) return;
try {
if(tag.size() >0){
bookmarksDB.this.tagsTable.put(tag.getTagHash(), tag.getMap());
}else{
bookmarksDB.this.tagsTable.remove(tag.getTagHash());
}
} catch (final IOException e) {}
}
/**
* save a Tag in tagCache; see also flushTagCache(), addTag(), loadTag()
* @param tag an object of type Tag to be saved in tagCache
*/
public void saveTag(final Tag tag) {
if(tag!=null){
tagCache.put(tag.getTagHash(), tag);
}
}
public void flushTagCache() {
final Iterator<String> it=tagCache.keySet().iterator();
while(it.hasNext()){
storeTag(tagCache.get(it.next()));
}
tagCache=new TreeMap<String, Tag>();
}
public String addTag(final Tag tag) { // TODO: is addTag() really needed - check storeTag() and saveTag()
//tagsTable.set(tag.getTagName(), tag.getMap());
//tagCache.put(tag.getTagHash(), tag);
saveTag(tag);
return tag.getTagName();
}
public void removeTag(final String hash) {
try {
if(tagCache.containsKey(hash)){
tagCache.remove(hash);
}
tagsTable.remove(hash);
} catch (final IOException e) {}
}
public Iterator<Tag> tagIterator(final boolean up) {
try {
return new tagIterator(up);
} catch (final IOException e) {
e.printStackTrace();
return new HashSet<Tag>().iterator();
}
}
public Iterator<Tag> getTagIterator(final boolean priv) {
return getTagIterator(priv,1);
}
public Iterator<Tag> getTagIterator(final boolean priv, final int c) {
Comparator<Tag> comp;
if (c == SORT_SIZE) comp = new tagSizeComparator();
else comp = new tagComparator();
final TreeSet<Tag> set=new TreeSet<Tag>(comp);
final Iterator<Tag> it = tagIterator(true);
Tag tag;
while(it.hasNext()){
tag=it.next();
if(priv ||tag.hasPublicItems()){
set.add(tag);
}
}
return set.iterator();
}
public Iterator<Tag> getTagIterator(final boolean priv, final int comp, final int max){
if (max==SHOW_ALL)
return getTagIterator(priv, comp);
final Iterator<Tag> it = getTagIterator(priv, SORT_SIZE);
Comparator<Tag> c;
if (comp == SORT_SIZE) c = new tagSizeComparator();
else c = new tagComparator();
final TreeSet<Tag> set=new TreeSet<Tag>(c);
int count = 0;
while (it.hasNext() && count<=max) {
set.add(it.next());
count++;
}
return set.iterator();
}
/*
public Iterator<Tag> getTagIterator(String tagName, boolean priv){
return getTagIterator(tagName, priv, SORT_ALPHA);
}
*/
public Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp){
Comparator<Tag> c;
if (comp == SORT_SIZE) c = new tagSizeComparator();
else c = new tagComparator();
final TreeSet<Tag> set=new TreeSet<Tag>(c);
Iterator<String> it=null;
final Iterator<String> bit=getBookmarksIterator(tagName, priv);
Bookmark bm;
Tag tag;
Set<String> tags;
while(bit.hasNext()){
bm=getBookmark(bit.next());
tags = bm.getTags();
it = tags.iterator();
while (it.hasNext()) {
tag=getTag( tagHash(it.next()) );
if(priv ||tag.hasPublicItems()){
set.add(tag);
}
}
}
return set.iterator();
}
public Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp, final int max){
if (max==SHOW_ALL)
return getTagIterator(priv, comp);
final Iterator<Tag> it = getTagIterator(tagName, priv, SORT_SIZE);
Comparator<Tag> c;
if (comp == SORT_SIZE) c = new tagSizeComparator();
else c = new tagComparator();
final TreeSet<Tag> set=new TreeSet<Tag>(c);
int count = 0;
while (it.hasNext() && count<=max) {
set.add(it.next());
count++;
}
return set.iterator();
}
// rebuilds the tagsDB from the bookmarksDB
public void rebuildTags(){
Log.logInfo("BOOKMARKS", "rebuilding tags.db from bookmarks.db...");
final Iterator<Bookmark> it = bookmarkIterator(true);
Bookmark bookmark;
Tag tag;
String[] tags;
while(it.hasNext()){
bookmark=it.next();
tags = cleanTagsString(bookmark.getTagsString() + bookmark.getFoldersString()).split(",");
tag=null;
for(int i=0;i<tags.length;i++){
tag=getTag(tagHash(tags[i]));
if(tag==null){
tag=new Tag(tags[i]);
}
tag.addUrl(bookmark.getUrlHash());
saveTag(tag);
}
}
flushTagCache();
Log.logInfo("BOOKMARKS", "Rebuilt "+tagsTable.size()+" tags using your "+bookmarksTable.size()+" bookmarks.");
}
// ---------------------------------------
// bookmarksDB's functions for datesTable
// ---------------------------------------
public bookmarksDate getDate(final String date){
Map<String, String> map;
try {
map = datesTable.get(date);
} catch (final IOException e) {
map = null;
}
if(map==null) return new bookmarksDate(date);
return new bookmarksDate(date, map);
}
// rebuilds the datesDB from the bookmarksDB
public void rebuildDates(){
Log.logInfo("BOOKMARKS", "rebuilding dates.db from bookmarks.db...");
final Iterator<Bookmark> it=bookmarkIterator(true);
Bookmark bookmark;
String date;
bookmarksDate bmDate;
while(it.hasNext()){
bookmark=it.next();
date = String.valueOf(bookmark.getTimeStamp());
bmDate=getDate(date);
if(bmDate==null){
bmDate=new bookmarksDate(date);
}
bmDate.add(bookmark.getUrlHash());
bmDate.setDatesTable();
}
Log.logInfo("BOOKMARKS", "Rebuilt "+datesTable.size()+" dates using your "+bookmarksTable.size()+" bookmarks.");
}
// -------------------------------------
// bookmarksDB's experimental functions
// -------------------------------------
public boolean renameTag(final String oldName, final String newName){
final Tag oldTag=getTag(tagHash(oldName));
if (oldTag != null) {
final Set<String> urlHashes = oldTag.getUrlHashes(); // preserve urlHashes of oldTag
removeTag(tagHash(oldName)); // remove oldHash from TagsDB
final Iterator<String> it = urlHashes.iterator();
Bookmark bookmark;
Set<String> tags = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
while (it.hasNext()) { // looping through all bookmarks which were tagged with oldName
bookmark = getBookmark(it.next());
tags = bookmark.getTags();
tags.remove(oldName);
bookmark.setTags(tags, true); // might not be needed, but doesn't hurt
if(!newName.equals("")) bookmark.addTag(newName);
saveBookmark(bookmark);
}
return true;
}
return false;
}
public void addTag(final String selectTag, final String newTag){
final Iterator<String> it = getTag(tagHash(selectTag)).getUrlHashes().iterator(); // get urlHashes for selectTag
Bookmark bookmark;
while (it.hasNext()) { // looping through all bookmarks which were tagged with selectTag
bookmark = getBookmark(it.next());
bookmark.addTag(newTag);
saveBookmark(bookmark);
}
}
// --------------------------------------
// bookmarksDB's Import/Export functions
// --------------------------------------
public int importFromBookmarks(final yacyURL baseURL, final String input, final String tag, final boolean importPublic){
try {
// convert string to input stream
final ByteArrayInputStream byteIn = new ByteArrayInputStream(input.getBytes("UTF-8"));
final InputStreamReader reader = new InputStreamReader(byteIn,"UTF-8");
// import stream
return this.importFromBookmarks(baseURL,reader,tag,importPublic);
} catch (final UnsupportedEncodingException e) {
return 0;
}
}
public int importFromBookmarks(final yacyURL baseURL, final InputStreamReader input, final String tag, final boolean importPublic){
int importCount = 0;
Map<yacyURL, String> links = new HashMap<yacyURL, String>();
String title;
yacyURL url;
Bookmark bm;
final Set<String> tags=listManager.string2set(tag); //this allow multiple default tags
try {
//load the links
final ContentScraper scraper = new ContentScraper(baseURL);
//OutputStream os = new htmlFilterOutputStream(null, scraper, null, false);
final Writer writer= new TransformerWriter(null,null,scraper, null, false);
FileUtils.copy(input,writer);
writer.close();
links = scraper.getAnchors();
} catch (final IOException e) { Log.logWarning("BOOKMARKS", "error during load of links: "+ e.getClass() +" "+ e.getMessage());}
for (Entry<yacyURL, String> link: links.entrySet()) {
url= link.getKey();
title=link.getValue();
Log.logInfo("BOOKMARKS", "links.get(url)");
if(title.equals("")){//cannot be displayed
title=url.toString();
}
bm=new Bookmark(url.toString());
bm.setProperty(Bookmark.BOOKMARK_TITLE, title);
bm.setTags(tags);
bm.setPublic(importPublic);
saveBookmark(bm);
importCount++;
}
flushTagCache();
return importCount;
}
public int importFromXML(final String input, final boolean importPublic){
try {
// convert string to input stream
final ByteArrayInputStream byteIn = new ByteArrayInputStream(input.getBytes("UTF-8"));
// import stream
return this.importFromXML(byteIn,importPublic);
} catch (final UnsupportedEncodingException e) {
return 0;
}
}
public int importFromXML(final InputStream input, final boolean importPublic){
final DocumentBuilderFactory factory=DocumentBuilderFactory.newInstance();
factory.setValidating(false);
factory.setNamespaceAware(false);
DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
final Document doc=builder.parse(input);
return parseXMLimport(doc, importPublic);
} catch (final ParserConfigurationException e) {
} catch (final SAXException e) {
} catch (final IOException e) {
}
return 0;
}
public int parseXMLimport(final Node doc, final boolean importPublic){
int importCount = 0;
if(doc.getNodeName()=="post"){
final NamedNodeMap attributes = doc.getAttributes();
final String url=attributes.getNamedItem("href").getNodeValue();
if(url.equals("")){
return 0;
}
final Bookmark bm=new Bookmark(url);
String tagsString="";
String title="";
String description="";
String time="";
if(attributes.getNamedItem("tag")!=null){
tagsString=attributes.getNamedItem("tag").getNodeValue();
}
if(attributes.getNamedItem("description")!=null){
title=attributes.getNamedItem("description").getNodeValue();
}
if(attributes.getNamedItem("extended")!=null){
description=attributes.getNamedItem("extended").getNodeValue();
}
if(attributes.getNamedItem("time")!=null){
time=attributes.getNamedItem("time").getNodeValue();
}
Set<String> tags=new HashSet<String>();
if(title != null){
bm.setProperty(Bookmark.BOOKMARK_TITLE, title);
}
if(tagsString!=null){
tags = listManager.string2set(tagsString.replace(' ', ','));
}
bm.setTags(tags, true);
if(time != null){
Date parsedDate = null;
try {
parsedDate = DateFormatter.parseISO8601(time);
} catch (final ParseException e) {
parsedDate = new Date();
}
bm.setTimeStamp(parsedDate.getTime());
}
if(description!=null){
bm.setProperty(Bookmark.BOOKMARK_DESCRIPTION, description);
}
bm.setPublic(importPublic);
saveBookmark(bm);
importCount++;
}
final NodeList children=doc.getChildNodes();
if(children != null){
for (int i=0; i<children.getLength(); i++) {
importCount += parseXMLimport(children.item(i), importPublic);
}
}
flushTagCache();
return importCount;
}
// --------------------------------------
// bookmarksDB's Subclasses
// --------------------------------------
/**
* Subclass of bookmarksDB, which provides the Tag object-type
*/
public class Tag{
public static final String URL_HASHES="urlHashes";
public static final String TAG_NAME="tagName";
private final String tagHash;
private final Map<String, String> mem;
private Set<String> urlHashes;
public Tag(final String hash, final Map<String, String> map){
tagHash=hash;
mem=map;
if(mem.containsKey(URL_HASHES))
urlHashes = listManager.string2set(mem.get(URL_HASHES));
else
urlHashes = new HashSet<String>();
}
public Tag(final String name, final HashSet<String> entries){
tagHash=tagHash(name);
mem=new HashMap<String, String>();
//mem.put(URL_HASHES, listManager.arraylist2string(entries));
urlHashes=entries;
mem.put(TAG_NAME, name);
}
public Tag(final String name){
tagHash=tagHash(name);
mem=new HashMap<String, String>();
//mem.put(URL_HASHES, "");
urlHashes=new HashSet<String>();
mem.put(TAG_NAME, name);
}
public Map<String, String> getMap(){
mem.put(URL_HASHES, listManager.collection2string(this.urlHashes));
return mem;
}
/**
* get the lowercase Tagname
*/
public String getTagName(){
/*if(this.mem.containsKey(TAG_NAME)){
return (String) this.mem.get(TAG_NAME);
}
return "";*/
return getFriendlyName().toLowerCase();
}
public String getTagHash(){
return tagHash;
}
/**
* @return the tag name, with all uppercase chars
*/
public String getFriendlyName(){
/*if(this.mem.containsKey(TAG_FRIENDLY_NAME)){
return (String) this.mem.get(TAG_FRIENDLY_NAME);
}
return getTagName();*/
if(this.mem.containsKey(TAG_NAME)){
return this.mem.get(TAG_NAME);
}
return "notagname";
}
public Set<String> getUrlHashes(){
return urlHashes;
}
public boolean hasPublicItems(){
final Iterator<String> it=getBookmarksIterator(this.getTagName(), false);
if(it.hasNext()){
return true;
}
return false;
}
public void addUrl(final String urlHash){
urlHashes.add(urlHash);
}
public void delete(final String urlHash){
urlHashes.remove(urlHash);
}
public int size(){
return urlHashes.size();
}
}
/**
* Subclass of bookmarksDB, which provide the bookmarksDate object-type
*/
public class bookmarksDate{
public static final String URL_HASHES="urlHashes";
private final Map<String, String> mem;
String date;
public bookmarksDate(final String mydate){
//round to seconds, but store as milliseconds (java timestamp)
date=String.valueOf((Long.parseLong(mydate)/1000)*1000);
mem=new HashMap<String, String>();
mem.put(URL_HASHES, "");
}
public bookmarksDate(final String mydate, final Map<String, String> map){
//round to seconds, but store as milliseconds (java timestamp)
date=String.valueOf((Long.parseLong(mydate)/1000)*1000);
mem=map;
}
public bookmarksDate(final String mydate, final ArrayList<String> entries){
//round to seconds, but store as milliseconds (java timestamp)
date=String.valueOf((Long.parseLong(mydate)/1000)*1000);
mem=new HashMap<String, String>();
mem.put(URL_HASHES, listManager.collection2string(entries));
}
public void add(final String urlHash){
final String urlHashes = mem.get(URL_HASHES);
ArrayList<String> list;
if(urlHashes != null && !urlHashes.equals("")){
list=listManager.string2arraylist(urlHashes);
}else{
list=new ArrayList<String>();
}
if(!list.contains(urlHash) && urlHash != null && !urlHash.equals("")){
list.add(urlHash);
}
this.mem.put(URL_HASHES, listManager.collection2string(list));
/*if(urlHashes!=null && !urlHashes.equals("") ){
if(urlHashes.indexOf(urlHash) <0){
this.mem.put(URL_HASHES, urlHashes+","+urlHash);
}
}else{
this.mem.put(URL_HASHES, urlHash);
}*/
}
public void delete(final String urlHash){
final ArrayList<String> list=listManager.string2arraylist(this.mem.get(URL_HASHES));
if(list.contains(urlHash)){
list.remove(urlHash);
}
this.mem.put(URL_HASHES, listManager.collection2string(list));
}
public void setDatesTable(){
try {
if(this.size() >0){
bookmarksDB.this.datesTable.put(getDateString(), mem);
}else{
bookmarksDB.this.datesTable.remove(getDateString());
}
} catch (final IOException e) {}
}
public String getDateString(){
return date;
}
public ArrayList<String> getBookmarkList(){
return listManager.string2arraylist(this.mem.get(URL_HASHES));
}
public int size(){
return listManager.string2arraylist(this.mem.get(URL_HASHES)).size();
}
}
/**
* Subclass of bookmarksDB, which provides the Bookmark object-type
*/
public class Bookmark {
public static final String BOOKMARK_URL="bookmarkUrl";
public static final String BOOKMARK_TITLE="bookmarkTitle";
public static final String BOOKMARK_DESCRIPTION="bookmarkDesc";
public static final String BOOKMARK_TAGS="bookmarkTags";
public static final String BOOKMARK_PUBLIC="bookmarkPublic";
public static final String BOOKMARK_TIMESTAMP="bookmarkTimestamp";
public static final String BOOKMARK_OWNER="bookmarkOwner";
public static final String BOOKMARK_IS_FEED="bookmarkIsFeed";
private String urlHash;
private Set<String> tags;
private long timestamp;
Map<String, String> entry;
public Bookmark(final String urlHash, final Map<String, String> map) {
this.entry = map;
this.urlHash=urlHash;
tags=new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
if(map.containsKey(BOOKMARK_TAGS))
tags.addAll(listManager.string2set(map.get(BOOKMARK_TAGS)));
loadTimestamp();
}
public Bookmark(String url){
entry = new HashMap<String, String>();
if(!url.toLowerCase().startsWith("http://") && !url.toLowerCase().startsWith("https://")){
url="http://"+url;
}
try {
this.urlHash=(new yacyURL(url, null)).hash();
} catch (final MalformedURLException e) {
this.urlHash = null;
}
entry.put(BOOKMARK_URL, url);
this.timestamp=System.currentTimeMillis();
tags=new HashSet<String>();
final Bookmark oldBm=getBookmark(this.urlHash);
if(oldBm!=null && oldBm.entry.containsKey(BOOKMARK_TIMESTAMP)){
entry.put(BOOKMARK_TIMESTAMP, oldBm.entry.get(BOOKMARK_TIMESTAMP)); //preserve timestamp on edit
}else{
entry.put(BOOKMARK_TIMESTAMP, String.valueOf(System.currentTimeMillis()));
}
final bookmarksDate bmDate=getDate(entry.get(BOOKMARK_TIMESTAMP));
bmDate.add(this.urlHash);
bmDate.setDatesTable();
removeBookmark(this.urlHash); //prevent empty tags
}
public Bookmark(final String urlHash, final yacyURL url) {
entry = new HashMap<String, String>();
this.urlHash=urlHash;
entry.put(BOOKMARK_URL, url.toNormalform(false, true));
tags=new HashSet<String>();
timestamp=System.currentTimeMillis();
}
public Bookmark(final String urlHash, final String url) {
entry = new HashMap<String, String>();
this.urlHash=urlHash;
entry.put(BOOKMARK_URL, url);
tags=new HashSet<String>();
timestamp=System.currentTimeMillis();
}
public Bookmark(final Map<String, String> map) throws MalformedURLException {
this((new yacyURL(map.get(BOOKMARK_URL), null)).hash(), map);
}
Map<String, String> toMap() {
entry.put(BOOKMARK_TAGS, listManager.collection2string(tags));
entry.put(BOOKMARK_TIMESTAMP, String.valueOf(this.timestamp));
return entry;
}
private void loadTimestamp() {
if(entry.containsKey(BOOKMARK_TIMESTAMP))
this.timestamp=Long.parseLong(entry.get(BOOKMARK_TIMESTAMP));
}
public String getUrlHash() {
return urlHash;
}
public String getUrl() {
return entry.get(BOOKMARK_URL);
}
public Set<String> getTags() {
return tags;
}
public String getTagsString() {
final String s[] = listManager.collection2string(getTags()).split(",");
String tagsString="";
for (int i=0; i<s.length; i++){
if(!s[i].startsWith("/")){
tagsString += s[i]+",";
}
}
return tagsString;
}
public String getFoldersString(){
final String s[] = listManager.collection2string(getTags()).split(",");
String foldersString="";
for (int i=0; i<s.length; i++){
if(s[i].startsWith("/")){
foldersString += s[i]+",";
}
}
return foldersString;
}
public String getDescription(){
if(entry.containsKey(BOOKMARK_DESCRIPTION)){
return entry.get(BOOKMARK_DESCRIPTION);
}
return "";
}
public String getTitle(){
if(entry.containsKey(BOOKMARK_TITLE)){
return entry.get(BOOKMARK_TITLE);
}
return entry.get(BOOKMARK_URL);
}
public String getOwner(){
if(entry.containsKey(BOOKMARK_OWNER)){
return entry.get(BOOKMARK_OWNER);
}
return null; //null means admin
}
public void setOwner(final String owner){
entry.put(BOOKMARK_OWNER, owner);
}
public boolean getPublic(){
if(entry.containsKey(BOOKMARK_PUBLIC)){
return entry.get(BOOKMARK_PUBLIC).equals("public");
}
return false;
}
public boolean getFeed(){
if(entry.containsKey(BOOKMARK_IS_FEED)){
return entry.get(BOOKMARK_IS_FEED).equals("true");
}
return false;
}
public void setPublic(final boolean isPublic){
if(isPublic){
entry.put(BOOKMARK_PUBLIC, "public");
}else{
entry.put(BOOKMARK_PUBLIC, "private");
}
}
public void setFeed(final boolean isFeed){
if(isFeed){
entry.put(BOOKMARK_IS_FEED, "true");
}else{
entry.put(BOOKMARK_IS_FEED, "false");
}
}
public void setProperty(final String name, final String value){
entry.put(name, value);
//setBookmarksTable();
}
public void addTag(final String tagName){
tags.add(tagName);
setTags(tags);
saveBookmark(this);
}
/**
* set the Tags of the bookmark, and write them into the tags table.
* @param tags2 a ArrayList with the tags
*/
public void setTags(final Set<String> tags2){
setTags(tags2, true);
}
/**
* set the Tags of the bookmark
* @param tags ArrayList with the tagnames
* @param local sets, whether the updated tags should be stored to tagsDB
*/
public void setTags(final Set<String> tags2, final boolean local){
tags = tags2; // TODO: check if this is safe
// tags.addAll(tags2); // in order for renameTag() to work I had to change this form 'add' to 'set'
final Iterator<String> it=tags.iterator();
while(it.hasNext()){
final String tagName=it.next();
Tag tag=getTag(tagHash(tagName));
if(tag == null){
tag=new Tag(tagName);
}
tag.addUrl(getUrlHash());
if(local){
saveTag(tag);
}
}
toMap();
}
public long getTimeStamp(){
return timestamp;
}
public void setTimeStamp(final long ts){
this.timestamp=ts;
}
}
/**
* Subclass of bookmarksDB, which provides the tagIterator object-type
*/
public class tagIterator implements Iterator<Tag> {
CloneableIterator<byte[]> tagIter;
//bookmarksDB.Tag nextEntry;
public tagIterator(final boolean up) throws IOException {
flushTagCache(); //XXX: This costs performace :-((
this.tagIter = bookmarksDB.this.tagsTable.keys(up, false);
//this.nextEntry = null;
}
public boolean hasNext() {
try {
return this.tagIter.hasNext();
} catch (final Exception e) {
e.printStackTrace();
return false;
}
}
public Tag next() {
try {
byte[] b = this.tagIter.next();
String s = new String(b);
//System.out.println("### DEBUG tagIterator - " + s);
Tag t = getTag(s);
return t;
} catch (final Exception e) {
e.printStackTrace();
return null;
}
}
public void remove() {
// if (this.nextEntry != null) {
// try {
// final String tagHash = this.nextEntry.getTagHash();
// if (tagHash != null) removeTag(tagHash);
// } catch (final kelondroException e) {
// //resetDatabase();
// }
// }
}
}
/**
* Subclass of bookmarksDB, which provides the bookmarkIterator object-type
*/
public class bookmarkIterator implements Iterator<Bookmark> {
Iterator<byte[]> bookmarkIter;
//bookmarksDB.Bookmark nextEntry;
public bookmarkIterator(final boolean up) throws IOException {
//flushBookmarkCache(); //XXX: this will cost performance
this.bookmarkIter = bookmarksDB.this.bookmarksTable.keys(up, false);
//this.nextEntry = null;
}
public boolean hasNext() {
try {
return this.bookmarkIter.hasNext();
} catch (final kelondroException e) {
//resetDatabase();
return false;
}
}
public Bookmark next() {
try {
String s = new String(this.bookmarkIter.next());
return getBookmark(s);
} catch (final kelondroException e) {
//resetDatabase();
return null;
}
}
public void remove() {
// if (this.nextEntry != null) {
// try {
// final Object bookmarkName = this.nextEntry.getUrlHash();
// if (bookmarkName != null) removeBookmark((String) bookmarkName);
// } catch (final kelondroException e) {
// //resetDatabase();
// }
// }
}
}
/**
* Comparator to sort objects of type Bookmark according to their timestamps
*/
public class bookmarkComparator implements Comparator<String> {
private final boolean newestFirst;
/**
* @param newestFirst newest first, or oldest first?
*/
public bookmarkComparator(final boolean newestFirst){
this.newestFirst=newestFirst;
}
public int compare(final String obj1, final String obj2) {
final Bookmark bm1=getBookmark(obj1);
final Bookmark bm2=getBookmark(obj2);
if(bm1==null || bm2==null)
return 0; //XXX: i think this should not happen? maybe this needs further tracing of the bug
if(this.newestFirst){
if(bm2.getTimeStamp() - bm1.getTimeStamp() >0) return 1;
return -1;
}
if(bm1.getTimeStamp() - bm2.getTimeStamp() >0) return 1;
return -1;
}
}
/**
* Comparator to sort objects of type Tag according to their names
*/
public static class tagComparator implements Comparator<Tag>, Serializable {
/**
* generated serial
*/
private static final long serialVersionUID = 3105057490088903930L;
public int compare(final Tag obj1, final Tag obj2){
return obj1.getTagName().compareTo(obj2.getTagName());
}
}
public static class tagSizeComparator implements Comparator<Tag>, Serializable {
/**
* generated serial
*/
private static final long serialVersionUID = 4149185397646373251L;
public int compare(final Tag obj1, final Tag obj2) {
if (obj1.size() < obj2.size()) return 1;
else if (obj1.getTagName().equals(obj2.getTagName())) return 0;
else return -1;
}
}
}