You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/source/net/yacy/data/ymark/YMarkTables.java

509 lines
20 KiB

// YMarkTables.java
// (C) 2011 by Stefan Förster, sof@gmx.de, Norderstedt, Germany
// first published 2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.data.ymark;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import javax.swing.event.ChangeEvent;
import javax.swing.event.ChangeListener;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.data.WorkTables;
import net.yacy.document.Document;
import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.blob.TableColumnIndexException;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.kelondro.blob.TablesColumnIndex;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.repository.LoaderDispatcher;
public class YMarkTables {
public static enum TABLES {
BOOKMARKS ("_bookmarks"),
TAGS ("_tags"),
FOLDERS ("_folders");
private String basename;
private TABLES(final String b) {
this.basename = b;
}
public String basename() {
return this.basename;
}
public String tablename(final String bmk_user) {
return bmk_user+this.basename;
}
}
public static enum PROTOCOLS {
HTTP ("http://"),
HTTPS ("https://");
private String protocol;
private PROTOCOLS(final String s) {
this.protocol = s;
}
public String protocol() {
return this.protocol;
}
public String protocol(final String s) {
return this.protocol+s;
}
}
public final static String FOLDERS_ROOT = "/";
public final static String BOOKMARKS_LOG = "BOOKMARKS";
public final static String USER_ADMIN = "admin";
public final static String USER_AUTHENTICATE_MSG = "Bookmark user authentication required!";
public final static int BUFFER_LENGTH = 256;
private final WorkTables worktables;
private final Map<String, ChangeListener> progressListeners;
public boolean dirty = false;
public YMarkTables(final Tables wt) {
this.worktables = (WorkTables)wt;
this.progressListeners = new ConcurrentHashMap<String, ChangeListener>();
this.buildIndex();
}
public ChangeListener getProgressListener(String thread) {
final ChangeListener l = new ProgressListener();
this.progressListeners.put(thread, l);
return l;
}
public void removeProgressListener(String thread) {
this.progressListeners.remove(thread);
}
public class ProgressListener implements ChangeListener {
// the progress in %
private int progress = 0;
@Override
public void stateChanged(ChangeEvent e) {
final MonitoredReader mreader = (MonitoredReader)e.getSource();
this.progress = (int)((mreader.getProgress() / mreader.maxProgress())*100);
}
public int progress() {
return this.progress;
}
}
public void buildIndex() {
final Iterator<String> iter = this.worktables.iterator();
while(iter.hasNext()) {
final String bmk_table = iter.next();
if(bmk_table.endsWith(TABLES.BOOKMARKS.basename())) {
try {
final long time = System.currentTimeMillis();
final TablesColumnIndex index = this.worktables.getIndex(bmk_table);
if(index.getType() == TablesColumnIndex.INDEXTYPE.RAM || index.size() == 0) {
ConcurrentLog.info(YMarkTables.BOOKMARKS_LOG, "buildIndex() "+YMarkEntry.BOOKMARK.indexColumns().keySet().toString());
index.buildIndex(YMarkEntry.BOOKMARK.indexColumns(), this.worktables.iterator(bmk_table));
ConcurrentLog.info(YMarkTables.BOOKMARKS_LOG, "build "+index.getType().name()+" index for columns "+YMarkEntry.BOOKMARK.indexColumns().keySet().toString()
+" of table "+bmk_table+" containing "+this.worktables.size(bmk_table)+ " bookmarks"
+" ("+(System.currentTimeMillis()-time)+"ms)");
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final TableColumnIndexException e) {
// currently nothing to do...
}
}
}
}
public void deleteBookmark(final String bmk_user, final byte[] urlHash) throws IOException, SpaceExceededException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
Tables.Row bmk_row = null;
bmk_row = this.worktables.select(bmk_table, urlHash);
if(bmk_row != null) {
this.worktables.delete(bmk_table,urlHash);
}
if(this.worktables.hasIndex(bmk_table, YMarkEntry.BOOKMARK.FOLDERS.key())) {
try {
this.worktables.getIndex(bmk_table).delete(urlHash);
} catch (final TableColumnIndexException e) {
// currently nothing to do...
}
}
}
public void deleteBookmark(final String bmk_user, final String url) throws IOException, SpaceExceededException {
final byte[] urlHash = YMarkUtil.getBookmarkId(url);
this.deleteBookmark(bmk_user, urlHash);
}
public TreeMap<String, YMarkTag> getTags(final Iterator<Row> rowIterator) {
final TreeMap<String,YMarkTag> tags = new TreeMap<String,YMarkTag>();
Tables.Row bmk_row = null;
Iterator<String> tit = null;
String tag;
while(rowIterator.hasNext()) {
bmk_row = rowIterator.next();
if(bmk_row.containsKey(YMarkEntry.BOOKMARK.TAGS.key())) {
tit = YMarkUtil.keysStringToSet(bmk_row.get(YMarkEntry.BOOKMARK.TAGS.key(), YMarkEntry.BOOKMARK.TAGS.deflt())).iterator();
while(tit.hasNext()) {
tag = tit.next();
if(tags.containsKey(tag)) {
tags.get(tag).inc();
} else {
tags.put(tag, new YMarkTag(tag));
}
}
}
}
return tags;
}
public TreeMap<String, YMarkTag> getTags(final String bmk_user) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final TreeMap<String,YMarkTag> tags = new TreeMap<String,YMarkTag>();
if(this.worktables.hasIndex(bmk_table, YMarkEntry.BOOKMARK.TAGS.key())) {
try {
final TablesColumnIndex index = this.worktables.getIndex(bmk_table);
final Iterator<String> iter = index.keySet(YMarkEntry.BOOKMARK.TAGS.key()).iterator();
while(iter.hasNext()) {
final String tag = iter.next();
tags.put(tag, new YMarkTag(tag, index.get(YMarkEntry.BOOKMARK.TAGS.key(), tag).size()));
}
return tags;
} catch (final Exception e) {
// nothing to do
}
}
return getTags(this.worktables.iterator(bmk_table));
}
public TreeSet<String> getFolders(final String bmk_user, String root) throws IOException {
final TreeSet<String> folders = new TreeSet<String>();
final StringBuilder path = new StringBuilder(BUFFER_LENGTH);
final String r = root + YMarkUtil.FOLDERS_SEPARATOR;
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
// if exists, try the index first
if(this.worktables.hasIndex(bmk_table, YMarkEntry.BOOKMARK.FOLDERS.key())) {
TablesColumnIndex index;
try {
index = this.worktables.getIndex(bmk_table);
final Iterator<String> fiter = index.keySet(YMarkEntry.BOOKMARK.FOLDERS.key()).iterator();
while(fiter.hasNext()) {
final String folder = fiter.next();
if(folder.startsWith(r)) {
path.setLength(0);
path.append(folder);
while(path.length() > 0 && !path.toString().equals(root)){
final String p = path.toString();
if(folders.isEmpty() || !p.equals(folders.floor(p))) {
folders.add(p);
}
path.setLength(path.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
}
}
if (!root.equals(YMarkTables.FOLDERS_ROOT)) { folders.add(root); }
return folders;
} catch (final Exception e) {
ConcurrentLog.logException(e);
}
}
// by default iterate all bookmarks and extract folder information
final Iterator<Tables.Row> bit = this.worktables.iterator(bmk_table);
Tables.Row bmk_row = null;
while(bit.hasNext()) {
bmk_row = bit.next();
if(bmk_row.containsKey(YMarkEntry.BOOKMARK.FOLDERS.key())) {
final String[] folderArray = (new String(bmk_row.get(YMarkEntry.BOOKMARK.FOLDERS.key()),"UTF8")).split(YMarkUtil.TAGS_SEPARATOR);
for (final String folder : folderArray) {
if(folder.length() > root.length() && folder.substring(0, root.length()+1).equals(r)) {
if(!folders.contains(folder)) {
path.setLength(0);
path.append(folder);
//TODO: get rid of .toString.equals()
while(path.length() > 0 && !path.toString().equals(root)){
folders.add(path.toString());
path.setLength(path.lastIndexOf(YMarkUtil.FOLDERS_SEPARATOR));
}
}
}
}
}
}
if (!root.equals(YMarkTables.FOLDERS_ROOT)) { folders.add(root); }
return folders;
}
public int getSize(final String bmk_user) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
return this.worktables.size(bmk_table);
}
public Iterator<Tables.Row> getBookmarksByFolder(final String bmk_user, final String foldersString) {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
return this.worktables.getByIndex(bmk_table, YMarkEntry.BOOKMARK.FOLDERS.key(), YMarkEntry.BOOKMARK.FOLDERS.seperator(), foldersString);
}
public Iterator<Tables.Row> getBookmarksByTag(final String bmk_user, final String tagsString) {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
return this.worktables.getByIndex(bmk_table, YMarkEntry.BOOKMARK.TAGS.key(), YMarkEntry.BOOKMARK.TAGS.seperator(), tagsString);
}
public List<Row> orderBookmarksBy(final Iterator<Row> rowIterator, final String sortname, final String sortorder) {
final List<Row> sortList = new ArrayList<Row>();
Row row;
while (rowIterator.hasNext()) {
row = rowIterator.next();
if(row != null)
sortList.add(row);
}
Collections.sort(sortList, new TablesRowComparator(sortname, sortorder));
return sortList;
}
public void addTags(final String bmk_user, final String url, final String tagString, final boolean merge) throws IOException {
if(!tagString.isEmpty()) {
// do not set defaults as we only want to update tags
final YMarkEntry bmk = new YMarkEntry(false);
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(tagString));
addBookmark(bmk_user, bmk, merge, true);
}
this.dirty = true;
}
public void replaceTags(final Iterator<Row> rowIterator, final String bmk_user, final String tagString, final String replaceString) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final HashSet<String> remove = YMarkUtil.keysStringToSet(YMarkUtil.cleanTagsString(tagString.toLowerCase()));
final StringBuilder t = new StringBuilder(200);
HashSet<String> tags;
Row row;
while (rowIterator.hasNext()) {
row = rowIterator.next();
if(row != null) {
tags = YMarkUtil.keysStringToSet(row.get(YMarkEntry.BOOKMARK.TAGS.key(), YMarkEntry.BOOKMARK.TAGS.deflt()).toLowerCase());
tags.removeAll(remove);
t.append(YMarkUtil.keySetToString(tags));
}
t.append(YMarkUtil.TAGS_SEPARATOR);
t.append(replaceString);
row.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(t.toString()));
this.worktables.update(bmk_table, row);
if(this.worktables.hasIndex(bmk_table)) {
try {
this.worktables.getIndex(bmk_table).update(YMarkEntry.BOOKMARK.TAGS.key(), YMarkEntry.BOOKMARK.TAGS.seperator(), row);
} catch (final Exception e) {
// nothing to do
}
}
}
this.dirty = true;
}
public void addFolder(final String bmk_user, final String url, final String folder) throws IOException {
if(!folder.isEmpty()) {
// do not set defaults as we only want to add a folder
final YMarkEntry bmk = new YMarkEntry(false);
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
bmk.put(YMarkEntry.BOOKMARK.FOLDERS.key(), folder);
addBookmark(bmk_user, bmk, true, true);
}
}
public void visited(final String bmk_user, final String url) throws IOException {
// do not set defaults
final YMarkEntry bmk = new YMarkEntry(false);
bmk.put(YMarkEntry.BOOKMARK.URL.key(), url);
bmk.put(YMarkEntry.BOOKMARK.DATE_VISITED.key(), (new YMarkDate()).toString());
addBookmark(bmk_user, bmk, true, true);
}
public void createBookmark(final LoaderDispatcher loader, final String url, final ClientIdentification.Agent agent, final String bmk_user, final boolean autotag, final String tagsString, final String foldersString) throws IOException, Failure {
createBookmark(loader, new DigestURI(url), agent, bmk_user, autotag, tagsString, foldersString);
}
public void createBookmark(final LoaderDispatcher loader, final DigestURI url, final ClientIdentification.Agent agent, final String bmk_user, final boolean autotag, final String tagsString, final String foldersString) throws IOException, Failure {
final YMarkEntry bmk_entry = new YMarkEntry(false);
final YMarkMetadata meta = new YMarkMetadata(url);
final Document document = meta.loadDocument(loader, agent);
final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();
final String urls = url.toNormalform(true);
bmk_entry.put(YMarkEntry.BOOKMARK.URL.key(), urls);
if(!this.worktables.has(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), YMarkUtil.getBookmarkId(urls))) {
bmk_entry.put(YMarkEntry.BOOKMARK.PUBLIC.key(), "false");
bmk_entry.put(YMarkEntry.BOOKMARK.TITLE.key(), metadata.get(YMarkMetadata.METADATA.TITLE));
bmk_entry.put(YMarkEntry.BOOKMARK.DESC.key(), metadata.get(YMarkMetadata.METADATA.DESCRIPTION));
}
final String fs = YMarkUtil.cleanFoldersString(foldersString);
if(fs.isEmpty())
bmk_entry.put(YMarkEntry.BOOKMARK.FOLDERS.key(), YMarkEntry.BOOKMARK.FOLDERS.deflt());
else
bmk_entry.put(YMarkEntry.BOOKMARK.FOLDERS.key(), fs);
final StringBuilder strb = new StringBuilder();
if(autotag) {
final String autotags = YMarkAutoTagger.autoTag(document, 3, this.worktables.bookmarks.getTags(bmk_user));
strb.append(autotags);
}
if(!tagsString.isEmpty()) {
strb.append(YMarkUtil.TAGS_SEPARATOR);
strb.append(tagsString);
}
bmk_entry.put(YMarkEntry.BOOKMARK.TAGS.key(),YMarkUtil.cleanTagsString(strb.toString()));
this.worktables.bookmarks.addBookmark(bmk_user, bmk_entry, true, true);
}
public boolean hasBookmark(final String bmk_user, final String urlhash) {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
try {
return this.worktables.has(bmk_table, ASCII.getBytes(urlhash));
} catch (final IOException e) {
return false;
}
}
public void addBookmark(final String bmk_user, final YMarkEntry bmk, final boolean mergeTags, final boolean mergeFolders) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final String date = String.valueOf(System.currentTimeMillis());
byte[] urlHash = null;
try {
urlHash = YMarkUtil.getBookmarkId(bmk.get(YMarkEntry.BOOKMARK.URL.key()));
} catch (final MalformedURLException e) {
ConcurrentLog.info("BOOKMARKIMPORT", "invalid url: "+bmk.get(YMarkEntry.BOOKMARK.URL.key()));
}
Tables.Row bmk_row = null;
if (urlHash != null) {
try {
bmk_row = this.worktables.select(bmk_table, urlHash);
} catch (final Exception e) {
}
if (bmk_row == null) {
// create and insert new entry
if(!bmk.containsKey(YMarkEntry.BOOKMARK.DATE_ADDED.key())) {
bmk.put(YMarkEntry.BOOKMARK.DATE_ADDED.key(), date);
bmk.put(YMarkEntry.BOOKMARK.DATE_MODIFIED.key(), date);
}
this.worktables.insert(bmk_table, urlHash, bmk.getData());
try {
if(this.worktables.hasIndex(bmk_table))
this.worktables.getIndex(bmk_table).add(YMarkEntry.BOOKMARK.indexColumns(), bmk, urlHash);
} catch (final Exception e) {
// nothing to do
}
} else {
// modify and update existing entry
HashSet<String> oldSet;
HashSet<String> newSet;
for (final YMarkEntry.BOOKMARK b : YMarkEntry.BOOKMARK.values()) {
switch(b) {
case DATE_ADDED:
if(!bmk_row.containsKey(b.key()))
bmk_row.put(b.key(), date);
break;
case DATE_MODIFIED:
bmk_row.put(b.key(), date);
break;
case TAGS:
oldSet = YMarkUtil.keysStringToSet(bmk_row.get(b.key(),b.deflt()));
if(bmk.containsKey(b.key())) {
newSet = YMarkUtil.keysStringToSet(bmk.get(b.key()));
if(mergeTags) {
newSet.addAll(oldSet);
if(newSet.size() > 1 && newSet.contains(YMarkEntry.BOOKMARK.TAGS.deflt()))
newSet.remove(YMarkEntry.BOOKMARK.TAGS.deflt());
bmk_row.put(b.key(), YMarkUtil.keySetToString(newSet));
} else {
bmk_row.put(b.key(), bmk.get(b.key()));
}
} else {
bmk_row.put(b.key(), bmk_row.get(b.key(), b.deflt()));
}
break;
case FOLDERS:
oldSet = YMarkUtil.keysStringToSet(bmk_row.get(b.key(),b.deflt()));
if(bmk.containsKey(b.key())) {
newSet = YMarkUtil.keysStringToSet(bmk.get(b.key()));
if(mergeFolders) {
newSet.addAll(oldSet);
if(newSet.size() > 1 && newSet.contains(YMarkEntry.BOOKMARK.FOLDERS.deflt()))
newSet.remove(YMarkEntry.BOOKMARK.FOLDERS.deflt());
bmk_row.put(b.key(), YMarkUtil.keySetToString(newSet));
} else {
bmk_row.put(b.key(), bmk.get(b.key()));
}
} else {
bmk_row.put(b.key(), bmk_row.get(b.key(), b.deflt()));
}
break;
default:
if(bmk.containsKey(b.key())) {
bmk_row.put(b.key(), bmk.get(b.key()));
} else {
bmk_row.put(b.key(), bmk_row.get(b.key(), b.deflt()));
}
}
}
// update bmk_table
this.worktables.update(bmk_table, bmk_row);
try {
if(this.worktables.hasIndex(bmk_table))
this.worktables.getIndex(bmk_table).update(YMarkEntry.BOOKMARK.indexColumns(), bmk_row);
} catch (final Exception e) {
// nothing to do
}
}
this.dirty = true;
}
}
}