From 4214f250d0e34ca914f45eb46c8a464d55ee2e24 Mon Sep 17 00:00:00 2001
From: reger
Date: Fri, 9 Jan 2015 02:06:30 +0100
Subject: [PATCH] Add option for extended search (Autosearch) to Bookmark.html
asking all connected peers for the searchterm added as description to the
bookmark created by the bookmark icon. Intended for searches/research
projects with not sufficient results from local and DHT selected remote
target peers.
Function: the process checks newly created bookmarks for description starting with "query=..." and takes this to ask every peer for 20 search results and adds it to the local index in a background job.
link to start/stop the process added to /Bookmarks.html
---
htroot/Bookmarks.html | 29 ++-
htroot/Bookmarks.java | 38 +++
source/net/yacy/data/BookmarksDB.java | 9 +
source/net/yacy/search/AutoSearch.java | 322 +++++++++++++++++++++++++
4 files changed, 397 insertions(+), 1 deletion(-)
create mode 100644 source/net/yacy/search/AutoSearch.java
diff --git a/htroot/Bookmarks.html b/htroot/Bookmarks.html
index 504732c83..22701bf1f 100644
--- a/htroot/Bookmarks.html
+++ b/htroot/Bookmarks.html
@@ -202,13 +202,40 @@ To see a list of all APIs, please visit the #[name]# (#[num]#)
#{/optlist}#
-
+
This starts a serach of new or modified bookmarks since startup
+ in folder "search" with description "query="
+ Every peer online will be ask for results.
+
+
diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java
index 266d94cf4..d7fc531e6 100644
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@@ -55,7 +55,9 @@ import net.yacy.data.BookmarksDB.Tag;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.kelondro.data.meta.URIMetadataNode;
+import net.yacy.kelondro.workflow.BusyThread;
import net.yacy.peers.NewsPool;
+import net.yacy.search.AutoSearch;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@@ -372,6 +374,42 @@ public class Bookmarks {
count = 0;
count = recurseFolders(BookmarkHelper.getFolderList("/", sb.bookmarksDB.getTagIterator(isAdmin)), "/", 0, true, "");
prop.put("display_folderlist", count);
+
+ BusyThread bt = sb.getThread("autosearch");
+ if (bt != null) {
+ prop.put("display_autosearchrunning","1");
+ prop.put("display_autosearchrunning_msg", "" );
+ if (post != null && post.containsKey("stopautosearch")) {
+ sb.terminateThread("autosearch", false);
+ prop.put("display_autosearchrunning_msg", "autosearch will terminate");
+ prop.put("display_autosearchrunning","0");
+ }
+ int jobs = bt.getJobCount();
+ prop.put("display_autosearchrunning_jobcount", jobs);
+ int cnt=0;
+ String qstr = "";
+ if (bt instanceof AutoSearch) {
+ cnt = ((AutoSearch) bt).gotresults;
+ qstr = ((AutoSearch) bt).currentQuery;
+ if (qstr == null) qstr = "---";
+ }
+ prop.put("display_autosearchrunning_totalcount", cnt);
+ prop.put("display_autosearchrunning_query", qstr);
+
+ } else {
+ prop.put("display_autosearchrunning", "0");
+ prop.put("display_autosearchrunning_msg", "");
+ if (post != null && post.containsKey("startautosearch")) {
+ sb.deployThread(
+ "autosearch",
+ "Auto Search",
+ "query all peers for given search terms",
+ null,
+ new AutoSearch(Switchboard.getSwitchboard()),
+ 1000);
+ prop.put("display_autosearchrunning_msg", "autsearch job started");
+ }
+ }
}
return prop; // return from serverObjects respond()
}
diff --git a/source/net/yacy/data/BookmarksDB.java b/source/net/yacy/data/BookmarksDB.java
index 049513af3..c232b85af 100644
--- a/source/net/yacy/data/BookmarksDB.java
+++ b/source/net/yacy/data/BookmarksDB.java
@@ -208,6 +208,15 @@ public class BookmarksDB {
return set.iterator();
}
+ public Iterator getBookmarksIterator() {
+ try {
+ return new bookmarkIterator(true);
+ } catch (IOException ex) {
+ ConcurrentLog.logException(ex);
+ }
+ return null;
+ }
+
public Iterator getBookmarksIterator(final String tagName, final boolean priv){
final TreeSet set=new TreeSet(new bookmarkComparator(true));
final String tagHash=BookmarkHelper.tagHash(tagName);
diff --git a/source/net/yacy/search/AutoSearch.java b/source/net/yacy/search/AutoSearch.java
new file mode 100644
index 000000000..3548df031
--- /dev/null
+++ b/source/net/yacy/search/AutoSearch.java
@@ -0,0 +1,322 @@
+/**
+ * AutoSearch.java
+ * Copyright 2015 by Burkhard Buelte
+ * First released 09.01.2015 at http://yacy.net
+ *
+ * This is a part of YaCy, a peer-to-peer based web search engine
+ *
+ * LICENSE
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.search;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+import net.yacy.cora.document.feed.RSSFeed;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.document.id.MultiProtocolURL;
+import static net.yacy.cora.federate.opensearch.SRURSSConnector.loadSRURSS;
+import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
+import net.yacy.cora.federate.solr.connector.SolrConnector;
+import net.yacy.cora.federate.solr.instance.RemoteInstance;
+import net.yacy.cora.federate.yacy.CacheStrategy;
+import net.yacy.cora.protocol.ClientIdentification;
+import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.data.BookmarksDB.Bookmark;
+import net.yacy.kelondro.workflow.AbstractBusyThread;
+import net.yacy.peers.Seed;
+import net.yacy.search.schema.CollectionSchema;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.params.CommonParams;
+
+/**
+ * AutoSearch retrieves queries from Bookmarks or a property file (if existing)
+ * and loops to a list of connected peers and asks each for results which are
+ * added to the local index.
+ */
+public class AutoSearch extends AbstractBusyThread {
+
+ private Set querystack; // serach query
+ public String currentQuery = null; // current query
+ private Set currentTargets = null; // peer hashes
+ final Switchboard sb;
+ public int gotresults;
+ private long lastInitTime; // to recognize new data (Bookmarks) to import
+
+ public AutoSearch(Switchboard xsb) {
+ super(3000, 1000); // set lower limits of cycle delay
+ this.setIdleSleep(60000); // set actual cycle delays
+ this.setBusySleep(10000);
+ this.sb = xsb;
+
+ gotresults = 0;
+ querystack = new HashSet();
+
+ this.lastInitTime = System.currentTimeMillis() - 600000; // init to now - 10 min
+ if (!checkBookmarkDB()) {
+ try {
+ // check for old queries in temp property file
+ File pfile = new File(xsb.dataPath, "DATA/SETTINGS/autosearch.conf");
+ if (pfile.exists()) {
+ ConcurrentLog.info(AutoSearch.class.getName(), "read queries from file " + pfile.getAbsolutePath());
+ Properties prop = new Properties();
+ FileInputStream fileIn = new FileInputStream(pfile);
+ prop.load(fileIn);
+ if (prop.size() > 0) {
+ Set