From 8e751d754a38630c4d22d665f6bb3846b1dc9f84 Mon Sep 17 00:00:00 2001
From: reger
Date: Fri, 9 Jan 2015 01:31:57 +0100
Subject: [PATCH 1/4] - add javadoc to busythread with hint about the init
parameter useage - remove obsolete 10_httpd config parameter
---
htroot/PerformanceQueues_p.java | 3 --
.../kelondro/workflow/AbstractBusyThread.java | 40 +++++++++++++++----
2 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java
index 289bd3d5e..c6651b9ad 100644
--- a/htroot/PerformanceQueues_p.java
+++ b/htroot/PerformanceQueues_p.java
@@ -180,7 +180,6 @@ public class PerformanceQueues_p {
// check values to prevent short-cut loops
if (idlesleep < 1000) idlesleep = 1000;
- if (threadName.equals("10_httpd")) { idlesleep = 0; busysleep = 0; memprereq = 0; loadprereq = 9; }
sb.setThreadPerformance(threadName, idlesleep, busysleep, memprereq, loadprereq);
idlesleep = sb.getConfigLong(threadName + "_idlesleep", idlesleep);
@@ -194,7 +193,6 @@ public class PerformanceQueues_p {
loadprereq = Double.parseDouble(d(defaultSettings.get(threadName + "_loadprereq"), String.valueOf(memprereq)));
// check values to prevent short-cut loops
if (idlesleep < 1000) idlesleep = 1000;
- if (threadName.equals("10_httpd")) { idlesleep = 0; busysleep = 0; memprereq = 0; loadprereq = 9; }
//if (threadName.equals(plasmaSwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) && (busysleep < 50)) busysleep = 50;
sb.setThreadPerformance(threadName, idlesleep, busysleep, memprereq, loadprereq);
}
@@ -204,7 +202,6 @@ public class PerformanceQueues_p {
prop.put("table_" + c + "_loadprereq", loadprereq);
// disallow setting of memprereq for indexer to prevent db from throwing OOMs
// prop.put("table_" + c + "_disabled", /*(threadName.endsWith("_indexing")) ? 1 :*/ "0");
- prop.put("table_" + c + "_disabled", threadName.equals("10_httpd") ? "1" : "0" ); // httpd hardcoded defaults
prop.put("table_" + c + "_recommendation", threadName.endsWith("_indexing") ? "1" : "0");
prop.putNum("table_" + c + "_recommendation_value", rwi == null ? 0 : threadName.endsWith("_indexing") ? (rwi.minMem() / 1024) : 0);
c++;
diff --git a/source/net/yacy/kelondro/workflow/AbstractBusyThread.java b/source/net/yacy/kelondro/workflow/AbstractBusyThread.java
index a8e3477ef..de15a0d80 100644
--- a/source/net/yacy/kelondro/workflow/AbstractBusyThread.java
+++ b/source/net/yacy/kelondro/workflow/AbstractBusyThread.java
@@ -41,13 +41,23 @@ public abstract class AbstractBusyThread extends AbstractThread implements BusyT
private boolean intermissionObedient = true;
private final Object syncObject = new Object();
- private long idleSleep = Long.MIN_VALUE;
- private long busySleep = Long.MIN_VALUE;
-
- public AbstractBusyThread(long idleSleep, long busySleep) {
+ private final long idleSleep; // min allowed idle sleep
+ private final long busySleep; // min allowed busy sleep
+
+ /**
+ * Initializes the AbstractBusyThread with min allowed sleep time (in milliseconds)
+ * and sets the actual sleep time to this values.
+ *
+ * @param minidleSleep defines min idle sleep time that can be set via setIdleSleep()
+ * @param minbusySleep defines min busy sleep time that can be set via setBusySleep()
+ */
+ public AbstractBusyThread(long minidleSleep, long minbusySleep) {
super();
- this.idleSleep = idleSleep;
- this.busySleep = busySleep;
+ this.idleSleep = minidleSleep; // set min allowed
+ this.busySleep = minbusySleep;
+
+ this.idlePause = minidleSleep; // initialized actual (might be changed ba set methodes)
+ this.busyPause = minbusySleep; // init here makes sure getIdleSleep() returns at least min allowed
}
@Override
@@ -55,7 +65,14 @@ public abstract class AbstractBusyThread extends AbstractThread implements BusyT
// sets a sleep time before execution of the job-loop
startup = milliseconds;
}
-
+
+ /**
+ * Set the delay between idle cycles to the larger of the input argument
+ * and the min allowed delay defined on init
+ *
+ * @param milliseconds
+ * @return the actually set sleep time
+ */
@Override
public final long setIdleSleep(final long milliseconds) {
// sets a sleep time for pauses between two jobs
@@ -67,7 +84,14 @@ public abstract class AbstractBusyThread extends AbstractThread implements BusyT
public final long getIdleSleep() {
return idlePause;
}
-
+
+ /**
+ * Set the delay between busy cycles to the larger of the input argument
+ * and the min allowed delay defined on init
+ *
+ * @param milliseconds
+ * @return the actually set sleep time
+ */
@Override
public final long setBusySleep(final long milliseconds) {
// sets a sleep time for pauses between two jobs
From bb37cb32e443430049e5ee4eaae30e45c4f5b0fa Mon Sep 17 00:00:00 2001
From: reger
Date: Fri, 9 Jan 2015 01:33:45 +0100
Subject: [PATCH 2/4] Add title import for bookmark icon if avail in index
---
htroot/yacysearch.java | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index f3b77f0cc..24e7688d0 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -596,6 +596,10 @@ public class yacysearch {
//bmk.setProperty(Bookmark.BOOKMARK_QUERY, originalquerystring);
bmk.addTag("/search"); // add to bookmark folder
bmk.addTag("searchresult"); // add tag
+ String urlhash = post.get("bookmarkref");
+ final URIMetadataNode urlentry = indexSegment.fulltext().getMetadata(UTF8.getBytes(urlhash));
+ if (urlentry != null && !urlentry.dc_title().isEmpty())
+ bmk.setProperty(Bookmark.BOOKMARK_TITLE,urlentry.dc_title());
sb.bookmarksDB.saveBookmark(bmk);
// do the same for YMarks ?
From 4214f250d0e34ca914f45eb46c8a464d55ee2e24 Mon Sep 17 00:00:00 2001
From: reger
Date: Fri, 9 Jan 2015 02:06:30 +0100
Subject: [PATCH 3/4] Add option for extended search (Autosearch) to
Bookmark.html asking all connected peers for the searchterm added as
description to the bookmark created by the bookmark icon. Intended for
searches/research projects with not sufficient results from local and DHT
selected remote target peers.
Function: the process checks newly created bookmarks for description starting with "query=..." and takes this to ask every peer for 20 search results and adds it to the local index in a background job.
link to start/stop the process added to /Bookmarks.html
---
htroot/Bookmarks.html | 29 ++-
htroot/Bookmarks.java | 38 +++
source/net/yacy/data/BookmarksDB.java | 9 +
source/net/yacy/search/AutoSearch.java | 322 +++++++++++++++++++++++++
4 files changed, 397 insertions(+), 1 deletion(-)
create mode 100644 source/net/yacy/search/AutoSearch.java
diff --git a/htroot/Bookmarks.html b/htroot/Bookmarks.html
index 504732c83..22701bf1f 100644
--- a/htroot/Bookmarks.html
+++ b/htroot/Bookmarks.html
@@ -202,13 +202,40 @@ To see a list of all APIs, please visit the #[name]# (#[num]#)
#{/optlist}#
-
+
This starts a serach of new or modified bookmarks since startup
+ in folder "search" with description "query="
+ Every peer online will be ask for results.
+
+
diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java
index 266d94cf4..d7fc531e6 100644
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@@ -55,7 +55,9 @@ import net.yacy.data.BookmarksDB.Tag;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.kelondro.data.meta.URIMetadataNode;
+import net.yacy.kelondro.workflow.BusyThread;
import net.yacy.peers.NewsPool;
+import net.yacy.search.AutoSearch;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@@ -372,6 +374,42 @@ public class Bookmarks {
count = 0;
count = recurseFolders(BookmarkHelper.getFolderList("/", sb.bookmarksDB.getTagIterator(isAdmin)), "/", 0, true, "");
prop.put("display_folderlist", count);
+
+ BusyThread bt = sb.getThread("autosearch");
+ if (bt != null) {
+ prop.put("display_autosearchrunning","1");
+ prop.put("display_autosearchrunning_msg", "" );
+ if (post != null && post.containsKey("stopautosearch")) {
+ sb.terminateThread("autosearch", false);
+ prop.put("display_autosearchrunning_msg", "autosearch will terminate");
+ prop.put("display_autosearchrunning","0");
+ }
+ int jobs = bt.getJobCount();
+ prop.put("display_autosearchrunning_jobcount", jobs);
+ int cnt=0;
+ String qstr = "";
+ if (bt instanceof AutoSearch) {
+ cnt = ((AutoSearch) bt).gotresults;
+ qstr = ((AutoSearch) bt).currentQuery;
+ if (qstr == null) qstr = "---";
+ }
+ prop.put("display_autosearchrunning_totalcount", cnt);
+ prop.put("display_autosearchrunning_query", qstr);
+
+ } else {
+ prop.put("display_autosearchrunning", "0");
+ prop.put("display_autosearchrunning_msg", "");
+ if (post != null && post.containsKey("startautosearch")) {
+ sb.deployThread(
+ "autosearch",
+ "Auto Search",
+ "query all peers for given search terms",
+ null,
+ new AutoSearch(Switchboard.getSwitchboard()),
+ 1000);
+ prop.put("display_autosearchrunning_msg", "autsearch job started");
+ }
+ }
}
return prop; // return from serverObjects respond()
}
diff --git a/source/net/yacy/data/BookmarksDB.java b/source/net/yacy/data/BookmarksDB.java
index 049513af3..c232b85af 100644
--- a/source/net/yacy/data/BookmarksDB.java
+++ b/source/net/yacy/data/BookmarksDB.java
@@ -208,6 +208,15 @@ public class BookmarksDB {
return set.iterator();
}
+ public Iterator getBookmarksIterator() {
+ try {
+ return new bookmarkIterator(true);
+ } catch (IOException ex) {
+ ConcurrentLog.logException(ex);
+ }
+ return null;
+ }
+
public Iterator getBookmarksIterator(final String tagName, final boolean priv){
final TreeSet set=new TreeSet(new bookmarkComparator(true));
final String tagHash=BookmarkHelper.tagHash(tagName);
diff --git a/source/net/yacy/search/AutoSearch.java b/source/net/yacy/search/AutoSearch.java
new file mode 100644
index 000000000..3548df031
--- /dev/null
+++ b/source/net/yacy/search/AutoSearch.java
@@ -0,0 +1,322 @@
+/**
+ * AutoSearch.java
+ * Copyright 2015 by Burkhard Buelte
+ * First released 09.01.2015 at http://yacy.net
+ *
+ * This is a part of YaCy, a peer-to-peer based web search engine
+ *
+ * LICENSE
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+package net.yacy.search;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+import net.yacy.cora.document.feed.RSSFeed;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.document.id.MultiProtocolURL;
+import static net.yacy.cora.federate.opensearch.SRURSSConnector.loadSRURSS;
+import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
+import net.yacy.cora.federate.solr.connector.SolrConnector;
+import net.yacy.cora.federate.solr.instance.RemoteInstance;
+import net.yacy.cora.federate.yacy.CacheStrategy;
+import net.yacy.cora.protocol.ClientIdentification;
+import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.data.BookmarksDB.Bookmark;
+import net.yacy.kelondro.workflow.AbstractBusyThread;
+import net.yacy.peers.Seed;
+import net.yacy.search.schema.CollectionSchema;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.params.CommonParams;
+
+/**
+ * AutoSearch retrieves queries from Bookmarks or a property file (if existing)
+ * and loops to a list of connected peers and asks each for results which are
+ * added to the local index.
+ */
+public class AutoSearch extends AbstractBusyThread {
+
+ private Set querystack; // serach query
+ public String currentQuery = null; // current query
+ private Set currentTargets = null; // peer hashes
+ final Switchboard sb;
+ public int gotresults;
+ private long lastInitTime; // to recognize new data (Bookmarks) to import
+
+ public AutoSearch(Switchboard xsb) {
+ super(3000, 1000); // set lower limits of cycle delay
+ this.setIdleSleep(60000); // set actual cycle delays
+ this.setBusySleep(10000);
+ this.sb = xsb;
+
+ gotresults = 0;
+ querystack = new HashSet();
+
+ this.lastInitTime = System.currentTimeMillis() - 600000; // init to now - 10 min
+ if (!checkBookmarkDB()) {
+ try {
+ // check for old queries in temp property file
+ File pfile = new File(xsb.dataPath, "DATA/SETTINGS/autosearch.conf");
+ if (pfile.exists()) {
+ ConcurrentLog.info(AutoSearch.class.getName(), "read queries from file " + pfile.getAbsolutePath());
+ Properties prop = new Properties();
+ FileInputStream fileIn = new FileInputStream(pfile);
+ prop.load(fileIn);
+ if (prop.size() > 0) {
+ Set