Add option for extended search (Autosearch) to Bookmark.html asking all connected peers for the searchterm added as description to the bookmark created by the bookmark icon.

Intended for searches/research projects with not sufficient results from local and DHT selected remote target peers.

Function: the process checks newly created bookmarks for description starting with "query=..." and takes this to ask every peer for 20 search results and adds it to the local index in a background job.
link to start/stop the process added to /Bookmarks.html
pull/1/head
reger 10 years ago
parent bb37cb32e4
commit 4214f250d0

@ -202,13 +202,40 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
#{optlist}#<option value="#[name]#"#[selected]#>#[name]# (#[num]#)</option>
#{/optlist}#
</select>
<input type="submit" value=" ... " />
<input type="submit" class="btn btn-xs btn-primary" value=" ... " />
</div></form>
<p>
#{taglist}#<span style="font-size:#[size]#em"><a href="Bookmarks.html?tag=#[name]#">#[name]#</a></span>
#{/taglist}#
</p>
</fieldset>
<!-- show AutoSearch infobox -->
<fieldset>
<legend>Auto Search</legend>
#(autosearchrunning)#
<p>
<form action="Bookmarks.html" method="post">start autosearch of new bookmarks <input type="submit" name="startautosearch" class="btn btn-primary btn-xs" value="start it"/>
</p>
<div class="info">#[msg]#</div>
::
<table>
<tr>
<td>autosearch queue:</td><td>#[jobcount]#</td><td><form action="Bookmarks.html" method="post"><input type="submit" name="stopautosearch" class="btn btn-danger btn-xs" value="stop it"/></form></td>
</tr>
<tr>
<td>received results:</td><td>#[totalcount]#</td><td></td>
</tr>
<tr>
<td>current query:</td><td colspan="2">#[query]#</td>
</tr>
</table>
#(/autosearchrunning)#
<br />
<p>This starts a serach of new or modified bookmarks since startup
in folder "search" with description "query="<br />
Every peer online will be ask for results.
</p>
</fieldset>
</div>
<!-- show BookmarkList -->
<div class="bookmarkList">

@ -55,7 +55,9 @@ import net.yacy.data.BookmarksDB.Tag;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.workflow.BusyThread;
import net.yacy.peers.NewsPool;
import net.yacy.search.AutoSearch;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -372,6 +374,42 @@ public class Bookmarks {
count = 0;
count = recurseFolders(BookmarkHelper.getFolderList("/", sb.bookmarksDB.getTagIterator(isAdmin)), "/", 0, true, "");
prop.put("display_folderlist", count);
BusyThread bt = sb.getThread("autosearch");
if (bt != null) {
prop.put("display_autosearchrunning","1");
prop.put("display_autosearchrunning_msg", "" );
if (post != null && post.containsKey("stopautosearch")) {
sb.terminateThread("autosearch", false);
prop.put("display_autosearchrunning_msg", "autosearch will terminate");
prop.put("display_autosearchrunning","0");
}
int jobs = bt.getJobCount();
prop.put("display_autosearchrunning_jobcount", jobs);
int cnt=0;
String qstr = "";
if (bt instanceof AutoSearch) {
cnt = ((AutoSearch) bt).gotresults;
qstr = ((AutoSearch) bt).currentQuery;
if (qstr == null) qstr = "---";
}
prop.put("display_autosearchrunning_totalcount", cnt);
prop.put("display_autosearchrunning_query", qstr);
} else {
prop.put("display_autosearchrunning", "0");
prop.put("display_autosearchrunning_msg", "");
if (post != null && post.containsKey("startautosearch")) {
sb.deployThread(
"autosearch",
"Auto Search",
"query all peers for given search terms",
null,
new AutoSearch(Switchboard.getSwitchboard()),
1000);
prop.put("display_autosearchrunning_msg", "autsearch job started");
}
}
}
return prop; // return from serverObjects respond()
}

@ -208,6 +208,15 @@ public class BookmarksDB {
return set.iterator();
}
public Iterator<Bookmark> getBookmarksIterator() {
try {
return new bookmarkIterator(true);
} catch (IOException ex) {
ConcurrentLog.logException(ex);
}
return null;
}
public Iterator<String> getBookmarksIterator(final String tagName, final boolean priv){
final TreeSet<String> set=new TreeSet<String>(new bookmarkComparator(true));
final String tagHash=BookmarkHelper.tagHash(tagName);

@ -0,0 +1,322 @@
/**
* AutoSearch.java
* Copyright 2015 by Burkhard Buelte
* First released 09.01.2015 at http://yacy.net
*
* This is a part of YaCy, a peer-to-peer based web search engine
*
* LICENSE
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.search;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import net.yacy.cora.document.feed.RSSFeed;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import static net.yacy.cora.federate.opensearch.SRURSSConnector.loadSRURSS;
import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.solr.instance.RemoteInstance;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.BookmarksDB.Bookmark;
import net.yacy.kelondro.workflow.AbstractBusyThread;
import net.yacy.peers.Seed;
import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
/**
* AutoSearch retrieves queries from Bookmarks or a property file (if existing)
* and loops to a list of connected peers and asks each for results which are
* added to the local index.
*/
public class AutoSearch extends AbstractBusyThread {
private Set<String> querystack; // serach query
public String currentQuery = null; // current query
private Set<String> currentTargets = null; // peer hashes
final Switchboard sb;
public int gotresults;
private long lastInitTime; // to recognize new data (Bookmarks) to import
public AutoSearch(Switchboard xsb) {
super(3000, 1000); // set lower limits of cycle delay
this.setIdleSleep(60000); // set actual cycle delays
this.setBusySleep(10000);
this.sb = xsb;
gotresults = 0;
querystack = new HashSet<String>();
this.lastInitTime = System.currentTimeMillis() - 600000; // init to now - 10 min
if (!checkBookmarkDB()) {
try {
// check for old queries in temp property file
File pfile = new File(xsb.dataPath, "DATA/SETTINGS/autosearch.conf");
if (pfile.exists()) {
ConcurrentLog.info(AutoSearch.class.getName(), "read queries from file " + pfile.getAbsolutePath());
Properties prop = new Properties();
FileInputStream fileIn = new FileInputStream(pfile);
prop.load(fileIn);
if (prop.size() > 0) {
Set<Object> all = prop.keySet();
for (Object s : all) {
String query = prop.getProperty((String) s);
if (query != null && !query.isEmpty()) {
querystack.add(query);
}
}
}
fileIn.close();
}
} catch (final IOException e) {
ConcurrentLog.warn(AutoSearch.class.getName(), "Error reading config file");
}
}
}
/**
* Save current queries to a (temporary) property file to allow continue
* after a restart. Existing file will be overwritten or deleted.
*/
private void saveasPropFile() {
File pfile = new File(sb.dataPath, "DATA/SETTINGS/autosearch.conf");
if (querystack.size() == 0) {
if (pfile.exists()) {
pfile.delete();
}
} else {
try {
Properties prop = new Properties();
for (String s : querystack) {
prop.put("query" + s.hashCode(), s);
}
OutputStream fileOut = new FileOutputStream(pfile);
prop.store(fileOut, "AutoSearch query list");
fileOut.close();
} catch (FileNotFoundException ex) {
ConcurrentLog.warn(AutoSearch.class.getName(), "can not create file " + pfile.getAbsolutePath());
} catch (IOException ex) {
ConcurrentLog.warn(AutoSearch.class.getName(), "IO error writing to file " + pfile.getAbsolutePath());
}
}
}
/**
* Get peers to query (peers connected)
*
* @return Set of peer hashes to contact
*/
private void initPeerList() {
if (currentTargets == null) {
currentTargets = new HashSet<String>();
}
// TODO: DHT peers could be excluded
Iterator<Seed> it = Switchboard.getSwitchboard().peers.seedsConnected(true, false, null, 0);
int i=0;
while (it.hasNext() && i<5) {
Seed s = it.next();
currentTargets.add(s.hash); i++;
}
}
/**
* Check BookmarkDB for existing queries return true if new entry added to
* query queue. Store queries in (temporary) property file
*
* @return true if new query from bookmark was added
*/
private boolean checkBookmarkDB() {
int added = 0;
Iterator<Bookmark> it = Switchboard.getSwitchboard().bookmarksDB.getBookmarksIterator();
if (it != null) {
while (it.hasNext()) {
Bookmark bmk = it.next();
// get search bookmarks only
if (bmk.getFoldersString().startsWith("/search")) {
// take only new created or edited bookmarks
if (bmk.getTimeStamp() >= this.lastInitTime) {
final String query = bmk.getDescription();
if (!query.isEmpty() && query.startsWith("query=")) {
{
querystack.add(query.substring(6));
added++;
ConcurrentLog.info(AutoSearch.class.getName(), "add query from Bookmarks " + query);
}
}
}
}
}
}
if (added > 0) {
this.lastInitTime = System.currentTimeMillis();
saveasPropFile();
return true;
} else {
return false;
}
}
/**
* Process query queue, select one query and peer to ask next
*
* @return true if something processed
*/
@Override
public boolean job() {
if (currentQuery == null && querystack != null && querystack.size() > 0) {
currentQuery = querystack.iterator().next();
querystack.remove(currentQuery); // imediate remove to asure no repeat
initPeerList(); // late initialization of peerlist to get currently connected
}
// ask next peer for search term
if (currentQuery != null && !currentQuery.isEmpty()) {
if (currentTargets != null && !currentTargets.isEmpty()) {
while (currentTargets.size() > 0) { // loop only to skip disconnected peers
String peerhash = currentTargets.iterator().next();
currentTargets.remove(peerhash);
Seed seed = Switchboard.getSwitchboard().peers.getConnected(peerhash);
if (seed != null) {
processSingleTarget(seed);
return true; // just one query per busycycle is intended
}
}
}
currentQuery = null;
}
// no search targets
checkBookmarkDB();
// TODO: do idle processing
// analyse content of local index
// extend search with learned new search terms
// follow most promising links
ConcurrentLog.fine(AutoSearch.class.getName(), "nothing to do");
return this.querystack.size() > 0;
}
/**
* Calls one peer for search results of the current query and adds it to the
* local index. Depending on peers SolrAvailable flag the a solr query or
* opensearch/rss query is used.
*
* @param seed the peer to ask
*/
private void processSingleTarget(Seed seed) {
ConcurrentLog.fine(AutoSearch.class.getName(), "ask " + seed.getIP() + " " + seed.getName() + " for query=" + currentQuery);
if (seed.getFlagSolrAvailable()) { // do a solr query
SolrDocumentList docList = null;
SolrQuery solrQuery = new SolrQuery();
// use remote defaults and ranking (to query their index right)
solrQuery.set(CommonParams.Q, currentQuery + " AND (" + CollectionSchema.httpstatus_i.name() + ":200)"); // except this yacy special
solrQuery.set("q.op", "AND"); // except ... no one word matches please
solrQuery.set(CommonParams.ROWS, "20");
this.setName("Protocol.solrQuery(" + solrQuery.getQuery() + " to " + seed.hash + ")");
try {
RemoteInstance instance = new RemoteInstance("http://" + seed.getPublicAddress(seed.getIP()) + "/solr/", null, null, 10000); // this is a 'patch configuration' which considers 'solr' as default collection
try {
SolrConnector solrConnector = new RemoteSolrConnector(instance, true, null);
if (!solrConnector.isClosed()) {
try {
QueryResponse rsp = solrConnector.getResponseByParams(solrQuery);
docList = rsp.getResults();
} catch (Throwable e) {
} finally {
solrConnector.close();
}
}
} catch (Throwable ee) {
} finally {
instance.close();
}
if (docList != null) {
for (SolrDocument d : docList) {
sb.index.fulltext().putDocument(sb.index.fulltext().getDefaultConfiguration().toSolrInputDocument(d));
this.gotresults++;
}
ConcurrentLog.info(AutoSearch.class.getName(), "added " + docList.size() + " results from " + seed.getName() + " to index for solrquery=" + currentQuery);
}
} catch (Throwable eee) {
}
} else { // do a yacysearch.rss query
final String rssSearchServiceURL = "http://" + seed.getPublicAddress(seed.getIP()) + "/yacysearch.rss";
try {
RSSFeed feed = loadSRURSS(
rssSearchServiceURL,
currentQuery,
0,
20,
CacheStrategy.IFFRESH,
false, // just local, as we ask others too
ClientIdentification.yacyInternetCrawlerAgent);
final List<DigestURL> urls = new ArrayList<DigestURL>();
for (final MultiProtocolURL entry : feed.getLinks()) {
urls.add(new DigestURL(entry, (byte[]) null));
this.gotresults++;
}
sb.addToIndex(urls, null, "AutoSearch", null, true);
ConcurrentLog.info(AutoSearch.class.getName(), "added " + urls.size() + " results from " + seed.getName() + " to index for query=" + currentQuery);
} catch (IOException ex) {
ConcurrentLog.info(AutoSearch.class.getName(), "no answer from " + seed.getName());
}
}
}
/**
* Estimate of queries to perform
*/
@Override
public int getJobCount() {
if (currentTargets != null) {
int cnt = currentTargets.size();
cnt += querystack.size() * sb.peers.sizeConnected();
return cnt;
} else {
return 0;
}
}
@Override
public void freemem() {
}
@Override
public void close() {
this.saveasPropFile(); // saves or deletes property file with queries
}
}
Loading…
Cancel
Save