- added additional config page (ConfigSearchPage_p) for easy setup of search page layout (to not overload ConfigPortal page) - currently redundant setting with part of ConfigPortal page - added missing config for filetype and protocol navigator - adjusted init of SearchEvent to check navigation config setting - renamed RankigProcess.getTopicNavigator to getTopics (to distiguish between added SearchEvent.getTopicNavigator)pull/1/head
parent
24db2fcd9d
commit
f143804382
@ -0,0 +1,235 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Search Page</title>
|
||||
#%env/templates/metas.template%#
|
||||
<link rel="stylesheet" type="text/css" media="screen" href="/env/highslide.css" />
|
||||
<script type="text/javascript" src="/js/yacysearch.js"></script>
|
||||
<script type="text/javascript" src="/js/highslide/highslide.js"></script>
|
||||
<script type="text/javascript">hs.outlineType = 'rounded-white';</script>
|
||||
<script src="/yacy/ui/js/jquery-1.3.2.min.js" type="text/javascript"></script>
|
||||
<script src="/yacy/ui/js/jquery-ui-1.7.2.min.js" type="text/javascript"></script>
|
||||
<script src="/yacy/ui/js/jquery.tagcloud.min.js" type="text/javascript"></script>
|
||||
<script src="/yacy/ui/js/jquery.tinysort.min.js" type="text/javascript"></script>
|
||||
<link media="screen" type="text/css" href="/yacy/ui/css/themes/start/ui.base.css" rel="stylesheet" />
|
||||
<link media="screen" type="text/css" href="/yacy/ui/css/themes/start/ui.theme.css" rel="stylesheet" />
|
||||
<link media="screen" type="text/css" href="/yacy/ui/css/autocomplete.css" rel="stylesheet" />
|
||||
</head>
|
||||
|
||||
<body id="ConfigSearchPage" >
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuSearchConfiguration.template%#
|
||||
<script type="text/javascript">
|
||||
//<![CDATA[
|
||||
$(function() {
|
||||
$.extend($.ui.accordion.defaults, {
|
||||
autoHeight: false,
|
||||
clearStyle: true,
|
||||
collapsible: true,
|
||||
header: "h3"
|
||||
});
|
||||
$("#sidebar#[vocabulary]#").accordion({});
|
||||
$("#sidebarDomains").accordion({});
|
||||
$("#sidebarProtocols").accordion({});
|
||||
$("#sidebarFiletypes").accordion({});
|
||||
$("#sidebarAuthors").accordion({});
|
||||
$("#sidebarLanguages").accordion({});
|
||||
$("#sidebarNameSpace").accordion({});
|
||||
$("#sidebarTopics").tagcloud({type:"sphere",power:.25,seed:0,sizemin:10,sizemax:20,height:80,colormin:"682",colormax:"20C"}).find("li").tsort();
|
||||
$("#sidebarAbout").accordion({});
|
||||
});
|
||||
//]]>
|
||||
</script>
|
||||
|
||||
<h2>Search Result Page Layout Configuration</h2>
|
||||
<p>
|
||||
Below is a generic template of the search result page. Mark the check boxes for features you would like to be displayed.
|
||||
To change colors and styles use the <a href="ConfigAppearance_p.html">Appearance</a> menu for different skins and languages.
|
||||
Other portal settings can be adjusted in <a href="ConfigPortal.html">Generic Search Portal</a> menu.
|
||||
</p>
|
||||
<h4>Page Template</h4>
|
||||
|
||||
<form action="ConfigSearchPage_p.html" method="post" enctype="multipart/form-data" id="ConfigSearchPage" accept-charset="UTF-8">
|
||||
<table>
|
||||
<tr>
|
||||
<td style="border-width: 1px; border-color: grey; border-style: solid; padding: 5px;">
|
||||
<table width="90%">
|
||||
<tr>
|
||||
<td valign="top"><input type="checkbox" name="publicTopmenu" value="true" #(publicTopmenu)#::checked="checked" #(/publicTopmenu)# /></td>
|
||||
<td><div class="SubMenu">
|
||||
<ul class="SubMenu">
|
||||
<li style="width: 15%;"><a href="/Status.html" target="LayouTest" class="MenuItemLink">Administration</a></li>
|
||||
<li style="width: 15%;"><a href="/index.html" target="LayouTest" class="MenuItemLink">Web Search</a></li>
|
||||
<li style="width: 15%;"><a href="/yacyinteractive.html" target="LayouTest" class="MenuItemLink">File Search</a></li>
|
||||
<li style="width: 15%;"><a href="/HostBrowser.html?hosts=" target="LayouTest" class="MenuItemLink">HostBrowser</a></li>
|
||||
<li style="width: 15%;"><a href="/ViewProfile.html?hash=localhash" target="LayouTest" class="MenuItemLink">About Us</a></li>
|
||||
<li style="width: 15%;"><a href="http://www.yacy-websearch.net/wiki/" target="LayouTest" class="MenuItemLink">Help / YaCy Wiki</a></li>
|
||||
</ul>
|
||||
</div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>#[promoteSearchPageGreeting]#</h2>
|
||||
<div class="yacylogo">
|
||||
<a href="#[promoteSearchPageGreeting.homepage]#" target="LayouTest" class="yacylogo"><img src="#[promoteSearchPageGreeting.smallImage]#" /></a>
|
||||
</div>
|
||||
<div style="width: 550px; float: left;">
|
||||
<fieldset class="yacys">
|
||||
<input id="search" class="searchinput" name="query" type="text" size="40" maxlength="80" value="search words" />
|
||||
<input id="Enter" type="button" name="Enter" value="Search" onclick="var w = window.open('yacysearch.html','LayoutTest'); w.focus();" />
|
||||
<div class="yacysearch">
|
||||
<input type="checkbox" id="text" name="search.text" value="true" #(search.text)#::checked="checked" #(/search.text)# /><label for="text">Text</label>
|
||||
<input type="checkbox" id="image" name="search.image" value="true" #(search.image)#::checked="checked" #(/search.image)# /><label for="image">Images</label>
|
||||
<input type="checkbox" id="audio" name="search.audio" value="true" #(search.audio)#::checked="checked" #(/search.audio)# /><label for="audio">Audio</label>
|
||||
<input type="checkbox" id="video" name="search.video" value="true" #(search.video)#::checked="checked" #(/search.video)# /><label for="video">Video</label>
|
||||
<input type="checkbox" id="app" name="search.app" value="true" #(search.app)#::checked="checked" #(/search.app)# /><label for="app">Applications</label>
|
||||
<input type="checkbox" id="options" name="search.options" value="true" #(search.options)#::checked="checked" #(/search.options)# /><a href="" target="LayouTest">more options</a>
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
<!-- the sidebar navigation -->
|
||||
<div style="float: right; width: 27%;">
|
||||
<fieldset>
|
||||
<table>
|
||||
<tr>
|
||||
<td><input type="checkbox" name="search.navigation.topics" value="true" #(search.navigation.topics)#::checked="checked" #(/search.navigation.topics)# /></td>
|
||||
<td>
|
||||
<div style="float: right; margin-top: 0px; width: 100%; height: 80px">
|
||||
<div>
|
||||
<ul id="sidebarTopics" style="padding-left: 0px;">
|
||||
<li value="3"><a href="yacysearch.html" target="LayouTest">Tag</a></li>
|
||||
<li value="4"><a href="yacysearch.html" target="LayouTest">Topics</a></li>
|
||||
<li value="3"><a href="yacysearch.html" target="LayouTest">Cloud</a></li>
|
||||
<li value="2"><a href="yacysearch.html" target="LayouTest">Topics</a></li>
|
||||
<li value="1"><a href="yacysearch.html" target="LayouTest">Cloud</a></li>
|
||||
<li value="1"><a href="yacysearch.html" target="LayouTest">Tag</a></li>
|
||||
<li value="1"><a href="yacysearch.html" target="LayouTest">Cloud</a></li>
|
||||
<li value="1"><a href="yacysearch.html" target="LayouTest">Topic</a></li>
|
||||
<li value="1"><a href="yacysearch.html" target="LayouTest">Tag</a></li>
|
||||
<li value="1"><a href="yacysearch.html" target="LayouTest">Cloud</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input type="checkbox" name="search.navigation.protocol" value="true" #(search.navigation.protocol)#::checked="checked" #(/search.navigation.protocol)# /></td>
|
||||
<td><div id="sidebarProtocols" style="float: right; margin-top: 0px; width: 210px;">
|
||||
<h3 style="padding-left: 25px;">Protocol Navigator</h3>
|
||||
</div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input type="checkbox" name="search.navigation.filetype" value="true" #(search.navigation.filetype)#::checked="checked" #(/search.navigation.filetype)# /></td>
|
||||
<td><div id="sidebarFiletypes" style="float: right; margin-top: 0px; width: 210px;">
|
||||
<h3 style="padding-left: 25px;">Filetype Navigator</h3>
|
||||
</div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input type="checkbox" name="search.navigation.hosts" value="true" #(search.navigation.hosts)#::checked="checked" #(/search.navigation.hosts)# /></td>
|
||||
<td><div id="sidebarDomains" style="float: right; margin-top: 0px; width: 210px;">
|
||||
<h3 style="padding-left: 25px;">Domain Navigator</h3>
|
||||
</div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input type="checkbox" name="search.navigation.namespace" value="true" #(search.navigation.namespace)#::checked="checked" #(/search.navigation.namespace)# /></td>
|
||||
<td><div id="sidebarNameSpace" style="float: right; margin-top: 0px; width: 210px;">
|
||||
<h3 style="padding-left: 25px;">Wiki Name Space Navigator</h3>
|
||||
</div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input type="checkbox" name="search.navigation.authors" value="true" #(search.navigation.authors)#::checked="checked" #(/search.navigation.authors)# /></td>
|
||||
<td><div id="sidebarAuthors" style="float: right; margin-top: 0px; width: 210px;">
|
||||
<h3 style="padding-left: 25px;">Author Navigator</h3>
|
||||
</div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td><div id="vocabulary" style="float: right; margin-top: 0px; width: 210px;">
|
||||
<h3 style="padding-left: 25px;">Vocabulary Navigator</h3>
|
||||
</div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td><div id="sidebarAbout" style="float: right; margin-top: 5px; width: 210px;">
|
||||
<h3 style="padding-left: 25px;">about#[about.headline]#</h3>
|
||||
<div>#[about.body]#</div>
|
||||
</div></td>
|
||||
</tr>
|
||||
</table>
|
||||
</fieldset>
|
||||
</div>
|
||||
<!-- the search result -->
|
||||
<div style="float: left; width: 70%;">
|
||||
<fieldset>
|
||||
<div class="searchresults">
|
||||
<h4 class="linktitle">
|
||||
<img width="16" height="16" src="env/grafics/dfltfvcn.ico" class="favicon" alt="" />
|
||||
<a href="yacysearch.html" target="LayouTest">Title of Result</a>
|
||||
</h4>
|
||||
<p class="snippet">
|
||||
<span class="snippetLoaded" id="hhash">Description and text snippet of the search result</span>
|
||||
</p>
|
||||
<p class="url">
|
||||
<a href="yacysearch.html" id="urlhash" target="LayouTest">http://url-of-the-search-result.net</a>
|
||||
</p>
|
||||
<p class="urlinfo">
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="15px"></td>
|
||||
<td>#[content_showDate_date]#</td>
|
||||
<td> | 42 kbyte</td>
|
||||
<td> | <a href="api/yacydoc.html" target="LayouTest" onclick="return hs.htmlExpand(this, { objectType: 'ajax'} )">Metadata</a></td>
|
||||
<td> | <a href="ViewFile.html" target="LayouTest">Parser</a></td>
|
||||
<td> | <a href="yacysearch.html" target="LayouTest">Pictures</a></td>
|
||||
<td> | <a href="CacheResource_p.html" target="LayouTest">Cache</a></td>
|
||||
<td> | <a href="proxy.html" target="LayouTest">Augmented Browsing</a></td>
|
||||
<td> | <a href="HostBrowser.html" target="LayouTest"><img src="/env/grafics/minitree.png" width="15" height="8" /></a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td align="center"><input type="checkbox" name="search.result.show.date" value="true" #(search.result.show.date)#::checked="checked" #(/search.result.show.date)# /></td>
|
||||
<td align="center"><input type="checkbox" name="search.result.show.size" value="true" #(search.result.show.size)#::checked="checked" #(/search.result.show.size)# /></td>
|
||||
<td align="center"><input type="checkbox" name="search.result.show.metadata" value="true" #(search.result.show.metadata)#::checked="checked" #(/search.result.show.metadata)# /></td>
|
||||
<td align="center"><input type="checkbox" name="search.result.show.parser" value="true" #(search.result.show.parser)#::checked="checked" #(/search.result.show.parser)# /></td>
|
||||
<td align="center"><input type="checkbox" name="search.result.show.pictures" value="true" #(search.result.show.pictures)#::checked="checked" #(/search.result.show.pictures)# /></td>
|
||||
<td align="center"><input type="checkbox" name="search.result.show.cache" value="true" #(search.result.show.cache)#::checked="checked" #(/search.result.show.cache)# /></td>
|
||||
<td align="center"><input type="checkbox" name="search.result.show.proxy" value="true" #(search.result.show.proxy)#::checked="checked" #(/search.result.show.proxy)# /></td>
|
||||
<td align="center"><input type="checkbox" name="search.result.show.hostbrowser" value="true" #(search.result.show.hostbrowser)#::checked="checked" #(/search.result.show.hostbrowser)# /></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td colspan="8"><input type="checkbox" name="search.result.show.tags" value="true" #(search.result.show.tags)#::checked="checked" #(/search.result.show.tags)# />Tags</td>
|
||||
</tr>
|
||||
</table>
|
||||
</p>
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<fieldset>
|
||||
<dt> </dt>
|
||||
<dd>
|
||||
<input type="submit" name="searchpage_set" value="Save Settings" class="submitready" />
|
||||
<input type="submit" name="searchpage_default" value="Set Default Values" class="submitready" />
|
||||
</dd>
|
||||
</fieldset>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</form>
|
||||
|
||||
<script type="text/javascript">
|
||||
//<![CDATA[
|
||||
$(function() { $("#sidebarProtocols").accordion('activate', false); });
|
||||
$(function() { $("#sidebarFiletypes").accordion('activate', false); });
|
||||
$(function() { $("#sidebarDomains").accordion('activate', false); });
|
||||
$(function() { $("#sidebarNameSpace").accordion('activate', false); });
|
||||
$(function() { $("#sidebarAuthors").accordion('activate', false); });
|
||||
$(function() { $("#vocabulary").accordion('activate', false); });
|
||||
$(function() { $("#sidebarAbout").accordion('activate', false); });
|
||||
//]]>
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,145 @@
|
||||
// ConfigSearchPage_p.java
|
||||
// -----------------------
|
||||
// part of YaCy
|
||||
// (C) by Michael Peter Christen; mc@yacy.net
|
||||
// first published on http://yacy.net
|
||||
// Frankfurt, Germany, 4.7.2008
|
||||
//
|
||||
//$LastChangedDate$
|
||||
//$LastChangedRevision$
|
||||
//$LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
import java.sql.Date;
|
||||
import net.yacy.cora.date.GenericFormatter;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.data.WorkTables;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
import net.yacy.server.serverObjects;
|
||||
import net.yacy.server.serverSwitch;
|
||||
|
||||
public class ConfigSearchPage_p {
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
final serverObjects prop = new serverObjects();
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
|
||||
if (post != null) {
|
||||
// AUTHENTICATE
|
||||
if (!sb.verifyAuthentication(header)) {
|
||||
// force log-in
|
||||
prop.authenticationRequired();
|
||||
return prop;
|
||||
}
|
||||
|
||||
if (post.containsKey("searchpage_set")) {
|
||||
final String newGreeting = post.get(SwitchboardConstants.GREETING, "");
|
||||
// store this call as api call
|
||||
sb.tables.recordAPICall(post, "ConfigPortal.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "new portal design. greeting: " + newGreeting);
|
||||
|
||||
sb.setConfig("publicTopmenu", post.getBoolean("publicTopmenu"));
|
||||
sb.setConfig("search.options", post.getBoolean("search.options"));
|
||||
|
||||
sb.setConfig("search.text", post.getBoolean("search.text"));
|
||||
sb.setConfig("search.image", post.getBoolean("search.image"));
|
||||
sb.setConfig("search.audio", post.getBoolean("search.audio"));
|
||||
sb.setConfig("search.video", post.getBoolean("search.video"));
|
||||
sb.setConfig("search.app", post.getBoolean("search.app"));
|
||||
|
||||
sb.setConfig("search.result.show.date", post.getBoolean("search.result.show.date"));
|
||||
sb.setConfig("search.result.show.size", post.getBoolean("search.result.show.size"));
|
||||
sb.setConfig("search.result.show.metadata", post.getBoolean("search.result.show.metadata"));
|
||||
sb.setConfig("search.result.show.parser", post.getBoolean("search.result.show.parser"));
|
||||
sb.setConfig("search.result.show.pictures", post.getBoolean("search.result.show.pictures"));
|
||||
sb.setConfig("search.result.show.cache", post.getBoolean("search.result.show.cache"));
|
||||
sb.setConfig("search.result.show.proxy", post.getBoolean("search.result.show.proxy"));
|
||||
sb.setConfig("search.result.show.hostbrowser", post.getBoolean("search.result.show.hostbrowser"));
|
||||
sb.setConfig("search.result.show.tags", post.getBoolean("search.result.show.tags"));
|
||||
|
||||
// construct navigation String
|
||||
String nav = "";
|
||||
if (post.getBoolean("search.navigation.filetype")) nav += "filetype,";
|
||||
if (post.getBoolean("search.navigation.protocol")) nav += "protocol,";
|
||||
if (post.getBoolean("search.navigation.hosts")) nav += "hosts,";
|
||||
if (post.getBoolean("search.navigation.authors")) nav += "authors,";
|
||||
if (post.getBoolean("search.navigation.namespace")) nav += "namespace,";
|
||||
if (post.getBoolean("search.navigation.topics")) nav += "topics,";
|
||||
if (nav.endsWith(",")) nav = nav.substring(0, nav.length() - 1);
|
||||
sb.setConfig("search.navigation", nav);
|
||||
}
|
||||
if (post.containsKey("searchpage_default")) {
|
||||
sb.setConfig("publicTopmenu", true);
|
||||
sb.setConfig("search.navigation", "hosts,authors,namespace,topics");
|
||||
sb.setConfig("search.options", true);
|
||||
sb.setConfig("search.text", true);
|
||||
sb.setConfig("search.image", true);
|
||||
sb.setConfig("search.audio", false);
|
||||
sb.setConfig("search.video", false);
|
||||
sb.setConfig("search.app", false);
|
||||
sb.setConfig("search.result.show.date", true);
|
||||
sb.setConfig("search.result.show.size", false);
|
||||
sb.setConfig("search.result.show.metadata", false);
|
||||
sb.setConfig("search.result.show.parser", false);
|
||||
sb.setConfig("search.result.show.pictures", false);
|
||||
sb.setConfig("search.result.show.cache", true);
|
||||
sb.setConfig("search.result.show.proxy", false);
|
||||
sb.setConfig("search.result.show.hostbrowser", true);
|
||||
sb.setConfig("search.result.show.tags", false);
|
||||
}
|
||||
}
|
||||
|
||||
prop.putHTML(SwitchboardConstants.GREETING, sb.getConfig(SwitchboardConstants.GREETING, ""));
|
||||
prop.putHTML(SwitchboardConstants.GREETING_HOMEPAGE, sb.getConfig(SwitchboardConstants.GREETING_HOMEPAGE, ""));
|
||||
prop.putHTML(SwitchboardConstants.GREETING_LARGE_IMAGE, sb.getConfig(SwitchboardConstants.GREETING_LARGE_IMAGE, ""));
|
||||
prop.putHTML(SwitchboardConstants.GREETING_SMALL_IMAGE, sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, ""));
|
||||
prop.putHTML(SwitchboardConstants.INDEX_FORWARD, sb.getConfig(SwitchboardConstants.INDEX_FORWARD, ""));
|
||||
prop.put("publicTopmenu", sb.getConfigBool("publicTopmenu", false) ? 1 : 0);
|
||||
prop.put("search.options", sb.getConfigBool("search.options", false) ? 1 : 0);
|
||||
|
||||
prop.put("search.text", sb.getConfigBool("search.text", false) ? 1 : 0);
|
||||
prop.put("search.image", sb.getConfigBool("search.image", false) ? 1 : 0);
|
||||
prop.put("search.audio", sb.getConfigBool("search.audio", false) ? 1 : 0);
|
||||
prop.put("search.video", sb.getConfigBool("search.video", false) ? 1 : 0);
|
||||
prop.put("search.app", sb.getConfigBool("search.app", false) ? 1 : 0);
|
||||
|
||||
prop.put("search.result.show.date", sb.getConfigBool("search.result.show.date", false) ? 1 : 0);
|
||||
prop.put("search.result.show.size", sb.getConfigBool("search.result.show.size", false) ? 1 : 0);
|
||||
prop.put("search.result.show.metadata", sb.getConfigBool("search.result.show.metadata", false) ? 1 : 0);
|
||||
prop.put("search.result.show.parser", sb.getConfigBool("search.result.show.parser", false) ? 1 : 0);
|
||||
prop.put("search.result.show.pictures", sb.getConfigBool("search.result.show.pictures", false) ? 1 : 0);
|
||||
prop.put("search.result.show.cache", sb.getConfigBool("search.result.show.cache", false) ? 1 : 0);
|
||||
prop.put("search.result.show.proxy", sb.getConfigBool("search.result.show.proxy", false) ? 1 : 0);
|
||||
prop.put("search.result.show.hostbrowser", sb.getConfigBool("search.result.show.hostbrowser", false) ? 1 : 0);
|
||||
prop.put("search.result.show.tags", sb.getConfigBool("search.result.show.tags", false) ? 1 : 0);
|
||||
|
||||
prop.put("search.navigation.filetype", sb.getConfig("search.navigation", "").indexOf("filetype",0) >= 0 ? 1 : 0);
|
||||
prop.put("search.navigation.protocol", sb.getConfig("search.navigation", "").indexOf("protocol",0) >= 0 ? 1 : 0);
|
||||
prop.put("search.navigation.hosts", sb.getConfig("search.navigation", "").indexOf("hosts",0) >= 0 ? 1 : 0);
|
||||
prop.put("search.navigation.authors", sb.getConfig("search.navigation", "").indexOf("authors",0) >= 0 ? 1 : 0);
|
||||
prop.put("search.navigation.namespace", sb.getConfig("search.navigation", "").indexOf("namespace",0) >= 0 ? 1 : 0);
|
||||
prop.put("search.navigation.topics", sb.getConfig("search.navigation", "").indexOf("topics",0) >= 0 ? 1 : 0);
|
||||
|
||||
prop.put("about.headline", sb.getConfig("about.headline", "About"));
|
||||
prop.put("about.body", sb.getConfig("about.body", ""));
|
||||
|
||||
prop.put("content_showDate_date", GenericFormatter.RFC1123_SHORT_FORMATTER.format(new Date(System.currentTimeMillis())));
|
||||
return prop;
|
||||
}
|
||||
|
||||
}
|
@ -1,440 +1,440 @@
|
||||
// search.java
|
||||
// (C) 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
// You must compile this file with
|
||||
// javac -classpath .:../../Classes search.java
|
||||
// if the shell's current path is htroot/yacy
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.document.analysis.Classification;
|
||||
import net.yacy.cora.document.analysis.Classification.ContentDomain;
|
||||
import net.yacy.cora.document.RSSMessage;
|
||||
import net.yacy.cora.lod.vocabulary.Tagging;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.sorting.ScoreMap;
|
||||
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
|
||||
import net.yacy.cora.storage.HandleSet;
|
||||
import net.yacy.cora.util.SpaceExceededException;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.data.word.WordReference;
|
||||
import net.yacy.kelondro.data.word.WordReferenceFactory;
|
||||
import net.yacy.kelondro.data.word.WordReferenceRow;
|
||||
import net.yacy.kelondro.index.RowHandleSet;
|
||||
import net.yacy.kelondro.rwi.ReferenceContainer;
|
||||
import net.yacy.kelondro.util.Bitfield;
|
||||
import net.yacy.kelondro.util.ByteBuffer;
|
||||
import net.yacy.kelondro.util.ISO639;
|
||||
import net.yacy.kelondro.util.MemoryControl;
|
||||
import net.yacy.peers.EventChannel;
|
||||
import net.yacy.peers.Network;
|
||||
import net.yacy.peers.Protocol;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.peers.graphics.ProfilingGraph;
|
||||
import net.yacy.search.EventTracker;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
import net.yacy.search.index.Segment;
|
||||
import net.yacy.search.query.AccessTracker;
|
||||
import net.yacy.search.query.QueryGoal;
|
||||
import net.yacy.search.query.QueryParams;
|
||||
import net.yacy.search.query.SearchEvent;
|
||||
import net.yacy.search.query.SearchEventCache;
|
||||
import net.yacy.search.query.SearchEventType;
|
||||
import net.yacy.search.ranking.RankingProfile;
|
||||
import net.yacy.search.snippet.ResultEntry;
|
||||
import net.yacy.server.serverCore;
|
||||
import net.yacy.server.serverObjects;
|
||||
import net.yacy.server.serverSwitch;
|
||||
import net.yacy.utils.crypt;
|
||||
|
||||
public final class search {
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
sb.remoteSearchLastAccess = System.currentTimeMillis();
|
||||
|
||||
final serverObjects prop = new serverObjects();
|
||||
// set nice default values for error cases
|
||||
prop.put("searchtime", "0");
|
||||
prop.put("references", "");
|
||||
prop.put("joincount", "0");
|
||||
prop.put("linkcount", "0");
|
||||
prop.put("links", "");
|
||||
prop.put("indexcount", "");
|
||||
prop.put("indexabstract", "");
|
||||
|
||||
if (post == null || env == null) return prop;
|
||||
if (!Protocol.authentifyRequest(post, env)) return prop;
|
||||
final String client = header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP);
|
||||
|
||||
//System.out.println("yacy: search received request = " + post.toString());
|
||||
|
||||
final String oseed = post.get("myseed", ""); // complete seed of the requesting peer
|
||||
// final String youare = post.get("youare", ""); // seed hash of the target peer, used for testing network stability
|
||||
final String query = post.get("query", ""); // a string of word hashes that shall be searched and combined
|
||||
final String exclude= post.get("exclude", "");// a string of word hashes that shall not be within the search result
|
||||
final String urls = post.get("urls", ""); // a string of url hashes that are preselected for the search: no other may be returned
|
||||
final String abstracts = post.get("abstracts", ""); // a string of word hashes for abstracts that shall be generated, or 'auto' (for maxcount-word), or '' (for none)
|
||||
final int count = Math.min((int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_DEFAULT, 100), post.getInt("count", 10)); // maximum number of wanted results
|
||||
final long maxtime = Math.min((int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000), post.getLong("time", 3000)); // maximum waiting time
|
||||
final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE);
|
||||
final String prefer = post.get("prefer", "");
|
||||
final String modifier = post.get("modifier", "").trim();
|
||||
final String contentdom = post.get("contentdom", "all");
|
||||
final String filter = post.get("filter", ".*"); // a filter on the url
|
||||
String sitehash = post.get("sitehash", ""); if (sitehash.isEmpty()) sitehash = null;
|
||||
String author = post.get("author", ""); if (author.isEmpty()) author = null;
|
||||
String language = post.get("language", "");
|
||||
if (language == null || language.isEmpty() || !ISO639.exists(language)) {
|
||||
// take language from the user agent
|
||||
String agent = header.get("User-Agent");
|
||||
if (agent == null) agent = System.getProperty("user.language");
|
||||
language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent);
|
||||
if (language == null) language = "en";
|
||||
}
|
||||
final int partitions = post.getInt("partitions", 30);
|
||||
String profile = post.get("profile", ""); // remote profile hand-over
|
||||
if (profile.length() > 0) profile = crypt.simpleDecode(profile);
|
||||
//final boolean includesnippet = post.get("includesnippet", "false").equals("true");
|
||||
Bitfield constraint = ((post.containsKey("constraint")) && (post.get("constraint", "").length() > 0)) ? new Bitfield(4, post.get("constraint", "______")) : null;
|
||||
if (constraint != null) {
|
||||
// check bad handover parameter from older versions
|
||||
boolean allon = true;
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (!constraint.get(i)) {allon = false; break;}
|
||||
}
|
||||
if (allon) constraint = null;
|
||||
}
|
||||
// Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time
|
||||
|
||||
// test:
|
||||
// http://localhost:8090/yacy/search.html?query=4galTpdpDM5Q (search for linux)
|
||||
// http://localhost:8090/yacy/search.html?query=gh8DKIhGKXws (search for book)
|
||||
// http://localhost:8090/yacy/search.html?query=UEhMGfGv2vOE (search for kernel)
|
||||
// http://localhost:8090/yacy/search.html?query=ZX-LjaYo74PP (search for help)
|
||||
// http://localhost:8090/yacy/search.html?query=uDqIalxDfM2a (search for mail)
|
||||
// http://localhost:8090/yacy/search.html?query=4galTpdpDM5Qgh8DKIhGKXws&abstracts=auto (search for linux and book, generate abstract automatically)
|
||||
// http://localhost:8090/yacy/search.html?query=&abstracts=4galTpdpDM5Q (only abstracts for linux)
|
||||
|
||||
if (sb.isRobinsonMode() && !sb.isPublicRobinson()) {
|
||||
// if we are a robinson cluster, answer only if this client is known by our network definition
|
||||
return prop;
|
||||
}
|
||||
|
||||
// check the search tracker
|
||||
TreeSet<Long> trackerHandles = sb.remoteSearchTracker.get(client);
|
||||
if (trackerHandles == null) trackerHandles = new TreeSet<Long>();
|
||||
boolean block = false;
|
||||
synchronized (trackerHandles) {
|
||||
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size() > 1) {
|
||||
block = true;
|
||||
}
|
||||
}
|
||||
if (!block) synchronized (trackerHandles) {
|
||||
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size() > 12) {
|
||||
block = true;
|
||||
}
|
||||
}
|
||||
if (!block) synchronized (trackerHandles) {
|
||||
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size() > 36) {
|
||||
block = true;
|
||||
}
|
||||
}
|
||||
if (block && Domains.isLocal(client, null)) block = false; // check isLocal here to prevent dns lookup for client
|
||||
if (block) {
|
||||
return prop;
|
||||
}
|
||||
|
||||
// tell all threads to do nothing for a specific time
|
||||
sb.intermissionAllThreads(100);
|
||||
|
||||
EventTracker.delete(EventTracker.EClass.SEARCH);
|
||||
final HandleSet abstractSet = (abstracts.isEmpty() || abstracts.equals("auto")) ? null : QueryParams.hashes2Set(abstracts);
|
||||
|
||||
// store accessing peer
|
||||
Seed remoteSeed;
|
||||
try {
|
||||
remoteSeed = Seed.genRemoteSeed(oseed, false, client);
|
||||
} catch (final IOException e) {
|
||||
Network.log.logInfo("yacy.search: access with bad seed: " + e.getMessage());
|
||||
remoteSeed = null;
|
||||
}
|
||||
if (sb.peers == null) {
|
||||
Network.log.logSevere("yacy.search: seed cache not initialized");
|
||||
} else {
|
||||
sb.peers.peerActions.peerArrival(remoteSeed, true);
|
||||
}
|
||||
|
||||
// prepare search
|
||||
final HandleSet queryhashes = QueryParams.hashes2Set(query);
|
||||
final HandleSet excludehashes = (exclude.isEmpty()) ? new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0) : QueryParams.hashes2Set(exclude);
|
||||
final long timestamp = System.currentTimeMillis();
|
||||
|
||||
// prepare a search profile
|
||||
final RankingProfile rankingProfile = (profile.isEmpty()) ? new RankingProfile(Classification.ContentDomain.contentdomParser(contentdom)) : new RankingProfile("", profile);
|
||||
|
||||
// prepare an abstract result
|
||||
final StringBuilder indexabstract = new StringBuilder(6000);
|
||||
int indexabstractContainercount = 0;
|
||||
QueryParams theQuery = null;
|
||||
SearchEvent theSearch = null;
|
||||
ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> accu = null;
|
||||
if (query.isEmpty() && abstractSet != null) {
|
||||
// this is _not_ a normal search, only a request for index abstracts
|
||||
final Segment indexSegment = sb.index;
|
||||
QueryGoal qg = new QueryGoal(abstractSet, new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0), abstractSet);
|
||||
theQuery = new QueryParams(
|
||||
qg,
|
||||
modifier,
|
||||
maxdist,
|
||||
prefer,
|
||||
ContentDomain.contentdomParser(contentdom),
|
||||
language,
|
||||
new HashSet<Tagging.Metatag>(),
|
||||
null, // no snippet computation
|
||||
count,
|
||||
0,
|
||||
filter, null, null, null,
|
||||
QueryParams.Searchdom.LOCAL,
|
||||
-1,
|
||||
null,
|
||||
false,
|
||||
sitehash,
|
||||
null,
|
||||
null,
|
||||
author,
|
||||
DigestURI.TLD_any_zone_filter,
|
||||
client,
|
||||
false,
|
||||
indexSegment,
|
||||
rankingProfile,
|
||||
header.get(RequestHeader.USER_AGENT, ""),
|
||||
false, false, 0.0d, 0.0d, 0.0d
|
||||
);
|
||||
Network.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links");
|
||||
|
||||
final long timer = System.currentTimeMillis();
|
||||
//final Map<byte[], ReferenceContainer<WordReference>>[] containers = sb.indexSegment.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls));
|
||||
final TreeMap<byte[], ReferenceContainer<WordReference>> incc = indexSegment.termIndex().searchConjunction(theQuery.getQueryGoal().getIncludeHashes(), QueryParams.hashes2Handles(urls));
|
||||
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEventType.COLLECTION, "", incc.size(), System.currentTimeMillis() - timer), false);
|
||||
if (incc != null) {
|
||||
final Iterator<Map.Entry<byte[], ReferenceContainer<WordReference>>> ci = incc.entrySet().iterator();
|
||||
Map.Entry<byte[], ReferenceContainer<WordReference>> entry;
|
||||
byte[] wordhash;
|
||||
while (ci.hasNext()) {
|
||||
entry = ci.next();
|
||||
wordhash = entry.getKey();
|
||||
final ReferenceContainer<WordReference> container = entry.getValue();
|
||||
indexabstractContainercount += container.size();
|
||||
indexabstract.append("indexabstract.");
|
||||
indexabstract.append(ASCII.String(wordhash));
|
||||
indexabstract.append("=");
|
||||
indexabstract.append(WordReferenceFactory.compressIndex(container, null, 1000).toString());
|
||||
indexabstract.append(serverCore.CRLF_STRING);
|
||||
}
|
||||
}
|
||||
|
||||
prop.put("indexcount", "");
|
||||
prop.put("joincount", "0");
|
||||
prop.put("references", "");
|
||||
|
||||
} else {
|
||||
// retrieve index containers from search request
|
||||
RowHandleSet allHashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
|
||||
try {allHashes.putAll(queryhashes);} catch (SpaceExceededException e) {}
|
||||
try {allHashes.putAll(excludehashes);} catch (SpaceExceededException e) {}
|
||||
QueryGoal qg = new QueryGoal(queryhashes, excludehashes, allHashes);
|
||||
theQuery = new QueryParams(
|
||||
qg,
|
||||
modifier,
|
||||
maxdist,
|
||||
prefer,
|
||||
ContentDomain.contentdomParser(contentdom),
|
||||
language,
|
||||
new HashSet<Tagging.Metatag>(),
|
||||
null, // no snippet computation
|
||||
count,
|
||||
0,
|
||||
filter, null, null, null,
|
||||
QueryParams.Searchdom.LOCAL,
|
||||
-1,
|
||||
constraint,
|
||||
false,
|
||||
sitehash,
|
||||
null,
|
||||
null,
|
||||
author,
|
||||
DigestURI.TLD_any_zone_filter,
|
||||
client,
|
||||
false,
|
||||
sb.index,
|
||||
rankingProfile,
|
||||
header.get(RequestHeader.USER_AGENT, ""),
|
||||
false, false, 0.0d, 0.0d, 0.0d
|
||||
);
|
||||
Network.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links");
|
||||
EventChannel.channels(EventChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()), ""));
|
||||
|
||||
// make event
|
||||
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, count, maxtime, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
|
||||
|
||||
// set statistic details of search result and find best result index set
|
||||
prop.put("joincount", Integer.toString(theQuery.getResultCount()));
|
||||
if (theQuery.getResultCount() > 0) {
|
||||
accu = theSearch.completeResults(maxtime);
|
||||
}
|
||||
if (theQuery.getResultCount() <= 0 || abstracts.isEmpty()) {
|
||||
prop.put("indexcount", "");
|
||||
} else {
|
||||
// attach information about index abstracts
|
||||
final StringBuilder indexcount = new StringBuilder(6000);
|
||||
Map.Entry<byte[], Integer> entry;
|
||||
final Iterator<Map.Entry<byte[], Integer>> i = theSearch.abstractsCount();
|
||||
while (i.hasNext()) {
|
||||
entry = i.next();
|
||||
indexcount.append("indexcount.").append(ASCII.String(entry.getKey())).append('=').append((entry.getValue()).toString()).append(serverCore.CRLF_STRING);
|
||||
}
|
||||
if (abstractSet != null) {
|
||||
// if a specific index-abstract is demanded, attach it here
|
||||
final Iterator<byte[]> j = abstractSet.iterator();
|
||||
byte[] wordhash;
|
||||
while (j.hasNext()) {
|
||||
wordhash = j.next();
|
||||
indexabstractContainercount += theSearch.abstractsCount(wordhash);
|
||||
indexabstract.append("indexabstract.").append(ASCII.String(wordhash)).append("=").append(theSearch.abstractsString(wordhash)).append(serverCore.CRLF_STRING);
|
||||
}
|
||||
}
|
||||
prop.put("indexcount", indexcount.toString());
|
||||
|
||||
// generate compressed index for maxcounthash
|
||||
// this is not needed if the search is restricted to specific
|
||||
// urls, because it is a re-search
|
||||
if ((theSearch.getAbstractsMaxCountHash() == null) || (urls.length() != 0) || (queryhashes.size() <= 1) || (abstracts.isEmpty())) {
|
||||
prop.put("indexabstract", "");
|
||||
} else if (abstracts.equals("auto")) {
|
||||
// automatically attach the index abstract for the index that has the most references. This should be our target dht position
|
||||
indexabstractContainercount += theSearch.abstractsCount(theSearch.getAbstractsMaxCountHash());
|
||||
indexabstract.append("indexabstract.").append(ASCII.String(theSearch.getAbstractsMaxCountHash())).append("=").append(theSearch.abstractsString(theSearch.getAbstractsMaxCountHash())).append(serverCore.CRLF_STRING);
|
||||
if ((theSearch.getAbstractsNearDHTHash() != null) && (!(ByteBuffer.equals(theSearch.getAbstractsNearDHTHash(), theSearch.getAbstractsMaxCountHash())))) {
|
||||
// in case that the neardhthash is different from the maxcounthash attach also the neardhthash-container
|
||||
indexabstractContainercount += theSearch.abstractsCount(theSearch.getAbstractsNearDHTHash());
|
||||
indexabstract.append("indexabstract.").append(ASCII.String(theSearch.getAbstractsNearDHTHash())).append("=").append(theSearch.abstractsString(theSearch.getAbstractsNearDHTHash())).append(serverCore.CRLF_STRING);
|
||||
}
|
||||
//System.out.println("DEBUG-ABSTRACTGENERATION: maxcounthash = " + maxcounthash);
|
||||
//System.out.println("DEBUG-ABSTRACTGENERATION: neardhthash = "+ neardhthash);
|
||||
//yacyCore.log.logFine("DEBUG HASH SEARCH: " + indexabstract);
|
||||
}
|
||||
}
|
||||
if (partitions > 0) sb.searchQueriesGlobal += 1d / partitions; // increase query counter
|
||||
|
||||
// prepare reference hints
|
||||
final long timer = System.currentTimeMillis();
|
||||
final ScoreMap<String> topicNavigator = theSearch.rankingProcess.getTopicNavigator(5);
|
||||
final StringBuilder refstr = new StringBuilder(6000);
|
||||
final Iterator<String> navigatorIterator = topicNavigator.keys(false);
|
||||
int i = 0;
|
||||
String name;
|
||||
while (i < 5 && navigatorIterator.hasNext()) {
|
||||
name = navigatorIterator.next();
|
||||
refstr.append(",").append(name);
|
||||
i++;
|
||||
}
|
||||
prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString());
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEventType.REFERENCECOLLECTION, "", i, System.currentTimeMillis() - timer), false);
|
||||
}
|
||||
prop.put("indexabstract", indexabstract.toString());
|
||||
|
||||
// prepare result
|
||||
if (theQuery.getResultCount() == 0 || accu == null || accu.isEmpty()) {
|
||||
|
||||
// no results
|
||||
prop.put("links", "");
|
||||
prop.put("linkcount", "0");
|
||||
prop.put("references", "");
|
||||
|
||||
} else {
|
||||
// result is a List of urlEntry elements
|
||||
final long timer = System.currentTimeMillis();
|
||||
final StringBuilder links = new StringBuilder(6000);
|
||||
String resource = null;
|
||||
WeakPriorityBlockingQueue.Element<ResultEntry> entry;
|
||||
for (int i = 0; i < accu.size(); i++) {
|
||||
entry = accu.get(i);
|
||||
resource = entry.getElement().resource();
|
||||
if (resource != null) {
|
||||
links.append("resource").append(i).append('=').append(resource).append(serverCore.CRLF_STRING);
|
||||
}
|
||||
}
|
||||
theQuery.transmitcount = accu.size() + 1;
|
||||
prop.put("links", links.toString());
|
||||
prop.put("linkcount", accu.size());
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEventType.RESULTLIST, "", accu.size(), System.currentTimeMillis() - timer), false);
|
||||
}
|
||||
|
||||
// prepare search statistics
|
||||
theQuery.remotepeer = client == null ? null : sb.peers.lookupByIP(Domains.dnsResolve(client), -1, true, false, false);
|
||||
theQuery.searchtime = System.currentTimeMillis() - timestamp;
|
||||
theQuery.urlretrievaltime = (theSearch == null) ? 0 : theSearch.getURLRetrievalTime();
|
||||
theQuery.snippetcomputationtime = (theSearch == null) ? 0 : theSearch.getSnippetComputationTime();
|
||||
AccessTracker.add(AccessTracker.Location.remote, theQuery);
|
||||
|
||||
// update the search tracker
|
||||
synchronized (trackerHandles) {
|
||||
trackerHandles.add(theQuery.starttime); // thats the time when the handle was created
|
||||
// we don't need too much entries in the list; remove superfluous
|
||||
while (trackerHandles.size() > 36) if (!trackerHandles.remove(trackerHandles.first())) break;
|
||||
}
|
||||
sb.remoteSearchTracker.put(client, trackerHandles);
|
||||
if (MemoryControl.shortStatus()) sb.remoteSearchTracker.clear();
|
||||
|
||||
// log
|
||||
Network.log.logInfo("EXIT HASH SEARCH: " +
|
||||
QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.getResultCount() + " links found, " +
|
||||
prop.get("linkcount", "?") + " links selected, " +
|
||||
indexabstractContainercount + " index abstracts, " +
|
||||
(System.currentTimeMillis() - timestamp) + " milliseconds");
|
||||
|
||||
prop.put("searchtime", System.currentTimeMillis() - timestamp);
|
||||
|
||||
final int links = prop.getInt("linkcount",0);
|
||||
sb.peers.mySeed().incSI(links);
|
||||
sb.peers.mySeed().incSU(links);
|
||||
return prop;
|
||||
}
|
||||
|
||||
}
|
||||
// search.java
|
||||
// (C) 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
// You must compile this file with
|
||||
// javac -classpath .:../../Classes search.java
|
||||
// if the shell's current path is htroot/yacy
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.document.analysis.Classification;
|
||||
import net.yacy.cora.document.analysis.Classification.ContentDomain;
|
||||
import net.yacy.cora.document.RSSMessage;
|
||||
import net.yacy.cora.lod.vocabulary.Tagging;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.sorting.ScoreMap;
|
||||
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
|
||||
import net.yacy.cora.storage.HandleSet;
|
||||
import net.yacy.cora.util.SpaceExceededException;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.data.word.WordReference;
|
||||
import net.yacy.kelondro.data.word.WordReferenceFactory;
|
||||
import net.yacy.kelondro.data.word.WordReferenceRow;
|
||||
import net.yacy.kelondro.index.RowHandleSet;
|
||||
import net.yacy.kelondro.rwi.ReferenceContainer;
|
||||
import net.yacy.kelondro.util.Bitfield;
|
||||
import net.yacy.kelondro.util.ByteBuffer;
|
||||
import net.yacy.kelondro.util.ISO639;
|
||||
import net.yacy.kelondro.util.MemoryControl;
|
||||
import net.yacy.peers.EventChannel;
|
||||
import net.yacy.peers.Network;
|
||||
import net.yacy.peers.Protocol;
|
||||
import net.yacy.peers.Seed;
|
||||
import net.yacy.peers.graphics.ProfilingGraph;
|
||||
import net.yacy.search.EventTracker;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
import net.yacy.search.index.Segment;
|
||||
import net.yacy.search.query.AccessTracker;
|
||||
import net.yacy.search.query.QueryGoal;
|
||||
import net.yacy.search.query.QueryParams;
|
||||
import net.yacy.search.query.SearchEvent;
|
||||
import net.yacy.search.query.SearchEventCache;
|
||||
import net.yacy.search.query.SearchEventType;
|
||||
import net.yacy.search.ranking.RankingProfile;
|
||||
import net.yacy.search.snippet.ResultEntry;
|
||||
import net.yacy.server.serverCore;
|
||||
import net.yacy.server.serverObjects;
|
||||
import net.yacy.server.serverSwitch;
|
||||
import net.yacy.utils.crypt;
|
||||
|
||||
public final class search {
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
sb.remoteSearchLastAccess = System.currentTimeMillis();
|
||||
|
||||
final serverObjects prop = new serverObjects();
|
||||
// set nice default values for error cases
|
||||
prop.put("searchtime", "0");
|
||||
prop.put("references", "");
|
||||
prop.put("joincount", "0");
|
||||
prop.put("linkcount", "0");
|
||||
prop.put("links", "");
|
||||
prop.put("indexcount", "");
|
||||
prop.put("indexabstract", "");
|
||||
|
||||
if (post == null || env == null) return prop;
|
||||
if (!Protocol.authentifyRequest(post, env)) return prop;
|
||||
final String client = header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP);
|
||||
|
||||
//System.out.println("yacy: search received request = " + post.toString());
|
||||
|
||||
final String oseed = post.get("myseed", ""); // complete seed of the requesting peer
|
||||
// final String youare = post.get("youare", ""); // seed hash of the target peer, used for testing network stability
|
||||
final String query = post.get("query", ""); // a string of word hashes that shall be searched and combined
|
||||
final String exclude= post.get("exclude", "");// a string of word hashes that shall not be within the search result
|
||||
final String urls = post.get("urls", ""); // a string of url hashes that are preselected for the search: no other may be returned
|
||||
final String abstracts = post.get("abstracts", ""); // a string of word hashes for abstracts that shall be generated, or 'auto' (for maxcount-word), or '' (for none)
|
||||
final int count = Math.min((int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_DEFAULT, 100), post.getInt("count", 10)); // maximum number of wanted results
|
||||
final long maxtime = Math.min((int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000), post.getLong("time", 3000)); // maximum waiting time
|
||||
final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE);
|
||||
final String prefer = post.get("prefer", "");
|
||||
final String modifier = post.get("modifier", "").trim();
|
||||
final String contentdom = post.get("contentdom", "all");
|
||||
final String filter = post.get("filter", ".*"); // a filter on the url
|
||||
String sitehash = post.get("sitehash", ""); if (sitehash.isEmpty()) sitehash = null;
|
||||
String author = post.get("author", ""); if (author.isEmpty()) author = null;
|
||||
String language = post.get("language", "");
|
||||
if (language == null || language.isEmpty() || !ISO639.exists(language)) {
|
||||
// take language from the user agent
|
||||
String agent = header.get("User-Agent");
|
||||
if (agent == null) agent = System.getProperty("user.language");
|
||||
language = (agent == null) ? "en" : ISO639.userAgentLanguageDetection(agent);
|
||||
if (language == null) language = "en";
|
||||
}
|
||||
final int partitions = post.getInt("partitions", 30);
|
||||
String profile = post.get("profile", ""); // remote profile hand-over
|
||||
if (profile.length() > 0) profile = crypt.simpleDecode(profile);
|
||||
//final boolean includesnippet = post.get("includesnippet", "false").equals("true");
|
||||
Bitfield constraint = ((post.containsKey("constraint")) && (post.get("constraint", "").length() > 0)) ? new Bitfield(4, post.get("constraint", "______")) : null;
|
||||
if (constraint != null) {
|
||||
// check bad handover parameter from older versions
|
||||
boolean allon = true;
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (!constraint.get(i)) {allon = false; break;}
|
||||
}
|
||||
if (allon) constraint = null;
|
||||
}
|
||||
// Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time
|
||||
|
||||
// test:
|
||||
// http://localhost:8090/yacy/search.html?query=4galTpdpDM5Q (search for linux)
|
||||
// http://localhost:8090/yacy/search.html?query=gh8DKIhGKXws (search for book)
|
||||
// http://localhost:8090/yacy/search.html?query=UEhMGfGv2vOE (search for kernel)
|
||||
// http://localhost:8090/yacy/search.html?query=ZX-LjaYo74PP (search for help)
|
||||
// http://localhost:8090/yacy/search.html?query=uDqIalxDfM2a (search for mail)
|
||||
// http://localhost:8090/yacy/search.html?query=4galTpdpDM5Qgh8DKIhGKXws&abstracts=auto (search for linux and book, generate abstract automatically)
|
||||
// http://localhost:8090/yacy/search.html?query=&abstracts=4galTpdpDM5Q (only abstracts for linux)
|
||||
|
||||
if (sb.isRobinsonMode() && !sb.isPublicRobinson()) {
|
||||
// if we are a robinson cluster, answer only if this client is known by our network definition
|
||||
return prop;
|
||||
}
|
||||
|
||||
// check the search tracker
|
||||
TreeSet<Long> trackerHandles = sb.remoteSearchTracker.get(client);
|
||||
if (trackerHandles == null) trackerHandles = new TreeSet<Long>();
|
||||
boolean block = false;
|
||||
synchronized (trackerHandles) {
|
||||
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size() > 1) {
|
||||
block = true;
|
||||
}
|
||||
}
|
||||
if (!block) synchronized (trackerHandles) {
|
||||
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size() > 12) {
|
||||
block = true;
|
||||
}
|
||||
}
|
||||
if (!block) synchronized (trackerHandles) {
|
||||
if (trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size() > 36) {
|
||||
block = true;
|
||||
}
|
||||
}
|
||||
if (block && Domains.isLocal(client, null)) block = false; // check isLocal here to prevent dns lookup for client
|
||||
if (block) {
|
||||
return prop;
|
||||
}
|
||||
|
||||
// tell all threads to do nothing for a specific time
|
||||
sb.intermissionAllThreads(100);
|
||||
|
||||
EventTracker.delete(EventTracker.EClass.SEARCH);
|
||||
final HandleSet abstractSet = (abstracts.isEmpty() || abstracts.equals("auto")) ? null : QueryParams.hashes2Set(abstracts);
|
||||
|
||||
// store accessing peer
|
||||
Seed remoteSeed;
|
||||
try {
|
||||
remoteSeed = Seed.genRemoteSeed(oseed, false, client);
|
||||
} catch (final IOException e) {
|
||||
Network.log.logInfo("yacy.search: access with bad seed: " + e.getMessage());
|
||||
remoteSeed = null;
|
||||
}
|
||||
if (sb.peers == null) {
|
||||
Network.log.logSevere("yacy.search: seed cache not initialized");
|
||||
} else {
|
||||
sb.peers.peerActions.peerArrival(remoteSeed, true);
|
||||
}
|
||||
|
||||
// prepare search
|
||||
final HandleSet queryhashes = QueryParams.hashes2Set(query);
|
||||
final HandleSet excludehashes = (exclude.isEmpty()) ? new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0) : QueryParams.hashes2Set(exclude);
|
||||
final long timestamp = System.currentTimeMillis();
|
||||
|
||||
// prepare a search profile
|
||||
final RankingProfile rankingProfile = (profile.isEmpty()) ? new RankingProfile(Classification.ContentDomain.contentdomParser(contentdom)) : new RankingProfile("", profile);
|
||||
|
||||
// prepare an abstract result
|
||||
final StringBuilder indexabstract = new StringBuilder(6000);
|
||||
int indexabstractContainercount = 0;
|
||||
QueryParams theQuery = null;
|
||||
SearchEvent theSearch = null;
|
||||
ArrayList<WeakPriorityBlockingQueue.Element<ResultEntry>> accu = null;
|
||||
if (query.isEmpty() && abstractSet != null) {
|
||||
// this is _not_ a normal search, only a request for index abstracts
|
||||
final Segment indexSegment = sb.index;
|
||||
QueryGoal qg = new QueryGoal(abstractSet, new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0), abstractSet);
|
||||
theQuery = new QueryParams(
|
||||
qg,
|
||||
modifier,
|
||||
maxdist,
|
||||
prefer,
|
||||
ContentDomain.contentdomParser(contentdom),
|
||||
language,
|
||||
new HashSet<Tagging.Metatag>(),
|
||||
null, // no snippet computation
|
||||
count,
|
||||
0,
|
||||
filter, null, null, null,
|
||||
QueryParams.Searchdom.LOCAL,
|
||||
-1,
|
||||
null,
|
||||
false,
|
||||
sitehash,
|
||||
null,
|
||||
null,
|
||||
author,
|
||||
DigestURI.TLD_any_zone_filter,
|
||||
client,
|
||||
false,
|
||||
indexSegment,
|
||||
rankingProfile,
|
||||
header.get(RequestHeader.USER_AGENT, ""),
|
||||
false, false, 0.0d, 0.0d, 0.0d
|
||||
);
|
||||
Network.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links");
|
||||
|
||||
final long timer = System.currentTimeMillis();
|
||||
//final Map<byte[], ReferenceContainer<WordReference>>[] containers = sb.indexSegment.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls));
|
||||
final TreeMap<byte[], ReferenceContainer<WordReference>> incc = indexSegment.termIndex().searchConjunction(theQuery.getQueryGoal().getIncludeHashes(), QueryParams.hashes2Handles(urls));
|
||||
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEventType.COLLECTION, "", incc.size(), System.currentTimeMillis() - timer), false);
|
||||
if (incc != null) {
|
||||
final Iterator<Map.Entry<byte[], ReferenceContainer<WordReference>>> ci = incc.entrySet().iterator();
|
||||
Map.Entry<byte[], ReferenceContainer<WordReference>> entry;
|
||||
byte[] wordhash;
|
||||
while (ci.hasNext()) {
|
||||
entry = ci.next();
|
||||
wordhash = entry.getKey();
|
||||
final ReferenceContainer<WordReference> container = entry.getValue();
|
||||
indexabstractContainercount += container.size();
|
||||
indexabstract.append("indexabstract.");
|
||||
indexabstract.append(ASCII.String(wordhash));
|
||||
indexabstract.append("=");
|
||||
indexabstract.append(WordReferenceFactory.compressIndex(container, null, 1000).toString());
|
||||
indexabstract.append(serverCore.CRLF_STRING);
|
||||
}
|
||||
}
|
||||
|
||||
prop.put("indexcount", "");
|
||||
prop.put("joincount", "0");
|
||||
prop.put("references", "");
|
||||
|
||||
} else {
|
||||
// retrieve index containers from search request
|
||||
RowHandleSet allHashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
|
||||
try {allHashes.putAll(queryhashes);} catch (SpaceExceededException e) {}
|
||||
try {allHashes.putAll(excludehashes);} catch (SpaceExceededException e) {}
|
||||
QueryGoal qg = new QueryGoal(queryhashes, excludehashes, allHashes);
|
||||
theQuery = new QueryParams(
|
||||
qg,
|
||||
modifier,
|
||||
maxdist,
|
||||
prefer,
|
||||
ContentDomain.contentdomParser(contentdom),
|
||||
language,
|
||||
new HashSet<Tagging.Metatag>(),
|
||||
null, // no snippet computation
|
||||
count,
|
||||
0,
|
||||
filter, null, null, null,
|
||||
QueryParams.Searchdom.LOCAL,
|
||||
-1,
|
||||
constraint,
|
||||
false,
|
||||
sitehash,
|
||||
null,
|
||||
null,
|
||||
author,
|
||||
DigestURI.TLD_any_zone_filter,
|
||||
client,
|
||||
false,
|
||||
sb.index,
|
||||
rankingProfile,
|
||||
header.get(RequestHeader.USER_AGENT, ""),
|
||||
false, false, 0.0d, 0.0d, 0.0d
|
||||
);
|
||||
Network.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.itemsPerPage() + " links");
|
||||
EventChannel.channels(EventChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()), ""));
|
||||
|
||||
// make event
|
||||
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, count, maxtime, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
|
||||
|
||||
// set statistic details of search result and find best result index set
|
||||
prop.put("joincount", Integer.toString(theQuery.getResultCount()));
|
||||
if (theQuery.getResultCount() > 0) {
|
||||
accu = theSearch.completeResults(maxtime);
|
||||
}
|
||||
if (theQuery.getResultCount() <= 0 || abstracts.isEmpty()) {
|
||||
prop.put("indexcount", "");
|
||||
} else {
|
||||
// attach information about index abstracts
|
||||
final StringBuilder indexcount = new StringBuilder(6000);
|
||||
Map.Entry<byte[], Integer> entry;
|
||||
final Iterator<Map.Entry<byte[], Integer>> i = theSearch.abstractsCount();
|
||||
while (i.hasNext()) {
|
||||
entry = i.next();
|
||||
indexcount.append("indexcount.").append(ASCII.String(entry.getKey())).append('=').append((entry.getValue()).toString()).append(serverCore.CRLF_STRING);
|
||||
}
|
||||
if (abstractSet != null) {
|
||||
// if a specific index-abstract is demanded, attach it here
|
||||
final Iterator<byte[]> j = abstractSet.iterator();
|
||||
byte[] wordhash;
|
||||
while (j.hasNext()) {
|
||||
wordhash = j.next();
|
||||
indexabstractContainercount += theSearch.abstractsCount(wordhash);
|
||||
indexabstract.append("indexabstract.").append(ASCII.String(wordhash)).append("=").append(theSearch.abstractsString(wordhash)).append(serverCore.CRLF_STRING);
|
||||
}
|
||||
}
|
||||
prop.put("indexcount", indexcount.toString());
|
||||
|
||||
// generate compressed index for maxcounthash
|
||||
// this is not needed if the search is restricted to specific
|
||||
// urls, because it is a re-search
|
||||
if ((theSearch.getAbstractsMaxCountHash() == null) || (urls.length() != 0) || (queryhashes.size() <= 1) || (abstracts.isEmpty())) {
|
||||
prop.put("indexabstract", "");
|
||||
} else if (abstracts.equals("auto")) {
|
||||
// automatically attach the index abstract for the index that has the most references. This should be our target dht position
|
||||
indexabstractContainercount += theSearch.abstractsCount(theSearch.getAbstractsMaxCountHash());
|
||||
indexabstract.append("indexabstract.").append(ASCII.String(theSearch.getAbstractsMaxCountHash())).append("=").append(theSearch.abstractsString(theSearch.getAbstractsMaxCountHash())).append(serverCore.CRLF_STRING);
|
||||
if ((theSearch.getAbstractsNearDHTHash() != null) && (!(ByteBuffer.equals(theSearch.getAbstractsNearDHTHash(), theSearch.getAbstractsMaxCountHash())))) {
|
||||
// in case that the neardhthash is different from the maxcounthash attach also the neardhthash-container
|
||||
indexabstractContainercount += theSearch.abstractsCount(theSearch.getAbstractsNearDHTHash());
|
||||
indexabstract.append("indexabstract.").append(ASCII.String(theSearch.getAbstractsNearDHTHash())).append("=").append(theSearch.abstractsString(theSearch.getAbstractsNearDHTHash())).append(serverCore.CRLF_STRING);
|
||||
}
|
||||
//System.out.println("DEBUG-ABSTRACTGENERATION: maxcounthash = " + maxcounthash);
|
||||
//System.out.println("DEBUG-ABSTRACTGENERATION: neardhthash = "+ neardhthash);
|
||||
//yacyCore.log.logFine("DEBUG HASH SEARCH: " + indexabstract);
|
||||
}
|
||||
}
|
||||
if (partitions > 0) sb.searchQueriesGlobal += 1d / partitions; // increase query counter
|
||||
|
||||
// prepare reference hints
|
||||
final long timer = System.currentTimeMillis();
|
||||
final ScoreMap<String> topicNavigator = theSearch.rankingProcess.getTopics(5);
|
||||
final StringBuilder refstr = new StringBuilder(6000);
|
||||
final Iterator<String> navigatorIterator = topicNavigator.keys(false);
|
||||
int i = 0;
|
||||
String name;
|
||||
while (i < 5 && navigatorIterator.hasNext()) {
|
||||
name = navigatorIterator.next();
|
||||
refstr.append(",").append(name);
|
||||
i++;
|
||||
}
|
||||
prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString());
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEventType.REFERENCECOLLECTION, "", i, System.currentTimeMillis() - timer), false);
|
||||
}
|
||||
prop.put("indexabstract", indexabstract.toString());
|
||||
|
||||
// prepare result
|
||||
if (theQuery.getResultCount() == 0 || accu == null || accu.isEmpty()) {
|
||||
|
||||
// no results
|
||||
prop.put("links", "");
|
||||
prop.put("linkcount", "0");
|
||||
prop.put("references", "");
|
||||
|
||||
} else {
|
||||
// result is a List of urlEntry elements
|
||||
final long timer = System.currentTimeMillis();
|
||||
final StringBuilder links = new StringBuilder(6000);
|
||||
String resource = null;
|
||||
WeakPriorityBlockingQueue.Element<ResultEntry> entry;
|
||||
for (int i = 0; i < accu.size(); i++) {
|
||||
entry = accu.get(i);
|
||||
resource = entry.getElement().resource();
|
||||
if (resource != null) {
|
||||
links.append("resource").append(i).append('=').append(resource).append(serverCore.CRLF_STRING);
|
||||
}
|
||||
}
|
||||
theQuery.transmitcount = accu.size() + 1;
|
||||
prop.put("links", links.toString());
|
||||
prop.put("linkcount", accu.size());
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEventType.RESULTLIST, "", accu.size(), System.currentTimeMillis() - timer), false);
|
||||
}
|
||||
|
||||
// prepare search statistics
|
||||
theQuery.remotepeer = client == null ? null : sb.peers.lookupByIP(Domains.dnsResolve(client), -1, true, false, false);
|
||||
theQuery.searchtime = System.currentTimeMillis() - timestamp;
|
||||
theQuery.urlretrievaltime = (theSearch == null) ? 0 : theSearch.getURLRetrievalTime();
|
||||
theQuery.snippetcomputationtime = (theSearch == null) ? 0 : theSearch.getSnippetComputationTime();
|
||||
AccessTracker.add(AccessTracker.Location.remote, theQuery);
|
||||
|
||||
// update the search tracker
|
||||
synchronized (trackerHandles) {
|
||||
trackerHandles.add(theQuery.starttime); // thats the time when the handle was created
|
||||
// we don't need too much entries in the list; remove superfluous
|
||||
while (trackerHandles.size() > 36) if (!trackerHandles.remove(trackerHandles.first())) break;
|
||||
}
|
||||
sb.remoteSearchTracker.put(client, trackerHandles);
|
||||
if (MemoryControl.shortStatus()) sb.remoteSearchTracker.clear();
|
||||
|
||||
// log
|
||||
Network.log.logInfo("EXIT HASH SEARCH: " +
|
||||
QueryParams.anonymizedQueryHashes(theQuery.getQueryGoal().getIncludeHashes()) + " - " + theQuery.getResultCount() + " links found, " +
|
||||
prop.get("linkcount", "?") + " links selected, " +
|
||||
indexabstractContainercount + " index abstracts, " +
|
||||
(System.currentTimeMillis() - timestamp) + " milliseconds");
|
||||
|
||||
prop.put("searchtime", System.currentTimeMillis() - timestamp);
|
||||
|
||||
final int links = prop.getInt("linkcount",0);
|
||||
sb.peers.mySeed().incSI(links);
|
||||
sb.peers.mySeed().incSU(links);
|
||||
return prop;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,426 +1,426 @@
|
||||
// RankingProcess.java
|
||||
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 07.11.2007 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package net.yacy.search.query;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.SortedMap;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.yacy.cora.document.analysis.Classification.ContentDomain;
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.federate.yacy.CacheStrategy;
|
||||
import net.yacy.cora.lod.vocabulary.Tagging;
|
||||
import net.yacy.cora.sorting.ConcurrentScoreMap;
|
||||
import net.yacy.cora.sorting.ScoreMap;
|
||||
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
|
||||
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement;
|
||||
import net.yacy.cora.storage.HandleSet;
|
||||
import net.yacy.cora.util.SpaceExceededException;
|
||||
import net.yacy.document.Condenser;
|
||||
import net.yacy.document.LibraryProvider;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||
import net.yacy.kelondro.data.word.Word;
|
||||
import net.yacy.kelondro.data.word.WordReference;
|
||||
import net.yacy.kelondro.data.word.WordReferenceVars;
|
||||
import net.yacy.kelondro.index.RowHandleSet;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.rwi.ReferenceContainer;
|
||||
import net.yacy.kelondro.rwi.TermSearch;
|
||||
import net.yacy.kelondro.util.Bitfield;
|
||||
import net.yacy.peers.graphics.ProfilingGraph;
|
||||
import net.yacy.search.EventTracker;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.index.Segment;
|
||||
import net.yacy.search.ranking.ReferenceOrder;
|
||||
import net.yacy.search.snippet.ResultEntry;
|
||||
|
||||
public final class RankingProcess extends Thread {
|
||||
|
||||
protected static final int max_results_preparation = 3000, max_results_preparation_special = -1; // -1 means 'no limit'
|
||||
|
||||
//private final SearchEvent searchEvent;
|
||||
private final QueryParams query;
|
||||
private SortedMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
|
||||
private final ScoreMap<String> ref; // reference score computation for the commonSense heuristic
|
||||
private final long maxtime;
|
||||
protected final WeakPriorityBlockingQueue<WordReferenceVars> rwiStack;
|
||||
protected final ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
|
||||
private final int[] flagcount; // flag counter
|
||||
private final AtomicInteger feedersAlive, feedersTerminated;
|
||||
private boolean addRunning;
|
||||
protected final AtomicInteger receivedRemoteReferences;
|
||||
protected final ReferenceOrder order;
|
||||
protected final HandleSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
|
||||
private final Map<String, String> taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris
|
||||
private boolean remote;
|
||||
|
||||
protected RankingProcess(final QueryParams query, boolean remote) {
|
||||
// we collect the urlhashes and construct a list with urlEntry objects
|
||||
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
||||
// sortorder: 0 = hash, 1 = url, 2 = ranking
|
||||
this.query = query;
|
||||
this.remote = remote;
|
||||
this.localSearchInclusion = null;
|
||||
this.ref = new ConcurrentScoreMap<String>();
|
||||
this.maxtime = query.maxtime;
|
||||
int stackMaxsize = query.snippetCacheStrategy == null || query.snippetCacheStrategy == CacheStrategy.CACHEONLY ? max_results_preparation_special : max_results_preparation;
|
||||
this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(stackMaxsize, false);
|
||||
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>();
|
||||
this.flagcount = new int[32];
|
||||
for ( int i = 0; i < 32; i++ ) {
|
||||
this.flagcount[i] = 0;
|
||||
}
|
||||
this.feedersAlive = new AtomicInteger(0);
|
||||
this.feedersTerminated = new AtomicInteger(0);
|
||||
this.addRunning = true;
|
||||
this.receivedRemoteReferences = new AtomicInteger(0);
|
||||
this.order = new ReferenceOrder(this.query.ranking, UTF8.getBytes(this.query.targetlang));
|
||||
this.urlhashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 100);
|
||||
this.taggingPredicates = new HashMap<String, String>();
|
||||
for (Tagging t: LibraryProvider.autotagging.getVocabularies()) {
|
||||
this.taggingPredicates.put(t.getName(), t.getPredicate());
|
||||
}
|
||||
}
|
||||
|
||||
public ReferenceOrder getOrder() {
|
||||
return this.order;
|
||||
}
|
||||
|
||||
protected boolean feedingIsFinished() {
|
||||
return
|
||||
this.feedersTerminated.intValue() > (this.remote ? 1 : 0) &&
|
||||
this.feedersAlive.get() == 0;// &&
|
||||
//(!this.remote || this.remote_indexCount > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* method to signal the incoming stack that one feeder has terminated
|
||||
*/
|
||||
public void oneFeederTerminated() {
|
||||
this.feedersTerminated.incrementAndGet();
|
||||
final int c = this.feedersAlive.decrementAndGet();
|
||||
assert c >= 0 : "feeders = " + c;
|
||||
}
|
||||
|
||||
public void oneFeederStarted() {
|
||||
this.feedersAlive.addAndGet(1);
|
||||
}
|
||||
|
||||
public QueryParams getQuery() {
|
||||
return this.query;
|
||||
}
|
||||
|
||||
public int[] flagCount() {
|
||||
return this.flagcount;
|
||||
}
|
||||
|
||||
protected void addBegin() {
|
||||
this.addRunning = true;
|
||||
}
|
||||
|
||||
public void addFinalize() {
|
||||
this.addRunning = false;
|
||||
}
|
||||
|
||||
protected boolean addRunning() {
|
||||
return this.addRunning;
|
||||
}
|
||||
|
||||
public boolean rwiIsEmpty() {
|
||||
if ( !this.rwiStack.isEmpty() ) {
|
||||
return false;
|
||||
}
|
||||
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
|
||||
if ( !s.isEmpty() ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected int rwiQueueSize() {
|
||||
int c = this.rwiStack.sizeQueue();
|
||||
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
|
||||
c += s.sizeQueue();
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
protected boolean testFlags(final Bitfield flags) {
|
||||
if (this.query.constraint == null) return true;
|
||||
// test if ientry matches with filter
|
||||
// if all = true: let only entries pass that has all matching bits
|
||||
// if all = false: let all entries pass that has at least one matching bit
|
||||
if (this.query.allofconstraint) {
|
||||
for ( int i = 0; i < 32; i++ ) {
|
||||
if ((this.query.constraint.get(i)) && (!flags.get(i))) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if ((this.query.constraint.get(i)) && (flags.get(i))) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
// do a search
|
||||
oneFeederStarted();
|
||||
|
||||
// sort the local containers and truncate it to a limited count,
|
||||
// so following sortings together with the global results will be fast
|
||||
try {
|
||||
final long timer = System.currentTimeMillis();
|
||||
final TermSearch<WordReference> search =
|
||||
this.query
|
||||
.getSegment()
|
||||
.termIndex()
|
||||
.query(
|
||||
this.query.getQueryGoal().getIncludeHashes(),
|
||||
this.query.getQueryGoal().getExcludeHashes(),
|
||||
null,
|
||||
Segment.wordReferenceFactory,
|
||||
this.query.maxDistance);
|
||||
this.localSearchInclusion = search.inclusion();
|
||||
final ReferenceContainer<WordReference> index = search.joined();
|
||||
EventTracker.update(
|
||||
EventTracker.EClass.SEARCH,
|
||||
new ProfilingGraph.EventSearch(
|
||||
this.query.id(true),
|
||||
SearchEventType.JOIN,
|
||||
this.query.getQueryGoal().getOriginalQueryString(false),
|
||||
index.size(),
|
||||
System.currentTimeMillis() - timer),
|
||||
false);
|
||||
if ( !index.isEmpty() ) {
|
||||
add(index, true, "local index: " + this.query.getSegment().getLocation(), -1, this.maxtime);
|
||||
this.addFinalize();
|
||||
}
|
||||
} catch ( final Exception e ) {
|
||||
Log.logException(e);
|
||||
} finally {
|
||||
oneFeederTerminated();
|
||||
}
|
||||
}
|
||||
|
||||
public void add(
|
||||
final ReferenceContainer<WordReference> index,
|
||||
final boolean local,
|
||||
final String resourceName,
|
||||
final int fullResource,
|
||||
final long maxtime) {
|
||||
// we collect the urlhashes and construct a list with urlEntry objects
|
||||
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
||||
//Log.logInfo("RWIProcess", "added a container, size = " + index.size());
|
||||
|
||||
this.addRunning = true;
|
||||
assert (index != null);
|
||||
if (index.isEmpty()) return;
|
||||
if (local) {
|
||||
this.query.local_rwi_stored.addAndGet(fullResource);
|
||||
} else {
|
||||
assert fullResource >= 0 : "fullResource = " + fullResource;
|
||||
this.query.remote_stored.addAndGet(fullResource);
|
||||
this.query.remote_peerCount.incrementAndGet();
|
||||
}
|
||||
long timer = System.currentTimeMillis();
|
||||
|
||||
// normalize entries
|
||||
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime);
|
||||
int is = index.size();
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
|
||||
this.query.id(true),
|
||||
SearchEventType.NORMALIZING,
|
||||
resourceName,
|
||||
is,
|
||||
System.currentTimeMillis() - timer), false);
|
||||
if (!local) this.receivedRemoteReferences.addAndGet(is);
|
||||
|
||||
// iterate over normalized entries and select some that are better than currently stored
|
||||
timer = System.currentTimeMillis();
|
||||
|
||||
// apply all constraints
|
||||
long timeout = System.currentTimeMillis() + maxtime;
|
||||
try {
|
||||
WordReferenceVars iEntry;
|
||||
long remaining;
|
||||
pollloop: while ( true ) {
|
||||
remaining = timeout - System.currentTimeMillis();
|
||||
if (remaining <= 0) {
|
||||
Log.logWarning("RWIProcess", "terminated 'add' loop before poll time-out = " + remaining + ", decodedEntries.size = " + decodedEntries.size());
|
||||
break;
|
||||
}
|
||||
iEntry = decodedEntries.poll(remaining, TimeUnit.MILLISECONDS);
|
||||
if (iEntry == null) {
|
||||
Log.logWarning("RWIProcess", "terminated 'add' loop after poll time-out = " + remaining + ", decodedEntries.size = " + decodedEntries.size());
|
||||
break pollloop;
|
||||
}
|
||||
if (iEntry == WordReferenceVars.poison) {
|
||||
break pollloop;
|
||||
}
|
||||
assert (iEntry.urlhash().length == index.row().primaryKeyLength);
|
||||
|
||||
// doublecheck for urls
|
||||
if (this.urlhashes.has(iEntry.urlhash())) continue pollloop;
|
||||
|
||||
// increase flag counts
|
||||
Bitfield flags = iEntry.flags();
|
||||
for (int j = 0; j < 32; j++) {
|
||||
if (flags.get(j)) this.flagcount[j]++;
|
||||
}
|
||||
|
||||
// check constraints
|
||||
if (!this.testFlags(flags)) continue pollloop;
|
||||
|
||||
// check document domain
|
||||
if (this.query.contentdom.getCode() > 0 &&
|
||||
((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Condenser.flag_cat_hasaudio))) ||
|
||||
(this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Condenser.flag_cat_hasvideo))) ||
|
||||
(this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Condenser.flag_cat_hasimage))) ||
|
||||
(this.query.contentdom == ContentDomain.APP && !(flags.get(Condenser.flag_cat_hasapp))))) {
|
||||
continue pollloop;
|
||||
}
|
||||
|
||||
// count domZones
|
||||
//this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++;
|
||||
|
||||
// check site constraints
|
||||
final String hosthash = iEntry.hosthash();
|
||||
if ( this.query.nav_sitehash == null ) {
|
||||
if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) continue pollloop;
|
||||
} else {
|
||||
// filter out all domains that do not match with the site constraint
|
||||
if (!hosthash.equals(this.query.nav_sitehash)) continue pollloop;
|
||||
}
|
||||
|
||||
// finally extend the double-check and insert result to stack
|
||||
this.urlhashes.putUnique(iEntry.urlhash());
|
||||
rankingtryloop: while (true) {
|
||||
try {
|
||||
this.rwiStack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
|
||||
break rankingtryloop;
|
||||
} catch ( final ArithmeticException e ) {
|
||||
// this may happen if the concurrent normalizer changes values during cardinal computation
|
||||
continue rankingtryloop;
|
||||
}
|
||||
}
|
||||
// increase counter for statistics
|
||||
if (local) this.query.local_rwi_available.incrementAndGet(); else this.query.remote_available.incrementAndGet();
|
||||
}
|
||||
if (System.currentTimeMillis() >= timeout) Log.logWarning("RWIProcess", "rwi normalization ended with timeout = " + maxtime);
|
||||
|
||||
} catch ( final InterruptedException e ) {
|
||||
} catch ( final SpaceExceededException e ) {
|
||||
}
|
||||
|
||||
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
|
||||
this.query.id(true),
|
||||
SearchEventType.PRESORT,
|
||||
resourceName,
|
||||
index.size(),
|
||||
System.currentTimeMillis() - timer), false);
|
||||
}
|
||||
|
||||
protected Map<byte[], ReferenceContainer<WordReference>> searchContainerMap() {
|
||||
// direct access to the result maps is needed for abstract generation
|
||||
// this is only available if execQuery() was called before
|
||||
return this.localSearchInclusion;
|
||||
}
|
||||
|
||||
public ScoreMap<String> getTopicNavigator(final int count) {
|
||||
// create a list of words that had been computed by statistics over all
|
||||
// words that appeared in the url or the description of all urls
|
||||
final ScoreMap<String> result = new ConcurrentScoreMap<String>();
|
||||
if ( this.ref.sizeSmaller(2) ) {
|
||||
this.ref.clear(); // navigators with one entry are not useful
|
||||
}
|
||||
final Map<String, Float> counts = new HashMap<String, Float>();
|
||||
final Iterator<String> i = this.ref.keys(false);
|
||||
String word;
|
||||
int c;
|
||||
float q, min = Float.MAX_VALUE, max = Float.MIN_VALUE;
|
||||
int ic = count;
|
||||
while ( ic-- > 0 && i.hasNext() ) {
|
||||
word = i.next();
|
||||
if ( word == null ) {
|
||||
continue;
|
||||
}
|
||||
c = this.query.getSegment().getQueryCount(word);
|
||||
if ( c > 0 ) {
|
||||
q = ((float) this.ref.get(word)) / ((float) c);
|
||||
min = Math.min(min, q);
|
||||
max = Math.max(max, q);
|
||||
counts.put(word, q);
|
||||
}
|
||||
}
|
||||
if ( max > min ) {
|
||||
for ( final Map.Entry<String, Float> ce : counts.entrySet() ) {
|
||||
result.set(ce.getKey(), (int) (((double) count) * (ce.getValue() - min) / (max - min)));
|
||||
}
|
||||
}
|
||||
return this.ref;
|
||||
}
|
||||
|
||||
private final static Pattern lettermatch = Pattern.compile("[a-z]+");
|
||||
|
||||
public void addTopic(final String[] words) {
|
||||
String word;
|
||||
for ( final String w : words ) {
|
||||
word = w.toLowerCase();
|
||||
if ( word.length() > 2
|
||||
&& "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off"
|
||||
.indexOf(word) < 0
|
||||
&& !this.query.getQueryGoal().getIncludeHashes().has(Word.word2hash(word))
|
||||
&& lettermatch.matcher(word).matches()
|
||||
&& !Switchboard.badwords.contains(word)
|
||||
&& !Switchboard.stopwords.contains(word) ) {
|
||||
this.ref.inc(word);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void addTopics(final ResultEntry resultEntry) {
|
||||
// take out relevant information for reference computation
|
||||
if ((resultEntry.url() == null) || (resultEntry.title() == null)) return;
|
||||
final String[] descrcomps = MultiProtocolURI.splitpattern.split(resultEntry.title().toLowerCase()); // words in the description
|
||||
|
||||
// add references
|
||||
addTopic(descrcomps);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
// RankingProcess.java
|
||||
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 07.11.2007 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy, a peer-to-peer based web search engine
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package net.yacy.search.query;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.SortedMap;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.yacy.cora.document.analysis.Classification.ContentDomain;
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.federate.yacy.CacheStrategy;
|
||||
import net.yacy.cora.lod.vocabulary.Tagging;
|
||||
import net.yacy.cora.sorting.ConcurrentScoreMap;
|
||||
import net.yacy.cora.sorting.ScoreMap;
|
||||
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
|
||||
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement;
|
||||
import net.yacy.cora.storage.HandleSet;
|
||||
import net.yacy.cora.util.SpaceExceededException;
|
||||
import net.yacy.document.Condenser;
|
||||
import net.yacy.document.LibraryProvider;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||
import net.yacy.kelondro.data.word.Word;
|
||||
import net.yacy.kelondro.data.word.WordReference;
|
||||
import net.yacy.kelondro.data.word.WordReferenceVars;
|
||||
import net.yacy.kelondro.index.RowHandleSet;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.rwi.ReferenceContainer;
|
||||
import net.yacy.kelondro.rwi.TermSearch;
|
||||
import net.yacy.kelondro.util.Bitfield;
|
||||
import net.yacy.peers.graphics.ProfilingGraph;
|
||||
import net.yacy.search.EventTracker;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.index.Segment;
|
||||
import net.yacy.search.ranking.ReferenceOrder;
|
||||
import net.yacy.search.snippet.ResultEntry;
|
||||
|
||||
public final class RankingProcess extends Thread {
|
||||
|
||||
protected static final int max_results_preparation = 3000, max_results_preparation_special = -1; // -1 means 'no limit'
|
||||
|
||||
//private final SearchEvent searchEvent;
|
||||
private final QueryParams query;
|
||||
private SortedMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
|
||||
private final ScoreMap<String> ref; // reference score computation for the commonSense heuristic
|
||||
private final long maxtime;
|
||||
protected final WeakPriorityBlockingQueue<WordReferenceVars> rwiStack;
|
||||
protected final ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>> doubleDomCache; // key = domhash (6 bytes); value = like stack
|
||||
private final int[] flagcount; // flag counter
|
||||
private final AtomicInteger feedersAlive, feedersTerminated;
|
||||
private boolean addRunning;
|
||||
protected final AtomicInteger receivedRemoteReferences;
|
||||
protected final ReferenceOrder order;
|
||||
protected final HandleSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
|
||||
private final Map<String, String> taggingPredicates; // a map from tagging vocabulary names to tagging predicate uris
|
||||
private boolean remote;
|
||||
|
||||
protected RankingProcess(final QueryParams query, boolean remote) {
|
||||
// we collect the urlhashes and construct a list with urlEntry objects
|
||||
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
||||
// sortorder: 0 = hash, 1 = url, 2 = ranking
|
||||
this.query = query;
|
||||
this.remote = remote;
|
||||
this.localSearchInclusion = null;
|
||||
this.ref = new ConcurrentScoreMap<String>();
|
||||
this.maxtime = query.maxtime;
|
||||
int stackMaxsize = query.snippetCacheStrategy == null || query.snippetCacheStrategy == CacheStrategy.CACHEONLY ? max_results_preparation_special : max_results_preparation;
|
||||
this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(stackMaxsize, false);
|
||||
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>();
|
||||
this.flagcount = new int[32];
|
||||
for ( int i = 0; i < 32; i++ ) {
|
||||
this.flagcount[i] = 0;
|
||||
}
|
||||
this.feedersAlive = new AtomicInteger(0);
|
||||
this.feedersTerminated = new AtomicInteger(0);
|
||||
this.addRunning = true;
|
||||
this.receivedRemoteReferences = new AtomicInteger(0);
|
||||
this.order = new ReferenceOrder(this.query.ranking, UTF8.getBytes(this.query.targetlang));
|
||||
this.urlhashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 100);
|
||||
this.taggingPredicates = new HashMap<String, String>();
|
||||
for (Tagging t: LibraryProvider.autotagging.getVocabularies()) {
|
||||
this.taggingPredicates.put(t.getName(), t.getPredicate());
|
||||
}
|
||||
}
|
||||
|
||||
public ReferenceOrder getOrder() {
|
||||
return this.order;
|
||||
}
|
||||
|
||||
protected boolean feedingIsFinished() {
|
||||
return
|
||||
this.feedersTerminated.intValue() > (this.remote ? 1 : 0) &&
|
||||
this.feedersAlive.get() == 0;// &&
|
||||
//(!this.remote || this.remote_indexCount > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* method to signal the incoming stack that one feeder has terminated
|
||||
*/
|
||||
public void oneFeederTerminated() {
|
||||
this.feedersTerminated.incrementAndGet();
|
||||
final int c = this.feedersAlive.decrementAndGet();
|
||||
assert c >= 0 : "feeders = " + c;
|
||||
}
|
||||
|
||||
public void oneFeederStarted() {
|
||||
this.feedersAlive.addAndGet(1);
|
||||
}
|
||||
|
||||
public QueryParams getQuery() {
|
||||
return this.query;
|
||||
}
|
||||
|
||||
public int[] flagCount() {
|
||||
return this.flagcount;
|
||||
}
|
||||
|
||||
protected void addBegin() {
|
||||
this.addRunning = true;
|
||||
}
|
||||
|
||||
public void addFinalize() {
|
||||
this.addRunning = false;
|
||||
}
|
||||
|
||||
protected boolean addRunning() {
|
||||
return this.addRunning;
|
||||
}
|
||||
|
||||
public boolean rwiIsEmpty() {
|
||||
if ( !this.rwiStack.isEmpty() ) {
|
||||
return false;
|
||||
}
|
||||
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
|
||||
if ( !s.isEmpty() ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected int rwiQueueSize() {
|
||||
int c = this.rwiStack.sizeQueue();
|
||||
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
|
||||
c += s.sizeQueue();
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
protected boolean testFlags(final Bitfield flags) {
|
||||
if (this.query.constraint == null) return true;
|
||||
// test if ientry matches with filter
|
||||
// if all = true: let only entries pass that has all matching bits
|
||||
// if all = false: let all entries pass that has at least one matching bit
|
||||
if (this.query.allofconstraint) {
|
||||
for ( int i = 0; i < 32; i++ ) {
|
||||
if ((this.query.constraint.get(i)) && (!flags.get(i))) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if ((this.query.constraint.get(i)) && (flags.get(i))) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
// do a search
|
||||
oneFeederStarted();
|
||||
|
||||
// sort the local containers and truncate it to a limited count,
|
||||
// so following sortings together with the global results will be fast
|
||||
try {
|
||||
final long timer = System.currentTimeMillis();
|
||||
final TermSearch<WordReference> search =
|
||||
this.query
|
||||
.getSegment()
|
||||
.termIndex()
|
||||
.query(
|
||||
this.query.getQueryGoal().getIncludeHashes(),
|
||||
this.query.getQueryGoal().getExcludeHashes(),
|
||||
null,
|
||||
Segment.wordReferenceFactory,
|
||||
this.query.maxDistance);
|
||||
this.localSearchInclusion = search.inclusion();
|
||||
final ReferenceContainer<WordReference> index = search.joined();
|
||||
EventTracker.update(
|
||||
EventTracker.EClass.SEARCH,
|
||||
new ProfilingGraph.EventSearch(
|
||||
this.query.id(true),
|
||||
SearchEventType.JOIN,
|
||||
this.query.getQueryGoal().getOriginalQueryString(false),
|
||||
index.size(),
|
||||
System.currentTimeMillis() - timer),
|
||||
false);
|
||||
if ( !index.isEmpty() ) {
|
||||
add(index, true, "local index: " + this.query.getSegment().getLocation(), -1, this.maxtime);
|
||||
this.addFinalize();
|
||||
}
|
||||
} catch ( final Exception e ) {
|
||||
Log.logException(e);
|
||||
} finally {
|
||||
oneFeederTerminated();
|
||||
}
|
||||
}
|
||||
|
||||
public void add(
|
||||
final ReferenceContainer<WordReference> index,
|
||||
final boolean local,
|
||||
final String resourceName,
|
||||
final int fullResource,
|
||||
final long maxtime) {
|
||||
// we collect the urlhashes and construct a list with urlEntry objects
|
||||
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
||||
//Log.logInfo("RWIProcess", "added a container, size = " + index.size());
|
||||
|
||||
this.addRunning = true;
|
||||
assert (index != null);
|
||||
if (index.isEmpty()) return;
|
||||
if (local) {
|
||||
this.query.local_rwi_stored.addAndGet(fullResource);
|
||||
} else {
|
||||
assert fullResource >= 0 : "fullResource = " + fullResource;
|
||||
this.query.remote_stored.addAndGet(fullResource);
|
||||
this.query.remote_peerCount.incrementAndGet();
|
||||
}
|
||||
long timer = System.currentTimeMillis();
|
||||
|
||||
// normalize entries
|
||||
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime);
|
||||
int is = index.size();
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
|
||||
this.query.id(true),
|
||||
SearchEventType.NORMALIZING,
|
||||
resourceName,
|
||||
is,
|
||||
System.currentTimeMillis() - timer), false);
|
||||
if (!local) this.receivedRemoteReferences.addAndGet(is);
|
||||
|
||||
// iterate over normalized entries and select some that are better than currently stored
|
||||
timer = System.currentTimeMillis();
|
||||
|
||||
// apply all constraints
|
||||
long timeout = System.currentTimeMillis() + maxtime;
|
||||
try {
|
||||
WordReferenceVars iEntry;
|
||||
long remaining;
|
||||
pollloop: while ( true ) {
|
||||
remaining = timeout - System.currentTimeMillis();
|
||||
if (remaining <= 0) {
|
||||
Log.logWarning("RWIProcess", "terminated 'add' loop before poll time-out = " + remaining + ", decodedEntries.size = " + decodedEntries.size());
|
||||
break;
|
||||
}
|
||||
iEntry = decodedEntries.poll(remaining, TimeUnit.MILLISECONDS);
|
||||
if (iEntry == null) {
|
||||
Log.logWarning("RWIProcess", "terminated 'add' loop after poll time-out = " + remaining + ", decodedEntries.size = " + decodedEntries.size());
|
||||
break pollloop;
|
||||
}
|
||||
if (iEntry == WordReferenceVars.poison) {
|
||||
break pollloop;
|
||||
}
|
||||
assert (iEntry.urlhash().length == index.row().primaryKeyLength);
|
||||
|
||||
// doublecheck for urls
|
||||
if (this.urlhashes.has(iEntry.urlhash())) continue pollloop;
|
||||
|
||||
// increase flag counts
|
||||
Bitfield flags = iEntry.flags();
|
||||
for (int j = 0; j < 32; j++) {
|
||||
if (flags.get(j)) this.flagcount[j]++;
|
||||
}
|
||||
|
||||
// check constraints
|
||||
if (!this.testFlags(flags)) continue pollloop;
|
||||
|
||||
// check document domain
|
||||
if (this.query.contentdom.getCode() > 0 &&
|
||||
((this.query.contentdom == ContentDomain.AUDIO && !(flags.get(Condenser.flag_cat_hasaudio))) ||
|
||||
(this.query.contentdom == ContentDomain.VIDEO && !(flags.get(Condenser.flag_cat_hasvideo))) ||
|
||||
(this.query.contentdom == ContentDomain.IMAGE && !(flags.get(Condenser.flag_cat_hasimage))) ||
|
||||
(this.query.contentdom == ContentDomain.APP && !(flags.get(Condenser.flag_cat_hasapp))))) {
|
||||
continue pollloop;
|
||||
}
|
||||
|
||||
// count domZones
|
||||
//this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++;
|
||||
|
||||
// check site constraints
|
||||
final String hosthash = iEntry.hosthash();
|
||||
if ( this.query.nav_sitehash == null ) {
|
||||
if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) continue pollloop;
|
||||
} else {
|
||||
// filter out all domains that do not match with the site constraint
|
||||
if (!hosthash.equals(this.query.nav_sitehash)) continue pollloop;
|
||||
}
|
||||
|
||||
// finally extend the double-check and insert result to stack
|
||||
this.urlhashes.putUnique(iEntry.urlhash());
|
||||
rankingtryloop: while (true) {
|
||||
try {
|
||||
this.rwiStack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
|
||||
break rankingtryloop;
|
||||
} catch ( final ArithmeticException e ) {
|
||||
// this may happen if the concurrent normalizer changes values during cardinal computation
|
||||
continue rankingtryloop;
|
||||
}
|
||||
}
|
||||
// increase counter for statistics
|
||||
if (local) this.query.local_rwi_available.incrementAndGet(); else this.query.remote_available.incrementAndGet();
|
||||
}
|
||||
if (System.currentTimeMillis() >= timeout) Log.logWarning("RWIProcess", "rwi normalization ended with timeout = " + maxtime);
|
||||
|
||||
} catch ( final InterruptedException e ) {
|
||||
} catch ( final SpaceExceededException e ) {
|
||||
}
|
||||
|
||||
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
|
||||
this.query.id(true),
|
||||
SearchEventType.PRESORT,
|
||||
resourceName,
|
||||
index.size(),
|
||||
System.currentTimeMillis() - timer), false);
|
||||
}
|
||||
|
||||
protected Map<byte[], ReferenceContainer<WordReference>> searchContainerMap() {
|
||||
// direct access to the result maps is needed for abstract generation
|
||||
// this is only available if execQuery() was called before
|
||||
return this.localSearchInclusion;
|
||||
}
|
||||
|
||||
public ScoreMap<String> getTopics(final int count) {
|
||||
// create a list of words that had been computed by statistics over all
|
||||
// words that appeared in the url or the description of all urls
|
||||
final ScoreMap<String> result = new ConcurrentScoreMap<String>();
|
||||
if ( this.ref.sizeSmaller(2) ) {
|
||||
this.ref.clear(); // navigators with one entry are not useful
|
||||
}
|
||||
final Map<String, Float> counts = new HashMap<String, Float>();
|
||||
final Iterator<String> i = this.ref.keys(false);
|
||||
String word;
|
||||
int c;
|
||||
float q, min = Float.MAX_VALUE, max = Float.MIN_VALUE;
|
||||
int ic = count;
|
||||
while ( ic-- > 0 && i.hasNext() ) {
|
||||
word = i.next();
|
||||
if ( word == null ) {
|
||||
continue;
|
||||
}
|
||||
c = this.query.getSegment().getQueryCount(word);
|
||||
if ( c > 0 ) {
|
||||
q = ((float) this.ref.get(word)) / ((float) c);
|
||||
min = Math.min(min, q);
|
||||
max = Math.max(max, q);
|
||||
counts.put(word, q);
|
||||
}
|
||||
}
|
||||
if ( max > min ) {
|
||||
for ( final Map.Entry<String, Float> ce : counts.entrySet() ) {
|
||||
result.set(ce.getKey(), (int) (((double) count) * (ce.getValue() - min) / (max - min)));
|
||||
}
|
||||
}
|
||||
return this.ref;
|
||||
}
|
||||
|
||||
private final static Pattern lettermatch = Pattern.compile("[a-z]+");
|
||||
|
||||
public void addTopic(final String[] words) {
|
||||
String word;
|
||||
for ( final String w : words ) {
|
||||
word = w.toLowerCase();
|
||||
if ( word.length() > 2
|
||||
&& "http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_zum_der_die_das_und_the_zur_bzw_mit_blog_wiki_aus_bei_off"
|
||||
.indexOf(word) < 0
|
||||
&& !this.query.getQueryGoal().getIncludeHashes().has(Word.word2hash(word))
|
||||
&& lettermatch.matcher(word).matches()
|
||||
&& !Switchboard.badwords.contains(word)
|
||||
&& !Switchboard.stopwords.contains(word) ) {
|
||||
this.ref.inc(word);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void addTopics(final ResultEntry resultEntry) {
|
||||
// take out relevant information for reference computation
|
||||
if ((resultEntry.url() == null) || (resultEntry.title() == null)) return;
|
||||
final String[] descrcomps = MultiProtocolURI.splitpattern.split(resultEntry.title().toLowerCase()); // words in the description
|
||||
|
||||
// add references
|
||||
addTopic(descrcomps);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
Loading…
Reference in new issue