removed ContentControl servlet and functinality

This was not used at all (as I know) and was blocking a smooth
integration of ivy in the context of an existing JSON parser.
pull/533/head
Michael Peter Christen 2 years ago
parent b54f4ad35f
commit fc98ca7a9c

@ -3,119 +3,6 @@
<classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/|gsa/|solr/collection1/|api/blacklists/|proxymsg/|p2p/" kind="src" path="htroot"/>
<classpathentry excluding="bookmarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/|blacklists/" kind="src" path="htroot/api"/>
<classpathentry excluding="posts/|tags/|xbel/" kind="src" path="htroot/api/bookmarks"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
<attributes>
<attribute name="module" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="lib" path="lib/apache-mime4j-0.6.jar"/>
<classpathentry kind="lib" path="lib/avatica-core-1.13.0.jar"/>
<classpathentry kind="lib" path="lib/calcite-core-1.18.0.jar"/>
<classpathentry kind="lib" path="lib/calcite-linq4j-1.18.0.jar"/>
<classpathentry kind="lib" path="lib/jchardet-1.0.jar"/>
<classpathentry kind="lib" path="lib/common-image-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/common-io-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/common-lang-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.14.jar"/>
<classpathentry kind="lib" path="lib/commons-fileupload-1.4.jar"/>
<classpathentry kind="lib" path="lib/commons-io-2.7.jar"/>
<classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/>
<classpathentry kind="lib" path="lib/commons-lang3-3.12.0.jar"/>
<classpathentry kind="lib" path="lib/commons-logging-1.2.jar"/>
<classpathentry kind="lib" path="lib/commons-math3-3.4.1.jar"/>
<classpathentry kind="lib" path="lib/fontbox-2.0.15.jar"/>
<classpathentry kind="lib" path="lib/http2-client-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-common-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-http-client-transport-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/httpclient-4.5.12.jar"/>
<classpathentry kind="lib" path="lib/httpcore-4.4.13.jar"/>
<classpathentry kind="lib" path="lib/httpmime-4.5.12.jar"/>
<classpathentry kind="lib" path="lib/icu4j-63.1.jar"/>
<classpathentry kind="lib" path="lib/imageio-bmp-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/imageio-core-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/imageio-metadata-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/imageio-tiff-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/jackson-annotations-2.11.2.jar"/>
<classpathentry kind="lib" path="lib/jackson-core-2.11.2.jar"/>
<classpathentry kind="lib" path="lib/jackson-databind-2.11.2.jar"/>
<classpathentry kind="lib" path="lib/oro-2.0.8.jar"/>
<classpathentry kind="lib" path="lib/jaudiotagger-2.2.5.jar"/>
<classpathentry kind="lib" path="lib/javax.servlet-api-3.1.0.jar"/>
<classpathentry kind="lib" path="lib/jcifs-1.3.17.jar"/>
<classpathentry kind="lib" path="lib/jcl-over-slf4j-1.7.25.jar"/>
<classpathentry kind="lib" path="lib/jetty-client-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-continuation-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-deploy-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-http-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-io-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-jmx-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-proxy-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-security-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-server-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-servlet-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-servlets-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-util-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-webapp-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-xml-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jsch-0.1.54.jar"/>
<classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jsonic-1.3.10.jar"/>
<classpathentry kind="lib" path="lib/jwat-archive-common-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jwat-common-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jwat-gzip-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jwat-warc-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-common-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-phonetic-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-backward-codecs-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-classification-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-codecs-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-core-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-grouping-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-highlighter-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-join-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-memory-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-misc-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-queries-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-queryparser-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-spatial-extras-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-suggest-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/opentracing-api-0.33.0.jar"/>
<classpathentry kind="lib" path="lib/opentracing-noop-0.33.0.jar"/>
<classpathentry kind="lib" path="lib/opentracing-util-0.33.0.jar"/>
<classpathentry kind="lib" path="lib/metadata-extractor-2.11.0.jar"/>
<classpathentry kind="lib" path="lib/metrics-core-3.2.2.jar"/>
<classpathentry kind="lib" path="lib/metrics-jmx-4.1.5.jar"/>
<classpathentry kind="lib" path="lib/org.restlet.jar"/>
<classpathentry kind="lib" path="lib/pdfbox-2.0.15.jar"/>
<classpathentry kind="lib" path="lib/poi-3.17.jar"/>
<classpathentry kind="lib" path="lib/poi-scratchpad-3.17.jar"/>
<classpathentry kind="lib" path="lib/rrd4j-3.2.jar"/>
<classpathentry kind="lib" path="lib/solr-core-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/solr-solrj-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/spatial4j-0.6.jar"/>
<classpathentry kind="lib" path="lib/stax2-api-3.1.4.jar"/>
<classpathentry kind="lib" path="lib/weupnp-0.1.4.jar"/>
<classpathentry kind="lib" path="lib/woodstox-core-asl-4.4.1.jar"/>
<classpathentry kind="lib" path="lib/xml-apis-1.4.01.jar"/>
<classpathentry kind="lib" path="lib/xmpcore-5.1.3.jar"/>
<classpathentry kind="lib" path="lib/xz-1.8.jar"/>
<classpathentry kind="lib" path="lib/zookeeper-3.4.14.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-core-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-library-2.2.jar"/>
<classpathentry kind="lib" path="lib/commons-collections4-4.4.jar"/>
<classpathentry kind="lib" path="lib/guava-25.1-jre.jar"/>
<classpathentry kind="lib" path="lib/hazelcast-4.2.jar"/>
<classpathentry kind="lib" path="lib/commons-compress-1.21.jar"/>
<classpathentry kind="lib" path="lib/bcmail-jdk15on-1.69.jar"/>
<classpathentry kind="lib" path="lib/bcpkix-jdk15on-1.69.jar"/>
<classpathentry kind="lib" path="lib/bcprov-jdk15on-1.69.jar"/>
<classpathentry kind="lib" path="lib/jsoup-1.14.2.jar"/>
<classpathentry kind="lib" path="lib/log4j-over-slf4j-1.7.32.jar"/>
<classpathentry kind="lib" path="lib/slf4j-api-1.7.32.jar"/>
<classpathentry kind="lib" path="lib/slf4j-jdk14-1.7.32.jar"/>
<classpathentry kind="lib" path="lib/langdetect-1.1-20120112.jar"/>
<classpathentry kind="src" path="htroot/api/blacklists"/>
<classpathentry kind="src" path="htroot/api/bookmarks/posts"/>
<classpathentry kind="src" path="htroot/api/bookmarks/tags"/>
@ -126,6 +13,16 @@
<classpathentry kind="src" path="htroot/yacy"/>
<classpathentry kind="src" path="source"/>
<classpathentry kind="src" path="test/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
<attributes>
<attribute name="module" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.apache.ivyde.eclipse.cpcontainer.IVYDE_CONTAINER/?project=yacy&amp;ivyXmlPath=ivy.xml&amp;confs=compile">
<attributes>
<attribute name="module" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="lib" path="lib/J7Zip-modified-1.0.2.jar"/>
<classpathentry kind="output" path="gen"/>
</classpath>

@ -23,5 +23,6 @@
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.apache.ivyde.eclipse.ivynature</nature>
</natures>
</projectDescription>

@ -1370,16 +1370,6 @@ core.service.webgraph.tmp = false
parserAugmentation = false
parserAugmentation.RDFa = false
# Content control settings
contentcontrol.enabled = false
contentcontrol.bookmarklist = contentcontrol
contentcontrol.mandatoryfilterlist = yacy
contentcontrol.smwimport.enabled = false
contentcontrol.smwimport.baseurl =
contentcontrol.smwimport.purgelistoninit = true
contentcontrol.smwimport.targetlist = contentcontrol
contentcontrol.smwimport.defaultcategory = yacy
# host browser settings
# Allow the administrator to stack URLs to the local crawl queue from the host browser page, automatically (when a path is unknown) or manually through a "load and index" link
browser.autoload = false

@ -1,95 +0,0 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Content Control</title>
#%env/templates/metas.template%#
</head>
<body id="Settings">
#%env/templates/header.template%#
#%env/templates/submenuBlacklist.template%#
<h2>Content Control</h2>
<form id="contentcontrolsettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="augmentation">Peer Content Control URL Filter</legend>
<p>
With this settings you can activate or deactivate content control on this peer.
</p>
<dl>
<dt><label for="content">Use content control filtering:</label></dt>
<dd>
<input type="checkbox" name="contentcontrolenabled" id="contentcontrolenabled" #(contentcontrolenabled_checked)#:: checked="checked"#(/contentcontrolenabled_checked)# />Enabled<br/>
<p class="help">
Enables or disables content control.
</p>
</dd>
<dt><label for="content">Use this table to create filter:</label></dt>
<dd>
<input type="text" name="contentcontrolbml" value="#[contentcontrolbml]#" size="60" /><br/><br/>
<p class="help">
Define a table. Default: contentcontrol
</p>
</dd>
<dt></dt>
<dd><input type="submit" name="contentcontrolSettings" value="Submit" class="btn btn-primary"/></dd>
</dl>
</fieldset>
</form>
<form id="contentcontrolExtraSettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="urlproxy">Content Control SMW Import Settings</legend>
<p>
With this settings you can define the content control import settings. You can define a <a href="http://wiki.sciety.org/mediawiki/extensions/yacy-smwextension/" target="_blank">Semantic Media Wiki with the appropriate extensions.</a>
</p>
<dl>
<dt><label for="content">SMW import to content control list:</label></dt>
<dd>
<input type="checkbox" name="ccsmwimport" id="ccsmwimport" #(ccsmwimport_checked)#:: checked="checked"#(/ccsmwimport_checked)# />Enabled<br/>
<p class="help">
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!
</p>
</dd>
<dt><label for="content">SMW import base URL:</label></dt>
<dd>
<input type="text" name="ccsmwimporturl" value="#[ccsmwimporturl]#" size="60" /><br/><br/>
<p class="help">
Define base URL for SMW special page "Ask". Example: http://my.wiki.cc/wiki/Special:Ask
</p>
</dd>
<dt><label for="content">SMW import target table:</label></dt>
<dd>
<input type="text" name="ccsmwimportlist" value="#[ccsmwimportlist]#" size="60" /><br/><br/>
<p class="help">
Define import target table. Default: contentcontrol
</p>
</dd>
<dt><label for="content">Purge content control list on initial sync:</label></dt>
<dd>
<input type="checkbox" name="ccsmwpurge" id="ccsmwpurge" #(ccsmwpurge_checked)#:: checked="checked"#(/ccsmwpurge_checked)# />Enabled<br/>
<p class="help">
Purge content control list on initial synchronisation after startup.
</p>
</dd>
<dt></dt>
<dd><input type="submit" name="contentcontrolExtraSettings" value="Submit" class="btn btn-primary"/></dd>
</dl>
</fieldset>
</form>
#%env/templates/footer.template%#
</body>
</html>

@ -1,68 +0,0 @@
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public final class ContentControl_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header,
final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
if (post != null) {
if (post.containsKey("contentcontrolExtraSettings")) {
env.setConfig("contentcontrol.smwimport.baseurl",
post.get("ccsmwimporturl"));
env.setConfig("contentcontrol.smwimport.enabled",
"on".equals(post.get("ccsmwimport")) ? true : false);
env.setConfig("contentcontrol.smwimport.purgelistoninit",
"on".equals(post.get("ccsmwpurge")) ? true : false);
env.setConfig("contentcontrol.smwimport.targetlist",
post.get("ccsmwimportlist"));
}
if (post.containsKey("contentcontrolSettings")) {
env.setConfig("contentcontrol.enabled",
"on".equals(post.get("contentcontrolenabled")) ? true : false);
env.setConfig("contentcontrol.bookmarklist",
post.get("contentcontrolbml"));
}
}
prop.putHTML("ccsmwimportlist",
env.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"));
prop.put("ccsmwpurge_checked", env.getConfigBool(
"contentcontrol.smwimport.purgelistoninit", false) ? "1" : "0");
prop.putHTML("ccsmwimporturl",
env.getConfig("contentcontrol.smwimport.baseurl", ""));
prop.put("ccsmwimport_checked", env.getConfigBool(
"contentcontrol.smwimport.enabled", false) ? "1" : "0");
prop.put("contentcontrolenabled_checked",
env.getConfigBool("contentcontrol.enabled", false) ? "1" : "0");
prop.putHTML("contentcontrolbml",
env.getConfig("contentcontrol.bookmarklist", ""));
// return rewrite properties
return prop;
}
}

@ -5,6 +5,5 @@
<li><a href="BlacklistCleaner_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Cleaner</a></li>
<li><a href="BlacklistTest_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Test</a></li>
<li><a href="BlacklistImpExp_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Import/Export</a></li>
<li><a href="ContentControl_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Content Control</a></li>
</ul>
</div>

@ -13,7 +13,6 @@
<dependency org="com.cybozu.labs" name="langdetect" rev="1.1-20120112" conf="compile->master"/>
<dependency org="com.drewnoakes" name="metadata-extractor" rev="2.11.0" />
<dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.11.2"/>
<dependency org="com.googlecode.json-simple" name="json-simple" rev="1.1.1" conf="compile->master" />
<dependency org="com.google.guava" name="guava" rev="25.1-jre" conf="compile->master"/>
<dependency org="com.hazelcast" name="hazelcast" rev="4.2" />
<dependency org="com.ibm.icu" name="icu4j" rev="63.1"/>
@ -94,7 +93,6 @@
<!-- This does not match langdetect.jar from pre-ivy -->
<dependency org="org.tukaani" name="xz" rev="1.8"/>
<dependency org="oro" name="oro" rev="2.0.8"/>
<dependency org="xml-apis" name="xml-apis" rev="1.4.01"/>
<dependency org="junit" name="junit" rev="4.13" conf="test->default"/>
<dependency org="org.hamcrest" name="hamcrest" rev="2.2" conf="test->default"/>

@ -953,30 +953,6 @@ Duration==Dauer
#ID==ID
#-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==Inhaltskontrolle<
Peer Content Control URL Filter==Peer Inhaltskontrolle URL Filter
With this settings you can activate or deactivate content control on this peer.==Mit dieser Einstellung kann die Inhaltskontrolle auf diesem Peer an- oder abgeschalten werden.
Use content control filtering:==Verwende Inhaltskontrollfilter:
>Enabled<==>Aktiviert<
Enables or disables content control.==Schaltet Inhaltskontrolle an- oder ab.
Use this table to create filter:==Verwenden Sie diese Tabelle, um Filter zu erzeugen:
Define a table. Default:==Definieren Sie ein Tabelle. Standardeinstellung:
Content Control SMW Import Settings==Inhaltskontrolle SMW Importeinstellungen
With this settings you can define the content control import settings. You can define a==Mit diesen Einstellungen können Sie die Importeinstellungen für die Inhaltskontrolle definieren. Definieren Sie ein
Semantic Media Wiki with the appropriate extensions.==Semantisches Media Wiki mit den passenden Erweiterungen.
SMW import to content control list:==SMW Import für die Inhalts-Kontroll-Liste:
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==Konstante Synchronisation der Inhalts-Kontroll-Liste vom SMW (Semantisches Medienwiki) im Hintergrund. Benötigt Neustart!
SMW import base URL:==SMW Import Basis URL:
Define base URL for SMW special page "Ask". Example: ==Definiere Basis URL für SMW Spezialseite "Ask". Beispiel:
SMW import target table:==SMW Import Ziele Tabelle:
Define import target table. Default: contentcontrol==Definieren Import Ziel Tabelle. Standardeinstellung: contentcontrol
Purge content control list on initial sync:==Verwerfe Inhalts-Kontroll-Listen bei der ersten Synchronisation:
Purge content control list on initial synchronisation after startup.==Verwerfe Inhalts-Kontroll-Listen bei der ersten Synchronisation nach dem Start.
"Submit"=="Absenden"
#-----------------------------
#File: CookieMonitorIncoming_p.html
#---------------------------

@ -530,13 +530,6 @@ Duration==Duración
ID==ID
#-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==Control de contenido<
>Enabled<==>Habilitado
"Submit"=="Enviar"
#-----------------------------
#File: CookieMonitorIncoming_p.html
#---------------------------

@ -510,13 +510,6 @@ Duration==Durata
ID==ID
#-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==Controllo dei contenuti<
>Enabled<==>Abilitato
"Submit"=="Invia"
#-----------------------------
#File: CookieMonitorIncoming_p.html
#---------------------------

@ -714,13 +714,6 @@ Last Deploy==最後の展開
Connection Tracking==接続の追跡
#-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==コンテントの制御<
"Submit"=="確定する"
#-----------------------------
#File: CookieMonitorIncoming_p.html
#---------------------------
Incoming Cookies Monitor==着信したCookieのモニター

@ -2429,71 +2429,6 @@
</body>
</file>
<file original="ContentControl_p.html" source-language="en" datatype="html">
<body>
<trans-unit id="3f3b9286" xml:space="preserve" approved="no" translate="yes">
<source>Content Control&lt;</source>
</trans-unit>
<trans-unit id="d21676d1" xml:space="preserve" approved="no" translate="yes">
<source>Peer Content Control URL Filter</source>
</trans-unit>
<trans-unit id="542e1ecb" xml:space="preserve" approved="no" translate="yes">
<source>With this settings you can activate or deactivate content control on this peer.</source>
</trans-unit>
<trans-unit id="2bd01413" xml:space="preserve" approved="no" translate="yes">
<source>Use content control filtering:</source>
</trans-unit>
<trans-unit id="4e4f2379" xml:space="preserve" approved="no" translate="yes">
<source>&gt;Enabled&lt;</source>
</trans-unit>
<trans-unit id="ff54fe20" xml:space="preserve" approved="no" translate="yes">
<source>Enables or disables content control.</source>
</trans-unit>
<trans-unit id="81cdc1a8" xml:space="preserve" approved="no" translate="yes">
<source>Use this table to create filter:</source>
</trans-unit>
<trans-unit id="2a641f75" xml:space="preserve" approved="no" translate="yes">
<source>Define a table. Default:</source>
</trans-unit>
<trans-unit id="c3a262b1" xml:space="preserve" approved="no" translate="yes">
<source>Content Control SMW Import Settings</source>
</trans-unit>
<trans-unit id="fe0fc485" xml:space="preserve" approved="no" translate="yes">
<source>With this settings you can define the content control import settings. You can define a</source>
</trans-unit>
<trans-unit id="a00319d4" xml:space="preserve" approved="no" translate="yes">
<source>Semantic Media Wiki with the appropriate extensions.</source>
</trans-unit>
<trans-unit id="3f00f0c5" xml:space="preserve" approved="no" translate="yes">
<source>SMW import to content control list:</source>
</trans-unit>
<trans-unit id="446815ef" xml:space="preserve" approved="no" translate="yes">
<source>Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!</source>
</trans-unit>
<trans-unit id="d9bff282" xml:space="preserve" approved="no" translate="yes">
<source>SMW import base URL:</source>
</trans-unit>
<trans-unit id="ecfbe3e8" xml:space="preserve" approved="no" translate="yes">
<source>Define base URL for SMW special page "Ask". Example: </source>
</trans-unit>
<trans-unit id="d0d7e963" xml:space="preserve" approved="no" translate="yes">
<source>SMW import target table:</source>
</trans-unit>
<trans-unit id="84acd3e4" xml:space="preserve" approved="no" translate="yes">
<source>Define import target table. Default: contentcontrol</source>
</trans-unit>
<trans-unit id="70ed825" xml:space="preserve" approved="no" translate="yes">
<source>Purge content control list on initial sync:</source>
</trans-unit>
<trans-unit id="642de9e8" xml:space="preserve" approved="no" translate="yes">
<source>Purge content control list on initial synchronisation after startup.</source>
</trans-unit>
<trans-unit id="bfcc5088" xml:space="preserve" approved="no" translate="yes">
<source>"Submit"</source>
</trans-unit>
</body>
</file>
<file original="ContentIntegrationPHPBB3_p.html" source-language="en" datatype="html">
<body>
<trans-unit id="c7bfa2ca" xml:space="preserve" approved="no" translate="yes">

@ -1059,30 +1059,6 @@ Duration==Длительность
#ID==ID
#-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==Управление контентом<
Peer Content Control URL Filter==Управление контентом узла
With this settings you can activate or deactivate content control on this peer.==Эти настройки позволяют включить или отключить управление контентом для вашего узла.
Use content control filtering:==Использовать фильтр управления контентом:
>Enabled<==>Включить<
Enables or disables content control.==Включение или отключение управления контентом.
Use this table to create filter:==Использовать это поле для создания фильтра:
Define a table. Default:==Задать значение поля. По-умолчанию:
Content Control SMW Import Settings==Импорт настроек управления контентом SMW
With this settings you can define the content control import settings. You can define a==Эти настройки позволяют задать параметры импорта настроек управления контентом
Semantic Media Wiki with the appropriate extensions.==Semantic Media Wiki с соответствующими расширениями.
SMW import to content control list:== Импорт SMW в список управления контентом:
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==Включение или отключение постоянной фоновой синхронизации списка управления контентом из SMW (Semantic Mediawiki). Потребуется перезапуск программы!
SMW import base URL:==Ссылка на импортируемую базу SMW:
Define base URL for SMW special page "Ask". Example: ==Укажите ссылку на базу SMW на специальной странице "Ask". Например:
SMW import target table:==Поле назначения импорта SMW:
Define import target table. Default: contentcontrol==Укажите поле назначения импорта. По-умолчанию: contentcontrol
Purge content control list on initial sync:==Удалить список управления контентом в начале синхронизации:
Purge content control list on initial synchronisation after startup.==Удалить список управления контентом в начале синхронизации после запуска программы.
"Submit"=="Сохранить"
#-----------------------------
#File: CookieMonitorIncoming_p.html
#---------------------------

@ -1033,31 +1033,6 @@ For minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLe
The quantRate is a measurement for the number of words that take part in a signature computation. The higher the number==quantRate是参与签名计算的单词数量的度量。 数字越高,越少
#-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==内容控制<
Peer Content Control URL Filter==节点内容控制地址过滤器
With this settings you can activate or deactivate content control on this peer==使用此设置你可以激活或取消激活此YaCy节点上的内容控制
Use content control filtering:==使用内容控制过滤:
>Enabled<==>已启用<
Enables or disables content control==启用或禁用内容控制
Use this table to create filter:==使用此表创建过滤器:
Define a table. Default:==定义一个表格. 默认:
Content Control SMW Import Settings==内容控制SMW导入设置
With this settings you can define the content control import settings. You can define a==使用此设置,你可以定义内容控制导入设置. 你可以定义一个
Semantic Media Wiki with the appropriate extensions==语义媒体百科与适当的扩展
SMW import to content control list:==SMW导入到内容控制列表:
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==启用或禁用来自SMWSemantic Mediawiki的内容控制列表的恒定后台同步。 需要重启!
SMW import base URL:==SMW导入基URL:
Define base URL for SMW special page "Ask". Example: ==为SMW特殊页面“Ask”定义基础地址.例:
SMW import target table:==SMW导入目标表:
Define import target table. Default: contentcontrol==定义导入目标表. 默认值:contentcontrol
Purge content control list on initial sync:==在初始同步时清除内容控制列表:
Purge content control list on initial synchronisation after startup.==重启后,清除初始同步的内容控制列表.
"Submit"=="提交"
Define base URL for SMW special page "Ask". Example:==为SMW特殊页面“Ask”定义基础地址.例:
#-----------------------------
#File: ContentIntegrationPHPBB3_p.html
#---------------------------
Content Integration: Retrieval from phpBB3 Databases==内容集成: 从phpBB3数据库中导入

@ -1,90 +0,0 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.util.Iterator;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.repository.FilterEngine;
import net.yacy.search.Switchboard;
public class ContentControlFilterUpdateThread implements Runnable {
private final Switchboard sb;
private Boolean locked = false;
private static FilterEngine networkfilter;
public ContentControlFilterUpdateThread(final Switchboard sb) {
this.sb = sb;
}
@Override
public final void run() {
if (!this.locked) {
this.locked = true;
if (this.sb.getConfigBool("contentcontrol.enabled", false) == true) {
if (SMWListSyncThread.dirty) {
networkfilter = updateFilter();
SMWListSyncThread.dirty = false;
}
}
this.locked = false;
}
return;
}
private static FilterEngine updateFilter () {
FilterEngine newfilter = new FilterEngine();
Switchboard sb = Switchboard.getSwitchboard();
Iterator<Tables.Row> it;
try {
it = sb.tables.iterator(sb.getConfig("contentcontrol.bookmarklist",
"contentcontrol"));
while (it.hasNext()) {
Row b = it.next();
if (!b.get("filter", "").equals("")) {
newfilter.add(b.get("filter", ""), null);
}
}
} catch (final IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return newfilter;
}
public static FilterEngine getNetworkFilter() {
FilterEngine f = networkfilter;
if (f != null && f.size() > 0)
return f;
return null;
}
}

@ -1,163 +0,0 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.cora.util.ConcurrentLog;
import org.json.simple.parser.ContentHandler;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class SMWListImporter implements Runnable, ContentHandler{
// Importer Variables
private final ArrayBlockingQueue<SMWListRow> listEntries;
private final Reader importFile;
private SMWListRow row;
private final JSONParser parser;
// Parser Variables
private final StringBuilder value;
private final StringBuilder key;
private final HashMap<String,String> obj;
private Boolean isElement;
public SMWListImporter(final Reader importFile, final int queueSize) {
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
this.importFile = importFile;
this.row = new SMWListRow();
this.parser = new JSONParser();
this.value = new StringBuilder(128);
this.key = new StringBuilder(16);
this.obj = new HashMap<String,String>();
this.isElement = false;
}
@Override
public void startJSON() throws ParseException, IOException {
}
@Override
public void endJSON() throws ParseException, IOException {
}
@Override
public boolean startArray() throws ParseException, IOException {
final String key = this.key.toString();
if (key.equals("items")) {
this.isElement = true;
}
return true;
}
@Override
public boolean endArray() throws ParseException, IOException {
return true;
}
@Override
public boolean startObject() throws ParseException, IOException {
return true;
}
@Override
public boolean endObject() throws ParseException, IOException {
if(this.isElement) {
for (Entry<String, String> e: this.obj.entrySet()) {
this.row.add (e.getKey(), e.getValue());
}
try {
this.listEntries.put(this.row);
//this.count++;
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
}
this.obj.clear();
this.row = new SMWListRow();
}
return true;
}
@Override
public boolean startObjectEntry(String key) throws ParseException, IOException {
this.key.setLength(0);
this.key.append(key);
return true;
}
@Override
public boolean primitive(Object value) throws ParseException, IOException {
this.value.setLength(0);
if(value instanceof java.lang.String) {
this.value.append((String)value);
} else if(value instanceof java.lang.Boolean) {
this.value.append(value);
} else if(value instanceof java.lang.Number) {
this.value.append(value);
}
return true;
}
@Override
public boolean endObjectEntry() throws ParseException, IOException {
final String key = this.key.toString();
final String value = this.value.toString();
this.obj.put(key, value);
return true;
}
@Override
public void run() {
try {
ConcurrentLog.info("SMWLISTSYNC", "Importer run()");
this.parser.parse(this.importFile, this, true);
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final ParseException e) {
ConcurrentLog.logException(e);
} finally {
try {
ConcurrentLog.info("SMWLISTSYNC", "Importer inserted poison pill in queue");
this.listEntries.put(SMWListRow.POISON);
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
}
}
}
public SMWListRow take() {
try {
return this.listEntries.take();
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
return null;
}
}
}

@ -1,117 +0,0 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.cora.util.ConcurrentLog;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class SMWListImporterFormatObsolete implements Runnable{
private final ArrayBlockingQueue<SMWListRow> listEntries;
private final Reader importFile;
private final JSONParser parser;
public SMWListImporterFormatObsolete(final Reader importFile, final int queueSize) {
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
this.importFile = importFile;
this.parser = new JSONParser();
}
@Override
public void run() {
try {
ConcurrentLog.info("SMWLISTSYNC", "Importer run()");
Object obj = this.parser.parse(this.importFile);
JSONObject jsonObject = (JSONObject) obj;
JSONArray items = (JSONArray) jsonObject.get("items");
@SuppressWarnings("unchecked")
Iterator<JSONObject> iterator = items.iterator();
while (iterator.hasNext()) {
this.parseItem (iterator.next());
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final ParseException e) {
ConcurrentLog.logException(e);
} finally {
try {
ConcurrentLog.info("SMWLISTSYNC", "Importer inserted poison pill in queue");
this.listEntries.put(SMWListRow.POISON);
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
}
}
}
private void parseItem(JSONObject jsonObject) {
try {
SMWListRow row = new SMWListRow();
@SuppressWarnings("unchecked")
Iterator<String> iterator = jsonObject.keySet().iterator();
while (iterator.hasNext()) {
String entryKey = iterator.next();
Object value = jsonObject.get (entryKey);
String valueKey = "";
if (value instanceof java.lang.String) {
valueKey = value.toString();
} else if (value instanceof JSONArray) {
valueKey = jsonListAll ((JSONArray) value);
}
row.add (entryKey, valueKey);
}
this.listEntries.put(row);
} catch (final Exception e) {
ConcurrentLog.info("SMWLISTSYNC", "import of entry failed");
}
}
private String jsonListAll(JSONArray value) {
String res = "";
@SuppressWarnings("unchecked")
Iterator<Object> iterator = value.listIterator();
while (iterator.hasNext()) {
Object val = iterator.next();
res += val.toString()+",";
}
if (res.endsWith (",")) {
res = res.substring (0, res.length()-1);
}
return res;
}
public SMWListRow take() {
try {
return this.listEntries.take();
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
return null;
}
}
}

@ -1,24 +0,0 @@
package net.yacy.contentcontrol;
import net.yacy.kelondro.blob.Tables;
public class SMWListRow {
private Tables.Data data;
public static final SMWListRow POISON = new SMWListRow();
public static final SMWListRow EMPTY = new SMWListRow();
public SMWListRow() {
this.data = new Tables.Data();
}
public void add (String key, String value) {
this.data.put(key, value);
}
public Tables.Data getData() {
return this.data;
}
}

@ -1,201 +0,0 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.Switchboard;
public class SMWListSyncThread implements Runnable {
private final Switchboard sb;
private Boolean locked = false;
private String lastsync = "1900-01-01T01:00:00";
private String currenttimestamp = "1900-01-01T01:00:00";
private long offset = 0;
private final long limit = 500;
private long currentmax = 0;
private boolean runningjob = false;
private String targetList;
private String parameters;
private String query;
public static Boolean dirty = false;
public SMWListSyncThread(final Switchboard sb, final String targetList, final String query, final String parameters, final Boolean purgeOnInit) {
this.sb = sb;
this.targetList = targetList;
this.parameters = parameters;
this.query = query;
if (purgeOnInit) {
this.sb.tables.clear(targetList);
}
}
private final String wikiurlify (String s) {
String ret = s;
ret = ret.replace("-", "-2D");
ret = ret.replace("+", "-2B");
ret = ret.replace(" ", "-20");
ret = ret.replace("[", "-5B");
ret = ret.replace("]", "-5D");
ret = ret.replace(":", "-3A");
ret = ret.replace(">", "-3E");
ret = ret.replace("?", "-3F");
return ret;
}
@Override
public final void run() {
if (!this.locked) {
this.locked = true;
if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
if (!this.runningjob) {
// we have to count all new elements first
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
URL urlCount;
urlCount = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.lastsync+ "]]")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/offset%3D0"
+ "/limit%3D200000"
+ "/format%3Dystat");
String reply = UTF8.String(new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent).GETbytes(urlCount.toString(), null, null, false));
String overallcount = CommonPattern.COMMA.split(reply)[0];
String lastsyncstring = CommonPattern.COMMA.split(reply)[1];
this.currentmax = Integer.parseInt(overallcount);
if (this.currentmax > 0) {
ConcurrentLog.info("SMWLISTSYNC",
"import job counts "
+ this.currentmax
+ " new elements between "
+ this.lastsync + " and "
+ this.currenttimestamp);
this.currenttimestamp = this.lastsync;
this.runningjob = true;
this.lastsync = lastsyncstring;
this.offset = 0;
}
} else {
ConcurrentLog.warn("SMWLISTSYNC",
"No SMWimport URL defined");
}
} catch (final MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (final IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
// there are new elements to be imported
ConcurrentLog.info("SMWLISTSYNC",
"importing max. " + this.limit
+ " elements at " + this.offset + " of "
+ this.currentmax + ", since "
+ this.currenttimestamp);
URL urlImport;
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
urlImport = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.currenttimestamp+ "]]")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/syntax%3Dobsolete"
+ "/offset%3D" + this.offset
+ "/limit%3D" + this.limit
+ "/format%3Djson");
this.offset += this.limit;
if (this.offset > this.currentmax) {
this.runningjob = false;
}
InputStreamReader reader = null;
try {
reader = new InputStreamReader(
urlImport.openStream(), StandardCharsets.UTF_8);
} catch (final Exception e) {
ConcurrentLog.logException(e);
this.runningjob = false;
}
if (reader != null) {
SMWListImporterFormatObsolete smwListImporter = null;
try {
smwListImporter = new SMWListImporterFormatObsolete(
reader, 200);
} catch (final Exception e) {
// TODO: display an error message
ConcurrentLog.logException(e);
this.runningjob = false;
}
Thread t;
SMWListRow row;
t = new Thread(smwListImporter,"SMW List Importer");
t.start();
while ((row = smwListImporter.take()) != SMWListRow.POISON) {
if (row == SMWListRow.EMPTY) {
this.runningjob = false;
} else {
try {
this.sb.tables.insert(targetList, row.getData());
dirty = true;
} catch (final Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
}
} catch (final MalformedURLException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
}
this.locked = false;
}
}
return;
}
}

@ -26,31 +26,31 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import net.yacy.cora.federate.solr.instance.ServerShard;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import net.yacy.cora.federate.solr.instance.ServerShard;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.schema.CollectionSchema;
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
protected final static ConcurrentLog log = new ConcurrentLog(SolrServerConnector.class.getName());
public final static org.apache.lucene.analysis.CharArrayMap<Byte> classLoaderSynchro = new org.apache.lucene.analysis.CharArrayMap<Byte>(0, true);
public final static org.apache.lucene.analysis.CharArrayMap<Byte> classLoaderSynchro = new org.apache.lucene.analysis.CharArrayMap<>(0, true);
// pre-instantiate this object to prevent sun.misc.Launcher$AppClassLoader deadlocks
// this is a very nasty problem; solr instantiates objects dynamically which can cause deadlocks
static {
@ -158,8 +158,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
@Override
public void deleteByIds(final Collection<String> ids) throws IOException {
if (this.server == null) return;
List<String> l = new ArrayList<String>();
for (String s: ids) l.add(s);
final List<String> l = new ArrayList<>();
for (final String s: ids) l.add(s);
synchronized (this.server) {
try {
this.server.deleteById(l, -1);
@ -247,7 +247,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
@Override
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
if (this.server == null) return;
for (SolrInputDocument solrdoc : solrdocs) {
for (final SolrInputDocument solrdoc : solrdocs) {
if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_",0L); // prevent Solr "version conflict"
}
synchronized (this.server) {
@ -278,8 +278,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
this.server.add(solrdocs, -1);
} catch (final Throwable ee) {
ConcurrentLog.logException(ee);
List<String> ids = new ArrayList<String>();
for (SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
final List<String> ids = new ArrayList<>();
for (final SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
log.warn(e.getMessage() + " IDs=" + ids.toString());
throw new IOException(ee);
}
@ -300,11 +300,11 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException {
if (this.server == null) throw new IOException("server disconnected");
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
String q = params.get(CommonParams.Q);
String fq = params.get(CommonParams.FQ);
String sort = params.get(CommonParams.SORT);
String fl = params.get(CommonParams.FL);
String threadname = Thread.currentThread().getName();
final String q = params.get(CommonParams.Q);
final String fq = params.get(CommonParams.FQ);
final String sort = params.get(CommonParams.SORT);
final String fl = params.get(CommonParams.FL);
final String threadname = Thread.currentThread().getName();
QueryResponse rsp;
int retry = 0;
Throwable error = null;
@ -322,7 +322,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
clearCaches(); // prevent further OOM if this was caused by OOM
}
ConcurrentLog.severe("SolrServerConnector", "Failed to query remote Solr: " + error.getMessage() + ", query:" + q + (fq == null ? "" : ", fq = " + fq));
try {Thread.sleep(1000);} catch (InterruptedException e) {}
try {Thread.sleep(1000);} catch (final InterruptedException e) {}
}
throw new IOException("Error executing query", error);
}
@ -342,10 +342,10 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public int getSegmentCount() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
NamedList<Object> info = lukeResponse.getIndexInfo();
final LukeResponse lukeResponse = getIndexBrowser(false);
final NamedList<Object> info = lukeResponse.getIndexInfo();
if (info == null) return 0;
Integer segmentCount = (Integer) info.get("segmentCount");
final Integer segmentCount = (Integer) info.get("segmentCount");
if (segmentCount == null) return 1;
return segmentCount.intValue();
} catch (final Throwable e) {
@ -363,19 +363,19 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
if (this.server instanceof ServerShard) {
// the server can be a single shard; we don't know here
// to test that, we submit requests to bots variants
if (useluke == 1) return getSizeLukeRequest();
if (useluke == -1) return getSizeQueryRequest();
long ls = getSizeLukeRequest();
long qs = getSizeQueryRequest();
if (this.useluke == 1) return getSizeLukeRequest();
if (this.useluke == -1) return getSizeQueryRequest();
final long ls = getSizeLukeRequest();
final long qs = getSizeQueryRequest();
if (ls == 0 && qs == 0) {
// we don't know if this is caused by an error or not; don't change the useluke value
return 0;
}
if (ls == qs) {
useluke = 1;
this.useluke = 1;
return ls;
}
useluke = -1;
this.useluke = -1;
return qs;
}
return getSizeLukeRequest();
@ -398,9 +398,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
private long getSizeLukeRequest() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
final LukeResponse lukeResponse = getIndexBrowser(false);
if (lukeResponse == null) return 0;
Integer numDocs = lukeResponse.getNumDocs();
final Integer numDocs = lukeResponse.getNumDocs();
if (numDocs == null) return 0;
return numDocs.longValue();
} catch (final Throwable e) {
@ -419,7 +419,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
LukeResponse lukeResponse = null;
try {
lukeResponse = lukeRequest.process(this.server);
} catch (IOException e) {
} catch (final IOException e) {
throw new SolrServerException(e.getMessage());
}
return lukeResponse;

@ -27,10 +27,6 @@ import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.util.MemoryControl;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.core.CoreContainer;
@ -38,17 +34,21 @@ import org.apache.solr.core.SolrCore;
import com.google.common.io.Files;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.util.MemoryControl;
public class EmbeddedInstance implements SolrInstance {
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
// additional a optional solrcore.properties (or solrcore.x86.properties for 32bit systems is copied
private CoreContainer coreContainer;
private String defaultCoreName;
private SolrCore defaultCore;
private SolrClient defaultCoreServer;
private File containerPath;
private Map<String, SolrCore> cores;
private Map<String, SolrClient> server;
private final String defaultCoreName;
private final SolrCore defaultCore;
private final SolrClient defaultCoreServer;
private final File containerPath;
private final Map<String, SolrCore> cores;
private final Map<String, SolrClient> server;
public EmbeddedInstance(final File solr_config, final File containerPath, String givenDefaultCoreName, String[] initializeCoreNames) throws IOException {
super();
@ -56,30 +56,30 @@ public class EmbeddedInstance implements SolrInstance {
this.containerPath = containerPath;
// ensure that default core path exists
File defaultCorePath = new File(containerPath, givenDefaultCoreName);
final File defaultCorePath = new File(containerPath, givenDefaultCoreName);
if (!defaultCorePath.exists()) defaultCorePath.mkdirs();
// migrate old conf directory
File oldConf = new File(containerPath, "conf");
File confDir = new File(defaultCorePath, "conf");
final File oldConf = new File(containerPath, "conf");
final File confDir = new File(defaultCorePath, "conf");
if (oldConf.exists()) oldConf.renameTo(confDir);
// migrate old data directory
File oldData = new File(containerPath, "data");
File dataDir = new File(defaultCorePath, "data");
final File oldData = new File(containerPath, "data");
final File dataDir = new File(defaultCorePath, "data");
if (oldData.exists()) oldData.renameTo(dataDir);
// create index subdirectory in data if it does not exist
File indexDir = new File(dataDir, "index");
final File indexDir = new File(dataDir, "index");
if (!indexDir.exists()) indexDir.mkdirs();
// initialize the cores' configuration
for (String coreName: initializeCoreNames) {
for (final String coreName: initializeCoreNames) {
initializeCoreConf(solr_config, containerPath, coreName);
}
// initialize the coreContainer
File configFile = new File(solr_config, "solr.xml"); // the configuration file for all cores
final File configFile = new File(solr_config, "solr.xml"); // the configuration file for all cores
this.coreContainer = CoreContainer.createAndLoad(containerPath.toPath(), configFile.toPath()); // this may take indefinitely long if solr files are broken
if (this.coreContainer == null) throw new IOException("cannot create core container dir = " + containerPath + ", configFile = " + configFile);
@ -94,9 +94,9 @@ public class EmbeddedInstance implements SolrInstance {
this.defaultCoreServer = new EmbeddedSolrServer(this.coreContainer, this.defaultCoreName);
// initialize core cache
this.cores = new ConcurrentHashMap<String, SolrCore>();
this.cores = new ConcurrentHashMap<>();
this.cores.put(this.defaultCoreName, this.defaultCore);
this.server = new ConcurrentHashMap<String, SolrClient>();
this.server = new ConcurrentHashMap<>();
this.server.put(this.defaultCoreName, this.defaultCoreServer);
}
@ -113,11 +113,11 @@ public class EmbeddedInstance implements SolrInstance {
private static void initializeCoreConf(final File solr_config, final File containerPath, String coreName) {
// ensure that default core path exists
File corePath = new File(containerPath, coreName);
final File corePath = new File(containerPath, coreName);
if (!corePath.exists()) corePath.mkdirs();
// check if core.properties exists in each path (thats new in Solr 5.0)
File core_properties = new File(corePath, "core.properties");
final File core_properties = new File(corePath, "core.properties");
if (!core_properties.exists()) {
// create the file
try (
@ -130,25 +130,25 @@ public class EmbeddedInstance implements SolrInstance {
fos.write(ASCII.getBytes("config=${solrconfig:solrconfig.xml}\n"));
fos.write(ASCII.getBytes("schema=${schema:schema.xml}\n"));
fos.write(ASCII.getBytes("coreNodeName=${coreNodeName:}\n"));
} catch (IOException e) {
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
// ensure necessary subpaths exist
File conf = new File(corePath, "conf");
final File conf = new File(corePath, "conf");
conf.mkdirs();
File data = new File(corePath, "data");
final File data = new File(corePath, "data");
data.mkdirs();
// (over-)write configuration into conf path
File source, target;
for (String cf: confFiles) {
for (final String cf: confFiles) {
source = new File(solr_config, cf);
if (source.isDirectory()) {
target = new File(conf, cf);
target.mkdirs();
for (String cfl: source.list()) {
for (final String cfl: source.list()) {
try {
Files.copy(new File(source, cfl), new File(target, cfl));
} catch (final IOException e) {
@ -168,7 +168,7 @@ public class EmbeddedInstance implements SolrInstance {
// copy the solrcore.properties
// for 32bit systems (os.arch name not containing '64') take the solrcore.x86.properties as solrcore.properties if exists
String os = System.getProperty("os.arch");
final String os = System.getProperty("os.arch");
if (os.contains("64")) {
source = new File(solr_config, "solrcore.properties");
} else {
@ -242,7 +242,7 @@ public class EmbeddedInstance implements SolrInstance {
@Override
public synchronized void close() {
for (SolrCore core: cores.values()) core.close();
for (final SolrCore core: this.cores.values()) core.close();
if (this.coreContainer != null) try {
this.coreContainer.shutdown();
this.coreContainer = null;

@ -36,7 +36,6 @@ import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
@ -424,7 +423,7 @@ public final class CrawlStacker implements WorkflowTask<Request>{
if (dbocc != null) {
return CRAWL_REJECT_REASON_DOUBLE_IN_PREFIX + ": " + dbocc.name();
}
String urls = url.toNormalform(false);
final String urls = url.toNormalform(false);
final long oldDate = this.indexSegment.getLoadTime(url.hash());
// deny urls that exceed allowed number of occurrences
@ -574,26 +573,6 @@ public final class CrawlStacker implements WorkflowTask<Request>{
}
}
if (Switchboard.getSwitchboard().getConfigBool(
"contentcontrol.enabled", false) == true) {
if (!Switchboard.getSwitchboard()
.getConfig("contentcontrol.mandatoryfilterlist", "")
.equals("")) {
final FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
if (f != null) {
if (!f.isListed(url, null)) {
return "the url '"
+ url
+ "' does not belong to the network mandatory filter list";
}
}
}
}
final boolean local = url.isLocal();
if (this.acceptLocalURLs && local) return null;
if (this.acceptGlobalURLs && !local) return null;

@ -114,8 +114,6 @@ import com.hazelcast.config.NetworkConfig;
import com.hazelcast.core.Hazelcast;
import com.hazelcast.core.HazelcastInstance;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.contentcontrol.SMWListSyncThread;
import net.yacy.cora.date.AbstractFormatter;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter;
@ -501,8 +499,6 @@ public final class Switchboard extends serverSwitch {
// load the network definition
try {
this.overwriteNetworkDefinition(this.getSysinfo());
} catch (final FileNotFoundException e) {
ConcurrentLog.logException(e);
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
@ -1267,27 +1263,6 @@ public final class Switchboard extends serverSwitch {
Long.parseLong(this.getConfig(SwitchboardConstants.INDEX_DIST_MEMPREREQ, "1000000")),
Double.parseDouble(this.getConfig(SwitchboardConstants.INDEX_DIST_LOADPREREQ, "9.0")));
// content control: initialize list sync thread
this.deployThread(
"720_ccimport",
"Content Control Import",
"this is the content control import thread",
null,
InstantBusyThread.createFromRunnable(
new SMWListSyncThread(this, sb.getConfig("contentcontrol.bookmarklist", "contentcontrol"),
"Category:Content Source", "/?Url/?Filter/?Category/?Modification date",
sb.getConfigBool("contentcontrol.smwimport.purgelistoninit", false)),
3000, 3000),
2000);
this.deployThread(
"730_ccfilter",
"Content Control Filter",
"this is the content control filter update thread",
null,
InstantBusyThread.createFromRunnable(new ContentControlFilterUpdateThread(this), 3000, 3000),
2000);
// set network-specific performance attributes
if ( this.firstInit ) {
this.setRemotecrawlPPM(Math.max(1, (int) this.getConfigLong("network.unit.remotecrawl.speed", 60)));
@ -2059,7 +2034,7 @@ public final class Switchboard extends serverSwitch {
if ( this.dhtDispatcher != null ) {
this.dhtDispatcher.close();
}
// de.anomic.http.client.Client.closeAllConnections();
// de.anomic.http.client.Client.closeAllConnections();
this.wikiDB.close();
this.blogDB.close();
this.blogCommentDB.close();
@ -2243,8 +2218,6 @@ public final class Switchboard extends serverSwitch {
if ( gzfile.exists() ) {
FileUtils.deletedelete(outfile);
}
} catch (final FileNotFoundException e ) {
ConcurrentLog.logException(e);
} catch (final IOException e ) {
/* Catch but log any IO exception that can occur on copy, automatic closing or streams creation */
ConcurrentLog.logException(e);
@ -3112,8 +3085,6 @@ public final class Switchboard extends serverSwitch {
Document[] documents = null;
try {
documents = this.parseDocument(in.queueEntry);
} catch (final InterruptedException e ) {
documents = null;
} catch (final Exception e ) {
documents = null;
}
@ -4291,7 +4262,7 @@ public final class Switchboard extends serverSwitch {
this.log.info("dhtTransferJob: too many connections in httpc pool : "
+ ConnectionInfo.getCount());
// close unused connections
// Client.cleanup();
// Client.cleanup();
} else if ( kbytesUp > 128 ) {
this.log.info("dhtTransferJob: too much upload(1), currently uploading: " + kbytesUp + " Kb");
} else {
@ -4331,7 +4302,7 @@ public final class Switchboard extends serverSwitch {
this.log.info("dhtTransferJob: too many connections in httpc pool : "
+ ConnectionInfo.getCount());
// close unused connections
// Client.cleanup();
// Client.cleanup();
} else if ( kbytesUp > 256 ) {
this.log.info("dhtTransferJob: too much upload(2), currently uploading: " + kbytesUp + " Kb");
} else {

@ -51,7 +51,6 @@ import java.util.regex.Pattern;
import org.apache.solr.common.SolrDocument;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
@ -95,7 +94,6 @@ import net.yacy.peers.RemoteSearch;
import net.yacy.peers.SeedDB;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.FilterEngine;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
@ -186,9 +184,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
/** a set of words that are used to match with the snippets */
private final Set<String> snippetFetchWords;
private final boolean deleteIfSnippetFail;
private long urlRetrievalAllTime;
private long snippetComputationAllTime;
private ConcurrentHashMap<String, LinkedHashSet<String>> snippets;
private final long urlRetrievalAllTime;
private final long snippetComputationAllTime;
private final ConcurrentHashMap<String, LinkedHashSet<String>> snippets;
private final boolean remote;
/** add received results to local index (defult=true) */
@ -283,7 +281,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
return Math.max(
this.local_rwi_available.get() + this.remote_rwi_available.get() +
this.remote_solr_available.get() + Math.max(0, this.local_solr_stored.get() - this.local_solr_evicted.get()),
imageViewed.size() + sizeSpare()
this.imageViewed.size() + sizeSpare()
);
}
@ -324,17 +322,17 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
long ab = MemoryControl.available();
if (ab < 1024 * 1024 * 200) {
int eb = SearchEventCache.size();
final int eb = SearchEventCache.size();
SearchEventCache.cleanupEvents(false);
int en = SearchEventCache.size();
final int en = SearchEventCache.size();
if (en < eb) {
log.info("Cleaned up search event cache (1) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed");
}
}
ab = MemoryControl.available();
int eb = SearchEventCache.size();
final int eb = SearchEventCache.size();
SearchEventCache.cleanupEvents(Math.max(1, (int) (MemoryControl.available() / (1024 * 1024 * 120))));
int en = SearchEventCache.size();
final int en = SearchEventCache.size();
if (en < eb) {
log.info("Cleaned up search event cache (2) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed");
}
@ -348,7 +346,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.imagePageCounter = query.offset;
}
this.loader = loader;
this.nodeStack = new WeakPriorityBlockingQueue<URIMetadataNode>(max_results_node, false);
this.nodeStack = new WeakPriorityBlockingQueue<>(max_results_node, false);
this.maxExpectedRemoteReferences = new AtomicInteger(0);
this.expectedRemoteReferences = new AtomicInteger(0);
this.excludeintext_image = Switchboard.getSwitchboard().getConfigBool("search.excludeintext.image", true);
@ -377,7 +375,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.protocolNavigator = protocolNavEnabled ? new ConcurrentScoreMap<>(this) : null;
this.dateNavigator = dateNavEnabled ? new ConcurrentScoreMap<>(this) : null;
this.topicNavigatorCount = topicsNavEnabled ? MAX_TOPWORDS : 0;
this.vocabularyNavigator = new TreeMap<String, ScoreMap<String>>();
this.vocabularyNavigator = new TreeMap<>();
// prepare configured search navigation (plugins)
this.navigatorPlugins = NavigatorPlugins.initFromCfgStrings(navConfigs);
if(this.navigatorPlugins != null) {
@ -386,14 +384,14 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
}
this.snippets = new ConcurrentHashMap<String, LinkedHashSet<String>>();
this.snippets = new ConcurrentHashMap<>();
this.secondarySearchSuperviser = (this.query.getQueryGoal().getIncludeHashes().size() > 1) ? new SecondarySearchSuperviser(this) : null; // generate abstracts only for combined searches
if (this.secondarySearchSuperviser != null) this.secondarySearchSuperviser.start();
this.secondarySearchThreads = null;
this.preselectedPeerHashes = preselectedPeerHashes;
this.IAResults = new TreeMap<byte[], String>(Base64Order.enhancedCoder);
this.IACount = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
this.heuristics = new TreeMap<byte[], HeuristicResult>(Base64Order.enhancedCoder);
this.IAResults = new TreeMap<>(Base64Order.enhancedCoder);
this.IACount = new TreeMap<>(Base64Order.enhancedCoder);
this.heuristics = new TreeMap<>(Base64Order.enhancedCoder);
this.IAmaxcounthash = null;
this.IAneardhthash = null;
this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW_SEARCH, false)));
@ -420,10 +418,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchInclusion = null;
this.ref = new ConcurrentScoreMap<String>(this);
this.ref = new ConcurrentScoreMap<>(this);
this.maxtime = query.maxtime;
this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false);
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>();
this.rwiStack = new WeakPriorityBlockingQueue<>(max_results_rwi, false);
this.doubleDomCache = new ConcurrentHashMap<>();
this.flagcount = new int[32];
for ( int i = 0; i < 32; i++ ) {
this.flagcount[i] = 0;
@ -435,8 +433,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.receivedRemoteReferences = new AtomicInteger(0);
this.order = new ReferenceOrder(this.query.ranking, this.query.targetlang);
this.urlhashes = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 100);
this.taggingPredicates = new HashMap<String, String>();
for (Tagging t: LibraryProvider.autotagging.getVocabularies()) {
this.taggingPredicates = new HashMap<>();
for (final Tagging t: LibraryProvider.autotagging.getVocabularies()) {
this.taggingPredicates.put(t.getName(), t.getPredicate());
}
@ -453,8 +451,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.rwiProcess = null;
if (query.getSegment().connectedRWI() && !Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_DHT_OFF, false)) {
// we start the local search only if this peer is doing a remote search or when it is doing a local search and the peer is old
rwiProcess = new RWIProcess(this.localsolrsearch);
rwiProcess.start();
this.rwiProcess = new RWIProcess(this.localsolrsearch);
this.rwiProcess.start();
}
if (this.remote) {
@ -465,8 +463,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.primarySearchThreadsL = null;
this.nodeSearchThreads = null;
} else {
this.primarySearchThreadsL = new ArrayList<RemoteSearch>();
this.nodeSearchThreads = new ArrayList<Thread>();
this.primarySearchThreadsL = new ArrayList<>();
this.nodeSearchThreads = new ArrayList<>();
// start this concurrently because the remote search needs an enumeration
// of the remote peers which may block in some cases when i.e. DHT is active
// at the same time.
@ -502,7 +500,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if ( generateAbstracts ) {
// we need the results now
try {
if (rwiProcess != null && query.getSegment().connectedRWI()) rwiProcess.join();
if (this.rwiProcess != null && query.getSegment().connectedRWI()) this.rwiProcess.join();
} catch (final Throwable e ) {
}
// compute index abstracts
@ -535,7 +533,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// give process time to accumulate a certain amount of data
// before a reading process wants to get results from it
try {
if (rwiProcess != null && query.getSegment().connectedRWI() && rwiProcess.isAlive()) rwiProcess.join(100);
if (this.rwiProcess != null && query.getSegment().connectedRWI() && this.rwiProcess.isAlive()) this.rwiProcess.join(100);
} catch (final Throwable e ) {
}
// this will reduce the maximum waiting time until results are available to 100 milliseconds
@ -547,14 +545,14 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.deleteIfSnippetFail = deleteIfSnippetFail;
this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0;
this.resultList = new WeakPriorityBlockingQueue<URIMetadataNode>(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking
this.resultList = new WeakPriorityBlockingQueue<>(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking
// snippets do not need to match with the complete query hashes,
// only with the query minus the stopwords which had not been used for the search
boolean filtered = false;
// check if query contains stopword
if (Switchboard.stopwordHashes != null) {
Iterator<byte[]> it = query.getQueryGoal().getIncludeHashes().iterator();
final Iterator<byte[]> it = query.getQueryGoal().getIncludeHashes().iterator();
while (it.hasNext()) {
if (Switchboard.stopwordHashes.contains((it.next()))) {
filtered = true;
@ -600,7 +598,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
@Override
public void run() {
if (query.getSegment().termIndex() == null) return; // nothing to do; this index is not used
if (SearchEvent.this.query.getSegment().termIndex() == null) return; // nothing to do; this index is not used
// do a search
oneFeederStarted();
@ -634,7 +632,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
SearchEvent.this.query.modifier.sitehost != null && SearchEvent.this.query.modifier.sitehost.length() > 0
) {
// try again with sitehost
String newGoal = Domains.getSmartSLD(SearchEvent.this.query.modifier.sitehost);
final String newGoal = Domains.getSmartSLD(SearchEvent.this.query.modifier.sitehost);
search =
SearchEvent.this.query
.getSegment()
@ -695,7 +693,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// normalize entries
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime, local);
int is = index.size();
final int is = index.size();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
this.query.id(true),
SearchEventType.NORMALIZING,
@ -708,7 +706,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
timer = System.currentTimeMillis();
// apply all constraints
long timeout = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
final long timeout = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
int successcounter = 0;
try {
WordReferenceVars iEntry;
@ -716,7 +714,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
String acceptableAlternativeSitehash = null;
if (this.query.modifier.sitehost != null && this.query.modifier.sitehost.length() > 0) try {
acceptableAlternativeSitehash = DigestURL.hosthash(this.query.modifier.sitehost.startsWith("www.") ? this.query.modifier.sitehost.substring(4) : "www." + this.query.modifier.sitehost, 80);
} catch (MalformedURLException e1) {}
} catch (final MalformedURLException e1) {}
pollloop: while ( true ) {
remaining = timeout - System.currentTimeMillis();
if (remaining <= 0) {
@ -740,7 +738,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// increase flag counts
Bitfield flags = iEntry.flags();
final Bitfield flags = iEntry.flags();
for (int j = 0; j < 32; j++) {
if (flags.get(j)) this.flagcount[j]++;
}
@ -806,7 +804,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.urlhashes.putUnique(iEntry.urlhash());
rankingtryloop: while (true) {
try {
this.rwiStack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
this.rwiStack.put(new ReverseElement<>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
break rankingtryloop;
} catch (final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation
@ -821,8 +819,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
if (System.currentTimeMillis() >= timeout) ConcurrentLog.warn("SearchEvent", "rwi normalization ended with timeout = " + maxtime);
} catch (final InterruptedException e ) {
} catch (final SpaceExceededException e ) {
} catch (final InterruptedException | SpaceExceededException e ) {
}
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
@ -847,7 +844,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// stop all threads
if (this.localsolrsearch != null) {
if (localsolrsearch.isAlive()) synchronized (this.localsolrsearch) {this.localsolrsearch.interrupt();}
if (this.localsolrsearch.isAlive()) synchronized (this.localsolrsearch) {this.localsolrsearch.interrupt();}
}
if (this.nodeSearchThreads != null) {
for (final Thread search : this.nodeSearchThreads) {
@ -969,7 +966,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
long timer = System.currentTimeMillis();
// normalize entries
int is = nodeList.size();
final int is = nodeList.size();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.NORMALIZING, resourceName, is, System.currentTimeMillis() - timer), false);
if (!local) {
this.receivedRemoteReferences.addAndGet(is);
@ -985,7 +982,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// apply all constraints
try {
pollloop: for (URIMetadataNode iEntry: nodeList) {
pollloop: for (final URIMetadataNode iEntry: nodeList) {
// check url related eventual constraints (protocol, tld, sitehost, and filetype)
final String matchingResult = QueryParams.matchesURL(this.query.modifier, this.query.tld, iEntry.url());
@ -1019,7 +1016,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// check constraints
Bitfield flags = iEntry.flags();
final Bitfield flags = iEntry.flags();
if (!this.testFlags(flags)) {
if (log.isFine()) log.fine("dropped Node: flag test");
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
@ -1049,7 +1046,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// filter out media links in text search, if wanted
String ext = MultiProtocolURL.getFileExtension(iEntry.url().getFileName());
final String ext = MultiProtocolURL.getFileExtension(iEntry.url().getFileName());
if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) {
if (log.isFine()) log.fine("dropped Node: file name domain does not match");
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
@ -1097,12 +1094,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
long score;
// determine nodestack ranking (will be altered by postranking)
// so far Solr score is used (with abitrary factor to get value similar to rwi ranking values)
Float scorex = (Float) iEntry.getFieldValue("score"); // this is a special field containing the ranking score of a Solr search result
final Float scorex = (Float) iEntry.getFieldValue("score"); // this is a special field containing the ranking score of a Solr search result
if (scorex != null && scorex > 0)
score = (long) ((1000000.0f * scorex) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
else
score = this.order.cardinal(iEntry);
this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score)); // inserts the element and removes the worst (which is smallest)
this.nodeStack.put(new ReverseElement<>(iEntry, score)); // inserts the element and removes the worst (which is smallest)
break rankingtryloop;
} catch (final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation
@ -1131,8 +1128,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
private void incrNavigatorsFromSolrFacets(final Map<String, ReversibleScoreMap<String>> facets) {
if(facets != null && !facets.isEmpty()) {
/* Iterate over active navigator plugins to let them update the counters */
for (String s : this.navigatorPlugins.keySet()) {
Navigator navi = this.navigatorPlugins.get(s);
for (final String s : this.navigatorPlugins.keySet()) {
final Navigator navi = this.navigatorPlugins.get(s);
if (navi != null) {
navi.incFacet(facets);
}
@ -1144,8 +1141,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* is expressed as a spatial filter not producing facets counts (see QueryParams.getFacetsFilterQueries()). */
fcts = facets.get(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName());
if (fcts != null) {
for (String coordinate: fcts) {
int hc = fcts.get(coordinate);
for (final String coordinate: fcts) {
final int hc = fcts.get(coordinate);
if (hc == 0) continue;
this.locationNavigator.inc(coordinate, hc);
}
@ -1161,9 +1158,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
fcts = facets.get(CollectionSchema.url_protocol_s.getSolrFieldName());
if (fcts != null) {
// remove all protocols that we don't know
Iterator<String> i = fcts.iterator();
final Iterator<String> i = fcts.iterator();
while (i.hasNext()) {
String protocol = i.next();
final String protocol = i.next();
if (PROTOCOL_NAVIGATOR_SUPPORTED_VALUES.indexOf(protocol) < 0) {
i.remove();
}
@ -1173,15 +1170,15 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// get the vocabulary navigation
Set<String> genericFacets = new LinkedHashSet<>();
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) genericFacets.add(v.getName());
final Set<String> genericFacets = new LinkedHashSet<>();
for (final Tagging v: LibraryProvider.autotagging.getVocabularies()) genericFacets.add(v.getName());
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
for (String vocName: genericFacets) {
for (final String vocName: genericFacets) {
fcts = facets.get(CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
if (fcts != null) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav == null) {
vocNav = new ConcurrentScoreMap<String>();
vocNav = new ConcurrentScoreMap<>();
this.vocabularyNavigator.put(vocName, vocNav);
}
vocNav.inc(fcts);
@ -1199,8 +1196,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
final Map<String, ReversibleScoreMap<String>> facets) {
/* Iterate over active navigator plugins to let them update the counters */
for (String s : this.navigatorPlugins.keySet()) {
Navigator navi = this.navigatorPlugins.get(s);
for (final String s : this.navigatorPlugins.keySet()) {
final Navigator navi = this.navigatorPlugins.get(s);
if (navi != null && facets == null || !facets.containsKey(navi.getIndexFieldName())) {
navi.incDoc(doc);
}
@ -1211,7 +1208,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if (this.dateNavigator != null) {
if (facets == null || !facets.containsKey(CollectionSchema.dates_in_content_dts.getSolrFieldName())) {
Date[] dates = doc.datesInContent();
final Date[] dates = doc.datesInContent();
if (dates != null) {
for (final Date date : dates) {
if (date != null) {
@ -1234,12 +1231,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// get the vocabulary navigation
if(this.vocabularyNavigator != null) {
Set<String> genericFacets = new LinkedHashSet<>();
for (Tagging v : LibraryProvider.autotagging.getVocabularies()) {
final Set<String> genericFacets = new LinkedHashSet<>();
for (final Tagging v : LibraryProvider.autotagging.getVocabularies()) {
genericFacets.add(v.getName());
}
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
for (String vocName : genericFacets) {
for (final String vocName : genericFacets) {
final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX;
if (facets == null || !facets.containsKey(fieldName)) {
incrementVocNavigator(doc, vocName, fieldName);
@ -1259,7 +1256,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if(docValue instanceof String) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav == null) {
vocNav = new ConcurrentScoreMap<String>();
vocNav = new ConcurrentScoreMap<>();
this.vocabularyNavigator.put(vocName, vocNav);
}
vocNav.inc((String)docValue);
@ -1267,7 +1264,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if (!((Collection<?>) docValue).isEmpty()) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav == null) {
vocNav = new ConcurrentScoreMap<String>();
vocNav = new ConcurrentScoreMap<>();
this.vocabularyNavigator.put(vocName, vocNav);
}
for (final Object singleDocValue : (Collection<?>) docValue) {
@ -1306,7 +1303,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
rwi = this.rwiStack.poll();
if (rwi == null) return null;
if (!skipDoubleDom) {
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
final URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
if (node == null) {
decrementCounts(rwi.getElement());
continue pollloop;
@ -1322,9 +1319,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
m = this.doubleDomCache.get(hosthash);
if (m == null) {
// first appearance of dom. we create an entry to signal that one of that domain was already returned
m = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false);
m = new WeakPriorityBlockingQueue<>(max_results_rwi, false);
this.doubleDomCache.put(hosthash, m);
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
final URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
if (node == null) {
decrementCounts(rwi.getElement());
continue pollloop;
@ -1390,7 +1387,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
URIMetadataNode node = null;
try {
node = this.query.getSegment().fulltext().getMetadata(bestEntry);
} catch (Throwable e) {
} catch (final Throwable e) {
ConcurrentLog.logException(e);
}
if (node == null) {
@ -1442,7 +1439,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// check content domain
ContentDomain contentDomain = page.getContentDomain();
final ContentDomain contentDomain = page.getContentDomain();
if (this.query.contentdom.getCode() > 0 && (
(this.query.contentdom == Classification.ContentDomain.IMAGE && contentDomain != Classification.ContentDomain.IMAGE) ||
(this.query.contentdom == Classification.ContentDomain.AUDIO && contentDomain != Classification.ContentDomain.AUDIO) ||
@ -1454,7 +1451,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// filter out media links in text search, if wanted
String ext = MultiProtocolURL.getFileExtension(page.url().getFileName());
final String ext = MultiProtocolURL.getFileExtension(page.url().getFileName());
if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) {
if (log.isFine()) log.fine("dropped RWI: file name domain does not match");
continue;
@ -1480,7 +1477,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check modifier constraint collection
// this is not available in pure RWI entries (but in local or via solr query received metadate/entries),
if (this.query.modifier.collection != null) {
Collection<Object> docCols = page.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName()); // get multivalued value
final Collection<Object> docCols = page.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName()); // get multivalued value
if (docCols == null) { // no collection info
decrementCounts(page.word());
continue;
@ -1504,16 +1501,6 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
continue;
}
// content control
if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false)) {
FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
if (f != null && !f.isListed(page.url(), null)) {
if (log.isFine()) log.fine("dropped RWI: url is blacklisted in contentcontrol");
decrementCounts(page.word());
continue;
}
}
final String pageurl = page.url().toNormalform(true);
final String pageauthor = page.dc_creator();
final String pagetitle = page.dc_title().toLowerCase();
@ -1551,9 +1538,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check geo coordinates
double lat, lon;
if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) != 0.0d && (lon = page.lon()) != 0.0d) {
double latDelta = this.query.lat - lat;
double lonDelta = this.query.lon - lon;
double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras
final double latDelta = this.query.lat - lat;
final double lonDelta = this.query.lon - lon;
final double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras
if (distance > this.query.radius) {
if (log.isFine()) log.fine("dropped RWI: radius constraint");
decrementCounts(page.word());
@ -1564,10 +1551,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check vocabulary terms (metatags) {only available in Solr index as vocabulary_xxyyzzz_sxt field}
// TODO: vocabulary is only valid and available in local Solr index (consider to auto-switch to Searchdom.LOCAL)
if (this.query.metatags != null && !this.query.metatags.isEmpty()) {
tagloop: for (Tagging.Metatag tag : this.query.metatags) {
SolrDocument sdoc = page;
tagloop: for (final Tagging.Metatag tag : this.query.metatags) {
final SolrDocument sdoc = page;
if (sdoc != null) {
Collection<Object> tagvalues = sdoc.getFieldValues(CollectionSchema.VOCABULARY_PREFIX + tag.getVocabularyName() + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
final Collection<Object> tagvalues = sdoc.getFieldValues(CollectionSchema.VOCABULARY_PREFIX + tag.getVocabularyName() + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
if (tagvalues != null && tagvalues.contains(tag.getObject())) {
continue tagloop; // metatag exists check next tag (filter may consist of several tags)
}
@ -1582,8 +1569,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// TODO: it may be a little bit late here, to update navigator counters
// iterate over active navigator plugins (the rwi metadata may contain the field the plugin counts)
for (String s : this.navigatorPlugins.keySet()) {
Navigator navi = this.navigatorPlugins.get(s);
for (final String s : this.navigatorPlugins.keySet()) {
final Navigator navi = this.navigatorPlugins.get(s);
if (navi != null) {
navi.incDoc(page);
}
@ -1597,7 +1584,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
if(this.dateNavigator != null) {
Date[] dates = page.datesInContent();
final Date[] dates = page.datesInContent();
if (dates != null) {
for (final Date date : dates) {
if (date != null) {
@ -1609,8 +1596,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// handle the vocabulary navigator
if (this.vocabularyNavigator != null) {
Set<String> genericFacets = new LinkedHashSet<>();
for (Tagging v : LibraryProvider.autotagging.getVocabularies()) {
final Set<String> genericFacets = new LinkedHashSet<>();
for (final Tagging v : LibraryProvider.autotagging.getVocabularies()) {
genericFacets.add(v.getName());
}
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
@ -1674,8 +1661,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
/* Iterate over active navigator plugins to let them update the counters */
for (String s : this.navigatorPlugins.keySet()) {
Navigator navi = this.navigatorPlugins.get(s);
for (final String s : this.navigatorPlugins.keySet()) {
final Navigator navi = this.navigatorPlugins.get(s);
if (navi != null) {
if (navIncrementedWithFacets) {
fcts = facets.get(navi.getIndexFieldName());
@ -1719,7 +1706,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} else {
fcts = null;
}
Date[] dates = entry.datesInContent();
final Date[] dates = entry.datesInContent();
if (dates != null) {
for (final Date date : dates) {
if (date != null) {
@ -1752,12 +1739,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// get the vocabulary navigation
if (this.vocabularyNavigator != null) {
Set<String> genericFacets = new LinkedHashSet<>();
for (Tagging v : LibraryProvider.autotagging.getVocabularies()) {
final Set<String> genericFacets = new LinkedHashSet<>();
for (final Tagging v : LibraryProvider.autotagging.getVocabularies()) {
genericFacets.add(v.getName());
}
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
for (String vocName : genericFacets) {
for (final String vocName : genericFacets) {
final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName
+ CollectionSchema.VOCABULARY_TERMS_SUFFIX;
if (navIncrementedWithFacets) {
@ -1765,20 +1752,20 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} else {
fcts = null;
}
Object docValue = entry.getFieldValue(fieldName);
final Object docValue = entry.getFieldValue(fieldName);
if (docValue instanceof String) {
if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) docValue))) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
final ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav != null && vocNav.get((String) docValue) > 0) {
vocNav.dec((String) docValue);
}
}
} else if(docValue instanceof Collection) {
if (!((Collection<?>) docValue).isEmpty()) {
for (Object singleDocValue : (Collection<?>) docValue) {
for (final Object singleDocValue : (Collection<?>) docValue) {
if (singleDocValue instanceof String) {
if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) singleDocValue))) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
final ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav != null && vocNav.get((String) singleDocValue) > 0) {
vocNav.dec((String) singleDocValue);
}
@ -1815,10 +1802,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if (this.ref.size() <= ic) { // size matches return map directly
result = this.getTopics(/*ic, 500*/);
} else { // collect top most count topics
result = new ConcurrentScoreMap<String>();
Iterator<String> it = this.getTopics(/*ic, 500*/).keys(false);
result = new ConcurrentScoreMap<>();
final Iterator<String> it = this.getTopics(/*ic, 500*/).keys(false);
while (ic-- > 0 && it.hasNext()) {
String word = it.next();
final String word = it.next();
result.set(word, this.ref.get(word));
}
}
@ -1836,8 +1823,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
*/
public boolean drainStacksToResult(boolean concurrentSnippetFetch) {
// we take one entry from both stacks at the same time
boolean solrSuccess = drainSolrStackToResult(concurrentSnippetFetch);
boolean rwiSuccess = drainRWIStackToResult(concurrentSnippetFetch);
final boolean solrSuccess = drainSolrStackToResult(concurrentSnippetFetch);
final boolean rwiSuccess = drainRWIStackToResult(concurrentSnippetFetch);
return solrSuccess || rwiSuccess;
}
@ -1857,7 +1844,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
success = true;
}
} else {
Thread t = new Thread("SearchEvent.drainStacksToResult.oneFilteredFromRWI") {
final Thread t = new Thread("SearchEvent.drainStacksToResult.oneFilteredFromRWI") {
@Override
public void run() {
SearchEvent.this.oneFeederStarted();
@ -1894,7 +1881,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
if (node != null) {
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
final LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
final TextSnippet solrsnippet = new TextSnippet(node.url(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
@ -1908,7 +1895,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
false);
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
final URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
addResult(re, localEntryElement.getWeight());
success = true;
} else {
@ -1955,8 +1942,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.getTopicNavigator(MAX_TOPWORDS));
resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api
this.resultList.put(new ReverseElement<URIMetadataNode>(resultEntry, ranking)); // remove smallest in case of overflow
if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
this.resultList.put(new ReverseElement<>(resultEntry, ranking)); // remove smallest in case of overflow
if (this.pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
this.addTopics(resultEntry);
}
@ -1984,7 +1971,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// apply citation count
//System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother());
if (this.query.getSegment().connectedCitation()) {
int referencesCount = this.query.getSegment().urlCitation().count(rentry.hash());
final int referencesCount = this.query.getSegment().urlCitation().count(rentry.hash());
r += (128 * referencesCount / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation;
}
// prefer hit with 'prefer' pattern
@ -2002,11 +1989,11 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// the token map is used (instead of urlcomps/descrcomps) to determine appearance in url/title and eliminate double occurances
// (example Title="News News News News News News - today is party -- News News News News News News" to add one score instead of 12 * score !)
for (final String urlcomp : urlcompmap) {
int tc = topwords.get(urlcomp);
final int tc = topwords.get(urlcomp);
if (tc > 0) r += tc << this.query.ranking.coeff_urlcompintoplist;
}
for (final String descrcomp : descrcompmap) {
int tc = topwords.get(descrcomp);
final int tc = topwords.get(descrcomp);
if (tc > 0) r += tc << this.query.ranking.coeff_descrcompintoplist;
}
@ -2037,10 +2024,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
}
// load snippet
ContentDomain contentDomain = page.getContentDomain();
final ContentDomain contentDomain = page.getContentDomain();
if (contentDomain == Classification.ContentDomain.TEXT || contentDomain == Classification.ContentDomain.ALL) {
// attach text snippet
long startTime = System.currentTimeMillis();
final long startTime = System.currentTimeMillis();
final TextSnippet snippet = new TextSnippet(
this.loader,
page,
@ -2110,7 +2097,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* For local only search, a new SearchEvent should be created, starting directly at the requested offset,
* thus allowing to handle last pages of large resultsets
*/
int nextitems = item - this.localsolroffset + this.query.itemsPerPage; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded.
final int nextitems = item - this.localsolroffset + this.query.itemsPerPage; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded.
if (this.localsolrsearch != null && this.localsolrsearch.isAlive()) {try {this.localsolrsearch.join();} catch (final InterruptedException e) {}}
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
// Do not increment again navigators from the local Solr on next local pages retrieval, as facets counts scope is on the total results and should already have been added
@ -2175,43 +2162,43 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
/** Image results counter */
private int imagePageCounter = 0;
private LinkedHashMap<String, ImageResult> imageViewed = new LinkedHashMap<String, ImageResult>();
private LinkedHashMap<String, ImageResult> imageSpareGood = new LinkedHashMap<String, ImageResult>();
private LinkedHashMap<String, ImageResult> imageSpareBad = new LinkedHashMap<String, ImageResult>();
private final LinkedHashMap<String, ImageResult> imageViewed = new LinkedHashMap<>();
private final LinkedHashMap<String, ImageResult> imageSpareGood = new LinkedHashMap<>();
private final LinkedHashMap<String, ImageResult> imageSpareBad = new LinkedHashMap<>();
private ImageResult nthImage(int item) {
Object o = SetTools.nth(this.imageViewed.values(), item);
final Object o = SetTools.nth(this.imageViewed.values(), item);
if (o == null) return null;
return (ImageResult) o;
}
private boolean hasSpare() {
return imageSpareGood.size() > 0 || imageSpareBad.size() > 0;
return this.imageSpareGood.size() > 0 || this.imageSpareBad.size() > 0;
}
private boolean containsSpare(String id) {
return imageSpareGood.containsKey(id) || imageSpareBad.containsKey(id);
return this.imageSpareGood.containsKey(id) || this.imageSpareBad.containsKey(id);
}
private int sizeSpare() {
return imageSpareGood.size() + imageSpareBad.size();
return this.imageSpareGood.size() + this.imageSpareBad.size();
}
private ImageResult nextSpare() {
if (imageSpareGood.size() > 0) {
Map.Entry<String, ImageResult> next = imageSpareGood.entrySet().iterator().next();
imageViewed.put(next.getKey(), next.getValue());
imageSpareGood.remove(next.getKey());
if (this.imageSpareGood.size() > 0) {
final Map.Entry<String, ImageResult> next = this.imageSpareGood.entrySet().iterator().next();
this.imageViewed.put(next.getKey(), next.getValue());
this.imageSpareGood.remove(next.getKey());
return next.getValue();
}
if (imageSpareBad.size() > 0) {
Map.Entry<String, ImageResult> next = imageSpareBad.entrySet().iterator().next();
imageViewed.put(next.getKey(), next.getValue());
imageSpareBad.remove(next.getKey());
if (this.imageSpareBad.size() > 0) {
final Map.Entry<String, ImageResult> next = this.imageSpareBad.entrySet().iterator().next();
this.imageViewed.put(next.getKey(), next.getValue());
this.imageSpareBad.remove(next.getKey());
return next.getValue();
}
return null;
}
public ImageResult oneImageResult(final int item, final long timeout, final boolean strictContentDom) throws MalformedURLException {
if (item < imageViewed.size()) return nthImage(item);
if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare
URIMetadataNode doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
if (item < this.imageViewed.size()) return nthImage(item);
if (this.imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare
final URIMetadataNode doc = oneResult(this.imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
// check if the match was made in the url or in the image links
if (doc == null) {
if (hasSpare()) return nextSpare();
@ -2231,45 +2218,45 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check image size
final Collection<Object> height = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName());
final Collection<Object> width = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName());
int h = height == null ? 0 : (Integer) height.iterator().next(); // might be -1 for unknown
int w = width == null ? 0 : (Integer) width.iterator().next();
final int h = height == null ? 0 : (Integer) height.iterator().next(); // might be -1 for unknown
final int w = width == null ? 0 : (Integer) width.iterator().next();
if ((h <= 0 || h > 16) && (w <= 0 || w > 16)) { // we don't want too small images (< 16x16)
if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), doc.mime(), doc.title(), w, h, 0));
if (!this.imageViewed.containsKey(id) && !containsSpare(id)) this.imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), doc.mime(), doc.title(), w, h, 0));
}
}
} else if(!strictContentDom) {
Collection<Object> altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName());
Collection<Object> imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName());
final Collection<Object> altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName());
final Collection<Object> imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName());
if (imgO != null && imgO.size() > 0 && imgO instanceof List<?>) {
List<Object> alt = altO == null ? null : (List<Object>) altO;
List<Object> img = (List<Object>) imgO;
List<String> prt = CollectionConfiguration.indexedList2protocolList(doc.getFieldValues(CollectionSchema.images_protocol_sxt.getSolrFieldName()), img.size());
Collection<Object> heightO = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName());
Collection<Object> widthO = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName());
List<Object> height = heightO == null ? null : (List<Object>) heightO;
List<Object> width = widthO == null ? null : (List<Object>) widthO;
final List<Object> alt = altO == null ? null : (List<Object>) altO;
final List<Object> img = (List<Object>) imgO;
final List<String> prt = CollectionConfiguration.indexedList2protocolList(doc.getFieldValues(CollectionSchema.images_protocol_sxt.getSolrFieldName()), img.size());
final Collection<Object> heightO = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName());
final Collection<Object> widthO = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName());
final List<Object> height = heightO == null ? null : (List<Object>) heightO;
final List<Object> width = widthO == null ? null : (List<Object>) widthO;
for (int c = 0; c < img.size(); c++) {
String image_urlstub = (String) img.get(c);
final String image_urlstub = (String) img.get(c);
/* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents,
* or documents coming from previous versions peers */
if (image_urlstub.endsWith(".ico")) continue; // we don't want favicons, makes the result look idiotic
try {
int h = height == null ? 0 : (Integer) height.get(c);
int w = width == null ? 0 : (Integer) width.get(c);
final int h = height == null ? 0 : (Integer) height.get(c);
final int w = width == null ? 0 : (Integer) width.get(c);
// check size good for display (parser may init unknown dimension with -1)
if (h > 0 && h <= 16) continue; // to small for display
if (w > 0 && w <= 16) continue; // to small for display
DigestURL imageUrl = new DigestURL((prt != null && prt.size() > c ? prt.get(c) : "http") + "://" + image_urlstub);
String id = ASCII.String(imageUrl.hash());
if (!imageViewed.containsKey(id) && !containsSpare(id)) {
String image_alt = (alt != null && alt.size() > c) ? (String) alt.get(c) : "";
ImageResult imageResult = new ImageResult(doc.url(), imageUrl, "", image_alt, w, h, 0);
boolean match = (query.getQueryGoal().matches(image_urlstub) || query.getQueryGoal().matches(image_alt));
if (match) imageSpareGood.put(id, imageResult); else imageSpareBad.put(id, imageResult);
final DigestURL imageUrl = new DigestURL((prt != null && prt.size() > c ? prt.get(c) : "http") + "://" + image_urlstub);
final String id = ASCII.String(imageUrl.hash());
if (!this.imageViewed.containsKey(id) && !containsSpare(id)) {
final String image_alt = (alt != null && alt.size() > c) ? (String) alt.get(c) : "";
final ImageResult imageResult = new ImageResult(doc.url(), imageUrl, "", image_alt, w, h, 0);
final boolean match = (this.query.getQueryGoal().matches(image_urlstub) || this.query.getQueryGoal().matches(image_alt));
if (match) this.imageSpareGood.put(id, imageResult); else this.imageSpareBad.put(id, imageResult);
}
} catch (MalformedURLException e) {
} catch (final MalformedURLException e) {
continue;
}
}
@ -2303,7 +2290,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
int i = 0;
while (this.resultList.sizeAvailable() < this.query.neededResults() && System.currentTimeMillis() < timeout) {
URIMetadataNode re = oneResult(i++, timeout - System.currentTimeMillis());
final URIMetadataNode re = oneResult(i++, timeout - System.currentTimeMillis());
if (re == null) break;
}
return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable()));
@ -2331,7 +2318,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* because they were not supposed to be here. If really necessary to keep them,
* growing the maxSize of the resultList should be considered here.
*/
WeakPriorityBlockingQueue.Element<URIMetadataNode> initialLastResult = this.resultList.getLastInQueue();
final WeakPriorityBlockingQueue.Element<URIMetadataNode> initialLastResult = this.resultList.getLastInQueue();
/*
* Drain stacks in two steps (Solr, then RWI), because one stack might still

Loading…
Cancel
Save