Merge branch 'master' of git@github.com:yacy/yacy_search_server.git

pull/9/merge
Michael Peter Christen 10 years ago
commit 203df5a750

@ -40,9 +40,9 @@
<classpathentry kind="lib" path="lib/spatial4j-0.4.1.jar"/>
<classpathentry kind="lib" path="lib/zookeeper-3.4.6.jar"/>
<classpathentry kind="lib" path="lib/org.restlet.jar"/>
<classpathentry kind="lib" path="lib/fontbox-1.8.9.jar"/>
<classpathentry kind="lib" path="lib/jempbox-1.8.9.jar"/>
<classpathentry kind="lib" path="lib/pdfbox-1.8.9.jar"/>
<classpathentry kind="lib" path="lib/fontbox-1.8.10.jar"/>
<classpathentry kind="lib" path="lib/jempbox-1.8.10.jar"/>
<classpathentry kind="lib" path="lib/pdfbox-1.8.10.jar"/>
<classpathentry kind="lib" path="lib/bcmail-jdk15-1.46.jar"/>
<classpathentry kind="lib" path="lib/bcprov-jdk15-1.46.jar"/>
<classpathentry kind="lib" path="lib/poi-3.12-20150511.jar"/>

@ -172,7 +172,7 @@
<pathelement location="${lib}/commons-jxpath-1.3.jar" />
<pathelement location="${lib}/commons-lang-2.6.jar" />
<pathelement location="${lib}/commons-logging-1.2.jar" />
<pathelement location="${lib}/fontbox-1.8.9.jar" />
<pathelement location="${lib}/fontbox-1.8.10.jar" />
<pathelement location="${lib}/geronimo-stax-api_1.0_spec-1.0.1.jar" />
<pathelement location="${lib}/guava-18.0.jar" />
<pathelement location="${lib}/htmllexer.jar" />
@ -186,7 +186,7 @@
<pathelement location="${lib}/javax.servlet-api-3.1.0.jar" />
<pathelement location="${lib}/jcifs-1.3.17.jar" />
<pathelement location="${lib}/jcl-over-slf4j-1.7.12.jar" />
<pathelement location="${lib}/jempbox-1.8.9.jar" />
<pathelement location="${lib}/jempbox-1.8.10.jar" />
<pathelement location="${lib}/jetty-client-9.2.11.v20150529.jar" />
<pathelement location="${lib}/jetty-continuation-9.2.11.v20150529.jar" />
<pathelement location="${lib}/jetty-deploy-9.2.11.v20150529.jar" />
@ -224,7 +224,7 @@
<pathelement location="${lib}/metadata-extractor-2.8.1.jar" />
<pathelement location="${lib}/noggit-0.5.jar" />
<pathelement location="${lib}/org.restlet.jar" />
<pathelement location="${lib}/pdfbox-1.8.9.jar" />
<pathelement location="${lib}/pdfbox-1.8.10.jar" />
<pathelement location="${lib}/poi-3.12-20150511.jar" />
<pathelement location="${lib}/poi-scratchpad-3.12-20150511.jar" />
<pathelement location="${lib}/slf4j-api-1.7.12.jar" />

@ -77,7 +77,7 @@ div.ys {
.filter {
width: 190px;
margin-bottom: .5em;
padding: 2px 0px 2px 0px /* top right bottom left */
padding: 2px 0px 2px 0px; /* top right bottom left */
border: 1px solid #CCCCCC;
font-family: "Trebuchet MS", Trebuchet, Verdana, Helvetica, Arial, sans-serif;
font-size: 12px;
@ -228,7 +228,7 @@ p.url a {
margin-bottom:-1em;
}
.bm_input, .bm_select {
padding: 2px 0px 2px 0px /* top right bottom left */
padding: 2px 0px 2px 0px; /* top right bottom left */
border: 1px solid #CCCCCC;
margin: 0px 0px 2px 10px;
}

@ -83,7 +83,7 @@
<compilation-unit>
<package-root>source</package-root>
<package-root>htroot</package-root>
<classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.10.jar;lib/commons-compress-1.9.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.4.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.9.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.5.jar;lib/httpcore-4.4.1.jar;lib/httpmime-4.5.jar;lib/icu4j-55_1.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.12.jar;lib/jempbox-1.8.9.jar;lib/jetty-client-9.2.11.v20150529.jar;lib/jetty-continuation-9.2.11.v20150529.jar;lib/jetty-deploy-9.2.11.v20150529.jar;lib/jetty-http-9.2.11.v20150529.jar;lib/jetty-io-9.2.11.v20150529.jar;lib/jetty-jmx-9.2.11.v20150529.jar;lib/jetty-proxy-9.2.11.v20150529.jar;lib/jetty-security-9.2.11.v20150529.jar;lib/jetty-server-9.2.11.v20150529.jar;lib/jetty-servlet-9.2.11.v20150529.jar;lib/jetty-servlets-9.2.11.v20150529.jar;lib/jetty-util-9.2.11.v20150529.jar;lib/jetty-webapp-9.2.11.v20150529.jar;lib/jetty-xml-9.2.11.v20150529.jar;lib/jsch-0.1.53.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.2.jar;lib/log4j-over-slf4j-1.7.12.jar;lib/lucene-analyzers-common-5.2.1.jar;lib/lucene-analyzers-phonetic-5.2.1.jar;lib/lucene-backward-codecs-5.2.1.jar;lib/lucene-classification-5.2.1.jar;lib/lucene-codecs-5.2.1.jar;lib/lucene-core-5.2.1.jar;lib/lucene-facet-5.2.1.jar;lib/lucene-grouping-5.2.1.jar;lib/lucene-highlighter-5.2.1.jar;lib/lucene-join-5.2.1.jar;lib/lucene-memory-5.2.1.jar;lib/lucene-misc-5.2.1.jar;lib/lucene-queries-5.2.1.jar;lib/lucene-queryparser-5.2.1.jar;lib/lucene-spatial-5.2.1.jar;lib/lucene-suggest-5.2.1.jar;lib/metadata-extractor-2.8.1.jar;lib/noggit-0.6.jar;lib/org.restlet.jar;lib/pdfbox-1.8.9.jar;lib/poi-3.12-20150511.jar;lib/poi-scratchpad-3.12-20150511.jar;lib/slf4j-api-1.7.12.jar;lib/slf4j-jdk14-1.7.12.jar;lib/solr-core-5.2.1.jar;lib/solr-solrj-5.2.1.jar;lib/spatial4j-0.4.1.jar;lib/stax2-api-3.1.4.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.3.jar;lib/woodstox-core-asl-4.4.1.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar</classpath>
<classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.10.jar;lib/commons-compress-1.9.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.4.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.10.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.5.jar;lib/httpcore-4.4.1.jar;lib/httpmime-4.5.jar;lib/icu4j-55_1.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.12.jar;lib/jempbox-1.8.10.jar;lib/jetty-client-9.2.11.v20150529.jar;lib/jetty-continuation-9.2.11.v20150529.jar;lib/jetty-deploy-9.2.11.v20150529.jar;lib/jetty-http-9.2.11.v20150529.jar;lib/jetty-io-9.2.11.v20150529.jar;lib/jetty-jmx-9.2.11.v20150529.jar;lib/jetty-proxy-9.2.11.v20150529.jar;lib/jetty-security-9.2.11.v20150529.jar;lib/jetty-server-9.2.11.v20150529.jar;lib/jetty-servlet-9.2.11.v20150529.jar;lib/jetty-servlets-9.2.11.v20150529.jar;lib/jetty-util-9.2.11.v20150529.jar;lib/jetty-webapp-9.2.11.v20150529.jar;lib/jetty-xml-9.2.11.v20150529.jar;lib/jsch-0.1.53.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.2.jar;lib/log4j-over-slf4j-1.7.12.jar;lib/lucene-analyzers-common-5.2.1.jar;lib/lucene-analyzers-phonetic-5.2.1.jar;lib/lucene-backward-codecs-5.2.1.jar;lib/lucene-classification-5.2.1.jar;lib/lucene-codecs-5.2.1.jar;lib/lucene-core-5.2.1.jar;lib/lucene-facet-5.2.1.jar;lib/lucene-grouping-5.2.1.jar;lib/lucene-highlighter-5.2.1.jar;lib/lucene-join-5.2.1.jar;lib/lucene-memory-5.2.1.jar;lib/lucene-misc-5.2.1.jar;lib/lucene-queries-5.2.1.jar;lib/lucene-queryparser-5.2.1.jar;lib/lucene-spatial-5.2.1.jar;lib/lucene-suggest-5.2.1.jar;lib/metadata-extractor-2.8.1.jar;lib/noggit-0.6.jar;lib/org.restlet.jar;lib/pdfbox-1.8.10.jar;lib/poi-3.12-20150511.jar;lib/poi-scratchpad-3.12-20150511.jar;lib/slf4j-api-1.7.12.jar;lib/slf4j-jdk14-1.7.12.jar;lib/solr-core-5.2.1.jar;lib/solr-solrj-5.2.1.jar;lib/spatial4j-0.4.1.jar;lib/stax2-api-3.1.4.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.3.jar;lib/woodstox-core-asl-4.4.1.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar</classpath>
<built-to>lib/yacycore.jar</built-to>
<source-level>1.7</source-level>
</compilation-unit>

@ -366,7 +366,7 @@
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>1.8.9</version>
<version>1.8.10</version>
</dependency>
<dependency>
<groupId>org.apache.geronimo.specs</groupId>
@ -426,7 +426,7 @@
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
<version>1.8.9</version>
<version>1.8.10</version>
</dependency>
<dependency>
<groupId>com.jcraft</groupId>
@ -443,66 +443,16 @@
<artifactId>jsoup</artifactId>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
<version>1.7.12</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-phonetic</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-grouping</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-memory</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-misc</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-spatial</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-suggest</artifactId>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>com.drewnoakes</groupId>
<artifactId>metadata-extractor</artifactId>
@ -516,7 +466,7 @@
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>1.8.9</version>
<version>1.8.10</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
@ -552,16 +502,6 @@
<groupId>org.apache.solr</groupId>
<artifactId>solr-core</artifactId>
<version>${solr.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>javax.servlet</artifactId>
<groupId>org.eclipse.jetty.orbit</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>

@ -105,7 +105,7 @@ public class Ranking {
if (!this.fieldBoosts.containsKey(CollectionSchema.description_txt)) qf.append(CollectionSchema.description_txt.getSolrFieldName()).append(' ');
if (!this.fieldBoosts.containsKey(CollectionSchema.keywords)) qf.append(CollectionSchema.keywords.getSolrFieldName());
this.queryFields = qf.toString(); // doesn't change often, cache it
this.queryFields = qf.toString().trim(); // doesn't change often, cache it
return this.queryFields;
}

@ -30,6 +30,7 @@ import java.util.Map;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.util.MemoryControl;
@ -150,7 +151,7 @@ public class RemoteInstance implements SolrInstance {
((org.apache.http.impl.client.DefaultHttpClient) this.client).addRequestInterceptor(new HttpRequestInterceptor() {
@Override
public void process(final HttpRequest request, final HttpContext context) throws IOException {
if (!request.containsHeader("Accept-Encoding")) request.addHeader("Accept-Encoding", "gzip");
if (!request.containsHeader(HeaderFramework.ACCEPT_ENCODING)) request.addHeader(HeaderFramework.ACCEPT_ENCODING, HeaderFramework.CONTENT_ENCODING_GZIP);
if (!request.containsHeader("Connection")) request.addHeader("Connection", "close"); // prevent CLOSE_WAIT
}
@ -164,7 +165,7 @@ public class RemoteInstance implements SolrInstance {
if (ceheader != null) {
HeaderElement[] codecs = ceheader.getElements();
for (HeaderElement codec : codecs) {
if (codec.getName().equalsIgnoreCase("gzip")) {
if (codec.getName().equalsIgnoreCase(HeaderFramework.CONTENT_ENCODING_GZIP)) {
response.setEntity(new GzipDecompressingEntity(response.getEntity()));
return;
}

@ -30,6 +30,8 @@ import java.io.OutputStream;
import java.util.zip.Deflater;
import java.util.zip.GZIPOutputStream;
import net.yacy.cora.protocol.HeaderFramework;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.entity.HttpEntityWrapper;
@ -38,17 +40,16 @@ import org.apache.http.protocol.HTTP;
public class GzipCompressingEntity extends HttpEntityWrapper {
private static final String GZIP_CODEC = "gzip";
// private static final int DEFAULT_BUFFER_SIZE = 1024; // this is also the maximum chunk size
public GzipCompressingEntity(final HttpEntity entity) {
super(entity);
}
public GzipCompressingEntity(final HttpEntity entity) {
super(entity);
}
@Override
@Override
public Header getContentEncoding() {
return new BasicHeader(HTTP.CONTENT_ENCODING, GZIP_CODEC);
}
return new BasicHeader(HTTP.CONTENT_ENCODING, HeaderFramework.CONTENT_ENCODING_GZIP);
}
@Override
public long getContentLength() {

@ -26,21 +26,18 @@
package net.yacy.cora.protocol.http;
import java.io.IOException;
import net.yacy.cora.protocol.HeaderFramework;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.protocol.HttpContext;
public class GzipRequestInterceptor implements HttpRequestInterceptor {
private static final String ACCEPT_ENCODING = "Accept-Encoding";
private static final String GZIP_CODEC = "gzip";
@Override
public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {
if (!request.containsHeader(ACCEPT_ENCODING)) {
request.addHeader(ACCEPT_ENCODING, GZIP_CODEC);
if (!request.containsHeader(HeaderFramework.ACCEPT_ENCODING)) {
request.addHeader(HeaderFramework.ACCEPT_ENCODING, HeaderFramework.CONTENT_ENCODING_GZIP);
}
}

@ -26,8 +26,7 @@
package net.yacy.cora.protocol.http;
import java.io.IOException;
import net.yacy.cora.protocol.HeaderFramework;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
@ -38,8 +37,6 @@ import org.apache.http.client.entity.GzipDecompressingEntity;
import org.apache.http.protocol.HttpContext;
public class GzipResponseInterceptor implements HttpResponseInterceptor {
private static final String GZIP_CODEC = "gzip";
@Override
public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException {
@ -52,7 +49,7 @@ public class GzipResponseInterceptor implements HttpResponseInterceptor {
if (ceheader != null) {
HeaderElement[] codecs = ceheader.getElements();
for (int i = 0; i < codecs.length; i++) {
if (codecs[i].getName().equalsIgnoreCase(GZIP_CODEC)) {
if (codecs[i].getName().equalsIgnoreCase(HeaderFramework.CONTENT_ENCODING_GZIP)) {
// response.removeHeader(ceheader);
response.setEntity(new GzipDecompressingEntity(response.getEntity()));
return;

@ -48,7 +48,7 @@ public class Request extends WorkflowJob
+ Word.commonHashLength
+ ", "
+ // the crawling initiator
"String urlstring-256, "
"String urlstring-2048, "
+ // the url as string
"String refhash-"
+ Word.commonHashLength

@ -64,15 +64,28 @@ abstract public class AbstractRemoteHandler extends ConnectHandler implements Ha
new Thread() {
@Override
public void run() {
final InetAddress localInetAddress = Domains.myPublicLocalIP();
if (localInetAddress != null) {
if (!localVirtualHostNames.contains(localInetAddress.getHostName())) {
localVirtualHostNames.add(localInetAddress.getHostName());
localVirtualHostNames.add(localInetAddress.getHostAddress()); // same as getServer().getURI().getHost()
for (InetAddress localInetAddress : Domains.myPublicIPv4()) {
if (localInetAddress != null) {
if (!localVirtualHostNames.contains(localInetAddress.getHostName())) {
localVirtualHostNames.add(localInetAddress.getHostName());
localVirtualHostNames.add(localInetAddress.getHostAddress()); // same as getServer().getURI().getHost()
}
if (!localVirtualHostNames.contains(localInetAddress.getCanonicalHostName())) {
localVirtualHostNames.add(localInetAddress.getCanonicalHostName());
}
}
}
for (InetAddress localInetAddress : Domains.myPublicIPv6()) {
if (localInetAddress != null) {
if (!localVirtualHostNames.contains(localInetAddress.getHostName())) {
localVirtualHostNames.add(localInetAddress.getHostName());
localVirtualHostNames.add(localInetAddress.getHostAddress()); // same as getServer().getURI().getHost()
}
if (!localVirtualHostNames.contains(localInetAddress.getCanonicalHostName())) {
localVirtualHostNames.add(localInetAddress.getCanonicalHostName());
if (!localVirtualHostNames.contains(localInetAddress.getCanonicalHostName())) {
localVirtualHostNames.add(localInetAddress.getCanonicalHostName());
}
}
}
if (sb.peers != null) {

@ -787,32 +787,9 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
}
return this.toString(this.textSnippet.getLineRaw());
}
/*
taken from ResultEntry (should work without)
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.hash());
}
return this.hashCache;
}
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (!(obj instanceof URIMetadataNode)) return false;
URIMetadataNode other = (URIMetadataNode) obj;
return Base64Order.enhancedCoder.equal(this.hash(), other.hash());
return this.url().hashCode();
}
@Override
public int compareTo(URIMetadataNode o) {
return Base64Order.enhancedCoder.compare(this.hash(), o.hash());
}
@Override
public int compare(URIMetadataNode o1, URIMetadataNode o2) {
return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash());
}*/
}

@ -47,7 +47,9 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.InetAddress;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -937,6 +939,8 @@ public final class Protocol {
final int partitions,
final Blacklist blacklist) {
//try {System.out.println("*** debug-query *** " + URLDecoder.decode(solrQuery.toString(), "UTF-8"));} catch (UnsupportedEncodingException e) {}
if (event.query.getQueryGoal().getQueryString(false) == null || event.query.getQueryGoal().getQueryString(false).length() == 0) {
return -1; // we cannot query solr only with word hashes, there is no clear text string
}
@ -954,6 +958,7 @@ public final class Protocol {
solrQuery.setHighlightSimplePre("<b>");
solrQuery.setHighlightSnippets(5);
for (CollectionSchema field: snippetFields) solrQuery.addHighlightField(field.getSolrFieldName());
//System.out.println("*** debug-query-highligh ***:" + ConcurrentLog.stackTrace());
} else {
solrQuery.setHighlight(false);
}
@ -1078,6 +1083,7 @@ public final class Protocol {
docs = new ArrayList<SolrInputDocument>(docList[0].size());
} else docs = null;
for (final SolrDocument doc: docList[0]) {
//System.out.println("***DEBUG*** " + ((String) doc.getFieldValue("sku")));
if ( term-- <= 0 ) {
break; // do not process more that requested (in case that evil peers fill us up with rubbish)
}

@ -328,7 +328,9 @@ public class RemoteSearch extends Thread {
final Seed targetPeer,
final int partitions,
final Blacklist blacklist) {
//System.out.println("*** debug-remoteSearch ***:" + ConcurrentLog.stackTrace());
assert solrQuery != null;
// check own peer status
if (event.peers.mySeed() == null) { return null; }

@ -83,13 +83,11 @@ public class Transmission {
* to multiple peers and to ensure that all entries in the indexContainers
* have a reference in the urls
* - a set of yacy seeds which will shrink as the containers are transmitted to them
* - a counter that gives the number of sucessful and unsuccessful transmissions so far
*/
private final Seed dhtTarget;
private final ReferenceContainerCache<WordReference> containers;
private final HandleSet references;
private final HandleSet badReferences;
private int hit, miss;
/**
* generate a new dispatcher target. such a target is defined with a primary target and
@ -103,8 +101,6 @@ public class Transmission {
this.containers = new ReferenceContainerCache<WordReference>(Segment.wordReferenceFactory, Segment.wordOrder, Word.commonHashLength);
this.references = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.badReferences = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
this.hit = 0;
this.miss = 0;
}
/*
@ -211,28 +207,11 @@ public class Transmission {
return this.dhtTarget;
}
/**
* return the number of successful transmissions
* @return
*/
public int hit() {
return this.hit;
}
/**
* return the number of unsuccessful transmissions
* @return
*/
public int miss() {
return this.miss;
}
public boolean transmit() {
// transferring selected words to remote peer
if (this.dhtTarget == Transmission.this.seeds.mySeed() || this.dhtTarget.hash.equals(Transmission.this.seeds.mySeed().hash)) {
// target is my own peer. This is easy. Just restore the indexContainer
restore();
this.hit++;
Transmission.this.log.info("Transfer of chunk to myself-target");
return true;
}
@ -256,13 +235,11 @@ public class Transmission {
// if the peer has set a pause time and we are in flush mode (index transfer)
// then we pause for a while now
Transmission.this.log.info("Transfer finished of chunk to target " + this.dhtTarget.hash + "/" + this.dhtTarget.getName());
this.hit++;
return true;
}
Transmission.this.log.info(
"Index transfer to peer " + this.dhtTarget.getName() + ":" + this.dhtTarget.hash +
" failed: " + error);
this.miss++;
// write information that peer does not receive index transmissions
Transmission.this.log.info("Transfer failed of chunk to target " + this.dhtTarget.hash + "/" + this.dhtTarget.getName() + ": " + error);
// get possibly newer target Info

@ -315,7 +315,7 @@ public final class Switchboard extends serverSwitch {
// set loglevel and log
setLog(new ConcurrentLog("SWITCHBOARD"));
AccessTracker.setDumpFile(new File("DATA/LOG/queries.log"));
AccessTracker.setDumpFile(new File(dataPath, "DATA/LOG/queries.log"));
// set default peer name
Seed.ANON_PREFIX = getConfig("peernameprefix", "_anon");
@ -359,7 +359,7 @@ public final class Switchboard extends serverSwitch {
this.workPath = getDataPath(SwitchboardConstants.WORK_PATH, SwitchboardConstants.WORK_PATH_DEFAULT);
this.workPath.mkdirs();
// if default work files exist, copy them (don't overwrite existing!)
File defaultWorkPath = new File("defaults/data/work");
File defaultWorkPath = new File(appPath, "defaults/data/work");
if (defaultWorkPath.list() != null) {
for (String fs : defaultWorkPath.list()) {
File wf = new File(this.workPath, fs);
@ -730,7 +730,7 @@ public final class Switchboard extends serverSwitch {
// copy opensearch heuristic config (if not exist)
final File osdConfig = new File(getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
if (!osdConfig.exists()) {
final File osdDefaultConfig = new File("defaults/heuristicopensearch.conf");
final File osdDefaultConfig = new File(appPath, "defaults/heuristicopensearch.conf");
this.log.info("heuristic.opensearch list Path = " + osdDefaultConfig.getAbsolutePath());
try {
Files.copy(osdDefaultConfig, osdConfig);
@ -781,7 +781,7 @@ public final class Switchboard extends serverSwitch {
}
// init html parser evaluation scheme
File parserPropertiesPath = new File("defaults/");
File parserPropertiesPath = new File(appPath, "defaults/");
String[] settingsList = parserPropertiesPath.list();
for ( final String l : settingsList ) {
if ( l.startsWith("parser.") && l.endsWith(".properties") ) {
@ -865,7 +865,7 @@ public final class Switchboard extends serverSwitch {
// load oai tables
final Map<String, File> oaiFriends =
OAIListFriendsLoader.loadListFriendsSources(
new File("defaults/oaiListFriendsSource.xml"),
new File(appPath, "defaults/oaiListFriendsSource.xml"),
getDataPath());
OAIListFriendsLoader.init(this.loader, oaiFriends, ClientIdentification.yacyInternetCrawlerAgent);
this.crawlQueues = new CrawlQueues(this, this.queuesRoot);
@ -2413,7 +2413,7 @@ public final class Switchboard extends serverSwitch {
final Properties profile = new Properties();
FileInputStream fileIn = null;
try {
fileIn = new FileInputStream(new File("DATA/SETTINGS/profile.txt"));
fileIn = new FileInputStream(new File(this.dataPath, "DATA/SETTINGS/profile.txt"));
profile.load(fileIn);
} catch (final IOException e ) {
} finally {

@ -331,20 +331,20 @@ public class QueryModifier {
*/
public static String parseCollectionExpression(String collectionDescription) {
String[] s0 = CommonPattern.VERTICALBAR.split(collectionDescription);
ArrayList<String> sites = new ArrayList<String>(2);
ArrayList<String> collections = new ArrayList<String>(2);
for (String s: s0) {
s = s.trim();
if (s.length() > 0) sites.add(s);
if (s.length() > 0) collections.add(s);
}
StringBuilder fq = new StringBuilder(20);
if (sites.size() > 1) {
fq.append('(').append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(0)).append('\"');
for (int i = 1; i < sites.size(); i++) {
fq.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(i)).append('\"');
if (collections.size() > 1) {
fq.append('(').append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(collections.get(0)).append('\"');
for (int i = 1; i < collections.size(); i++) {
fq.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(collections.get(i)).append('\"');
}
fq.append(')');
} else if (sites.size() == 1) {
fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(0)).append('\"');
} else if (collections.size() == 1) {
fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(collections.get(0)).append('\"');
}
if (fq.length() > 0) fq.insert(0, "{!tag=" + CollectionSchema.collection_sxt.getSolrFieldName() + "}");
return fq.toString();

@ -448,7 +448,15 @@ public final class QueryParams {
// add site facets
fqs.addAll(getFacetsFilterQueries());
if (fqs.size() > 0) {
params.setFilterQueries(fqs.toArray(new String[fqs.size()]));
StringBuilder fqsb = new StringBuilder();
for (String f: fqs) {
fqsb.append(" AND ");
//boolean wo = f.indexOf(" OR ") >= 0;
//if (wo) fqsb.append('(');
fqsb.append(f);
//if (wo) fqsb.append(')');
}
params.setFilterQueries(new String[]{fqsb.substring(5)});
}
// set facet query attributes

@ -973,7 +973,8 @@ public final class SearchEvent {
this.urlhashes.putUnique(iEntry.hash());
rankingtryloop: while (true) {
try {
long score = (long) (1000000.0f * iEntry.score());
long score = (long) Math.max(0, (1000000.0f * iEntry.score()) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
//System.out.println("*** debug-score *** " + score + " for entry " + iEntry.urlstring());
this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score == 0 ? this.order.cardinal(iEntry) : score)); // inserts the element and removes the worst (which is smallest)
break rankingtryloop;
} catch (final ArithmeticException e ) {
@ -1519,13 +1520,15 @@ public final class SearchEvent {
final URIMetadataNode re = this.resultList.element(item).getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "fetched, item = " + item + ", available = " + this.getResultCount() + ": " + re.urlstring(), 0, 0), false);
if (this.localsolrsearch == null || !this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) {
/*
if (this.localsolrsearch == null || (!this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0)) {
// at the end of a list, trigger a next solr search
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, this.query.itemsPerPage, null /*this peer*/, 0, Switchboard.urlBlacklist);
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, this.query.itemsPerPage, null, 0, Switchboard.urlBlacklist);
}
this.localsolroffset += this.query.itemsPerPage;
}
*/
return re;
}

@ -722,8 +722,8 @@ public final class HTTPDProxyHandler {
}
// only gzip-encoding is supported, remove other encodings (e. g. deflate)
if ((requestHeader.get(HeaderFramework.ACCEPT_ENCODING,"")).indexOf("gzip",0) != -1) {
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, "gzip");
if ((requestHeader.get(HeaderFramework.ACCEPT_ENCODING,"")).indexOf(HeaderFramework.CONTENT_ENCODING_GZIP,0) != -1) {
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, HeaderFramework.CONTENT_ENCODING_GZIP);
} else {
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, "");
}

Loading…
Cancel
Save