Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 10 years ago
commit 961f06c0b6

@ -71,7 +71,7 @@
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="lib" path="lib/icu4j-core.jar"/>
<classpathentry kind="lib" path="lib/htmllexer.jar"/>
<classpathentry kind="lib" path="lib/jsoup-1.6.3.jar"/>
<classpathentry kind="lib" path="lib/jsoup-1.8.1.jar"/>
<classpathentry kind="lib" path="lib/jetty-client-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-continuation-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-deploy-9.2.3.v20140905.jar"/>

@ -78,7 +78,7 @@
<string>$JAVAROOT/lib/jetty-xml-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jsch-0.1.50.jar</string>
<string>$JAVAROOT/lib/json-simple-1.1.1.jar</string>
<string>$JAVAROOT/lib/jsoup-1.6.3.jar</string>
<string>$JAVAROOT/lib/jsoup-1.8.1.jar</string>
<string>$JAVAROOT/lib/log4j-over-slf4j-1.7.2.jar</string>
<string>$JAVAROOT/lib/lucene-analyzers-common-4.9.0.jar</string>
<string>$JAVAROOT/lib/lucene-analyzers-phonetic-4.9.0.jar</string>

@ -202,7 +202,7 @@
<pathelement location="${lib}/jetty-xml-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jsch-0.1.50.jar" />
<pathelement location="${lib}/json-simple-1.1.1.jar" />
<pathelement location="${lib}/jsoup-1.6.3.jar" />
<pathelement location="${lib}/jsoup-1.8.1.jar" />
<pathelement location="${lib}/log4j-over-slf4j-1.7.2.jar" />
<pathelement location="${lib}/lucene-analyzers-common-4.9.0.jar" />
<pathelement location="${lib}/lucene-analyzers-phonetic-4.9.0.jar" />

@ -38,7 +38,7 @@
#{list}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<td>#[time]#</td>
<td><a href="#[url]#">#[url]#</a></td>
<td><a href="#[url]#" target="_blank">#[url]#</a></td>
<td>#[failreason]#</td>
</tr>
#{/list}#

@ -36,6 +36,7 @@ import net.yacy.cora.document.feed.RSSFeed;
import net.yacy.cora.document.feed.RSSMessage;
import net.yacy.cora.document.feed.RSSReader;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
@ -212,7 +213,7 @@ public class Load_RSS_p {
final Date date_next_exec = r.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
prop.put("showscheduledfeeds_list_" + apic + "_pk", UTF8.String(row.getPK()));
prop.put("showscheduledfeeds_list_" + apic + "_count", apic);
prop.putXML("showscheduledfeeds_list_" + apic + "_rss", messageurl);
prop.put("showscheduledfeeds_list_" + apic + "_rss", MultiProtocolURL.escape(messageurl).toString());
prop.putXML("showscheduledfeeds_list_" + apic + "_title", row.get("title", ""));
prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true));
prop.put("showscheduledfeeds_list_" + apic + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));

@ -56,7 +56,7 @@
<form class="search form-inline" action="yacysearch.html" method="get" id="searchform" accept-charset="UTF-8">
<fieldset class="maininput">
<div class="input-group">
<input name="query" id="search" type="text" size="40" maxlength="80" value="#[former]#" onFocus="this.select()" class="form-control searchinput typeahead" />
<input name="query" id="search" type="text" size="40" maxlength="80" value="#[former]#" #(focus)#::autofocus="autofocus"#(/focus)# onFocus="this.select()" class="form-control searchinput typeahead" />
<div class="input-group-btn">
<button type="submit" id="Enter" class="btn btn-primary">Search</button>
</div>

@ -1,8 +1,8 @@
/******************************************************************************
Name: Highslide JS
Version: 3.3.9 (February 15 2008)
Config: default
Author: Torstein Hønsi
Version: 3.3.9 (February 15 2008)
Config: default
Author: Torstein H<EFBFBD>nsi
Support: http://vikjavev.no/highslide/forum
Licence:
@ -1090,7 +1090,7 @@ setSize : function (to) {
try {
this.wrapper.style.width = (to.w + 2*this.offsetBorderW) +'px';
this.content.style.width =
((to.imgW && !isNaN(to.imgW)) ? to.imgW : to.w) +'px';
((to.imgW && !isNaN(to.imgW)) ? to.imgW : to.w) +'px';
if (hs.safari) this.content.style.maxWidth = this.content.style.width;
this.content.style.height = to.h +'px';
@ -1197,7 +1197,7 @@ writeCaption : function() {
null, null, true); // to get height
this.caption.innerHTML = '';
this.caption.appendChild(temp);
height = this.caption.childNodes[0].offsetHeight;
height = this.caption.offsetHeight;
this.caption.innerHTML = this.caption.childNodes[0].innerHTML;
}
hs.setStyles(this.caption, { overflow: 'hidden', height: 0, zIndex: 2, marginTop: 0 });

@ -92,7 +92,7 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
<form class="search small" name="searchform" action="" method="get" accept-charset="UTF-8" style="position:fixed;top:8px;z-index:1052;max-width:500px;">
<div class="input-group">
<input type="text" class="form-control searchinput typeahead" size="40" maxlength="200" placeholder="#[promoteSearchPageGreeting]#" name="query" value="#[former]#" onFocus="this.select()" id="search" onclick="document.getElementById('Enter').innerHTML = 'search'"/>
<input type="text" class="form-control searchinput typeahead" size="40" maxlength="200" placeholder="#[promoteSearchPageGreeting]#" name="query" value="#[former]#" autofocus="autofocus" onFocus="this.select()" id="search" onclick="document.getElementById('Enter').innerHTML = 'search'"/>
<div class="input-group-btn">
<button id="Enter" class="btn btn-default" type="submit">search</button>
</div>

@ -172,7 +172,7 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
<h2>#[promoteSearchPageGreeting]#</h2>
<div class="yacylogo"><a href="#[promoteSearchPageGreeting.homepage]#" class="yacylogo"><img src="#[promoteSearchPageGreeting.smallImage]#" alt="yacysearch" /></a></div>
<fieldset class="yacys">
<input type="text" value="#(initsearch)#::#[query]##(/initsearch)#" id="query" size="42" onFocus="this.select()" />
<input type="text" value="#(initsearch)#::#[query]##(/initsearch)#" id="query" size="42" autofocus="autofocus" onFocus="this.select()" />
<input type="submit" onclick="search(); return false;" value="search" onsubmit="search(); return false;" />
</fieldset>
<div id="resultline"></div>

@ -30,7 +30,7 @@
data-content="You can switch to 'Peer-to-Peer Mode' which will cause that your search is done using the other peers in the YaCy network."
>
<input
data-label-text="Privacy"
data-label-text="Peer-to-Peer"
data-on-text="<span>&nbsp;&nbsp;&nbsp;Peer-to-Peer&nbsp;&nbsp;&nbsp;&nbsp;</span>"
data-off-text="<span>&nbsp;&nbsp;&nbsp;&nbsp;Stealth&nbsp;Mode&nbsp;&nbsp;&nbsp;</span>"
data-on-color="warning" data-off-color="success"

Binary file not shown.

Binary file not shown.

@ -83,7 +83,7 @@
<compilation-unit>
<package-root>source</package-root>
<package-root>htroot</package-root>
<classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/commons-codec-1.7.jar;lib/commons-compress-1.8.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.1.3.jar;lib/fontbox-1.8.6.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-16.0.1.jar;lib/htmllexer.jar;lib/httpclient-4.3.5.jar;lib/httpcore-4.3.2.jar;lib/httpmime-4.3.5.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.8.6.jar;lib/jetty-client-9.2.2.v20140723.jar;lib/jetty-continuation-9.2.2.v20140723.jar;lib/jetty-deploy-9.2.2.v20140723.jar;lib/jetty-http-9.2.2.v20140723.jar;lib/jetty-io-9.2.2.v20140723.jar;lib/jetty-jmx-9.2.2.v20140723.jar;lib/jetty-proxy-9.2.2.v20140723.jar;lib/jetty-security-9.2.2.v20140723.jar;lib/jetty-server-9.2.2.v20140723.jar;lib/jetty-servlet-9.2.2.v20140723.jar;lib/jetty-servlets-9.2.2.v20140723.jar;lib/jetty-util-9.2.2.v20140723.jar;lib/jetty-webapp-9.2.2.v20140723.jar;lib/jetty-xml-9.2.2.v20140723.jar;lib/jsch-0.1.50.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.6.3.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.9.0.jar;lib/lucene-analyzers-phonetic-4.9.0.jar;lib/lucene-classification-4.9.0.jar;lib/lucene-codecs-4.9.0.jar;lib/lucene-core-4.9.0.jar;lib/lucene-facet-4.9.0.jar;lib/lucene-grouping-4.9.0.jar;lib/lucene-highlighter-4.9.0.jar;lib/lucene-join-4.9.0.jar;lib/lucene-memory-4.9.0.jar;lib/lucene-misc-4.9.0.jar;lib/lucene-queries-4.9.0.jar;lib/lucene-queryparser-4.9.0.jar;lib/lucene-spatial-4.9.0.jar;lib/lucene-suggest-4.9.0.jar;lib/metadata-extractor-2.6.2.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.6.jar;lib/poi-3.10-FINAL-20140208.jar;lib/poi-scratchpad-3.10-FINAL-20140208.jar;lib/slf4j-api-1.7.6.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.9.0.jar;lib/solr-solrj-4.9.0.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/zookeeper-3.4.6.jar</classpath>
<classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/commons-codec-1.7.jar;lib/commons-compress-1.8.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.1.3.jar;lib/fontbox-1.8.7.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-16.0.1.jar;lib/htmllexer.jar;lib/httpclient-4.3.5.jar;lib/httpcore-4.3.2.jar;lib/httpmime-4.3.5.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.8.7.jar;lib/jetty-client-9.2.3.v20140905.jar;lib/jetty-continuation-9.2.3.v20140905.jar;lib/jetty-deploy-9.2.3.v20140905.jar;lib/jetty-http-9.2.3.v20140905.jar;lib/jetty-io-9.2.3.v20140905.jar;lib/jetty-jmx-9.2.3.v20140905.jar;lib/jetty-proxy-9.2.3.v20140905.jar;lib/jetty-security-9.2.3.v20140905.jar;lib/jetty-server-9.2.3.v20140905.jar;lib/jetty-servlet-9.2.3.v20140905.jar;lib/jetty-servlets-9.2.3.v20140905.jar;lib/jetty-util-9.2.3.v20140905.jar;lib/jetty-webapp-9.2.3.v20140905.jar;lib/jetty-xml-9.2.3.v20140905.jar;lib/jsch-0.1.50.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.1.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.9.0.jar;lib/lucene-analyzers-phonetic-4.9.0.jar;lib/lucene-classification-4.9.0.jar;lib/lucene-codecs-4.9.0.jar;lib/lucene-core-4.9.0.jar;lib/lucene-facet-4.9.0.jar;lib/lucene-grouping-4.9.0.jar;lib/lucene-highlighter-4.9.0.jar;lib/lucene-join-4.9.0.jar;lib/lucene-memory-4.9.0.jar;lib/lucene-misc-4.9.0.jar;lib/lucene-queries-4.9.0.jar;lib/lucene-queryparser-4.9.0.jar;lib/lucene-spatial-4.9.0.jar;lib/lucene-suggest-4.9.0.jar;lib/metadata-extractor-2.6.2.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.7.jar;lib/poi-3.10-FINAL-20140208.jar;lib/poi-scratchpad-3.10-FINAL-20140208.jar;lib/slf4j-api-1.7.6.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.9.0.jar;lib/solr-solrj-4.9.0.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/zookeeper-3.4.6.jar</classpath>
<built-to>lib/yacycore.jar</built-to>
<source-level>1.7</source-level>
</compilation-unit>

@ -408,7 +408,7 @@
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.3</version>
<version>1.8.1</version>
</dependency>
<dependency>
<groupId>log4j</groupId>

@ -66,8 +66,6 @@ import net.yacy.crawler.retrieval.Response;
*/
public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolURL> {
public static final MultiProtocolURL POISON = new MultiProtocolURL(); // poison pill for concurrent link generators
private static final long serialVersionUID = -1173233022912141884L;
private static final long SMB_TIMEOUT = 5000;
@ -373,6 +371,11 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
escape();
}
/**
* creates MultiProtocolURL
* if path contains '?' search part is automatically created by splitting input into path and searchpart
* dto for anchor's ('#')
*/
public MultiProtocolURL(final String protocol, String host, final int port, final String path) throws MalformedURLException {
if (protocol == null) throw new MalformedURLException("protocol is null");
if (host.indexOf(':') >= 0 && host.charAt(0) != '[') host = '[' + host + ']'; // IPv6 host must be enclosed in square brackets
@ -521,9 +524,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase())) {
sbuf.append((char)ch); // leave it that way, it is used the right way
} else {
sbuf.append("&amp;"); // this must be urlencoded
sbuf.append("%26"); // this must be urlencoded
}
sbuf.append((char)ch);
} else if (ch == '#') { // RFC 1738 2.2 unsafe char is _not_ encoded because it may already be used for encoding
sbuf.append((char)ch);
} else if (ch == '!' || ch == ':' // unreserved

@ -28,6 +28,7 @@ package net.yacy.data;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@ -42,6 +43,7 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
@ -222,7 +224,6 @@ public class WorkTables extends Tables {
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
client.setTimout(120000);
Tables.Row row;
String url;
LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>();
for (final String pk: pks) {
row = null;
@ -234,20 +235,25 @@ public class WorkTables extends Tables {
ConcurrentLog.logException(e);
}
if (row == null) continue;
url = "http://" + host + ":" + port + UTF8.String(row.get(WorkTables.TABLE_API_COL_URL));
url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
ConcurrentLog.info("WorkTables", "executing url: " + url);
String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
try {
client.GETbytes(url, username, pass, false);
l.put(url, client.getStatusCode());
} catch (final IOException e) {
ConcurrentLog.logException(e);
l.put(url, -1);
// use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes()
MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
ConcurrentLog.info("WorkTables", "executing url: " + url.toString());
try {
client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%
l.put(url.toString(), client.getStatusCode());
} catch (final IOException e) {
ConcurrentLog.logException(e);
l.put(url.toString(), -1);
}
} catch (MalformedURLException ex) {
ConcurrentLog.warn("APICALL", "wrong url in apicall " + theapicall);
}
}
return l;
}
public static int execAPICall(String host, int port, String path, byte[] pk, final String username, final String pass) {
// now call the api URLs and store the result status
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);

@ -157,7 +157,7 @@ public class metadataImageParser extends AbstractParser implements Parser {
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
documentCharset,
this,
new HashSet<String>(0), // languages
keywords == null ? new String[]{} : keywords.split(keywords.indexOf(',') > 0 ? "," : " "), // keywords

@ -206,9 +206,9 @@ public class UrlProxyServlet extends ProxyServlet implements Servlet {
final String servletstub = request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort() + request.getServletPath() + "?url=";
Document doc;
try {
doc = Jsoup.parse(proxyout, UTF8.charset.name(), proxyurl.toString());
doc = Jsoup.parse(proxyout, proxyResponseHeader.getCharacterEncoding(), proxyurl.toString());
} catch (IOException eio) {
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy: parser error on " + proxyurl.toString());
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy: parser error on " + proxyurl.toString() +"\n\n"+ eio.getMessage());
return;
}

@ -47,6 +47,7 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.EnhancedTextProfileSignature;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.AnchorURL;
@ -200,7 +201,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
* @param doctype
* @return the normalized url
*/
public String addURIAttributes(final SolrInputDocument doc, final boolean allAttr, final DigestURL digestURL, final char doctype) {
public String addURIAttributes(final SolrInputDocument doc, final boolean allAttr, final DigestURL digestURL) {
add(doc, CollectionSchema.id, ASCII.String(digestURL.hash()));
if (allAttr || contains(CollectionSchema.host_id_s)) add(doc, CollectionSchema.host_id_s, digestURL.hosthash());
String us = digestURL.toNormalform(true);
@ -237,9 +238,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
if (allAttr || contains(CollectionSchema.url_file_name_s)) add(doc, CollectionSchema.url_file_name_s, filenameStub);
if (allAttr || contains(CollectionSchema.url_file_name_tokens_t)) add(doc, CollectionSchema.url_file_name_tokens_t, MultiProtocolURL.toTokens(filenameStub));
if (allAttr || contains(CollectionSchema.url_file_ext_s)) add(doc, CollectionSchema.url_file_ext_s, extension);
if (allAttr || contains(CollectionSchema.content_type)) add(doc, CollectionSchema.content_type, Response.doctype2mime(extension, doctype));
Map<String, String> searchpart = digestURL.getSearchpartMap();
if (searchpart == null) {
if (allAttr || contains(CollectionSchema.url_parameter_i)) add(doc, CollectionSchema.url_parameter_i, 0);
@ -253,13 +252,12 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
public SolrInputDocument metadata2solr(final URIMetadataNode md) {
final SolrInputDocument doc = new SolrInputDocument();
boolean allAttr = this.isEmpty();
SolrInputDocument doc = toSolrInputDocument(md); //urimetadatanode stores some values in private fields, add now to sorldocument
addURIAttributes(doc, allAttr, md.url(), md.doctype());
boolean allAttr = this.isEmpty();
addURIAttributes(doc, allAttr, md.url());
String title = md.dc_title();
if (allAttr || contains(CollectionSchema.title)) add(doc, CollectionSchema.title, new String[]{title});
if (allAttr || contains(CollectionSchema.title_count_i)) add(doc, CollectionSchema.title_count_i, 1);
if (allAttr || contains(CollectionSchema.title_chars_val)) {
Integer[] cv = new Integer[]{new Integer(title.length())};
@ -282,10 +280,6 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
add(doc, CollectionSchema.description_words_val, description_exist ? new Integer[]{new Integer(description.length() == 0 ? 0 : CommonPattern.SPACE.split(description).length)} : new Integer[0]);
}
if (allAttr || contains(CollectionSchema.author)) add(doc, CollectionSchema.author, md.dc_creator());
if (allAttr || contains(CollectionSchema.last_modified)) add(doc, CollectionSchema.last_modified, md.moddate());
if (allAttr || contains(CollectionSchema.wordcount_i)) add(doc, CollectionSchema.wordcount_i, md.wordCount());
String keywords = md.dc_subject();
Bitfield flags = md.flags();
if (flags.get(Condenser.flag_cat_indexof)) {
@ -310,13 +304,6 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
if (allAttr || contains(CollectionSchema.httpstatus_i)) add(doc, CollectionSchema.httpstatus_i, 200);
// fields that are in URIMetadataRow additional to yacy2solr basic requirement
if (allAttr || contains(CollectionSchema.load_date_dt)) add(doc, CollectionSchema.load_date_dt, md.loaddate());
if (allAttr || contains(CollectionSchema.fresh_date_dt)) add(doc, CollectionSchema.fresh_date_dt, md.freshdate());
if ((allAttr || contains(CollectionSchema.referrer_id_s)) && md.referrerHash() != null) add(doc, CollectionSchema.referrer_id_s, ASCII.String(md.referrerHash()));
if (allAttr || contains(CollectionSchema.md5_s)) add(doc, CollectionSchema.md5_s, md.md5());
if (allAttr || contains(CollectionSchema.publisher_t)) add(doc, CollectionSchema.publisher_t, md.dc_publisher());
if (allAttr || contains(CollectionSchema.language_s)) add(doc, CollectionSchema.language_s, md.language());
if (allAttr || contains(CollectionSchema.size_i)) add(doc, CollectionSchema.size_i, md.size());
if (allAttr || contains(CollectionSchema.audiolinkscount_i)) add(doc, CollectionSchema.audiolinkscount_i, md.laudio());
if (allAttr || contains(CollectionSchema.videolinkscount_i)) add(doc, CollectionSchema.videolinkscount_i, md.lvideo());
if (allAttr || contains(CollectionSchema.applinkscount_i)) add(doc, CollectionSchema.applinkscount_i, md.lapp());
@ -342,7 +329,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
text = text.trim();
if (!text.isEmpty() && text.charAt(text.length() - 1) == '.') sb.append(text); else sb.append(text).append('.');
}
public static class Subgraph {
public final ArrayList<String>[] urlProtocols, urlStubs, urlAnchorTexts;
@SuppressWarnings("unchecked")
@ -404,8 +391,9 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
SolrVector doc = new SolrVector();
final DigestURL digestURL = document.dc_source();
boolean allAttr = this.isEmpty();
String url = addURIAttributes(doc, allAttr, digestURL, Response.docType(digestURL));
String url = addURIAttributes(doc, allAttr, digestURL);
if (allAttr || contains(CollectionSchema.content_type)) add(doc, CollectionSchema.content_type, new String[]{document.dc_format()});
Set<ProcessType> processTypes = new LinkedHashSet<ProcessType>();
String host = digestURL.getHost();
@ -476,7 +464,6 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
if (author == null || author.length() == 0) author = document.dc_publisher();
add(doc, CollectionSchema.author, author);
}
if (allAttr || contains(CollectionSchema.content_type)) add(doc, CollectionSchema.content_type, new String[]{document.dc_format()});
if (allAttr || contains(CollectionSchema.last_modified)) {
Date lastModified = responseHeader == null ? new Date() : responseHeader.lastModified();
if (lastModified == null) lastModified = new Date();
@ -1858,7 +1845,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
assert allAttr || configuration.contains(CollectionSchema.failreason_s);
final SolrInputDocument doc = new SolrInputDocument();
String url = configuration.addURIAttributes(doc, allAttr, this.getDigestURL(), Response.docType(this.getDigestURL()));
String url = configuration.addURIAttributes(doc, allAttr, this.getDigestURL());
if (allAttr || configuration.contains(CollectionSchema.content_type)) configuration.add(doc, CollectionSchema.content_type, new String[]{Classification.url2mime(this.digestURL)});
if (allAttr || configuration.contains(CollectionSchema.load_date_dt)) configuration.add(doc, CollectionSchema.load_date_dt, getFailDate());
if (allAttr || configuration.contains(CollectionSchema.crawldepth_i)) configuration.add(doc, CollectionSchema.crawldepth_i, this.crawldepth);

@ -136,7 +136,7 @@ public final class yacy {
try {
"a".isEmpty(); // needs at least Java 1.6
} catch (final NoSuchMethodError e) {
System.err.println("STARTUP: Java Version too low. You need at least Java 1.6 to run YaCy");
System.err.println("STARTUP: Java Version too low. You need at least Java 1.7 to run YaCy");
System.exit(-1);
}

Loading…
Cancel
Save