Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 11 years ago
commit 3073c69aee

@ -1,92 +1,92 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<classpath> <classpath>
<classpathentry kind="src" path="source"/> <classpathentry kind="src" path="source"/>
<classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/ymarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/|gsa/|solr/collection1/|api/blacklists/" kind="src" path="htroot"/> <classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/ymarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/|gsa/|solr/collection1/|api/blacklists/" kind="src" path="htroot"/>
<classpathentry excluding="bookmarks/|ymarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/|blacklists/" kind="src" path="htroot/api"/> <classpathentry excluding="bookmarks/|ymarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/|blacklists/" kind="src" path="htroot/api"/>
<classpathentry kind="src" path="htroot/env"/> <classpathentry kind="src" path="htroot/env"/>
<classpathentry kind="src" path="htroot/yacy"/> <classpathentry kind="src" path="htroot/yacy"/>
<classpathentry excluding="posts/|tags/|xbel/" kind="src" path="htroot/api/bookmarks"/> <classpathentry excluding="posts/|tags/|xbel/" kind="src" path="htroot/api/bookmarks"/>
<classpathentry kind="src" path="htroot/api/ymarks"/> <classpathentry kind="src" path="htroot/api/ymarks"/>
<classpathentry kind="src" path="htroot/api/bookmarks/posts"/> <classpathentry kind="src" path="htroot/api/bookmarks/posts"/>
<classpathentry kind="src" path="htroot/api/bookmarks/tags"/> <classpathentry kind="src" path="htroot/api/bookmarks/tags"/>
<classpathentry kind="src" path="htroot/api/bookmarks/xbel"/> <classpathentry kind="src" path="htroot/api/bookmarks/xbel"/>
<classpathentry kind="src" path="htroot/api/blacklists"/> <classpathentry kind="src" path="htroot/api/blacklists"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/commons-logging-1.1.3.jar"/> <classpathentry kind="lib" path="lib/commons-logging-1.1.3.jar"/>
<classpathentry kind="lib" path="lib/J7Zip-modified.jar"/> <classpathentry kind="lib" path="lib/J7Zip-modified.jar"/>
<classpathentry kind="lib" path="lib/webcat-0.1-swf.jar"/> <classpathentry kind="lib" path="lib/webcat-0.1-swf.jar"/>
<classpathentry kind="lib" path="lib/commons-jxpath-1.3.jar"/> <classpathentry kind="lib" path="lib/commons-jxpath-1.3.jar"/>
<classpathentry kind="lib" path="lib/jsch-0.1.50.jar"/> <classpathentry kind="lib" path="lib/jsch-0.1.50.jar"/>
<classpathentry kind="lib" path="lib/jakarta-oro-2.0.8.jar"/> <classpathentry kind="lib" path="lib/jakarta-oro-2.0.8.jar"/>
<classpathentry kind="lib" path="lib/apache-mime4j-0.6.jar"/> <classpathentry kind="lib" path="lib/apache-mime4j-0.6.jar"/>
<classpathentry kind="lib" path="lib/commons-fileupload-1.2.2.jar"/> <classpathentry kind="lib" path="lib/commons-fileupload-1.2.2.jar"/>
<classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/> <classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/xercesImpl.jar"/> <classpathentry kind="lib" path="lib/xercesImpl.jar"/>
<classpathentry kind="lib" path="lib/xml-apis.jar"/> <classpathentry kind="lib" path="lib/xml-apis.jar"/>
<classpathentry kind="lib" path="lib/geronimo-stax-api_1.0_spec-1.0.1.jar"/> <classpathentry kind="lib" path="lib/geronimo-stax-api_1.0_spec-1.0.1.jar"/>
<classpathentry kind="lib" path="lib/commons-compress-1.8.1.jar"/> <classpathentry kind="lib" path="lib/commons-compress-1.8.1.jar"/>
<classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/> <classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/>
<classpathentry kind="lib" path="lib/jaudiotagger-2.0.4-20111207.115108-15.jar"/> <classpathentry kind="lib" path="lib/jaudiotagger-2.0.4-20111207.115108-15.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.7.jar"/> <classpathentry kind="lib" path="lib/commons-codec-1.7.jar"/>
<classpathentry kind="lib" path="lib/jcl-over-slf4j-1.7.2.jar"/> <classpathentry kind="lib" path="lib/jcl-over-slf4j-1.7.2.jar"/>
<classpathentry kind="lib" path="lib/wstx-asl-3.2.9.jar"/> <classpathentry kind="lib" path="lib/wstx-asl-3.2.9.jar"/>
<classpathentry kind="lib" path="lib/slf4j-jdk14-1.7.2.jar"/> <classpathentry kind="lib" path="lib/slf4j-jdk14-1.7.2.jar"/>
<classpathentry kind="lib" path="lib/log4j-over-slf4j-1.7.2.jar"/> <classpathentry kind="lib" path="lib/log4j-over-slf4j-1.7.2.jar"/>
<classpathentry kind="lib" path="lib/httpclient-4.3.5.jar"/> <classpathentry kind="lib" path="lib/httpclient-4.3.5.jar"/>
<classpathentry kind="lib" path="lib/httpcore-4.3.2.jar"/> <classpathentry kind="lib" path="lib/httpcore-4.3.2.jar"/>
<classpathentry kind="lib" path="lib/httpmime-4.3.5.jar"/> <classpathentry kind="lib" path="lib/httpmime-4.3.5.jar"/>
<classpathentry kind="lib" path="lib/noggit-0.5.jar"/> <classpathentry kind="lib" path="lib/noggit-0.5.jar"/>
<classpathentry kind="lib" path="lib/metadata-extractor-2.6.2.jar"/> <classpathentry kind="lib" path="lib/metadata-extractor-2.6.2.jar"/>
<classpathentry kind="lib" path="lib/jcifs-1.3.17.jar"/> <classpathentry kind="lib" path="lib/jcifs-1.3.17.jar"/>
<classpathentry kind="lib" path="lib/guava-16.0.1.jar"/> <classpathentry kind="lib" path="lib/guava-16.0.1.jar"/>
<classpathentry kind="lib" path="lib/spatial4j-0.4.1.jar"/> <classpathentry kind="lib" path="lib/spatial4j-0.4.1.jar"/>
<classpathentry kind="lib" path="lib/zookeeper-3.4.6.jar"/> <classpathentry kind="lib" path="lib/zookeeper-3.4.6.jar"/>
<classpathentry kind="lib" path="lib/org.restlet.jar"/> <classpathentry kind="lib" path="lib/org.restlet.jar"/>
<classpathentry kind="lib" path="lib/fontbox-1.8.6.jar"/> <classpathentry kind="lib" path="lib/fontbox-1.8.7.jar"/>
<classpathentry kind="lib" path="lib/jempbox-1.8.6.jar"/> <classpathentry kind="lib" path="lib/jempbox-1.8.7.jar"/>
<classpathentry kind="lib" path="lib/pdfbox-1.8.6.jar"/> <classpathentry kind="lib" path="lib/pdfbox-1.8.7.jar"/>
<classpathentry kind="lib" path="lib/bcmail-jdk15-1.46.jar"/> <classpathentry kind="lib" path="lib/bcmail-jdk15-1.46.jar"/>
<classpathentry kind="lib" path="lib/bcprov-jdk15-1.46.jar"/> <classpathentry kind="lib" path="lib/bcprov-jdk15-1.46.jar"/>
<classpathentry kind="lib" path="lib/poi-3.10-FINAL-20140208.jar"/> <classpathentry kind="lib" path="lib/poi-3.10-FINAL-20140208.jar"/>
<classpathentry kind="lib" path="lib/poi-scratchpad-3.10-FINAL-20140208.jar"/> <classpathentry kind="lib" path="lib/poi-scratchpad-3.10-FINAL-20140208.jar"/>
<classpathentry kind="lib" path="lib/commons-io-2.3.jar"/> <classpathentry kind="lib" path="lib/commons-io-2.3.jar"/>
<classpathentry kind="lib" path="lib/slf4j-api-1.7.6.jar"/> <classpathentry kind="lib" path="lib/slf4j-api-1.7.6.jar"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-common-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-analyzers-common-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-phonetic-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-analyzers-phonetic-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-classification-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-classification-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-codecs-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-codecs-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-core-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-core-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-facet-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-facet-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-grouping-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-grouping-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-highlighter-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-highlighter-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-join-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-join-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-memory-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-memory-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-misc-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-misc-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-queries-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-queries-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-queryparser-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-queryparser-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-spatial-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-spatial-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/lucene-suggest-4.9.0.jar"/> <classpathentry kind="lib" path="lib/lucene-suggest-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/solr-core-4.9.0.jar"/> <classpathentry kind="lib" path="lib/solr-core-4.9.0.jar"/>
<classpathentry kind="lib" path="lib/solr-solrj-4.9.0.jar"/> <classpathentry kind="lib" path="lib/solr-solrj-4.9.0.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/> <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="lib" path="lib/icu4j-core.jar"/> <classpathentry kind="lib" path="lib/icu4j-core.jar"/>
<classpathentry kind="lib" path="lib/htmllexer.jar"/> <classpathentry kind="lib" path="lib/htmllexer.jar"/>
<classpathentry kind="lib" path="lib/jsoup-1.6.3.jar"/> <classpathentry kind="lib" path="lib/jsoup-1.6.3.jar"/>
<classpathentry kind="lib" path="lib/jetty-client-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-client-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-continuation-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-continuation-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-deploy-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-deploy-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-http-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-http-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-io-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-io-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-jmx-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-jmx-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-proxy-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-proxy-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-security-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-security-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-server-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-server-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-servlet-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-servlet-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-servlets-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-servlets-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-util-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-util-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-webapp-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-webapp-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/jetty-xml-9.2.2.v20140723.jar"/> <classpathentry kind="lib" path="lib/jetty-xml-9.2.3.v20140905.jar"/>
<classpathentry kind="lib" path="lib/javax.servlet-api-3.1.0.jar"/> <classpathentry kind="lib" path="lib/javax.servlet-api-3.1.0.jar"/>
<classpathentry kind="lib" path="lib/weupnp-0.1.2.jar"/> <classpathentry kind="lib" path="lib/weupnp-0.1.2.jar"/>
<classpathentry kind="output" path="gen"/> <classpathentry kind="output" path="gen"/>
</classpath> </classpath>

@ -47,7 +47,7 @@
<string>$JAVAROOT/lib/commons-jxpath-1.3.jar</string> <string>$JAVAROOT/lib/commons-jxpath-1.3.jar</string>
<string>$JAVAROOT/lib/commons-lang-2.6.jar</string> <string>$JAVAROOT/lib/commons-lang-2.6.jar</string>
<string>$JAVAROOT/lib/commons-logging-1.1.3.jar</string> <string>$JAVAROOT/lib/commons-logging-1.1.3.jar</string>
<string>$JAVAROOT/lib/fontbox-1.8.6.jar</string> <string>$JAVAROOT/lib/fontbox-1.8.7.jar</string>
<string>$JAVAROOT/lib/geronimo-stax-api_1.0_spec-1.0.1.jar</string> <string>$JAVAROOT/lib/geronimo-stax-api_1.0_spec-1.0.1.jar</string>
<string>$JAVAROOT/lib/guava-16.0.1.jar</string> <string>$JAVAROOT/lib/guava-16.0.1.jar</string>
<string>$JAVAROOT/lib/htmllexer.jar</string> <string>$JAVAROOT/lib/htmllexer.jar</string>
@ -60,22 +60,22 @@
<string>$JAVAROOT/lib/jaudiotagger-2.0.4-20111207.115108-15.jar</string> <string>$JAVAROOT/lib/jaudiotagger-2.0.4-20111207.115108-15.jar</string>
<string>$JAVAROOT/lib/jcifs-1.3.17.jar</string> <string>$JAVAROOT/lib/jcifs-1.3.17.jar</string>
<string>$JAVAROOT/lib/jcl-over-slf4j-1.7.2.jar</string> <string>$JAVAROOT/lib/jcl-over-slf4j-1.7.2.jar</string>
<string>$JAVAROOT/lib/jempbox-1.8.6.jar</string> <string>$JAVAROOT/lib/jempbox-1.8.7.jar</string>
<string>$JAVAROOT/lib/javax.servlet-api-3.1.0.jar</string> <string>$JAVAROOT/lib/javax.servlet-api-3.1.0.jar</string>
<string>$JAVAROOT/lib/jetty-client-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-client-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-continuation-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-continuation-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-deploy-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-deploy-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-http-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-http-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-io-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-io-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-jmx-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-jmx-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-proxy-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-proxy-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-security-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-security-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-server-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-server-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-servlet-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-servlet-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-servlets-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-servlets-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-util-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-util-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-webapp-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-webapp-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jetty-xml-9.2.2.v20140723.jar</string> <string>$JAVAROOT/lib/jetty-xml-9.2.3.v20140905.jar</string>
<string>$JAVAROOT/lib/jsch-0.1.50.jar</string> <string>$JAVAROOT/lib/jsch-0.1.50.jar</string>
<string>$JAVAROOT/lib/json-simple-1.1.1.jar</string> <string>$JAVAROOT/lib/json-simple-1.1.1.jar</string>
<string>$JAVAROOT/lib/jsoup-1.6.3.jar</string> <string>$JAVAROOT/lib/jsoup-1.6.3.jar</string>
@ -98,7 +98,7 @@
<string>$JAVAROOT/lib/metadata-extractor-2.6.2.jar</string> <string>$JAVAROOT/lib/metadata-extractor-2.6.2.jar</string>
<string>$JAVAROOT/lib/noggit-0.5.jar</string> <string>$JAVAROOT/lib/noggit-0.5.jar</string>
<string>$JAVAROOT/lib/org.restlet.jar</string> <string>$JAVAROOT/lib/org.restlet.jar</string>
<string>$JAVAROOT/lib/pdfbox-1.8.6.jar</string> <string>$JAVAROOT/lib/pdfbox-1.8.7.jar</string>
<string>$JAVAROOT/lib/poi-3.10-FINAL-20140208.jar</string> <string>$JAVAROOT/lib/poi-3.10-FINAL-20140208.jar</string>
<string>$JAVAROOT/lib/poi-scratchpad-3.10-FINAL-20140208.jar</string> <string>$JAVAROOT/lib/poi-scratchpad-3.10-FINAL-20140208.jar</string>
<string>$JAVAROOT/lib/slf4j-api-1.7.6.jar</string> <string>$JAVAROOT/lib/slf4j-api-1.7.6.jar</string>

@ -171,7 +171,7 @@
<pathelement location="${lib}/commons-jxpath-1.3.jar" /> <pathelement location="${lib}/commons-jxpath-1.3.jar" />
<pathelement location="${lib}/commons-lang-2.6.jar" /> <pathelement location="${lib}/commons-lang-2.6.jar" />
<pathelement location="${lib}/commons-logging-1.1.3.jar" /> <pathelement location="${lib}/commons-logging-1.1.3.jar" />
<pathelement location="${lib}/fontbox-1.8.6.jar" /> <pathelement location="${lib}/fontbox-1.8.7.jar" />
<pathelement location="${lib}/geronimo-stax-api_1.0_spec-1.0.1.jar" /> <pathelement location="${lib}/geronimo-stax-api_1.0_spec-1.0.1.jar" />
<pathelement location="${lib}/guava-16.0.1.jar" /> <pathelement location="${lib}/guava-16.0.1.jar" />
<pathelement location="${lib}/htmllexer.jar" /> <pathelement location="${lib}/htmllexer.jar" />
@ -184,22 +184,22 @@
<pathelement location="${lib}/jaudiotagger-2.0.4-20111207.115108-15.jar" /> <pathelement location="${lib}/jaudiotagger-2.0.4-20111207.115108-15.jar" />
<pathelement location="${lib}/jcifs-1.3.17.jar" /> <pathelement location="${lib}/jcifs-1.3.17.jar" />
<pathelement location="${lib}/jcl-over-slf4j-1.7.2.jar" /> <pathelement location="${lib}/jcl-over-slf4j-1.7.2.jar" />
<pathelement location="${lib}/jempbox-1.8.6" /> <pathelement location="${lib}/jempbox-1.8.7" />
<pathelement location="${lib}/javax.servlet-api-3.1.0.jar" /> <pathelement location="${lib}/javax.servlet-api-3.1.0.jar" />
<pathelement location="${lib}/jetty-client-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-client-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-continuation-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-continuation-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-deploy-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-deploy-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-http-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-http-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-io-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-io-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-jmx-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-jmx-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-proxy-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-proxy-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-security-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-security-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-server-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-server-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-servlet-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-servlet-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-servlets-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-servlets-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-util-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-util-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-webapp-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-webapp-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jetty-xml-9.2.2.v20140723.jar" /> <pathelement location="${lib}/jetty-xml-9.2.3.v20140905.jar" />
<pathelement location="${lib}/jsch-0.1.50.jar" /> <pathelement location="${lib}/jsch-0.1.50.jar" />
<pathelement location="${lib}/json-simple-1.1.1.jar" /> <pathelement location="${lib}/json-simple-1.1.1.jar" />
<pathelement location="${lib}/jsoup-1.6.3.jar" /> <pathelement location="${lib}/jsoup-1.6.3.jar" />
@ -222,7 +222,7 @@
<pathelement location="${lib}/metadata-extractor-2.6.2.jar" /> <pathelement location="${lib}/metadata-extractor-2.6.2.jar" />
<pathelement location="${lib}/noggit-0.5.jar" /> <pathelement location="${lib}/noggit-0.5.jar" />
<pathelement location="${lib}/org.restlet.jar" /> <pathelement location="${lib}/org.restlet.jar" />
<pathelement location="${lib}/pdfbox-1.8.6.jar" /> <pathelement location="${lib}/pdfbox-1.8.7.jar" />
<pathelement location="${lib}/poi-3.10-FINAL-20140208.jar" /> <pathelement location="${lib}/poi-3.10-FINAL-20140208.jar" />
<pathelement location="${lib}/poi-scratchpad-3.10-FINAL-20140208.jar" /> <pathelement location="${lib}/poi-scratchpad-3.10-FINAL-20140208.jar" />
<pathelement location="${lib}/slf4j-api-1.7.6.jar" /> <pathelement location="${lib}/slf4j-api-1.7.6.jar" />

@ -12,7 +12,7 @@
company's own web pages.<br /> company's own web pages.<br />
Filter have to be entered as IP, IP range or first part of allowed IP's separated by comma (e.g. 10.100.0-100.0-100, 127. ) Filter have to be entered as IP, IP range or first part of allowed IP's separated by comma (e.g. 10.100.0-100.0-100, 127. )
further details on format see Jetty further details on format see Jetty
<a href="http://download.eclipse.org/jetty/8.1.14.v20131031/apidocs/org/eclipse/jetty/server/handler/IPAccessHandler.html" target="_blank">IPAccessHandler</a> docu. <a href="http://download.eclipse.org/jetty/stable-9/apidocs/org/eclipse/jetty/server/handler/IPAccessHandler.html" target="_blank">IPAccessHandler</a> docu.
</td> </td>
</tr> </tr>
<!-- <!--

@ -1579,6 +1579,7 @@ only the local index==Nur lokaler Index
Query Operators==Such-Operatoren Query Operators==Such-Operatoren
restrictions==Restriktionen restrictions==Restriktionen
only urls with the &lt;phrase&gt; in the url==Nur URLs, welche &lt;phrase&gt; enthalten only urls with the &lt;phrase&gt; in the url==Nur URLs, welche &lt;phrase&gt; enthalten
only urls with the &lt;phrase&gt; within outbound links of the document==Nur URLs, die &lt;phrase&gt; in einem Link enthalten
only urls with extension==Nur URLs mit der Dateinamenserweiterung only urls with extension==Nur URLs mit der Dateinamenserweiterung
only urls from host==Nur URLs vom Server only urls from host==Nur URLs vom Server
only pages with as-author-anotated==Nur Seiten mit dem angegebenen Autor only pages with as-author-anotated==Nur Seiten mit dem angegebenen Autor
@ -1598,7 +1599,6 @@ doublequotes==Anführungszeichen
prefer given language==Angegebene Sprache bevorzugen prefer given language==Angegebene Sprache bevorzugen
an ISO639 2-letter code==2-Buchstaben-Ländercode nach ISO639 an ISO639 2-letter code==2-Buchstaben-Ländercode nach ISO639
heuristics==Heuristiken heuristics==Heuristiken
#add search results from scroogle==Benutze zusätzliche Ergebnisse von Scroogle
add search results from blekko==Benutze zusätzliche Ergebnisse von Blekko add search results from blekko==Benutze zusätzliche Ergebnisse von Blekko
Search Navigation==Such-Navigation Search Navigation==Such-Navigation
keyboard shotcuts==Tastatur-Kürzel keyboard shotcuts==Tastatur-Kürzel
@ -1610,7 +1610,8 @@ automatic result retrieval==Automatische Ergebnis-Abfrage
browser integration==Browserintegration browser integration==Browserintegration
after searching, click-open on the default search engine in the upper right search field of your browser and select 'Add "YaCy Search.."'==Nach der Suche clicken Sie auf das Suchfeld Ihres Browsers und wählen Sie '"YaCy" hinzufügen' after searching, click-open on the default search engine in the upper right search field of your browser and select 'Add "YaCy Search.."'==Nach der Suche clicken Sie auf das Suchfeld Ihres Browsers und wählen Sie '"YaCy" hinzufügen'
search as rss feed==Suche als RSS-Feed search as rss feed==Suche als RSS-Feed
#click on the red icon in the upper right after a search. this works good in combination with the '/date' ranking modifier. See an==Klicken Sie nach der Suche auf das rote Icon in der rechten oberen Ecke. Dies funktioniert gut mit dem '/date' Ranking-Modifizierer. Hier ein click on the red icon in the upper right after a search. this works good in combination with the==Klicken Sie nach der Suche auf das rote Icon in der rechten oberen Ecke. Dies funktioniert gut mit dem
See an==siehe
>example==>Beispiel >example==>Beispiel
json search results==JSON-Suchergebnisse json search results==JSON-Suchergebnisse
for ajax developers: get the search rss feed and replace the '.rss' extension in the search result url with '.json'==Für AJAX-Entwickler: Rufen Sie den RSS-Feed auf und ersetzen Sie '.rss' durch '.json' for ajax developers: get the search rss feed and replace the '.rss' extension in the search result url with '.json'==Für AJAX-Entwickler: Rufen Sie den RSS-Feed auf und ersetzen Sie '.rss' durch '.json'

@ -338,7 +338,7 @@
<dependency> <dependency>
<groupId>org.apache.pdfbox</groupId> <groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId> <artifactId>fontbox</artifactId>
<version>1.8.6</version> <version>1.8.7</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.geronimo.specs</groupId> <groupId>org.apache.geronimo.specs</groupId>
@ -393,7 +393,7 @@
<dependency> <dependency>
<groupId>org.apache.pdfbox</groupId> <groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId> <artifactId>jempbox</artifactId>
<version>1.8.6</version> <version>1.8.7</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.jcraft</groupId> <groupId>com.jcraft</groupId>
@ -483,7 +483,7 @@
<dependency> <dependency>
<groupId>org.apache.pdfbox</groupId> <groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId> <artifactId>pdfbox</artifactId>
<version>1.8.6</version> <version>1.8.7</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.poi</groupId> <groupId>org.apache.poi</groupId>
@ -533,68 +533,68 @@
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId> <artifactId>jetty-server</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlet</artifactId> <artifactId>jetty-servlet</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlets</artifactId> <artifactId>jetty-servlets</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-webapp</artifactId> <artifactId>jetty-webapp</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId> <artifactId>jetty-util</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
<type>jar</type> <type>jar</type>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-xml</artifactId> <artifactId>jetty-xml</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-http</artifactId> <artifactId>jetty-http</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-security</artifactId> <artifactId>jetty-security</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-io</artifactId> <artifactId>jetty-io</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-continuation</artifactId> <artifactId>jetty-continuation</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-jmx</artifactId> <artifactId>jetty-jmx</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-proxy</artifactId> <artifactId>jetty-proxy</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.eclipse.jetty</groupId> <groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-deploy</artifactId> <artifactId>jetty-deploy</artifactId>
<version>9.2.2.v20140723</version> <version>9.2.3.v20140905</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.bitlet</groupId> <groupId>org.bitlet</groupId>

@ -65,6 +65,7 @@ import net.yacy.document.parser.xlsParser;
import net.yacy.document.parser.zipParser; import net.yacy.document.parser.zipParser;
import net.yacy.document.parser.augment.AugmentParser; import net.yacy.document.parser.augment.AugmentParser;
import net.yacy.document.parser.images.genericImageParser; import net.yacy.document.parser.images.genericImageParser;
import net.yacy.document.parser.images.metadataImageParser;
import net.yacy.document.parser.rdfa.impl.RDFaParser; import net.yacy.document.parser.rdfa.impl.RDFaParser;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
@ -94,6 +95,7 @@ public final class TextParser {
if (Switchboard.getSwitchboard().getConfigBool("parserAugmentation.RDFa", true)) initParser(new RDFaParser()); if (Switchboard.getSwitchboard().getConfigBool("parserAugmentation.RDFa", true)) initParser(new RDFaParser());
initParser(new htmlParser()); // called within rdfa parser initParser(new htmlParser()); // called within rdfa parser
initParser(new genericImageParser()); initParser(new genericImageParser());
initParser(new metadataImageParser());
initParser(new linkScraperParser()); initParser(new linkScraperParser());
initParser(new mmParser()); initParser(new mmParser());
initParser(new odtParser()); initParser(new odtParser());

@ -35,6 +35,7 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@ -76,19 +77,15 @@ public class genericImageParser extends AbstractParser implements Parser {
public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>(); public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>(); public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
static { static {
SUPPORTED_EXTENSIONS.add("png");
SUPPORTED_EXTENSIONS.add("gif");
SUPPORTED_EXTENSIONS.add("jpg");
SUPPORTED_EXTENSIONS.add("jpeg");
SUPPORTED_EXTENSIONS.add("jpe");
SUPPORTED_EXTENSIONS.add("bmp"); SUPPORTED_EXTENSIONS.add("bmp");
SUPPORTED_EXTENSIONS.add("tif"); // by default java ImageIO supports bmp, gif, jpg, jpeg, png, wbmp (tif if jai-imageio is in classpath/registered)
SUPPORTED_EXTENSIONS.add("tiff"); // http://download.java.net/media/jai-imageio/javadoc/1.1/overview-summary.html
SUPPORTED_MIME_TYPES.add("image/png"); SUPPORTED_EXTENSIONS.add("jpe"); // not listed in ImageIO extension but sometimes uses for jpeg
SUPPORTED_MIME_TYPES.add("image/gif"); SUPPORTED_EXTENSIONS.addAll(Arrays.asList(ImageIO.getReaderFileSuffixes()));
SUPPORTED_MIME_TYPES.add("image/jpeg");
SUPPORTED_MIME_TYPES.add("image/jpg"); // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently
SUPPORTED_MIME_TYPES.add("image/bmp"); SUPPORTED_MIME_TYPES.add("image/bmp");
SUPPORTED_MIME_TYPES.add("image/jpg"); // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently
SUPPORTED_MIME_TYPES.addAll(Arrays.asList(ImageIO.getReaderMIMETypes()));
} }
public genericImageParser() { public genericImageParser() {
@ -200,7 +197,6 @@ public class genericImageParser extends AbstractParser implements Parser {
} }
final HashSet<String> languages = new HashSet<String>(); final HashSet<String> languages = new HashSet<String>();
final List<AnchorURL> anchors = new ArrayList<AnchorURL>();
final LinkedHashMap<DigestURL, ImageEntry> images = new LinkedHashMap<>(); final LinkedHashMap<DigestURL, ImageEntry> images = new LinkedHashMap<>();
// add this image to the map of images // add this image to the map of images
final String infoString = ii.info.toString(); final String infoString = ii.info.toString();
@ -222,7 +218,7 @@ public class genericImageParser extends AbstractParser implements Parser {
descriptions, // description descriptions, // description
gpslon, gpslat, // location gpslon, gpslat, // location
infoString, // content text infoString, // content text
anchors, // anchors null, // anchors
null, null,
images, images,
false, false,
@ -309,6 +305,22 @@ public class genericImageParser extends AbstractParser implements Parser {
public static void main(final String[] args) { public static void main(final String[] args) {
// list support file extension by java ImageIO
String names[] = ImageIO.getReaderFileSuffixes();
System.out.print("supported file extension:");
for (int i = 0; i < names.length; ++i) {
System.out.print(" " + names[i]);
}
System.out.println();
// list supported mime types of java ImageIO
String mime[] = ImageIO.getReaderMIMETypes();
System.out.print("supported mime types: ");
for (int i = 0; i < mime.length; ++i) {
System.out.print(" " + mime[i]);
}
System.out.println();
final File image = new File(args[0]); final File image = new File(args[0]);
final genericImageParser parser = new genericImageParser(); final genericImageParser parser = new genericImageParser();
AnchorURL uri; AnchorURL uri;

@ -0,0 +1,177 @@
// metadataImageParser.java
// (C) 2014 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 30.09.2014 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.document.parser.images;
import com.drew.imaging.ImageMetadataReader;
import com.drew.imaging.ImageProcessingException;
import com.drew.lang.GeoLocation;
import com.drew.metadata.Directory;
import com.drew.metadata.Metadata;
import com.drew.metadata.Tag;
import com.drew.metadata.exif.GpsDirectory;
import java.io.IOException;
import java.io.InputStream;
import java.io.BufferedInputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
/**
* Image parser base on drewnoakes.com metadata-extractor which supports
* metadata extraction from bmp, gif, jpeg, png, psd, tiff
* All discovered metadata are added to the parsed document
*
* http://www.drewnoakes.com/drewnoakes.com/code/exif/
*
* (in difference to genericImageParser javax ImageIO is not used,
* to support tiff parsing also if not supported by ImageIO)
*/
public class metadataImageParser extends AbstractParser implements Parser {
public metadataImageParser() {
super("Metadata Image Parser");
SUPPORTED_EXTENSIONS.add("tif");
SUPPORTED_EXTENSIONS.add("psd");
// only used for ext/mime not covered by genericImageParser's default
//SUPPORTED_EXTENSIONS.add("gif");
//SUPPORTED_EXTENSIONS.add("jpg");
//SUPPORTED_EXTENSIONS.add("jpeg");
//SUPPORTED_EXTENSIONS.add("png");
SUPPORTED_MIME_TYPES.add("image/tiff");
SUPPORTED_MIME_TYPES.add("image/vnd.adobe.photoshop");
SUPPORTED_MIME_TYPES.add("image/x-photoshop");
//SUPPORTED_MIME_TYPES.add("image/gif");
//SUPPORTED_MIME_TYPES.add("image/jpeg");
//SUPPORTED_MIME_TYPES.add("image/png");
}
@Override
public Document[] parse(
final AnchorURL location,
final String mimeType,
final String documentCharset,
final InputStream sourceStream) throws Parser.Failure, InterruptedException {
String title = null;
String author = null;
String keywords = null;
List<String> descriptions = new ArrayList<String>();
double gpslat = 0;
double gpslon = 0;
StringBuilder imgInfotxt = new StringBuilder();
try {
final Metadata metadata = ImageMetadataReader.readMetadata(new BufferedInputStream(sourceStream), false);
final Iterator<Directory> directories = metadata.getDirectories().iterator();
final HashMap<String, String> props = new HashMap<String, String>();
while (directories.hasNext()) {
final Directory directory = directories.next();
if (directory instanceof GpsDirectory) { // extracting GPS location
GeoLocation geoloc = ((GpsDirectory) directory).getGeoLocation();
if (geoloc != null) {
gpslat = geoloc.getLatitude();
gpslon = geoloc.getLongitude();
}
} else {
final Iterator<Tag> tags = directory.getTags().iterator();
while (tags.hasNext()) {
final Tag tag = tags.next();
if (!tag.getTagName().startsWith("Unknown")) { // filter out returned TagName of "Unknown tag"
props.put(tag.getTagName(), tag.getDescription());
imgInfotxt.append(tag.getTagName() + ": " + tag.getDescription() + " .\n");
}
}
}
}
title = props.get("Image Description");
if (title == null || title.isEmpty()) title = props.get("Headline");
if (title == null || title.isEmpty()) title = props.get("Object Name");
author = props.get("Artist");
if (author == null || author.isEmpty()) author = props.get("Writer/Editor");
if (author == null || author.isEmpty()) author = props.get("By-line");
if (author == null || author.isEmpty()) author = props.get("Credit");
if (author == null || author.isEmpty()) author = props.get("Make");
keywords = props.get("Keywords");
if (keywords == null || keywords.isEmpty()) keywords = props.get("Category");
if (keywords == null || keywords.isEmpty()) keywords = props.get("Supplemental Category(s)");
String description;
description = props.get("Caption/Abstract");
if (description != null && description.length() > 0) descriptions.add("Abstract: " + description);
description = props.get("Country/Primary Location");
if (description != null && description.length() > 0) descriptions.add("Location: " + description);
description = props.get("Province/State");
if (description != null && description.length() > 0) descriptions.add("State: " + description);
description = props.get("Copyright Notice");
if (description != null && description.length() > 0) descriptions.add("Copyright: " + description);
} catch (ImageProcessingException e) {
throw new Parser.Failure("could not extract image meta data", location);
} catch (IOException ex) {
throw new Parser.Failure("IO-Error reading", location);
}
if (title == null || title.isEmpty()) {
title = MultiProtocolURL.unescape(location.getFileName());
}
return new Document[]{new Document(
location,
mimeType,
"UTF-8",
this,
new HashSet<String>(0), // languages
keywords == null ? new String[]{} : keywords.split(keywords.indexOf(',') > 0 ? "," : " "), // keywords
singleList(title), // title
author == null ? "" : author, // author
location.getHost(), // Publisher
new String[]{}, // sections
descriptions, // description
gpslon, gpslat, // location
imgInfotxt.toString(), // content text
null, // anchors
null, // rss
null, // images
false,
new Date())}; // images
}
}

@ -148,10 +148,22 @@ public final class LoaderDispatcher {
return load(request, cacheStrategy, protocolMaxFileSize(request.url()), blacklistType, agent); return load(request, cacheStrategy, protocolMaxFileSize(request.url()), blacklistType, agent);
} }
/**
* loads a resource from cache or web/ftp/smb/file
* on concurrent execution waits max 5 sec for the prev. loader to fill the cache (except for CacheStrategy.NOCACHE)
*
* @param request the request essentials
* @param cacheStrategy strategy according to NOCACHE, IFFRESH, IFEXIST, CACHEONLY
* @param maxFileSize
* @param blacklistType
* @param agent
* @return the loaded entity in a Response object
* @throws IOException
*/
public Response load(final Request request, final CacheStrategy cacheStrategy, final int maxFileSize, final BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException { public Response load(final Request request, final CacheStrategy cacheStrategy, final int maxFileSize, final BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
Semaphore check = this.loaderSteering.get(request.url()); Semaphore check = this.loaderSteering.get(request.url());
if (check != null) { if (check != null && cacheStrategy != CacheStrategy.NOCACHE) {
// a loading process may be going on for that url // a loading process is going on for that url
//ConcurrentLog.info("LoaderDispatcher", "waiting for " + request.url().toNormalform(true)); //ConcurrentLog.info("LoaderDispatcher", "waiting for " + request.url().toNormalform(true));
long t = System.currentTimeMillis(); long t = System.currentTimeMillis();
try { check.tryAcquire(5, TimeUnit.SECONDS);} catch (final InterruptedException e) {} try { check.tryAcquire(5, TimeUnit.SECONDS);} catch (final InterruptedException e) {}
@ -163,15 +175,14 @@ public final class LoaderDispatcher {
this.loaderSteering.put(request.url(), new Semaphore(0)); this.loaderSteering.put(request.url(), new Semaphore(0));
try { try {
final Response response = loadInternal(request, cacheStrategy, maxFileSize, blacklistType, agent); final Response response = loadInternal(request, cacheStrategy, maxFileSize, blacklistType, agent);
check = this.loaderSteering.remove(request.url()); // finally block cleans up loaderSteering and semaphore
if (check != null) check.release(1000);
return response; return response;
} catch (final Throwable e) { } catch (final Throwable e) {
throw new IOException(e); throw new IOException(e);
} finally { } finally {
// release the semaphore anyway // release the semaphore anyway
check = this.loaderSteering.remove(request.url()); check = this.loaderSteering.remove(request.url()); // = next caller goes directly to loadInternal (is ok we just wanted to fill cash)
if (check != null) check.release(1000); if (check != null) check.release(1000); // don't block any other
} }
} }
@ -190,8 +201,8 @@ public final class LoaderDispatcher {
final String host = url.getHost(); final String host = url.getHost();
final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.get(UTF8.getBytes(request.profileHandle())); final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.get(UTF8.getBytes(request.profileHandle()));
// check if url is in blacklist // check if url is in blacklist
if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) { if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$"); throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$");
} }

@ -125,7 +125,6 @@ public final class QueryParams {
public Seed remotepeer; public Seed remotepeer;
public final long starttime; // the time when the query started, how long it should take and the time when the timeout is reached (milliseconds) public final long starttime; // the time when the query started, how long it should take and the time when the timeout is reached (milliseconds)
protected final long maxtime; protected final long maxtime;
private final long timeout;
// values that are set after a search: // values that are set after a search:
public int transmitcount; // number of results that had been shown to the user public int transmitcount; // number of results that had been shown to the user
public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets
@ -213,7 +212,6 @@ public final class QueryParams {
this.remotepeer = null; this.remotepeer = null;
this.starttime = Long.valueOf(System.currentTimeMillis()); this.starttime = Long.valueOf(System.currentTimeMillis());
this.maxtime = 10000; this.maxtime = 10000;
this.timeout = this.starttime + this.timeout;
this.indexSegment = indexSegment; this.indexSegment = indexSegment;
this.userAgent = userAgent; this.userAgent = userAgent;
this.transmitcount = 0; this.transmitcount = 0;

Loading…
Cancel
Save