Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 10 years ago
commit 003ec43bee

@ -19,7 +19,7 @@
<classpathentry kind="lib" path="lib/jsch-0.1.51.jar"/> <classpathentry kind="lib" path="lib/jsch-0.1.51.jar"/>
<classpathentry kind="lib" path="lib/jakarta-oro-2.0.8.jar"/> <classpathentry kind="lib" path="lib/jakarta-oro-2.0.8.jar"/>
<classpathentry kind="lib" path="lib/apache-mime4j-0.6.jar"/> <classpathentry kind="lib" path="lib/apache-mime4j-0.6.jar"/>
<classpathentry kind="lib" path="lib/commons-fileupload-1.2.2.jar"/> <classpathentry kind="lib" path="lib/commons-fileupload-1.3.1.jar"/>
<classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/> <classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/xercesImpl.jar"/> <classpathentry kind="lib" path="lib/xercesImpl.jar"/>
<classpathentry kind="lib" path="lib/xml-apis.jar"/> <classpathentry kind="lib" path="lib/xml-apis.jar"/>

@ -43,7 +43,7 @@
<string>$JAVAROOT/lib/chardet.jar</string> <string>$JAVAROOT/lib/chardet.jar</string>
<string>$JAVAROOT/lib/commons-codec-1.9.jar</string> <string>$JAVAROOT/lib/commons-codec-1.9.jar</string>
<string>$JAVAROOT/lib/commons-compress-1.8.1.jar</string> <string>$JAVAROOT/lib/commons-compress-1.8.1.jar</string>
<string>$JAVAROOT/lib/commons-fileupload-1.2.2.jar</string> <string>$JAVAROOT/lib/commons-fileupload-1.3.1.jar</string>
<string>$JAVAROOT/lib/commons-io-2.3.jar</string> <string>$JAVAROOT/lib/commons-io-2.3.jar</string>
<string>$JAVAROOT/lib/commons-jxpath-1.3.jar</string> <string>$JAVAROOT/lib/commons-jxpath-1.3.jar</string>
<string>$JAVAROOT/lib/commons-lang-2.6.jar</string> <string>$JAVAROOT/lib/commons-lang-2.6.jar</string>

@ -167,7 +167,7 @@
<pathelement location="${lib}/chardet.jar" /> <pathelement location="${lib}/chardet.jar" />
<pathelement location="${lib}/commons-codec-1.9.jar" /> <pathelement location="${lib}/commons-codec-1.9.jar" />
<pathelement location="${lib}/commons-compress-1.8.1.jar" /> <pathelement location="${lib}/commons-compress-1.8.1.jar" />
<pathelement location="${lib}/commons-fileupload-1.2.2.jar" /> <pathelement location="${lib}/commons-fileupload-1.3.1.jar" />
<pathelement location="${lib}/commons-io-2.3.jar" /> <pathelement location="${lib}/commons-io-2.3.jar" />
<pathelement location="${lib}/commons-jxpath-1.3.jar" /> <pathelement location="${lib}/commons-jxpath-1.3.jar" />
<pathelement location="${lib}/commons-lang-2.6.jar" /> <pathelement location="${lib}/commons-lang-2.6.jar" />

@ -129,7 +129,7 @@
The task is started in the background. It may take some minutes before new entries appear (after refreshing the page). The task is started in the background. It may take some minutes before new entries appear (after refreshing the page).
Alternatively you may <a href="?copydefaultosdconfig=">copy &amp; paste a example config file</a> located in <i>defaults/heuristicopensearch.conf</i> to the DATA/SETTINGS directory. Alternatively you may <a href="?copydefaultosdconfig=">copy &amp; paste a example config file</a> located in <i>defaults/heuristicopensearch.conf</i> to the DATA/SETTINGS directory.
For the discover function the <i>web graph</i> option of the web structure index and the fields <i>target_rel_s, target_protocol_s, target_urlstub_s</i> have to be switched on in the <a href="IndexSchema_p.html?core=webgraph">webgraph Solr schema</a>. For the discover function the <i>web graph</i> option of the web structure index and the fields <i>target_rel_s, target_protocol_s, target_urlstub_s</i> have to be switched on in the <a href="IndexSchema_p.html?core=webgraph">webgraph Solr schema</a>.
#{osdsolrfieldswitch}#<input type="submit" name="switchsolrfieldson" value="switch Solr fields on" class="btn btn-primary" onclick="return confirm('modify Solr Schema')"/>#{/osdsolrfieldswitch}# #{osdsolrfieldswitch}#<input type="submit" name="switchsolrfieldson" value="switch Solr fields on" class="btn btn-primary btn-xs" onclick="return confirm('modify Solr Schema')"/>#{/osdsolrfieldswitch}#
</div> </div>
</form> </form>
</fieldset> </fieldset>

@ -66,7 +66,7 @@
<dt>Greedy Learning Mode</dt> <dt>Greedy Learning Mode</dt>
<dd> <dd>
<input type="checkbox" name="greedylearning.active" value="true" #(greedylearning.active)#::checked="checked"#(/greedylearning.active)# />load documents linked in search results, will be deactivated automatically when index size &gt; #[greedylearning.limit.doccount]# <input type="checkbox" name="greedylearning.active" value="true" #(greedylearning.active)#::checked="checked"#(/greedylearning.active)# />load documents linked in search results, will be deactivated automatically when index size &gt; #[greedylearning.limit.doccount]# <small> (see <a href="ConfigHeuristics_p.html">Heuristics: search-result</a> to use this permanent)</small>
</dd> </dd>
<dt>Default Pop-Up Page</dt> <dt>Default Pop-Up Page</dt>

Binary file not shown.

Binary file not shown.

@ -83,7 +83,7 @@
<compilation-unit> <compilation-unit>
<package-root>source</package-root> <package-root>source</package-root>
<package-root>htroot</package-root> <package-root>htroot</package-root>
<classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.9.jar;lib/commons-compress-1.8.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.8.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.3.6.jar;lib/httpcore-4.3.3.jar;lib/httpmime-4.3.6.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.8.8.jar;lib/jetty-client-9.2.6.v20141205.jar;lib/jetty-continuation-9.2.6.v20141205.jar;lib/jetty-deploy-9.2.6.v20141205.jar;lib/jetty-http-9.2.6.v20141205.jar;lib/jetty-io-9.2.6.v20141205.jar;lib/jetty-jmx-9.2.6.v20141205.jar;lib/jetty-proxy-9.2.6.v20141205.jar;lib/jetty-security-9.2.6.v20141205.jar;lib/jetty-server-9.2.6.v20141205.jar;lib/jetty-servlet-9.2.6.v20141205.jar;lib/jetty-servlets-9.2.6.v20141205.jar;lib/jetty-util-9.2.6.v20141205.jar;lib/jetty-webapp-9.2.6.v20141205.jar;lib/jetty-xml-9.2.6.v20141205.jar;lib/jsch-0.1.51.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.1.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.10.2.jar;lib/lucene-analyzers-phonetic-4.10.2.jar;lib/lucene-classification-4.10.2.jar;lib/lucene-codecs-4.10.2.jar;lib/lucene-core-4.10.2.jar;lib/lucene-facet-4.10.2.jar;lib/lucene-grouping-4.10.2.jar;lib/lucene-highlighter-4.10.2.jar;lib/lucene-join-4.10.2.jar;lib/lucene-memory-4.10.2.jar;lib/lucene-misc-4.10.2.jar;lib/lucene-queries-4.10.2.jar;lib/lucene-queryparser-4.10.2.jar;lib/lucene-spatial-4.10.2.jar;lib/lucene-suggest-4.10.2.jar;lib/metadata-extractor-2.7.0.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.8.jar;lib/poi-3.10.1.jar;lib/poi-scratchpad-3.10.1.jar;lib/slf4j-api-1.7.6.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.10.2.jar;lib/solr-solrj-4.10.2.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar</classpath> <classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.9.jar;lib/commons-compress-1.8.1.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.8.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.3.6.jar;lib/httpcore-4.3.3.jar;lib/httpmime-4.3.6.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.8.8.jar;lib/jetty-client-9.2.6.v20141205.jar;lib/jetty-continuation-9.2.6.v20141205.jar;lib/jetty-deploy-9.2.6.v20141205.jar;lib/jetty-http-9.2.6.v20141205.jar;lib/jetty-io-9.2.6.v20141205.jar;lib/jetty-jmx-9.2.6.v20141205.jar;lib/jetty-proxy-9.2.6.v20141205.jar;lib/jetty-security-9.2.6.v20141205.jar;lib/jetty-server-9.2.6.v20141205.jar;lib/jetty-servlet-9.2.6.v20141205.jar;lib/jetty-servlets-9.2.6.v20141205.jar;lib/jetty-util-9.2.6.v20141205.jar;lib/jetty-webapp-9.2.6.v20141205.jar;lib/jetty-xml-9.2.6.v20141205.jar;lib/jsch-0.1.51.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.1.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.10.2.jar;lib/lucene-analyzers-phonetic-4.10.2.jar;lib/lucene-classification-4.10.2.jar;lib/lucene-codecs-4.10.2.jar;lib/lucene-core-4.10.2.jar;lib/lucene-facet-4.10.2.jar;lib/lucene-grouping-4.10.2.jar;lib/lucene-highlighter-4.10.2.jar;lib/lucene-join-4.10.2.jar;lib/lucene-memory-4.10.2.jar;lib/lucene-misc-4.10.2.jar;lib/lucene-queries-4.10.2.jar;lib/lucene-queryparser-4.10.2.jar;lib/lucene-spatial-4.10.2.jar;lib/lucene-suggest-4.10.2.jar;lib/metadata-extractor-2.7.0.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.8.jar;lib/poi-3.10.1.jar;lib/poi-scratchpad-3.10.1.jar;lib/slf4j-api-1.7.6.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.10.2.jar;lib/solr-solrj-4.10.2.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar</classpath>
<built-to>lib/yacycore.jar</built-to> <built-to>lib/yacycore.jar</built-to>
<source-level>1.7</source-level> <source-level>1.7</source-level>
</compilation-unit> </compilation-unit>

@ -329,7 +329,7 @@
<dependency> <dependency>
<groupId>commons-fileupload</groupId> <groupId>commons-fileupload</groupId>
<artifactId>commons-fileupload</artifactId> <artifactId>commons-fileupload</artifactId>
<version>1.3</version> <version>1.3.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>commons-jxpath</groupId> <groupId>commons-jxpath</groupId>

@ -746,7 +746,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
public static String getFileExtension(final String fileName) { public static String getFileExtension(final String fileName) {
final int p = fileName.lastIndexOf('.'); final int p = fileName.lastIndexOf('.');
if (p < 0) return ""; if (p < 0) return "";
return fileName.substring(p + 1).toLowerCase(); final int q = fileName.lastIndexOf('?');
return q < 0 ? fileName.substring(p + 1).toLowerCase() : fileName.substring(p + 1, q).toLowerCase();
} }
public String getPath() { public String getPath() {

@ -30,7 +30,6 @@ import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.ImageView; import javax.swing.text.html.ImageView;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.document.ImageParser; import net.yacy.document.ImageParser;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.OS; import net.yacy.kelondro.util.OS;
@ -45,7 +44,6 @@ import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener; import java.beans.PropertyChangeListener;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Date;
import java.util.List; import java.util.List;
public class Html2Image { public class Html2Image {
@ -81,10 +79,15 @@ public class Html2Image {
* @return * @return
*/ */
public static boolean writeWkhtmltopdf(String url, String proxy, String userAgent, final String acceptLanguage, File destination) { public static boolean writeWkhtmltopdf(String url, String proxy, String userAgent, final String acceptLanguage, File destination) {
boolean success = writeWkhtmltopdfInternal(url, proxy, destination, null, acceptLanguage, true); boolean success = false;
for (boolean ignoreErrors: new boolean[]{false, true}) {
success = writeWkhtmltopdfInternal(url, proxy, destination, null, acceptLanguage, ignoreErrors);
if (success) break;
if (!success && proxy != null) { if (!success && proxy != null) {
ConcurrentLog.warn("Html2Image", "trying to load without proxy: " + url); ConcurrentLog.warn("Html2Image", "trying to load without proxy: " + url);
success = writeWkhtmltopdfInternal(url, null, destination, userAgent, acceptLanguage, true); success = writeWkhtmltopdfInternal(url, null, destination, userAgent, acceptLanguage, ignoreErrors);
if (success) break;
}
} }
if (success) { if (success) {
ConcurrentLog.info("Html2Image", "wrote " + destination.toString() + " for " + url); ConcurrentLog.info("Html2Image", "wrote " + destination.toString() + " for " + url);
@ -101,9 +104,9 @@ public class Html2Image {
//acceptLanguage == null ? "" : "--custom-header 'Accept-Language' '" + acceptLanguage + "' " + //acceptLanguage == null ? "" : "--custom-header 'Accept-Language' '" + acceptLanguage + "' " +
(userAgent == null ? "" : "--custom-header 'User-Agent' '" + userAgent + "' --custom-header-propagation ") + (userAgent == null ? "" : "--custom-header 'User-Agent' '" + userAgent + "' --custom-header-propagation ") +
(proxy == null ? "" : "--proxy " + proxy + " ") + (proxy == null ? "" : "--proxy " + proxy + " ") +
(ignoreErrors ? (OS.isMacArchitecture ? "--load-error-handling ignore " : "--ignore-load-errors ") : "") + (ignoreErrors ? (OS.isMacArchitecture ? "--load-error-handling ignore " : "--ignore-load-errors ") : "") + // some versions do not have that flag and fail if attempting to use it...
//"--footer-font-name 'Courier' --footer-font-size 9 --footer-left [webpage] --footer-right [date]/[time]([page]/[topage]) " + //"--footer-font-name 'Courier' --footer-font-size 9 --footer-left [webpage] --footer-right [date]/[time]([page]/[topage]) " +
"--footer-left [webpage] --footer-right [date]/[time]([page]/[topage]) " + "--footer-left [webpage] --footer-right '[date]/[time]([page]/[topage])' " +
url + " " + destination.getAbsolutePath(); url + " " + destination.getAbsolutePath();
try { try {
ConcurrentLog.info("Html2Pdf", "creating pdf from url " + url + " with command: " + commandline); ConcurrentLog.info("Html2Pdf", "creating pdf from url " + url + " with command: " + commandline);

@ -174,7 +174,7 @@ public class pdfParser extends AbstractParser implements Parser {
stripper.setStartPage(page); stripper.setStartPage(page);
stripper.setEndPage(page); stripper.setEndPage(page);
pages[page - 1] = stripper.getText(pdfDoc); pages[page - 1] = stripper.getText(pdfDoc);
System.out.println("PAGE " + page + ": " + pages[page - 1]); //System.out.println("PAGE " + page + ": " + pages[page - 1]);
} }
// create individual documents for each page // create individual documents for each page

Loading…
Cancel
Save