Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 10 years ago
commit 003ec43bee

@ -19,7 +19,7 @@
<classpathentry kind="lib" path="lib/jsch-0.1.51.jar"/>
<classpathentry kind="lib" path="lib/jakarta-oro-2.0.8.jar"/>
<classpathentry kind="lib" path="lib/apache-mime4j-0.6.jar"/>
<classpathentry kind="lib" path="lib/commons-fileupload-1.2.2.jar"/>
<classpathentry kind="lib" path="lib/commons-fileupload-1.3.1.jar"/>
<classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/xercesImpl.jar"/>
<classpathentry kind="lib" path="lib/xml-apis.jar"/>

@ -43,7 +43,7 @@
<string>$JAVAROOT/lib/chardet.jar</string>
<string>$JAVAROOT/lib/commons-codec-1.9.jar</string>
<string>$JAVAROOT/lib/commons-compress-1.8.1.jar</string>
<string>$JAVAROOT/lib/commons-fileupload-1.2.2.jar</string>
<string>$JAVAROOT/lib/commons-fileupload-1.3.1.jar</string>
<string>$JAVAROOT/lib/commons-io-2.3.jar</string>
<string>$JAVAROOT/lib/commons-jxpath-1.3.jar</string>
<string>$JAVAROOT/lib/commons-lang-2.6.jar</string>

@ -167,7 +167,7 @@
<pathelement location="${lib}/chardet.jar" />
<pathelement location="${lib}/commons-codec-1.9.jar" />
<pathelement location="${lib}/commons-compress-1.8.1.jar" />
<pathelement location="${lib}/commons-fileupload-1.2.2.jar" />
<pathelement location="${lib}/commons-fileupload-1.3.1.jar" />
<pathelement location="${lib}/commons-io-2.3.jar" />
<pathelement location="${lib}/commons-jxpath-1.3.jar" />
<pathelement location="${lib}/commons-lang-2.6.jar" />

@ -129,7 +129,7 @@
The task is started in the background. It may take some minutes before new entries appear (after refreshing the page).
Alternatively you may <a href="?copydefaultosdconfig=">copy &amp; paste a example config file</a> located in <i>defaults/heuristicopensearch.conf</i> to the DATA/SETTINGS directory.
For the discover function the <i>web graph</i> option of the web structure index and the fields <i>target_rel_s, target_protocol_s, target_urlstub_s</i> have to be switched on in the <a href="IndexSchema_p.html?core=webgraph">webgraph Solr schema</a>.
#{osdsolrfieldswitch}#<input type="submit" name="switchsolrfieldson" value="switch Solr fields on" class="btn btn-primary" onclick="return confirm('modify Solr Schema')"/>#{/osdsolrfieldswitch}#
#{osdsolrfieldswitch}#<input type="submit" name="switchsolrfieldson" value="switch Solr fields on" class="btn btn-primary btn-xs" onclick="return confirm('modify Solr Schema')"/>#{/osdsolrfieldswitch}#
</div>
</form>
</fieldset>

@ -66,7 +66,7 @@
<dt>Greedy Learning Mode</dt>
<dd>
<input type="checkbox" name="greedylearning.active" value="true" #(greedylearning.active)#::checked="checked"#(/greedylearning.active)# />load documents linked in search results, will be deactivated automatically when index size &gt; #[greedylearning.limit.doccount]#
<input type="checkbox" name="greedylearning.active" value="true" #(greedylearning.active)#::checked="checked"#(/greedylearning.active)# />load documents linked in search results, will be deactivated automatically when index size &gt; #[greedylearning.limit.doccount]# <small> (see <a href="ConfigHeuristics_p.html">Heuristics: search-result</a> to use this permanent)</small>
</dd>
<dt>Default Pop-Up Page</dt>

Binary file not shown.

Binary file not shown.

@ -83,7 +83,7 @@
<compilation-unit>
<package-root>source</package-root>
<package-root>htroot</package-root>
<classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.9.jar;lib/commons-compress-1.8.1.jar;lib/commons-fileupload-1.2.2.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.8.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.3.6.jar;lib/httpcore-4.3.3.jar;lib/httpmime-4.3.6.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.8.8.jar;lib/jetty-client-9.2.6.v20141205.jar;lib/jetty-continuation-9.2.6.v20141205.jar;lib/jetty-deploy-9.2.6.v20141205.jar;lib/jetty-http-9.2.6.v20141205.jar;lib/jetty-io-9.2.6.v20141205.jar;lib/jetty-jmx-9.2.6.v20141205.jar;lib/jetty-proxy-9.2.6.v20141205.jar;lib/jetty-security-9.2.6.v20141205.jar;lib/jetty-server-9.2.6.v20141205.jar;lib/jetty-servlet-9.2.6.v20141205.jar;lib/jetty-servlets-9.2.6.v20141205.jar;lib/jetty-util-9.2.6.v20141205.jar;lib/jetty-webapp-9.2.6.v20141205.jar;lib/jetty-xml-9.2.6.v20141205.jar;lib/jsch-0.1.51.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.1.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.10.2.jar;lib/lucene-analyzers-phonetic-4.10.2.jar;lib/lucene-classification-4.10.2.jar;lib/lucene-codecs-4.10.2.jar;lib/lucene-core-4.10.2.jar;lib/lucene-facet-4.10.2.jar;lib/lucene-grouping-4.10.2.jar;lib/lucene-highlighter-4.10.2.jar;lib/lucene-join-4.10.2.jar;lib/lucene-memory-4.10.2.jar;lib/lucene-misc-4.10.2.jar;lib/lucene-queries-4.10.2.jar;lib/lucene-queryparser-4.10.2.jar;lib/lucene-spatial-4.10.2.jar;lib/lucene-suggest-4.10.2.jar;lib/metadata-extractor-2.7.0.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.8.jar;lib/poi-3.10.1.jar;lib/poi-scratchpad-3.10.1.jar;lib/slf4j-api-1.7.6.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.10.2.jar;lib/solr-solrj-4.10.2.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar</classpath>
<classpath mode="compile">lib/J7Zip-modified.jar;lib/apache-mime4j-0.6.jar;lib/bcmail-jdk15-1.46.jar;lib/bcprov-jdk15-1.46.jar;lib/chardet.jar;lib/commons-codec-1.9.jar;lib/commons-compress-1.8.1.jar;lib/commons-fileupload-1.3.1.jar;lib/commons-io-2.3.jar;lib/commons-jxpath-1.3.jar;lib/commons-lang-2.6.jar;lib/commons-logging-1.2.jar;lib/fontbox-1.8.8.jar;lib/geronimo-stax-api_1.0_spec-1.0.1.jar;lib/guava-18.0.jar;lib/htmllexer.jar;lib/httpclient-4.3.6.jar;lib/httpcore-4.3.3.jar;lib/httpmime-4.3.6.jar;lib/icu4j-core.jar;lib/jakarta-oro-2.0.8.jar;lib/jaudiotagger-2.0.4-20111207.115108-15.jar;lib/javax.servlet-api-3.1.0.jar;lib/jcifs-1.3.17.jar;lib/jcl-over-slf4j-1.7.2.jar;lib/jempbox-1.8.8.jar;lib/jetty-client-9.2.6.v20141205.jar;lib/jetty-continuation-9.2.6.v20141205.jar;lib/jetty-deploy-9.2.6.v20141205.jar;lib/jetty-http-9.2.6.v20141205.jar;lib/jetty-io-9.2.6.v20141205.jar;lib/jetty-jmx-9.2.6.v20141205.jar;lib/jetty-proxy-9.2.6.v20141205.jar;lib/jetty-security-9.2.6.v20141205.jar;lib/jetty-server-9.2.6.v20141205.jar;lib/jetty-servlet-9.2.6.v20141205.jar;lib/jetty-servlets-9.2.6.v20141205.jar;lib/jetty-util-9.2.6.v20141205.jar;lib/jetty-webapp-9.2.6.v20141205.jar;lib/jetty-xml-9.2.6.v20141205.jar;lib/jsch-0.1.51.jar;lib/json-simple-1.1.1.jar;lib/jsoup-1.8.1.jar;lib/log4j-over-slf4j-1.7.2.jar;lib/lucene-analyzers-common-4.10.2.jar;lib/lucene-analyzers-phonetic-4.10.2.jar;lib/lucene-classification-4.10.2.jar;lib/lucene-codecs-4.10.2.jar;lib/lucene-core-4.10.2.jar;lib/lucene-facet-4.10.2.jar;lib/lucene-grouping-4.10.2.jar;lib/lucene-highlighter-4.10.2.jar;lib/lucene-join-4.10.2.jar;lib/lucene-memory-4.10.2.jar;lib/lucene-misc-4.10.2.jar;lib/lucene-queries-4.10.2.jar;lib/lucene-queryparser-4.10.2.jar;lib/lucene-spatial-4.10.2.jar;lib/lucene-suggest-4.10.2.jar;lib/metadata-extractor-2.7.0.jar;lib/noggit-0.5.jar;lib/org.restlet.jar;lib/pdfbox-1.8.8.jar;lib/poi-3.10.1.jar;lib/poi-scratchpad-3.10.1.jar;lib/slf4j-api-1.7.6.jar;lib/slf4j-jdk14-1.7.2.jar;lib/solr-core-4.10.2.jar;lib/solr-solrj-4.10.2.jar;lib/spatial4j-0.4.1.jar;lib/webcat-0.1-swf.jar;lib/weupnp-0.1.2.jar;lib/wstx-asl-3.2.9.jar;lib/xercesImpl.jar;lib/xml-apis.jar;lib/xmpcore-5.1.2.jar;lib/zookeeper-3.4.6.jar</classpath>
<built-to>lib/yacycore.jar</built-to>
<source-level>1.7</source-level>
</compilation-unit>

@ -329,7 +329,7 @@
<dependency>
<groupId>commons-fileupload</groupId>
<artifactId>commons-fileupload</artifactId>
<version>1.3</version>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>commons-jxpath</groupId>

@ -746,7 +746,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
public static String getFileExtension(final String fileName) {
final int p = fileName.lastIndexOf('.');
if (p < 0) return "";
return fileName.substring(p + 1).toLowerCase();
final int q = fileName.lastIndexOf('?');
return q < 0 ? fileName.substring(p + 1).toLowerCase() : fileName.substring(p + 1, q).toLowerCase();
}
public String getPath() {

@ -30,7 +30,6 @@ import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.ImageView;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.document.ImageParser;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.OS;
@ -45,7 +44,6 @@ import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.List;
public class Html2Image {
@ -81,10 +79,15 @@ public class Html2Image {
* @return
*/
public static boolean writeWkhtmltopdf(String url, String proxy, String userAgent, final String acceptLanguage, File destination) {
boolean success = writeWkhtmltopdfInternal(url, proxy, destination, null, acceptLanguage, true);
boolean success = false;
for (boolean ignoreErrors: new boolean[]{false, true}) {
success = writeWkhtmltopdfInternal(url, proxy, destination, null, acceptLanguage, ignoreErrors);
if (success) break;
if (!success && proxy != null) {
ConcurrentLog.warn("Html2Image", "trying to load without proxy: " + url);
success = writeWkhtmltopdfInternal(url, null, destination, userAgent, acceptLanguage, true);
success = writeWkhtmltopdfInternal(url, null, destination, userAgent, acceptLanguage, ignoreErrors);
if (success) break;
}
}
if (success) {
ConcurrentLog.info("Html2Image", "wrote " + destination.toString() + " for " + url);
@ -101,9 +104,9 @@ public class Html2Image {
//acceptLanguage == null ? "" : "--custom-header 'Accept-Language' '" + acceptLanguage + "' " +
(userAgent == null ? "" : "--custom-header 'User-Agent' '" + userAgent + "' --custom-header-propagation ") +
(proxy == null ? "" : "--proxy " + proxy + " ") +
(ignoreErrors ? (OS.isMacArchitecture ? "--load-error-handling ignore " : "--ignore-load-errors ") : "") +
(ignoreErrors ? (OS.isMacArchitecture ? "--load-error-handling ignore " : "--ignore-load-errors ") : "") + // some versions do not have that flag and fail if attempting to use it...
//"--footer-font-name 'Courier' --footer-font-size 9 --footer-left [webpage] --footer-right [date]/[time]([page]/[topage]) " +
"--footer-left [webpage] --footer-right [date]/[time]([page]/[topage]) " +
"--footer-left [webpage] --footer-right '[date]/[time]([page]/[topage])' " +
url + " " + destination.getAbsolutePath();
try {
ConcurrentLog.info("Html2Pdf", "creating pdf from url " + url + " with command: " + commandline);

@ -174,7 +174,7 @@ public class pdfParser extends AbstractParser implements Parser {
stripper.setStartPage(page);
stripper.setEndPage(page);
pages[page - 1] = stripper.getText(pdfDoc);
System.out.println("PAGE " + page + ": " + pages[page - 1]);
//System.out.println("PAGE " + page + ": " + pages[page - 1]);
}
// create individual documents for each page

Loading…
Cancel
Save