diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java
index b0b2deca2..85f9d24f2 100644
--- a/source/de/anomic/crawler/Balancer.java
+++ b/source/de/anomic/crawler/Balancer.java
@@ -56,7 +56,7 @@ import de.anomic.crawler.retrieval.Request;
public class Balancer {
- private static final String indexSuffix = "9.db";
+ private static final String indexSuffix = "A.db";
private static final int EcoFSBufferSize = 1000;
private static final int objectIndexBufferSize = 1000;
private static final String localhost = "localhost";
diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java
index 6dd7d865a..2eb4231dc 100644
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@@ -60,8 +60,8 @@ import de.anomic.crawler.retrieval.Response;
public class CrawlQueues {
- private static final String ERROR_DB_FILENAME = "urlError3.db";
- private static final String DELEGATED_DB_FILENAME = "urlDelegated3.db";
+ private static final String ERROR_DB_FILENAME = "urlError4.db";
+ private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db";
private static final Segments.Process PROCESS = Segments.Process.LOCALCRAWLING;
protected Switchboard sb;
diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java
index 1b98dd41d..3e5d81a2f 100644
--- a/source/de/anomic/crawler/retrieval/HTTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java
@@ -159,7 +159,7 @@ public final class HTTPLoader {
// check if the url was already indexed
final String dbname = this.sb.urlExists(Segments.Process.LOCALCRAWLING, redirectionUrl.hash());
- if (dbname != null) {
+ if (dbname != null) { //OTTO
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", code);
throw new IOException("CRAWLER Redirection of URL=" + request.url().toString() + " ignored. The url appears already in db " + dbname);
}
diff --git a/source/de/anomic/crawler/retrieval/Request.java b/source/de/anomic/crawler/retrieval/Request.java
index 33eb8e1c9..eb5bf09bd 100644
--- a/source/de/anomic/crawler/retrieval/Request.java
+++ b/source/de/anomic/crawler/retrieval/Request.java
@@ -53,8 +53,8 @@ public class Request extends WorkflowJob
+ Word.commonHashLength
+ ", "
+ // the url's referrer hash
- "String urlname-80, "
- + // the name of the url, from anchor tag name
+ "String urlname-256, "
+ + // the name of the url, from anchor tag name (must be big to transport NOLOAD entries)
"Cardinal appdate-8 {b256}, "
+ // the date of the resource; either file date or first appearance
"String profile-"
@@ -78,6 +78,8 @@ public class Request extends WorkflowJob
"Cardinal size-8 {b256}", // size of resource in bytes (if known) or 0 if not known
Base64Order.enhancedCoder);
+ public final static int descrLength = rowdef.column(4).cellwidth;
+
private byte[] initiator; // the initiator hash, is NULL or "" if it is the own proxy;
// if this is generated by a crawl, the own peer hash in entered
private byte[] refhash; // the url's referrer hash
diff --git a/source/de/anomic/crawler/retrieval/Response.java b/source/de/anomic/crawler/retrieval/Response.java
index aeaa8328e..a3d0eaa8b 100644
--- a/source/de/anomic/crawler/retrieval/Response.java
+++ b/source/de/anomic/crawler/retrieval/Response.java
@@ -162,16 +162,23 @@ public class Response {
this.content = content;
}
+ /**
+ * create a 'virtual' response that is composed using crawl details from the request object
+ * this is used when the NOLOAD queue is processed
+ * @param request
+ * @param profile
+ */
public Response(final Request request, final CrawlProfile profile) {
this.request = request;
// request and response headers may be zero in case that we process surrogates
this.requestHeader = new RequestHeader();
this.responseHeader = new ResponseHeader();
+ this.responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/plain"); // tell parser how to handle the content
if (request.size() > 0) this.responseHeader.put(HeaderFramework.CONTENT_LENGTH, Long.toString(request.size()));
this.responseStatus = "200";
this.profile = profile;
this.status = QUEUE_STATE_FRESH;
- this.content = request.url().toTokens().getBytes();
+ this.content = request.name().length() > 0 ? request.name().getBytes() : request.url().toTokens().getBytes();
}
public Response(
@@ -824,7 +831,7 @@ public class Response {
final String supportError = TextParser.supports(url(), this.responseHeader == null ? null : this.responseHeader.mime());
if (supportError != null) throw new Parser.Failure("no parser support:" + supportError, url());
try {
- return TextParser.parseSource(url(), this.responseHeader == null ? null : this.responseHeader.mime(), this.responseHeader == null ? "UTF-8" : this.responseHeader.getCharacterEncoding(), this.content, false);
+ return TextParser.parseSource(url(), this.responseHeader == null ? null : this.responseHeader.mime(), this.responseHeader == null ? "UTF-8" : this.responseHeader.getCharacterEncoding(), this.content);
} catch (final Exception e) {
return null;
}
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
index dc2cb8be8..ce9946269 100644
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@@ -60,6 +60,7 @@ import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.ByteBuffer;
import net.yacy.kelondro.util.FileUtils;
+import de.anomic.crawler.retrieval.Request;
public class Document {
@@ -827,7 +828,8 @@ dc_rights
final Map result = new HashMap();
for (final Document d: documents) {
for (final ImageEntry imageReference : d.getImages().values()) {
- result.put(imageReference.url(), imageReference.alt());
+ // construct a image name which contains the document title to enhance the search process for images
+ result.put(imageReference.url(), description(d, imageReference.alt()));
}
}
return result;
@@ -835,20 +837,57 @@ dc_rights
public static Map getAudiolinks(final Document[] documents) {
final Map result = new HashMap();
- for (final Document d: documents) result.putAll(d.audiolinks);
+ for (final Document d: documents) {
+ for (Map.Entry e: d.audiolinks.entrySet()) {
+ result.put(e.getKey(), description(d, e.getValue()));
+ }
+ }
return result;
}
public static Map getVideolinks(final Document[] documents) {
final Map result = new HashMap();
- for (final Document d: documents) result.putAll(d.videolinks);
+ for (final Document d: documents) {
+ for (Map.Entry e: d.videolinks.entrySet()) {
+ result.put(e.getKey(), description(d, e.getValue()));
+ }
+ }
return result;
}
public static Map getApplinks(final Document[] documents) {
final Map result = new HashMap();
- for (final Document d: documents) result.putAll(d.applinks);
+ for (final Document d: documents) {
+ for (Map.Entry e: d.applinks.entrySet()) {
+ result.put(e.getKey(), description(d, e.getValue()));
+ }
+ }
return result;
}
+ private static final String description(Document d, String tagname) {
+ if (tagname == null || tagname.length() == 0) {
+ tagname = d.source.toTokens();
+ }
+ StringBuilder sb = new StringBuilder(60);
+ sb.append(d.dc_title());
+ if (!d.dc_description().equals(d.dc_title()) && sb.length() < Request.descrLength - tagname.length()) {
+ sb.append(' ');
+ sb.append(d.dc_description());
+ }
+ if (sb.length() < Request.descrLength - tagname.length()) {
+ sb.append(' ');
+ sb.append(d.dc_subject(','));
+ }
+ if (tagname.length() > 0) {
+ if (sb.length() > Request.descrLength - tagname.length() - 3) {
+ // cut this off because otherwise the tagname is lost.
+ sb.setLength(Request.descrLength - tagname.length() - 3);
+ }
+ sb.append(" - ");
+ sb.append(tagname);
+ }
+ return sb.toString().trim();
+ }
+
}
diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java
index a6ab4c812..958d7d943 100644
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@@ -31,12 +31,11 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
-import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.cora.document.UTF8;
import net.yacy.document.parser.bzipParser;
import net.yacy.document.parser.csvParser;
import net.yacy.document.parser.docParser;
@@ -60,7 +59,6 @@ import net.yacy.document.parser.vcfParser;
import net.yacy.document.parser.vsdParser;
import net.yacy.document.parser.xlsParser;
import net.yacy.document.parser.zipParser;
-import net.yacy.document.parser.html.ImageEntry;
import net.yacy.document.parser.images.genericImageParser;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
@@ -144,8 +142,7 @@ public final class TextParser {
final MultiProtocolURI location,
final String mimeType,
final String charset,
- final File sourceFile,
- final boolean multipleVirtualDocs
+ final File sourceFile
) throws InterruptedException, Parser.Failure {
BufferedInputStream sourceStream = null;
@@ -158,7 +155,7 @@ public final class TextParser {
throw new Parser.Failure(errorMsg, location);
}
sourceStream = new BufferedInputStream(new FileInputStream(sourceFile));
- docs = parseSource(location, mimeType, charset, sourceFile.length(), sourceStream, multipleVirtualDocs);
+ docs = parseSource(location, mimeType, charset, sourceFile.length(), sourceStream);
} catch (final Exception e) {
if (e instanceof InterruptedException) throw (InterruptedException) e;
if (e instanceof Parser.Failure) throw (Parser.Failure) e;
@@ -176,8 +173,7 @@ public final class TextParser {
final MultiProtocolURI location,
String mimeType,
final String charset,
- final byte[] content,
- final boolean multipleVirtualDocs
+ final byte[] content
) throws Parser.Failure {
if (log.isFine()) log.logFine("Parsing '" + location + "' from byte-array");
mimeType = normalizeMimeType(mimeType);
@@ -193,9 +189,6 @@ public final class TextParser {
Document[] docs = parseSource(location, mimeType, idioms, charset, content);
- // finally enrich the docs set with virtual docs from the enclosed documents
- if (multipleVirtualDocs && docs.length == 1) docs = virtualDocs(docs[0]);
-
return docs;
}
@@ -204,8 +197,7 @@ public final class TextParser {
String mimeType,
final String charset,
final long contentLength,
- final InputStream sourceStream,
- final boolean multipleVirtualDocs
+ final InputStream sourceStream
) throws Parser.Failure {
if (log.isFine()) log.logFine("Parsing '" + location + "' from stream");
mimeType = normalizeMimeType(mimeType);
@@ -236,9 +228,6 @@ public final class TextParser {
}
Document[] docs = parseSource(location, mimeType, idioms, charset, b);
- // finally enrich the docs set with virtual docs from the enclosed documents
- if (multipleVirtualDocs && docs.length == 1) docs = virtualDocs(docs[0]);
-
return docs;
}
@@ -281,7 +270,13 @@ public final class TextParser {
final HashMap failedParser = new HashMap();
if (MemoryControl.request(sourceArray.length * 6, false)) {
for (final Parser parser: parsers) {
- ByteArrayInputStream bis = new ByteArrayInputStream(sourceArray);
+ ByteArrayInputStream bis;
+ if (mimeType.equals("text/plain") && parser.getName().equals("HTML Parser")) {
+ // a hack to simulate html files .. is needed for NOLOAD queues. This throws their data into virtual text/plain messages.
+ bis = new ByteArrayInputStream(UTF8.getBytes("" + UTF8.String(sourceArray) + "
"));
+ } else {
+ bis = new ByteArrayInputStream(sourceArray);
+ }
try {
docs = parser.parse(location, mimeType, documentCharset, bis);
} catch (final Parser.Failure e) {
@@ -477,73 +472,4 @@ public final class TextParser {
if (grant) denyExtensionx.remove(ext); else denyExtensionx.put(ext, v);
}
- /**
- * produce virtual documents for each of the link that is contained in the document
- * @param document
- * @return
- */
- public static Document[] virtualDocs(final Document document) {
-
- final ArrayList docs = new ArrayList();
- docs.add(document);
- for (final Map.Entry link: document.getApplinks().entrySet()) {
- docs.add(genLinkDocs("application", link.getKey(), link.getValue(), document.getContentLanguages()));
- }
- for (final Map.Entry link: document.getAudiolinks().entrySet()) {
- docs.add(genLinkDocs("audio", link.getKey(), link.getValue(), document.getContentLanguages()));
- }
- for (final Map.Entry link: document.getVideolinks().entrySet()) {
- docs.add(genLinkDocs("video", link.getKey(), link.getValue(), document.getContentLanguages()));
- }
- for (final Entry link: document.getImages().entrySet()) {
- docs.add(genImageDocs(link.getValue()));
- }
-
- // finally return the list of documents
- return docs.toArray(new Document[docs.size()]);
- }
-
- private final static Document genLinkDocs(final String type, final MultiProtocolURI uri, final String descr, final Set contentLanguages) {
- //System.out.println("HTMLPARSER-LINK " + type + ": " + uri.toNormalform(true, false) + " / " + descr);
- return new Document(
- uri,
- Classification.ext2mime(uri.getFileExtension()),
- "UTF-8",
- null,
- contentLanguages,
- null,
- descr,
- "",
- "",
- new String[]{descr},
- type,
- 0.0f, 0.0f,
- uri.toNormalform(false, false),
- null,
- null,
- null,
- false);
- }
-
- private final static Document genImageDocs(final ImageEntry img) {
- //System.out.println("HTMLPARSER-LINK image: " + img.url().toNormalform(true, false) + " / " + img.alt());
- return new Document(
- img.url(),
- Classification.ext2mime(img.url().getFileExtension()),
- "UTF-8",
- null,
- null,
- null,
- img.alt(),
- "",
- "",
- new String[]{img.alt()},
- "image",
- 0.0f, 0.0f,
- img.url().toNormalform(false, false),
- null,
- null,
- null,
- false);
- }
}
diff --git a/source/net/yacy/document/importer/MediawikiImporter.java b/source/net/yacy/document/importer/MediawikiImporter.java
index 432df2181..d45657a19 100644
--- a/source/net/yacy/document/importer/MediawikiImporter.java
+++ b/source/net/yacy/document/importer/MediawikiImporter.java
@@ -101,14 +101,17 @@ public class MediawikiImporter extends Thread implements Importer {
this.urlStub = null;
}
+ @Override
public int count() {
return this.count;
}
+ @Override
public String source() {
return this.sourcefile.getAbsolutePath();
}
+ @Override
public String status() {
return "";
}
@@ -117,6 +120,7 @@ public class MediawikiImporter extends Thread implements Importer {
* return the number of articles per second
* @return
*/
+ @Override
public int speed() {
if (this.count == 0) return 0;
return (int) (this.count / Math.max(1L, runningTime() ));
@@ -126,14 +130,17 @@ public class MediawikiImporter extends Thread implements Importer {
* return the remaining seconds for the completion of all records in milliseconds
* @return
*/
+ @Override
public long remainingTime() {
return Math.max(0, this.approxdocs - this.count) / Math.max(1, speed() );
}
+ @Override
public long runningTime() {
return (System.currentTimeMillis() - this.start) / 1000L;
}
+ @Override
public void run() {
this.start = System.currentTimeMillis();
try {
@@ -287,6 +294,7 @@ public class MediawikiImporter extends Thread implements Importer {
this.mediawikixml = mediawikixml;
}
+ @Override
public void run() {
try {
createIndex(this.mediawikixml);
@@ -365,6 +373,7 @@ public class MediawikiImporter extends Thread implements Importer {
}
}
+ @Override
public Integer call() {
wikisourcerecord r;
try {
@@ -412,6 +421,7 @@ public class MediawikiImporter extends Thread implements Importer {
}
}
+ @Override
public Integer call() {
wikisourcerecord r;
wikiraw c;
@@ -505,7 +515,7 @@ public class MediawikiImporter extends Thread implements Importer {
public void genDocument() throws Parser.Failure {
try {
this.url = new DigestURI(this.urlStub + this.title);
- final Document[] parsed = TextParser.parseSource(this.url, "text/html", "UTF-8", UTF8.getBytes(this.html), false);
+ final Document[] parsed = TextParser.parseSource(this.url, "text/html", "UTF-8", UTF8.getBytes(this.html));
this.document = Document.mergeDocuments(this.url, "text/html", parsed);
// the wiki parser is not able to find the proper title in the source text, so it must be set here
this.document.setTitle(this.title);
@@ -626,6 +636,7 @@ public class MediawikiImporter extends Thread implements Importer {
this.out = out;
}
+ @Override
public Integer call() {
wikiparserrecord record;
try {
@@ -682,6 +693,7 @@ public class MediawikiImporter extends Thread implements Importer {
this.outputfilename = null;
}
+ @Override
public Integer call() {
wikiparserrecord record;
try {
diff --git a/source/net/yacy/document/parser/bzipParser.java b/source/net/yacy/document/parser/bzipParser.java
index e2dba1b6c..8e6d3216d 100644
--- a/source/net/yacy/document/parser/bzipParser.java
+++ b/source/net/yacy/document/parser/bzipParser.java
@@ -55,6 +55,7 @@ public class bzipParser extends AbstractParser implements Parser {
this.SUPPORTED_MIME_TYPES.add("application/x-stuffit");
}
+ @Override
public Document[] parse(final MultiProtocolURI location, final String mimeType,
final String charset, final InputStream source)
throws Parser.Failure, InterruptedException {
@@ -93,7 +94,7 @@ public class bzipParser extends AbstractParser implements Parser {
out.close();
// creating a new parser class to parse the unzipped content
- docs = TextParser.parseSource(location, null, null, tempFile, false);
+ docs = TextParser.parseSource(location, null, null, tempFile);
} catch (final Exception e) {
if (e instanceof InterruptedException) throw (InterruptedException) e;
if (e instanceof Parser.Failure) throw (Parser.Failure) e;
diff --git a/source/net/yacy/document/parser/gzipParser.java b/source/net/yacy/document/parser/gzipParser.java
index 0680b9e22..db4097c5f 100644
--- a/source/net/yacy/document/parser/gzipParser.java
+++ b/source/net/yacy/document/parser/gzipParser.java
@@ -54,6 +54,7 @@ public class gzipParser extends AbstractParser implements Parser {
this.SUPPORTED_MIME_TYPES.add("gzip/document");
}
+ @Override
public Document[] parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws Parser.Failure, InterruptedException {
File tempFile = null;
@@ -78,7 +79,7 @@ public class gzipParser extends AbstractParser implements Parser {
out.close();
// creating a new parser class to parse the unzipped content
- docs = TextParser.parseSource(location,null,null,tempFile, false);
+ docs = TextParser.parseSource(location,null,null,tempFile);
} catch (final Exception e) {
if (e instanceof InterruptedException) throw (InterruptedException) e;
if (e instanceof Parser.Failure) throw (Parser.Failure) e;
diff --git a/source/net/yacy/document/parser/sevenzipParser.java b/source/net/yacy/document/parser/sevenzipParser.java
index d42c625c9..041b428f6 100644
--- a/source/net/yacy/document/parser/sevenzipParser.java
+++ b/source/net/yacy/document/parser/sevenzipParser.java
@@ -99,6 +99,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
}
}
+ @Override
public Document[] parse(final MultiProtocolURI location, final String mimeType, final String charset,
final InputStream source) throws Parser.Failure, InterruptedException {
try {
@@ -166,7 +167,7 @@ public class sevenzipParser extends AbstractParser implements Parser {
// below for reversion of the effects
final MultiProtocolURI url = MultiProtocolURI.newURL(this.doc.dc_source(), this.prefix + "/" + super.filePath);
final String mime = TextParser.mimeOf(super.filePath.substring(super.filePath.lastIndexOf('.') + 1));
- theDocs = TextParser.parseSource(url, mime, null, this.cfos.toByteArray(), false);
+ theDocs = TextParser.parseSource(url, mime, null, this.cfos.toByteArray());
this.doc.addSubDocuments(theDocs);
}
diff --git a/source/net/yacy/document/parser/tarParser.java b/source/net/yacy/document/parser/tarParser.java
index df7f58d12..7deb195e4 100644
--- a/source/net/yacy/document/parser/tarParser.java
+++ b/source/net/yacy/document/parser/tarParser.java
@@ -49,7 +49,7 @@ import org.apache.tools.tar.TarInputStream;
public class tarParser extends AbstractParser implements Parser {
private final static String MAGIC = "ustar"; // A magic for a tar archive, may appear at #101h-#105
-
+
public tarParser() {
super("Tape Archive File Parser");
this.SUPPORTED_EXTENSIONS.add("tar");
@@ -59,6 +59,7 @@ public class tarParser extends AbstractParser implements Parser {
this.SUPPORTED_MIME_TYPES.add("multipart/x-tar");
}
+ @Override
public Document[] parse(final MultiProtocolURI url, final String mimeType, final String charset, InputStream source) throws Parser.Failure, InterruptedException {
final List docacc = new ArrayList();
@@ -88,7 +89,7 @@ public class tarParser extends AbstractParser implements Parser {
try {
tmp = FileUtils.createTempFile(this.getClass(), name);
FileUtils.copy(tis, tmp, entry.getSize());
- subDocs = TextParser.parseSource(MultiProtocolURI.newURL(url,"#" + name), mime, null, tmp, false);
+ subDocs = TextParser.parseSource(MultiProtocolURI.newURL(url,"#" + name), mime, null, tmp);
if (subDocs == null) continue;
for (final Document d: subDocs) docacc.add(d);
} catch (final Parser.Failure e) {
@@ -103,7 +104,7 @@ public class tarParser extends AbstractParser implements Parser {
}
return docacc.toArray(new Document[docacc.size()]);
}
-
+
public final static boolean isTar(File f) {
if (!f.exists() || f.length() < 0x105) return false;
try {
diff --git a/source/net/yacy/document/parser/zipParser.java b/source/net/yacy/document/parser/zipParser.java
index b216fe099..d1d6277cf 100644
--- a/source/net/yacy/document/parser/zipParser.java
+++ b/source/net/yacy/document/parser/zipParser.java
@@ -59,6 +59,7 @@ public class zipParser extends AbstractParser implements Parser {
this.SUPPORTED_MIME_TYPES.add("application/vnd.android.package-archive");
}
+ @Override
public Document[] parse(final MultiProtocolURI url, final String mimeType,
final String charset, final InputStream source)
throws Parser.Failure, InterruptedException {
@@ -87,7 +88,7 @@ public class zipParser extends AbstractParser implements Parser {
FileUtils.copy(zis, tmp, entry.getSize());
final MultiProtocolURI virtualURL = MultiProtocolURI.newURL(url, "#" + name);
//this.log.logInfo("ZIP file parser: " + virtualURL.toNormalform(false, false));
- docs = TextParser.parseSource(virtualURL, mime, null, tmp, false);
+ docs = TextParser.parseSource(virtualURL, mime, null, tmp);
if (docs == null) continue;
for (final Document d: docs) docacc.add(d);
} catch (final Parser.Failure e) {
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index 0a1e1a975..254f0c66a 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -386,7 +386,7 @@ public final class LoaderDispatcher {
final String supportError = TextParser.supports(url, responseHeader.mime());
if (supportError != null) throw new IOException("no parser support: " + supportError);
try {
- documents = TextParser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), response.getContent(), false);
+ documents = TextParser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), response.getContent());
if (documents == null) throw new IOException("document == null");
} catch (final Exception e) {
throw new IOException("parser error: " + e.getMessage());
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index ce4252d9b..7acda2996 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -2329,8 +2329,7 @@ public final class Switchboard extends serverSwitch
response.url(),
response.getMimeType(),
response.getCharacterEncoding(),
- response.getContent(),
- response.profile().directDocByURL());
+ response.getContent());
if ( documents == null ) {
throw new Parser.Failure("Parser returned null.", response.url());
}
diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java
index e74df82a8..e29397e4f 100644
--- a/source/net/yacy/search/index/DocumentIndex.java
+++ b/source/net/yacy/search/index/DocumentIndex.java
@@ -150,7 +150,7 @@ public class DocumentIndex extends Segment
length = -1;
}
try {
- documents = TextParser.parseSource(url, null, null, length, url.getInputStream(null, -1), true);
+ documents = TextParser.parseSource(url, null, null, length, url.getInputStream(null, -1));
} catch ( final Exception e ) {
throw new IOException("cannot parse " + url.toString() + ": " + e.getMessage());
}