From 87087f12fe9544b28ac07198112e454ccc736aa6 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Thu, 17 Jun 2010 11:59:40 +0000
Subject: [PATCH] - scanned remote search process and enhanced some data
 structure and synchronizations here and there - removed concurrency overhead
 for small number of index normalizations as it happens during remote search -
 removed 'load only parseable' constraint for snippet fetch because some
 resources may not have any url file extension and these had therefore not
 been parseable and searcheable since they may become parseable after loading
 when their mime type is known - this partly fixes some problems with
 http://forum.yacy-websuche.de/viewtopic.php?p=20300#p20300 but more changes
 are necessary to get all expected search results

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6926 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 source/de/anomic/crawler/CrawlQueues.java     |  2 +-
 .../anomic/crawler/retrieval/HTTPLoader.java  | 27 +------
 source/de/anomic/search/Switchboard.java      |  4 +-
 source/de/anomic/yacy/yacyClient.java         | 78 +++++++++----------
 source/de/anomic/yacy/yacySearch.java         | 21 +++--
 .../net/yacy/document/parser/htmlParser.java  |  2 +
 .../kelondro/data/word/WordReferenceVars.java | 10 +++
 .../net/yacy/repository/LoaderDispatcher.java | 14 ++--
 8 files changed, 74 insertions(+), 84 deletions(-)

diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java
index aa1abf100..827db3285 100644
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@@ -563,7 +563,7 @@ public class CrawlQueues {
                     try {
                         request.setStatus("loading", WorkflowJob.STATUS_RUNNING);
                         final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
-                        Response response = sb.loader.load(request, true, maxFileSize);
+                        Response response = sb.loader.load(request, maxFileSize);
                         if (response == null) {
                             request.setStatus("error", WorkflowJob.STATUS_FINISHED);
                             if (log.isFine()) log.logFine("problem loading " + request.url().toString() + ": no content (possibly caused by cache policy)");
diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java
index 5d2917e0e..32e21bb88 100644
--- a/source/de/anomic/crawler/retrieval/HTTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java
@@ -28,7 +28,6 @@ import java.io.IOException;
 import java.util.Date;
 
 import net.yacy.cora.document.MultiProtocolURI;
-import net.yacy.document.TextParser;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.repository.Blacklist;
@@ -75,14 +74,14 @@ public final class HTTPLoader {
         this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 10000);
     }  
    
-    public Response load(final Request entry, final boolean acceptOnlyParseable, long maxFileSize) throws IOException {
+    public Response load(final Request entry, long maxFileSize) throws IOException {
         long start = System.currentTimeMillis();
-        Response doc = load(entry, acceptOnlyParseable, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize);
+        Response doc = load(entry, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize);
         Latency.update(entry.url(), System.currentTimeMillis() - start);
         return doc;
     }
     
-    private Response load(final Request request, boolean acceptOnlyParseable, final int retryCount, final long maxFileSize) throws IOException {
+    private Response load(final Request request, final int retryCount, final long maxFileSize) throws IOException {
 
         if (retryCount < 0) {
             sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "redirection counter exceeded");
@@ -96,15 +95,6 @@ public final class HTTPLoader {
         final boolean ssl = request.url().getProtocol().equals("https");
         if (port < 0) port = (ssl) ? 443 : 80;
         
-        // if not the right file type then reject file
-        if (acceptOnlyParseable) {
-            String supportError = TextParser.supportsExtension(request.url());
-            if (supportError != null) {
-                sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, supportError);
-                throw new IOException("REJECTED WRONG EXTENSION TYPE: " + supportError);
-            }
-        }
-        
         // check if url is in blacklist
         final String hostlow = host.toLowerCase();
         if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) {
@@ -138,15 +128,6 @@ public final class HTTPLoader {
             if (res.getStatusCode() == 200 || res.getStatusCode() == 203) {
                 // the transfer is ok
                 
-                if (acceptOnlyParseable) {
-                	// if the response has not the right file type then reject file
-                    String supportError = TextParser.supports(request.url(), res.getResponseHeader().mime());
-                    if (supportError != null) {
-                    	sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, supportError);
-                        throw new IOException("REJECTED WRONG MIME TYPE, mime = " + res.getResponseHeader().mime() + ": " + supportError);
-                    }
-                }
-                
                 // we write the new cache entry to file system directly
                 res.setAccountingName("CRAWLER");
                 final byte[] responseBody = res.getData();
@@ -202,7 +183,7 @@ public final class HTTPLoader {
                     
                     // retry crawling with new url
                     request.redirectURL(redirectionUrl);
-                    return load(request, acceptOnlyParseable, retryCount - 1, maxFileSize);
+                    return load(request, retryCount - 1, maxFileSize);
                 }
             } else {
                 // if the response has not the right response type then reject file
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index 08a48caf0..5341253b0 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -367,7 +367,7 @@ public final class Switchboard extends serverSwitch {
                 indexSegments.segment(Segments.Process.LOCALCRAWLING),
                 peers,
                 true, 
-                30000);
+                10000);
         
         // set up local robots.txt
         this.robotstxtConfig = RobotsTxtConfig.init(this);
@@ -894,7 +894,7 @@ public final class Switchboard extends serverSwitch {
                     indexSegments.segment(Segments.Process.LOCALCRAWLING),
                     peers,
                     true, 
-                    30000);
+                    10000);
 
             // create new web structure
             this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map"));
diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java
index 14a3e6c26..70b46b367 100644
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@@ -368,7 +368,7 @@ public final class yacyClient {
     }
     
     @SuppressWarnings("unchecked")
-    public static String[] search(
+    public static int search(
             final yacySeed mySeed,
             final String wordhashes,
             final String excludehashes,
@@ -392,7 +392,6 @@ public final class yacyClient {
             final Bitfield constraint
     ) {
         // send a search request to peer with remote Hash
-        // this mainly converts the words into word hashes
 
         // INPUT:
         // iam        : complete seed of the requesting peer
@@ -437,7 +436,7 @@ public final class yacyClient {
         } catch (final IOException e) {
             yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore);
             //yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
-            return null;
+            return -1;
         }
 
         if (result == null || result.isEmpty()) {
@@ -447,7 +446,7 @@ public final class yacyClient {
 					+ target.getName()
 					+ " (zero response), score="
 					+ target.selectscore);
-			return null;
+			return -1;
 		}
 
 		// compute all computation times
@@ -468,14 +467,14 @@ public final class yacyClient {
 		// now create a plasmaIndex out of this result
 		// System.out.println("yacyClient: " + ((urlhashes.length() == 0) ? "primary" : "secondary")+ " search result = " + result.toString()); // debug
 		
-		int results = 0, joincount = 0;
+		int urlcount = 0, joincount = 0;
         try {
-            results = Integer.parseInt(result.get("count"));
-            joincount = Integer.parseInt(result.get("joincount"));
+            joincount = Integer.parseInt(result.get("joincount")); // the complete number of hits at remote site
+            urlcount = Integer.parseInt(result.get("count"));      // the number of hits that are returned in the result list
         } catch (final NumberFormatException e) {
             yacyCore.log.logInfo("SEARCH failed FROM " + target.hash + ":" + target.getName() + ", wrong output format: " + e.getMessage());
             //yacyCore.peerActions.peerDeparture(target, "search request to peer created number format exception");
-            return null;
+            return -1;
         }
 		// System.out.println("***result count " + results);
 
@@ -488,14 +487,13 @@ public final class yacyClient {
                 container[i] = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhashes.substring(i * Word.commonHashLength, (i + 1) * Word.commonHashLength).getBytes(), count);
             } catch (RowSpaceExceededException e) {
                 Log.logException(e);
-                return null;
+                return -1;
             }
 		}
 
 		// insert results to containers
 		URIMetadataRow urlEntry;
-		final String[] urls = new String[results];
-		for (int n = 0; n < results; n++) {
+		for (int n = 0; n < urlcount; n++) {
 			// get one single search result
 			urlEntry = URIMetadataRow.importEntry(result.get("resource" + n));
 			if (urlEntry == null) continue;
@@ -504,27 +502,26 @@ public final class yacyClient {
 			final URIMetadataRow.Components metadata = urlEntry.metadata();
 			if (metadata == null) continue;
 			if (blacklist.isListed(Blacklist.BLACKLIST_SEARCH, metadata.url())) {
-			    if (yacyCore.log.isInfo()) yacyCore.log.logInfo("remote search (client): filtered blacklisted url " + metadata.url() + " from peer " + target.getName());
+			    if (yacyCore.log.isInfo()) yacyCore.log.logInfo("remote search: filtered blacklisted url " + metadata.url() + " from peer " + target.getName());
 				continue; // block with backlist
 			}
             
 			final String urlRejectReason = Switchboard.getSwitchboard().crawlStacker.urlInAcceptedDomain(metadata.url());
             if (urlRejectReason != null) {
-                if (yacyCore.log.isInfo()) yacyCore.log.logInfo("remote search (client): rejected url '" + metadata.url() + "' (" + urlRejectReason + ") from peer " + target.getName());
+                if (yacyCore.log.isInfo()) yacyCore.log.logInfo("remote search: rejected url '" + metadata.url() + "' (" + urlRejectReason + ") from peer " + target.getName());
                 continue; // reject url outside of our domain
             }
 
 			// save the url entry
-			Reference entry;
-			if (urlEntry.word() == null) {
-			    if (yacyCore.log.isWarning()) yacyCore.log.logWarning("remote search (client): no word attached from peer " + target.getName() + ", version " + target.getVersion());
+            Reference entry = urlEntry.word();
+			if (entry == null) {
+			    if (yacyCore.log.isWarning()) yacyCore.log.logWarning("remote search: no word attached from peer " + target.getName() + ", version " + target.getVersion());
 				continue; // no word attached
 			}
 
 			// the search-result-url transports all the attributes of word indexes
-			entry = urlEntry.word();
 			if (!Base64Order.enhancedCoder.equal(entry.metadataHash(), urlEntry.hash())) {
-				if (yacyCore.log.isInfo()) yacyCore.log.logInfo("remote search (client): url-hash " + new String(urlEntry.hash()) + " does not belong to word-attached-hash " + new String(entry.metadataHash()) + "; url = " + metadata.url() + " from peer " + target.getName());
+				yacyCore.log.logInfo("remote search: url-hash " + new String(urlEntry.hash()) + " does not belong to word-attached-hash " + new String(entry.metadataHash()) + "; url = " + metadata.url() + " from peer " + target.getName());
 				continue; // spammed
 			}
 
@@ -554,23 +551,30 @@ public final class yacyClient {
                     break;
                 }
 			}
-            
-			// store url hash for statistics
-			urls[n] = new String(urlEntry.hash());
 		}
 
         // store remote result to local result container
         synchronized (containerCache) {
             // insert one container into the search result buffer
-            containerCache.add(container[0], false, joincount); // one is enough
-            
-            // integrate remote topwords
-            final String references = result.get("references");
-            yacyCore.log.logInfo("remote search (client): peer " + target.getName() + " sent references " + references);
-            if (references != null) {
-                // add references twice, so they can be counted (must have at least 2 entries)
-                containerCache.addTopic(references.split(","));
-                containerCache.addTopic(references.split(","));
+            containerCache.add(container[0], false, joincount); // one is enough, only the references are used, not the word
+        }
+        // insert the containers to the index
+        for (ReferenceContainer<WordReference> c: container) try {
+            indexSegment.termIndex().add(c);
+        } catch (Exception e) {
+            Log.logException(e);
+        }
+        yacyCore.log.logInfo("remote search: peer " + target.getName() + " sent " + container[0].size() + "/" + joincount + " references for joined word queries");
+
+        // integrate remote top-words/topics
+        final String references = result.get("references");
+        if (references != null && references.length() > 0) {
+            yacyCore.log.logInfo("remote search: peer " + target.getName() + " sent topics: " + references);
+            // add references twice, so they can be counted (must have at least 2 entries)
+            String[] rs = references.split(",");
+            synchronized (containerCache) {
+                containerCache.addTopic(rs);
+                containerCache.addTopic(rs);
             }
         }
         
@@ -592,7 +596,7 @@ public final class yacyClient {
 							ci = new ByteBuffer(entry.getValue().getBytes("UTF-8"));
 						} catch (UnsupportedEncodingException e) {
 						    Log.logException(e);
-							return null;
+							return -1;
 						}
 						//System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
 						ReferenceContainer.decompressIndex(singleAbstract, ci, target.hash);
@@ -600,14 +604,8 @@ public final class yacyClient {
 					}
 				}
 			}
+			if (abstractCache.size() > 0) yacyCore.log.logInfo("remote search: peer " + target.getName() + " sent " + abstractCache.size() + " index abstracts");
 		}
-
-		// insert the containers to the index
-        for (int m = 0; m < words; m++) try {
-                indexSegment.termIndex().add(container[m]);
-            } catch (Exception e) {
-                Log.logException(e);
-            }
         
         // generate statistics
 		long searchtime;
@@ -617,7 +615,7 @@ public final class yacyClient {
 			searchtime = totalrequesttime;
 		}
 		if (yacyCore.log.isFine()) yacyCore.log.logFine("SEARCH "
-				+ results
+				+ urlcount
 				+ " URLS FROM "
 				+ target.hash
 				+ ":"
@@ -627,7 +625,7 @@ public final class yacyClient {
 				+ ", searchtime=" + searchtime + ", netdelay="
 				+ (totalrequesttime - searchtime) + ", references="
 				+ result.get("references"));
-		return urls;
+		return urlcount;
 	}
 
     public static Map<String, String> permissionMessage(final yacySeedDB seedDB, final String targetHash) {
diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java
index 1c764e7ff..19d10ca96 100644
--- a/source/de/anomic/yacy/yacySearch.java
+++ b/source/de/anomic/yacy/yacySearch.java
@@ -56,7 +56,7 @@ public class yacySearch extends Thread {
     final private Map<String, TreeMap<String, String>> abstractCache;
     final private Blacklist blacklist;
     final private yacySeed targetPeer;
-    private String[] urls;
+    private int urls;
     private final int count, maxDistance;
     final private RankingProfile rankingProfile;
     final private Pattern prefer, filter;
@@ -103,7 +103,7 @@ public class yacySearch extends Thread {
         this.abstractCache = abstractCache;
         this.blacklist = blacklist;
         this.targetPeer = targetPeer;
-        this.urls = null;
+        this.urls = -1;
         this.count = count;
         this.maxDistance = maxDistance;
         this.rankingProfile = rankingProfile;
@@ -119,13 +119,11 @@ public class yacySearch extends Thread {
                         count, maxDistance, global, partitions,
                         targetPeer, indexSegment, crawlResults, containerCache, abstractCache,
                         blacklist, rankingProfile, constraint);
-            if (urls != null) {
+            if (urls >= 0) {
                 // urls is an array of url hashes. this is only used for log output
-                final StringBuilder urllist = new StringBuilder(this.urls.length * 13);
-                for (int i = 0; i < this.urls.length; i++) urllist.append(this.urls[i]).append(' ');
-                yacyCore.log.logInfo("REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + urls.length + " links for word hash " + wordhashes + ": " + new String(urllist));
-                peers.mySeed().incRI(urls.length);
-                peers.mySeed().incRU(urls.length);
+                //yacyCore.log.logInfo("REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + urls.length + " links for word hash " + wordhashes + ": " + new String(urllist));
+                peers.mySeed().incRI(urls);
+                peers.mySeed().incRU(urls);
             } else {
                 yacyCore.log.logInfo("REMOTE SEARCH - no answer from remote peer " + targetPeer.hash + ":" + targetPeer.getName());
             }
@@ -144,7 +142,7 @@ public class yacySearch extends Thread {
     }
 
     public int links() {
-        return this.urls.length;
+        return this.urls;
     }
     
     public int count() {
@@ -218,7 +216,8 @@ public class yacySearch extends Thread {
         	seed = dhtEnum.next();
             if (seed == null) continue;
             if (seed.matchPeerTags(wordhashes)) {
-                Log.logInfo("PLASMA", "selectPeers/PeerTags: " + seed.hash + ":" + seed.getName() + ", is specialized peer for " + seed.getPeerTags().toString());
+                String specialized = seed.getPeerTags().toString();
+                if (!specialized.equals("[*]")) Log.logInfo("PLASMA", "selectPeers/PeerTags: " + seed.hash + ":" + seed.getName() + ", is specialized peer for " + specialized);
                 regularSeeds.remove(seed.hash);
                 ranking.deleteScore(seed.hash);
                 matchingSeeds.put(seed.hash, seed);
@@ -335,7 +334,7 @@ public class yacySearch extends Thread {
     public static int collectedLinks(final yacySearch[] searchThreads) {
         int links = 0;
         for (int i = 0; i < searchThreads.length; i++) {
-            if (!(searchThreads[i].isAlive())) links += searchThreads[i].urls.length;
+            if (!(searchThreads[i].isAlive()) && searchThreads[i].urls > 0) links += searchThreads[i].urls;
         }
         return links;
     }
diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java
index 0e325b3bd..1883aab80 100644
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@@ -69,6 +69,7 @@ public class htmlParser extends AbstractParser implements Idiom {
         SUPPORTED_EXTENSIONS.add("cfm");
         SUPPORTED_EXTENSIONS.add("asp");
         SUPPORTED_EXTENSIONS.add("aspx");
+        SUPPORTED_EXTENSIONS.add("tex");
         SUPPORTED_EXTENSIONS.add("txt");
         SUPPORTED_EXTENSIONS.add("jsp");
         SUPPORTED_EXTENSIONS.add("pl");
@@ -77,6 +78,7 @@ public class htmlParser extends AbstractParser implements Idiom {
         SUPPORTED_MIME_TYPES.add("text/xhtml+xml");
         SUPPORTED_MIME_TYPES.add("application/xhtml+xml");
         SUPPORTED_MIME_TYPES.add("application/x-httpd-php");
+        SUPPORTED_MIME_TYPES.add("application/x-tex");
         SUPPORTED_MIME_TYPES.add("text/plain");
         SUPPORTED_MIME_TYPES.add("text/sgml");
         SUPPORTED_MIME_TYPES.add("text/csv");
diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java
index dd02996de..6634ec560 100644
--- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java
+++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java
@@ -393,6 +393,16 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
     
     public static BlockingQueue<WordReferenceVars> transform(ReferenceContainer<WordReference> container) {
     	LinkedBlockingQueue<WordReferenceVars> out = new LinkedBlockingQueue<WordReferenceVars>();
+    	if (container.size() <= 100) {
+    	    // transform without concurrency to omit thread creation overhead
+    	    for (Row.Entry entry: container) try {
+                out.put(new WordReferenceVars(new WordReferenceRow(entry)));
+            } catch (InterruptedException e) {}
+            try {
+                out.put(WordReferenceVars.poison);
+            } catch (InterruptedException e) {}
+            return out;
+    	}
     	Thread distributor = new TransformDistributor(container, out);
     	distributor.start();
     	
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index 77730fd32..a0f8e1148 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -104,7 +104,7 @@ public final class LoaderDispatcher {
             final boolean forText,
             final boolean global,
             final long maxFileSize) throws IOException {
-        return load(request(url, forText, global), forText, maxFileSize);
+        return load(request(url, forText, global), maxFileSize);
     }
     
     /**
@@ -122,12 +122,12 @@ public final class LoaderDispatcher {
             final boolean global,
             CrawlProfile.CacheStrategy cacheStratgy,
             long maxFileSize) throws IOException {
-        return load(request(url, forText, global), forText, cacheStratgy, maxFileSize);
+        return load(request(url, forText, global), cacheStratgy, maxFileSize);
     }
     
     public void load(final DigestURI url, CrawlProfile.CacheStrategy cacheStratgy, long maxFileSize, File targetFile) throws IOException {
 
-        byte[] b = load(request(url, false, true), false, cacheStratgy, maxFileSize).getContent();
+        byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize).getContent();
         if (b == null) throw new IOException("load == null");
         File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
         
@@ -169,14 +169,14 @@ public final class LoaderDispatcher {
                     0);
     }
     
-    public Response load(final Request request, final boolean acceptOnlyParseable, long maxFileSize) throws IOException {
+    public Response load(final Request request, long maxFileSize) throws IOException {
         CrawlProfile.entry crawlProfile = sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle());
         CrawlProfile.CacheStrategy cacheStrategy = CrawlProfile.CacheStrategy.IFEXIST;
         if (crawlProfile != null) cacheStrategy = crawlProfile.cacheStrategy();
-        return load(request, acceptOnlyParseable, cacheStrategy, maxFileSize);
+        return load(request, cacheStrategy, maxFileSize);
     }
     
-    public Response load(final Request request, final boolean acceptOnlyParseable, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
+    public Response load(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
         // get the protocol of the next URL
         final String protocol = request.url().getProtocol();
         final String host = request.url().getHost();
@@ -258,7 +258,7 @@ public final class LoaderDispatcher {
         
         // load resource from the internet
         Response response = null;
-        if ((protocol.equals("http") || (protocol.equals("https")))) response = httpLoader.load(request, acceptOnlyParseable, maxFileSize);
+        if ((protocol.equals("http") || (protocol.equals("https")))) response = httpLoader.load(request, maxFileSize);
         if (protocol.equals("ftp")) response = ftpLoader.load(request, true);
         if (protocol.equals("smb")) response = smbLoader.load(request, true);
         if (protocol.equals("file")) response = fileLoader.load(request, true);