*) Bugfix for Entries with null url in GlobalQueue

See: http://www.yacy-forum.de/viewtopic.php?p=12675#12675

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1069 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 22da652d4f
commit 444a5a9368

@ -100,10 +100,10 @@ public class IndexCreateWWWGlobalQueue_p {
yacySeed initiator;
String profileHandle;
plasmaCrawlProfile.entry profileEntry;
int i;
int i, showNum = 0;
for (i = 0; i < crawlerList.length; i++) {
urle = crawlerList[i];
if (urle != null) {
if ((urle != null)&&(urle.url()!=null)) {
initiator = yacyCore.seedDB.getConnected(urle.initiator());
profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle);
@ -115,9 +115,10 @@ public class IndexCreateWWWGlobalQueue_p {
prop.put("crawler-queue_list_"+i+"_anchor", wikiCode.replaceHTML(urle.name()));
prop.put("crawler-queue_list_"+i+"_url", wikiCode.replaceHTML(urle.url().toString()));
dark = !dark;
showNum++;
}
}
prop.put("crawler-queue_list", i);
prop.put("crawler-queue_list", showNum);
}
// return rewrite properties

@ -61,7 +61,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterOutputStream;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverFileUtils;

@ -430,25 +430,25 @@ public final class plasmaCrawlLURL extends plasmaURL {
// if the url cannot be found, this returns null
this.urlHash = urlHash;
try {
byte[][] entry = urlHashCache.get(urlHash.getBytes());
if (entry != null) {
this.url = new URL(new String(entry[1]).trim());
this.descr = (entry[2] == null) ? this.url.toString() : new String(entry[2]).trim();
this.moddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[3])));
this.loaddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[4])));
this.referrerHash = (entry[5]==null)?dummyHash:new String(entry[5]);
this.copyCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[6]));
this.flags = new String(entry[7]);
this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8]));
this.language = new String(entry[9]);
this.doctype = (char) entry[10][0];
this.size = (long) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[11]));
this.wordCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[12]));
this.snippet = null;
return;
}
byte[][] entry = plasmaCrawlLURL.this.urlHashCache.get(urlHash.getBytes());
if (entry != null) {
this.url = new URL(new String(entry[1]).trim());
this.descr = (entry[2] == null) ? this.url.toString() : new String(entry[2]).trim();
this.moddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[3])));
this.loaddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[4])));
this.referrerHash = (entry[5]==null)?dummyHash:new String(entry[5]);
this.copyCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[6]));
this.flags = new String(entry[7]);
this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8]));
this.language = new String(entry[9]);
this.doctype = (char) entry[10][0];
this.size = serverCodings.enhancedCoder.decodeBase64Long(new String(entry[11]));
this.wordCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[12]));
this.snippet = null;
return;
}
} catch (Exception e) {
serverLog.logSevere("PLASMA", "INTERNAL ERROR in plasmaLURL.entry/1: " + e.toString(), e);
serverLog.logSevere("PLASMA", "INTERNAL ERROR in plasmaLURL.entry/1: " + e.toString(), e);
}
}

Loading…
Cancel
Save