*) Bugfix for Entries with null url in GlobalQueue

See: http://www.yacy-forum.de/viewtopic.php?p=12675#12675

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1069 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 22da652d4f
commit 444a5a9368

@ -100,10 +100,10 @@ public class IndexCreateWWWGlobalQueue_p {
yacySeed initiator; yacySeed initiator;
String profileHandle; String profileHandle;
plasmaCrawlProfile.entry profileEntry; plasmaCrawlProfile.entry profileEntry;
int i; int i, showNum = 0;
for (i = 0; i < crawlerList.length; i++) { for (i = 0; i < crawlerList.length; i++) {
urle = crawlerList[i]; urle = crawlerList[i];
if (urle != null) { if ((urle != null)&&(urle.url()!=null)) {
initiator = yacyCore.seedDB.getConnected(urle.initiator()); initiator = yacyCore.seedDB.getConnected(urle.initiator());
profileHandle = urle.profileHandle(); profileHandle = urle.profileHandle();
profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle); profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle);
@ -115,9 +115,10 @@ public class IndexCreateWWWGlobalQueue_p {
prop.put("crawler-queue_list_"+i+"_anchor", wikiCode.replaceHTML(urle.name())); prop.put("crawler-queue_list_"+i+"_anchor", wikiCode.replaceHTML(urle.name()));
prop.put("crawler-queue_list_"+i+"_url", wikiCode.replaceHTML(urle.url().toString())); prop.put("crawler-queue_list_"+i+"_url", wikiCode.replaceHTML(urle.url().toString()));
dark = !dark; dark = !dark;
showNum++;
} }
} }
prop.put("crawler-queue_list", i); prop.put("crawler-queue_list", showNum);
} }
// return rewrite properties // return rewrite properties

@ -61,7 +61,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterOutputStream; import de.anomic.htmlFilter.htmlFilterOutputStream;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlProfile; import de.anomic.plasma.plasmaCrawlProfile;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverFileUtils; import de.anomic.server.serverFileUtils;

@ -430,7 +430,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
// if the url cannot be found, this returns null // if the url cannot be found, this returns null
this.urlHash = urlHash; this.urlHash = urlHash;
try { try {
byte[][] entry = urlHashCache.get(urlHash.getBytes()); byte[][] entry = plasmaCrawlLURL.this.urlHashCache.get(urlHash.getBytes());
if (entry != null) { if (entry != null) {
this.url = new URL(new String(entry[1]).trim()); this.url = new URL(new String(entry[1]).trim());
this.descr = (entry[2] == null) ? this.url.toString() : new String(entry[2]).trim(); this.descr = (entry[2] == null) ? this.url.toString() : new String(entry[2]).trim();
@ -442,7 +442,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8])); this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8]));
this.language = new String(entry[9]); this.language = new String(entry[9]);
this.doctype = (char) entry[10][0]; this.doctype = (char) entry[10][0];
this.size = (long) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[11])); this.size = serverCodings.enhancedCoder.decodeBase64Long(new String(entry[11]));
this.wordCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[12])); this.wordCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[12]));
this.snippet = null; this.snippet = null;
return; return;

Loading…
Cancel
Save