orbiter 20 years ago
parent da81dcd66a
commit c8a7a85ce2

@ -1047,13 +1047,19 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String reason = null; // failure reason String reason = null; // failure reason
// strange error // strange errors
if (nexturlString == null) { if (nexturlString == null) {
reason = "denied_(url_null)"; reason = "denied_(url_null)";
log.logError("Wrong URL in stackCrawl: url=null"); log.logError("Wrong URL in stackCrawl: url=null");
return reason; return reason;
} }
/*
if (profile == null) {
reason = "denied_(profile_null)";
log.logError("Wrong Profile for stackCrawl: profile=null");
return reason;
}
*/
URL nexturl = null; URL nexturl = null;
if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = plasmaURL.dummyHash; if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = plasmaURL.dummyHash;
String referrerHash = plasmaURL.urlHash(referrerString); String referrerHash = plasmaURL.urlHash(referrerString);
@ -1066,7 +1072,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} }
// filter deny // filter deny
if ((currentdepth > 0) && (!(nexturlString.matches(profile.generalFilter())))) { if ((currentdepth > 0) && (profile != null) && (!(nexturlString.matches(profile.generalFilter())))) {
reason = "denied_(does_not_match_filter)"; reason = "denied_(does_not_match_filter)";
urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash, urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash,
name, reason, new bitfield(plasmaURL.urlFlagLength), false); name, reason, new bitfield(plasmaURL.urlFlagLength), false);
@ -1082,7 +1088,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} }
// deny post properties // deny post properties
if ((plasmaHTCache.isPOST(nexturlString)) && (!(profile.crawlingQ()))) { if ((plasmaHTCache.isPOST(nexturlString)) && (profile != null) && (!(profile.crawlingQ()))) {
reason = "denied_(post_url)"; reason = "denied_(post_url)";
urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash, urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash,
name, reason, new bitfield(plasmaURL.urlFlagLength), false); name, reason, new bitfield(plasmaURL.urlFlagLength), false);
@ -1102,6 +1108,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// store information // store information
boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash))); boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)));
boolean global = boolean global =
(profile != null) &&
(profile.remoteIndexing()) /* granted */ && (profile.remoteIndexing()) /* granted */ &&
(currentdepth == profile.generalDepth()) /* leaf node */ && (currentdepth == profile.generalDepth()) /* leaf node */ &&
(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ && (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
@ -1113,7 +1120,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
loadDate, /* load date */ loadDate, /* load date */
referrerHash, /* last url in crawling queue */ referrerHash, /* last url in crawling queue */
name, /* the anchor name */ name, /* the anchor name */
profile.handle(), (profile == null) ? null : profile.handle(),
currentdepth, /*depth so far*/ currentdepth, /*depth so far*/
0, /*anchors, default value */ 0, /*anchors, default value */
0, /*forkfactor, default value */ 0, /*forkfactor, default value */

@ -227,10 +227,10 @@ public class plasmaWordIndexDistribution {
indexEntity.deleteComplete(); indexEntity.deleteComplete();
} else if (indexEntity.size() <= count) { } else if (indexEntity.size() <= count) {
// take the whole entity // take the whole entity
try {
// fist check if we know all urls // fist check if we know all urls
urlEnum = indexEntity.elements(true); urlEnum = indexEntity.elements(true);
unknownURLEntries = new HashSet(); unknownURLEntries = new HashSet();
try {
while (urlEnum.hasMoreElements()) { while (urlEnum.hasMoreElements()) {
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement(); indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash()); lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
@ -245,11 +245,6 @@ public class plasmaWordIndexDistribution {
} }
} }
} }
} catch (kelondroException e) {
log.logError("plasmaWordIndexDistribution/1: deleted DB for word " + indexEntity.wordHash());
e.printStackTrace();
try {indexEntity.deleteComplete();} catch (IOException ee) {}
}
// now delete all entries that have no url entry // now delete all entries that have no url entry
hashIter = unknownURLEntries.iterator(); hashIter = unknownURLEntries.iterator();
while (hashIter.hasNext()) { while (hashIter.hasNext()) {
@ -259,12 +254,17 @@ public class plasmaWordIndexDistribution {
tmpEntities.add(indexEntity); tmpEntities.add(indexEntity);
log.logDebug("Selected whole index (" + indexEntity.size() + " URLs, " + unknownURLEntries.size() + " not bound) for word " + indexEntity.wordHash()); log.logDebug("Selected whole index (" + indexEntity.size() + " URLs, " + unknownURLEntries.size() + " not bound) for word " + indexEntity.wordHash());
count -= indexEntity.size(); count -= indexEntity.size();
} catch (kelondroException e) {
log.logError("plasmaWordIndexDistribution/1: deleted DB for word " + indexEntity.wordHash());
e.printStackTrace();
try {indexEntity.deleteComplete();} catch (IOException ee) {}
}
} else { } else {
// make an on-the-fly entity and insert values // make an on-the-fly entity and insert values
tmpEntity = new plasmaWordIndexEntity(indexEntity.wordHash()); tmpEntity = new plasmaWordIndexEntity(indexEntity.wordHash());
try {
urlEnum = indexEntity.elements(true); urlEnum = indexEntity.elements(true);
unknownURLEntries = new HashSet(); unknownURLEntries = new HashSet();
try {
while ((urlEnum.hasMoreElements()) && (count > 0)) { while ((urlEnum.hasMoreElements()) && (count > 0)) {
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement(); indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash()); lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
@ -281,11 +281,6 @@ public class plasmaWordIndexDistribution {
} }
} }
} }
} catch (kelondroException e) {
log.logError("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash());
e.printStackTrace();
try {indexEntity.deleteComplete();} catch (IOException ee) {}
}
// now delete all entries that have no url entry // now delete all entries that have no url entry
hashIter = unknownURLEntries.iterator(); hashIter = unknownURLEntries.iterator();
while (hashIter.hasNext()) { while (hashIter.hasNext()) {
@ -294,6 +289,11 @@ public class plasmaWordIndexDistribution {
// use whats remaining // use whats remaining
log.logDebug("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash()); log.logDebug("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash());
tmpEntities.add(tmpEntity); tmpEntities.add(tmpEntity);
} catch (kelondroException e) {
log.logError("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash());
e.printStackTrace();
try {indexEntity.deleteComplete();} catch (IOException ee) {}
}
indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards
indexEntity = null; indexEntity = null;
} }

@ -363,13 +363,16 @@ public class yacySeed {
} }
public String toString() { public String toString() {
String s = null;
synchronized (dna) {
// set hash into seed code structure // set hash into seed code structure
dna.put("Hash", this.hash); dna.put("Hash", this.hash);
// generate string representation // generate string representation
String s = dna.toString(); s = dna.toString();
// reconstruct original: hash is stored external // reconstruct original: hash is stored external
dna.remove("Hash"); dna.remove("Hash");
// return string // return string
}
return s; return s;
} }

Loading…
Cancel
Save