diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 87dac0c31..47eb2c69c 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1047,13 +1047,19 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String reason = null; // failure reason - // strange error + // strange errors if (nexturlString == null) { reason = "denied_(url_null)"; log.logError("Wrong URL in stackCrawl: url=null"); return reason; } - + /* + if (profile == null) { + reason = "denied_(profile_null)"; + log.logError("Wrong Profile for stackCrawl: profile=null"); + return reason; + } + */ URL nexturl = null; if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = plasmaURL.dummyHash; String referrerHash = plasmaURL.urlHash(referrerString); @@ -1066,7 +1072,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // filter deny - if ((currentdepth > 0) && (!(nexturlString.matches(profile.generalFilter())))) { + if ((currentdepth > 0) && (profile != null) && (!(nexturlString.matches(profile.generalFilter())))) { reason = "denied_(does_not_match_filter)"; urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash, name, reason, new bitfield(plasmaURL.urlFlagLength), false); @@ -1082,7 +1088,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // deny post properties - if ((plasmaHTCache.isPOST(nexturlString)) && (!(profile.crawlingQ()))) { + if ((plasmaHTCache.isPOST(nexturlString)) && (profile != null) && (!(profile.crawlingQ()))) { reason = "denied_(post_url)"; urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash, name, reason, new bitfield(plasmaURL.urlFlagLength), false); @@ -1102,6 +1108,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // store information boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash))); boolean global = + (profile != null) && (profile.remoteIndexing()) /* granted */ && (currentdepth == profile.generalDepth()) /* leaf node */ && (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ && @@ -1113,7 +1120,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser loadDate, /* load date */ referrerHash, /* last url in crawling queue */ name, /* the anchor name */ - profile.handle(), + (profile == null) ? null : profile.handle(), currentdepth, /*depth so far*/ 0, /*anchors, default value */ 0, /*forkfactor, default value */ diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java index 749e6b178..ebeb8940c 100644 --- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java +++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java @@ -227,10 +227,10 @@ public class plasmaWordIndexDistribution { indexEntity.deleteComplete(); } else if (indexEntity.size() <= count) { // take the whole entity - // fist check if we know all urls - urlEnum = indexEntity.elements(true); - unknownURLEntries = new HashSet(); try { + // fist check if we know all urls + urlEnum = indexEntity.elements(true); + unknownURLEntries = new HashSet(); while (urlEnum.hasMoreElements()) { indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement(); lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash()); @@ -245,26 +245,26 @@ public class plasmaWordIndexDistribution { } } } + // now delete all entries that have no url entry + hashIter = unknownURLEntries.iterator(); + while (hashIter.hasNext()) { + indexEntity.removeEntry((String) hashIter.next(), false); + } + // use whats remaining + tmpEntities.add(indexEntity); + log.logDebug("Selected whole index (" + indexEntity.size() + " URLs, " + unknownURLEntries.size() + " not bound) for word " + indexEntity.wordHash()); + count -= indexEntity.size(); } catch (kelondroException e) { log.logError("plasmaWordIndexDistribution/1: deleted DB for word " + indexEntity.wordHash()); e.printStackTrace(); try {indexEntity.deleteComplete();} catch (IOException ee) {} } - // now delete all entries that have no url entry - hashIter = unknownURLEntries.iterator(); - while (hashIter.hasNext()) { - indexEntity.removeEntry((String) hashIter.next(), false); - } - // use whats remaining - tmpEntities.add(indexEntity); - log.logDebug("Selected whole index (" + indexEntity.size() + " URLs, " + unknownURLEntries.size() + " not bound) for word " + indexEntity.wordHash()); - count -= indexEntity.size(); } else { // make an on-the-fly entity and insert values tmpEntity = new plasmaWordIndexEntity(indexEntity.wordHash()); - urlEnum = indexEntity.elements(true); - unknownURLEntries = new HashSet(); try { + urlEnum = indexEntity.elements(true); + unknownURLEntries = new HashSet(); while ((urlEnum.hasMoreElements()) && (count > 0)) { indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement(); lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash()); @@ -281,19 +281,19 @@ public class plasmaWordIndexDistribution { } } } + // now delete all entries that have no url entry + hashIter = unknownURLEntries.iterator(); + while (hashIter.hasNext()) { + indexEntity.removeEntry((String) hashIter.next(), true); + } + // use whats remaining + log.logDebug("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash()); + tmpEntities.add(tmpEntity); } catch (kelondroException e) { log.logError("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash()); e.printStackTrace(); try {indexEntity.deleteComplete();} catch (IOException ee) {} } - // now delete all entries that have no url entry - hashIter = unknownURLEntries.iterator(); - while (hashIter.hasNext()) { - indexEntity.removeEntry((String) hashIter.next(), true); - } - // use whats remaining - log.logDebug("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash()); - tmpEntities.add(tmpEntity); indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards indexEntity = null; } diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index 9311e29b4..0304252cf 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -363,14 +363,17 @@ public class yacySeed { } public String toString() { - // set hash into seed code structure - dna.put("Hash", this.hash); - // generate string representation - String s = dna.toString(); - // reconstruct original: hash is stored external - dna.remove("Hash"); - // return string - return s; + String s = null; + synchronized (dna) { + // set hash into seed code structure + dna.put("Hash", this.hash); + // generate string representation + s = dna.toString(); + // reconstruct original: hash is stored external + dna.remove("Hash"); + // return string + } + return s; } public String genSeedStr(String key) {