bug fixes and code cleaning

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@22 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent c13411c198
commit b9203bdb50

@ -307,7 +307,7 @@ public class IndexCreate_p {
prop.put("indexing-queue_list_"+i+"_modified", daydate(pcentry.lastModified)); prop.put("indexing-queue_list_"+i+"_modified", daydate(pcentry.lastModified));
prop.put("indexing-queue_list_"+i+"_href",((pcentry.scraper == null) ? "0" : ("" + pcentry.scraper.getAnchors().size()))); prop.put("indexing-queue_list_"+i+"_href",((pcentry.scraper == null) ? "0" : ("" + pcentry.scraper.getAnchors().size())));
prop.put("indexing-queue_list_"+i+"_anchor", ((pcentry.scraper == null) ? "-" : pcentry.scraper.getHeadline()) ); prop.put("indexing-queue_list_"+i+"_anchor", ((pcentry.scraper == null) ? "-" : pcentry.scraper.getHeadline()) );
prop.put("indexing-queue_list_"+i+"_url", pcentry.urlString); prop.put("indexing-queue_list_"+i+"_url", pcentry.nomalizedURLString);
dark = !dark; dark = !dark;
} }
} }

@ -456,7 +456,7 @@ public class dir {
public static void deletePhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr) { public static void deletePhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr) {
try { try {
String urlhash = plasmaURL.urlHash(new URL(urlstring)); String urlhash = plasmaURL.urlHash(new URL(urlstring));
Set words = plasmaSwitchboard.getWords(("yacyshare " + phrase + " " + descr).getBytes()); Set words = plasmaCondenser.getWords(("yacyshare " + phrase + " " + descr).getBytes());
switchboard.removeReferences(urlhash, words); switchboard.removeReferences(urlhash, words);
switchboard.loadedURL.remove(urlhash); switchboard.loadedURL.remove(urlhash);
} catch (Exception e) { } catch (Exception e) {

@ -122,6 +122,9 @@ public class transferRWI {
Iterator it = unknownURL.iterator(); Iterator it = unknownURL.iterator();
while (it.hasNext()) unknownURLs += "," + (String) it.next(); while (it.hasNext()) unknownURLs += "," + (String) it.next();
if (unknownURLs.length() > 0) unknownURLs = unknownURLs.substring(1); if (unknownURLs.length() > 0) unknownURLs = unknownURLs.substring(1);
if (wordhashes.length == 0)
switchboard.log.logInfo("Received 0 Words from peer " + iam + ", requested " + unknownURL.size() + " URL's");
else
switchboard.log.logInfo("Received " + received + " Words [" + wordhashes[0] + " .. " + wordhashes[wordhashes.length - 1] + "] from peer " + iam + ", requested " + unknownURL.size() + " URL's"); switchboard.log.logInfo("Received " + received + " Words [" + wordhashes[0] + " .. " + wordhashes[wordhashes.length - 1] + "] from peer " + iam + ", requested " + unknownURL.size() + " URL's");
result = "ok"; result = "ok";
} else { } else {

@ -583,6 +583,17 @@ public class plasmaCondenser {
} }
} }
public static Set getWords(byte[] text) {
if (text == null) return null;
ByteArrayInputStream buffer = new ByteArrayInputStream(text);
try {
plasmaCondenser condenser = new plasmaCondenser(buffer);
return condenser.getWords();
} catch (IOException e) {
return null;
}
}
public static void main(String[] args) { public static void main(String[] args) {
if ((args.length == 0) || (args.length > 3)) System.out.println("wrong number of arguments: plasmaCondenser -text|-html <infile> <outfile>"); else try { if ((args.length == 0) || (args.length > 3)) System.out.println("wrong number of arguments: plasmaCondenser -text|-html <infile> <outfile>"); else try {

@ -238,7 +238,7 @@ public class plasmaHTCache {
if ((entry.status == CACHE_FILL) || if ((entry.status == CACHE_FILL) ||
(entry.status == CACHE_STALE_RELOAD_GOOD) || (entry.status == CACHE_STALE_RELOAD_GOOD) ||
(entry.status == CACHE_STALE_RELOAD_BAD)) { (entry.status == CACHE_STALE_RELOAD_BAD)) {
responseHeaderDB.set(entry.urlHash, entry.responseHeader); responseHeaderDB.set(entry.nomalizedURLHash, entry.responseHeader);
} }
// work off unwritten files and undone parsing // work off unwritten files and undone parsing
@ -254,7 +254,7 @@ public class plasmaHTCache {
} }
entry.cacheFile.getParentFile().mkdirs(); entry.cacheFile.getParentFile().mkdirs();
serverFileUtils.write(entry.cacheArray, entry.cacheFile); serverFileUtils.write(entry.cacheArray, entry.cacheFile);
entry.cacheArray = null; //entry.cacheArray = null;
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
// this is the case of a "(Not a directory)" error, which should be prohibited // this is the case of a "(Not a directory)" error, which should be prohibited
// by the shallStoreCache() property. However, sometimes the error still occurs // by the shallStoreCache() property. However, sometimes the error still occurs
@ -444,8 +444,8 @@ public class plasmaHTCache {
public File cacheFile; // the cache file public File cacheFile; // the cache file
public byte[] cacheArray; // or the cache as byte-array public byte[] cacheArray; // or the cache as byte-array
public URL url; public URL url;
public String urlHash; public String nomalizedURLHash;
public String urlString; public String nomalizedURLString;
public int status; // cache load/hit/stale etc status public int status; // cache load/hit/stale etc status
public Date lastModified; public Date lastModified;
public char doctype; public char doctype;
@ -462,15 +462,15 @@ public class plasmaHTCache {
plasmaCrawlProfile.entry profile) { plasmaCrawlProfile.entry profile) {
// normalize url // normalize url
this.urlString = htmlFilterContentScraper.urlNormalform(url); this.nomalizedURLString = htmlFilterContentScraper.urlNormalform(url);
try { try {
this.url = new URL(urlString); this.url = new URL(nomalizedURLString);
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
System.out.println("internal error at httpdProxyCache.Entry: " + e); System.out.println("internal error at httpdProxyCache.Entry: " + e);
System.exit(-1); System.exit(-1);
} }
this.cacheFile = getCachePath(this.url); this.cacheFile = getCachePath(this.url);
this.urlHash = plasmaCrawlLURL.urlHash(urlString); this.nomalizedURLHash = plasmaCrawlLURL.urlHash(nomalizedURLString);
// assigned: // assigned:
this.initDate = initDate; this.initDate = initDate;
@ -496,7 +496,7 @@ public class plasmaHTCache {
lastModified = responseHeader.lastModified(); lastModified = responseHeader.lastModified();
if (lastModified == null) lastModified = new Date(); // does not exist in header if (lastModified == null) lastModified = new Date(); // does not exist in header
} }
this.doctype = plasmaWordIndexEntry.docType(urlString); this.doctype = plasmaWordIndexEntry.docType(nomalizedURLString);
this.language = plasmaWordIndexEntry.language(url); this.language = plasmaWordIndexEntry.language(url);
// to be defined later: // to be defined later:
@ -554,8 +554,8 @@ public class plasmaHTCache {
// -CGI access in request // -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches // CGI access makes the page very individual, and therefore not usable in caches
if ((isPOST(urlString)) && (!(profile.crawlingQ()))) return "dynamic_post"; if ((isPOST(nomalizedURLString)) && (!(profile.crawlingQ()))) return "dynamic_post";
if (isCGI(urlString)) return "dynamic_cgi"; if (isCGI(nomalizedURLString)) return "dynamic_cgi";
// -authorization cases in request // -authorization cases in request
// authorization makes pages very individual, and therefore we cannot use the // authorization makes pages very individual, and therefore we cannot use the
@ -622,8 +622,8 @@ public class plasmaHTCache {
// -CGI access in request // -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches // CGI access makes the page very individual, and therefore not usable in caches
if (isPOST(urlString)) return false; if (isPOST(nomalizedURLString)) return false;
if (isCGI(urlString)) return false; if (isCGI(nomalizedURLString)) return false;
// -authorization cases in request // -authorization cases in request
if (requestHeader.containsKey("AUTHORIZATION")) return false; if (requestHeader.containsKey("AUTHORIZATION")) return false;
@ -747,8 +747,8 @@ public class plasmaHTCache {
// -CGI access in request // -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches // CGI access makes the page very individual, and therefore not usable in caches
if ((isPOST(urlString)) && (!(profile.crawlingQ()))) return "Dynamic_(POST)"; if ((isPOST(nomalizedURLString)) && (!(profile.crawlingQ()))) return "Dynamic_(POST)";
if ((isCGI(urlString)) && (!(profile.crawlingQ()))) return "Dynamic_(CGI)"; if ((isCGI(nomalizedURLString)) && (!(profile.crawlingQ()))) return "Dynamic_(CGI)";
// -authorization cases in request // -authorization cases in request
// we checked that in shallStoreCache // we checked that in shallStoreCache
@ -759,7 +759,7 @@ public class plasmaHTCache {
// a picture cannot be indexed // a picture cannot be indexed
if (isPicture(responseHeader)) return "Media_Content_(Picture)"; if (isPicture(responseHeader)) return "Media_Content_(Picture)";
if (!(isText(responseHeader))) return "Media_Content_(not_text)"; if (!(isText(responseHeader))) return "Media_Content_(not_text)";
if (noIndexingURL(urlString)) return "Media_Content_(forbidden)"; if (noIndexingURL(nomalizedURLString)) return "Media_Content_(forbidden)";
// -if-modified-since in request // -if-modified-since in request
@ -864,8 +864,8 @@ public class plasmaHTCache {
// -CGI access in request // -CGI access in request
// CGI access makes the page very individual, and therefore not usable in caches // CGI access makes the page very individual, and therefore not usable in caches
if ((isPOST(urlString)) && (!(profile.crawlingQ()))) return "Dynamic_(POST)"; if ((isPOST(nomalizedURLString)) && (!(profile.crawlingQ()))) return "Dynamic_(POST)";
if ((isCGI(urlString)) && (!(profile.crawlingQ()))) return "Dynamic_(CGI)"; if ((isCGI(nomalizedURLString)) && (!(profile.crawlingQ()))) return "Dynamic_(CGI)";
// -authorization cases in request // -authorization cases in request
// we checked that in shallStoreCache // we checked that in shallStoreCache
@ -876,7 +876,7 @@ public class plasmaHTCache {
// a picture cannot be indexed // a picture cannot be indexed
if (isPicture(responseHeader)) return "Media_Content_(Picture)"; if (isPicture(responseHeader)) return "Media_Content_(Picture)";
if (!(isText(responseHeader))) return "Media_Content_(not_text)"; if (!(isText(responseHeader))) return "Media_Content_(not_text)";
if (noIndexingURL(urlString)) return "Media_Content_(forbidden)"; if (noIndexingURL(nomalizedURLString)) return "Media_Content_(forbidden)";
// -if-modified-since in request // -if-modified-since in request
// if the page is fresh at the very moment we can index it // if the page is fresh at the very moment we can index it

@ -393,7 +393,6 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
public synchronized void deQueue() { public synchronized void deQueue() {
if (serverJobs < 5) { if (serverJobs < 5) {
if (processStack.size() > 0) { if (processStack.size() > 0) {
log.logDebug("DEQUEUE: dequeueing one step (processStack=" + processStack.size() + ", localStackSize=" + noticeURL.localStackSize() + ", remoteStackSize=" + noticeURL.remoteStackSize() + ")");
processResourceStack((plasmaHTCache.Entry) processStack.removeFirst()); processResourceStack((plasmaHTCache.Entry) processStack.removeFirst());
} }
} else { } else {
@ -469,7 +468,13 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
private synchronized void processResourceStack(plasmaHTCache.Entry entry) { private synchronized void processResourceStack(plasmaHTCache.Entry entry) {
// work off one stack entry with a fresh resource (scraped web page) // work off one stack entry with a fresh resource (scraped web page)
if ((entry.cacheArray != null) || (entry.scraper != null)) try { String stats = "DEQUEUE: dequeueing one step (processStack=" + processStack.size() + ", localStackSize=" + noticeURL.localStackSize() + ", remoteStackSize=" + noticeURL.remoteStackSize() + ")";
if ((entry.cacheArray == null) && (entry.scraper == null)) {
log.logDebug(stats + " entry for " + entry.nomalizedURLString + " has no content -- skipped");
return;
}
try {
// we must distinguish the following cases: resource-load was initiated by // we must distinguish the following cases: resource-load was initiated by
// 1) global crawling: the index is extern, not here (not possible here) // 1) global crawling: the index is extern, not here (not possible here)
// 2) result of search queries, some indexes are here (not possible here) // 2) result of search queries, some indexes are here (not possible here)
@ -492,15 +497,15 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
processCase = 6; processCase = 6;
} }
log.logDebug("processResourceStack: processCase=" + processCase + ", depth=" + entry.depth + ", maxDepth=" + entry.profile.generalDepth() + ", filter=" + entry.profile.generalFilter() + ", initiatorHash=" + initiatorHash + ", status=" + entry.status + ", url=" + entry.url); // DEBUG log.logDebug(stats + " processCase=" + processCase + ", depth=" + entry.depth + ", maxDepth=" + entry.profile.generalDepth() + ", filter=" + entry.profile.generalFilter() + ", initiatorHash=" + initiatorHash + ", status=" + entry.status + ", source=" + ((entry.cacheArray == null) ? "scraper" : "byte[]") + ", url=" + entry.nomalizedURLString); // DEBUG
// parse content // parse content
plasmaParser.document document; plasmaParser.document document;
if (entry.scraper != null) { if (entry.scraper != null) {
log.logDebug("(Parser) '" + entry.urlString + "' is pre-parsed by scraper"); log.logDebug("(Parser) '" + entry.nomalizedURLString + "' is pre-parsed by scraper");
document = parser.transformScraper(entry.url, entry.responseHeader.mime(), entry.scraper); document = parser.transformScraper(entry.url, entry.responseHeader.mime(), entry.scraper);
} else { } else {
log.logDebug("(Parser) '" + entry.urlString + "' is not parsed, parsing now"); log.logDebug("(Parser) '" + entry.nomalizedURLString + "' is not parsed, parsing now");
document = parser.parseSource(entry.url, entry.responseHeader.mime(), entry.cacheArray); document = parser.parseSource(entry.url, entry.responseHeader.mime(), entry.cacheArray);
} }
@ -516,11 +521,11 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
while (i.hasNext()) { while (i.hasNext()) {
e = (Map.Entry) i.next(); e = (Map.Entry) i.next();
nexturlstring = (String) e.getKey(); nexturlstring = (String) e.getKey();
rejectReason = stackCrawl(nexturlstring, entry.urlString, initiatorHash, (String) e.getValue(), entry.lastModified, entry.depth + 1, entry.profile); rejectReason = stackCrawl(nexturlstring, entry.nomalizedURLString, initiatorHash, (String) e.getValue(), entry.lastModified, entry.depth + 1, entry.profile);
if (rejectReason == null) { if (rejectReason == null) {
c++; c++;
} else { } else {
errorURL.newEntry(new URL(nexturlstring), entry.urlString, entry.initiator(), yacyCore.seedDB.mySeed.hash, errorURL.newEntry(new URL(nexturlstring), entry.nomalizedURLString, entry.initiator(), yacyCore.seedDB.mySeed.hash,
(String) e.getValue(), rejectReason, new bitfield(plasmaURL.urlFlagLength), false); (String) e.getValue(), rejectReason, new bitfield(plasmaURL.urlFlagLength), false);
} }
} }
@ -543,12 +548,12 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
} }
if (noIndexReason == null) { if (noIndexReason == null) {
// strip out words // strip out words
log.logDebug("(Profile) Condensing for '" + entry.urlString + "'"); log.logDebug("(Profile) Condensing for '" + entry.nomalizedURLString + "'");
plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(document.getText())); plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(document.getText()));
//log.logInfo("INDEXING HEADLINE:" + descr); //log.logInfo("INDEXING HEADLINE:" + descr);
try { try {
log.logDebug("(Profile) Create LURL-Entry for '" + entry.urlString + "'"); log.logDebug("(Profile) Create LURL-Entry for '" + entry.nomalizedURLString + "'");
plasmaCrawlLURL.entry newEntry = loadedURL.newEntry( plasmaCrawlLURL.entry newEntry = loadedURL.newEntry(
entry.url, descr, entry.lastModified, new Date(), entry.url, descr, entry.lastModified, new Date(),
initiatorHash, initiatorHash,
@ -563,28 +568,28 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
); );
String urlHash = newEntry.hash(); String urlHash = newEntry.hash();
log.logDebug("(Profile) Remove NURL for '" + entry.urlString + "'"); log.logDebug("(Profile) Remove NURL for '" + entry.nomalizedURLString + "'");
noticeURL.remove(urlHash); // worked-off noticeURL.remove(urlHash); // worked-off
if (((processCase == 4) || (processCase == 5) || (processCase == 6)) && if (((processCase == 4) || (processCase == 5) || (processCase == 6)) &&
(entry.profile.localIndexing())) { (entry.profile.localIndexing())) {
// remove stopwords // remove stopwords
log.logDebug("(Profile) Exclude Stopwords for '" + entry.urlString + "'"); log.logDebug("(Profile) Exclude Stopwords for '" + entry.nomalizedURLString + "'");
log.logInfo("Excluded " + condenser.excludeWords(stopwords) + " words in URL " + entry.url); log.logInfo("Excluded " + condenser.excludeWords(stopwords) + " words in URL " + entry.url);
//System.out.println("DEBUG: words left to be indexed: " + condenser.getWords()); //System.out.println("DEBUG: words left to be indexed: " + condenser.getWords());
// do indexing // do indexing
log.logDebug("(Profile) Create Index for '" + entry.urlString + "'"); log.logDebug("(Profile) Create Index for '" + entry.nomalizedURLString + "'");
int words = searchManager.addPageIndex(entry.url, urlHash, entry.lastModified, condenser, entry.language, entry.doctype); int words = searchManager.addPageIndex(entry.url, urlHash, entry.lastModified, condenser, entry.language, entry.doctype);
log.logInfo("Indexed " + words + " words in URL " + entry.url + " (" + descr + ")"); log.logInfo("Indexed " + words + " words in URL " + entry.url + " (" + descr + ")");
// if this was performed for a remote crawl request, notify requester // if this was performed for a remote crawl request, notify requester
if ((processCase == 6) && (initiator != null)) { if ((processCase == 6) && (initiator != null)) {
log.logInfo("Sending crawl receipt for '" + entry.urlString + "' to " + initiator.getName()); log.logInfo("Sending crawl receipt for '" + entry.nomalizedURLString + "' to " + initiator.getName());
yacyClient.crawlReceipt(initiator, "crawl", "fill", "indexed", newEntry, ""); yacyClient.crawlReceipt(initiator, "crawl", "fill", "indexed", newEntry, "");
} }
} else { } else {
log.logDebug("Resource '" + entry.urlString + "' not indexed (indexing is off)"); log.logDebug("Resource '" + entry.nomalizedURLString + "' not indexed (indexing is off)");
} }
} catch (Exception ee) { } catch (Exception ee) {
log.logError("Could not index URL " + entry.url + ": " + ee.getMessage()); log.logError("Could not index URL " + entry.url + ": " + ee.getMessage());
@ -831,6 +836,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
} }
private static SimpleDateFormat DateFormatter = new SimpleDateFormat("EEE, dd MMM yyyy"); private static SimpleDateFormat DateFormatter = new SimpleDateFormat("EEE, dd MMM yyyy");
public static String dateString(Date date) { public static String dateString(Date date) {
if (date == null) return ""; else return DateFormatter.format(date); if (date == null) return ""; else return DateFormatter.format(date);
@ -911,7 +917,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
filename = url.getFile(); filename = url.getFile();
if ((seed == null) || ((address = seed.getAddress()) == null)) { if ((seed == null) || ((address = seed.getAddress()) == null)) {
// seed is not known from here // seed is not known from here
removeReferences(urlentry.hash(), getWords(("yacyshare " + filename.replace('?', ' ') + " " + urlentry.descr()).getBytes())); removeReferences(urlentry.hash(), plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + urlentry.descr()).getBytes()));
loadedURL.remove(urlentry.hash()); // clean up loadedURL.remove(urlentry.hash()); // clean up
continue; // next result continue; // next result
} }
@ -1063,28 +1069,6 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
return "PROPS: " + super.toString() + "; QUEUE: " + processStack.toString(); return "PROPS: " + super.toString() + "; QUEUE: " + processStack.toString();
} }
/*
private void addScoreForked(kelondroMScoreCluster ref, String no, String[] words) {
String s;
if (words != null) for (int i = 0; i < words.length; i++) {
s = words[i].trim().toLowerCase();
if (s.indexOf(".") >= 0) addScoreForked(ref, no, s.split("\\."));
else if (s.indexOf(",") >= 0) addScoreForked(ref, no, s.split(","));
else if (s.indexOf(":") >= 0) addScoreForked(ref, no, s.split(":"));
else if (s.indexOf("-") >= 0) addScoreForked(ref, no, s.split("-"));
else if (s.indexOf("/") >= 0) addScoreForked(ref, no, s.split("/"));
else if (s.indexOf('"') >= 0) addScoreForked(ref, no, s.split(new String(new byte[] {(char)'"'})));
else addScoreFiltered(ref, no, s);
}
}
private void addScoreFiltered(kelondroMScoreCluster ref, String no, String word) {
if ((word.length() > 2) &&
("http_html_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0) &&
(no.indexOf(word) < 0))
ref.incScore(word);
}
*/
// method for index deletion // method for index deletion
public int removeAllUrlReferences(URL url, boolean fetchOnline) { public int removeAllUrlReferences(URL url, boolean fetchOnline) {
return removeAllUrlReferences(plasmaURL.urlHash(url), fetchOnline); return removeAllUrlReferences(plasmaURL.urlHash(url), fetchOnline);
@ -1099,7 +1083,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
URL url = entry.url(); URL url = entry.url();
if (url == null) return 0; if (url == null) return 0;
// get set of words // get set of words
Set words = getWords(getText(getResource(url, fetchOnline))); Set words = plasmaCondenser.getWords(getText(getResource(url, fetchOnline)));
// delete all word references // delete all word references
int count = removeReferences(urlhash, words); int count = removeReferences(urlhash, words);
// finally delete the url entry itself // finally delete the url entry itself
@ -1172,17 +1156,6 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
} }
} }
public static Set getWords(byte[] text) {
if (text == null) return null;
ByteArrayInputStream buffer = new ByteArrayInputStream(text);
try {
plasmaCondenser condenser = new plasmaCondenser(buffer);
return condenser.getWords();
} catch (IOException e) {
return null;
}
}
public class distributeIndex { public class distributeIndex {
// distributes parts of the index to other peers // distributes parts of the index to other peers
// stops as soon as an error occurrs // stops as soon as an error occurrs
@ -1214,6 +1187,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
((transferred = performTransferIndex(indexCount, peerCount, true)) > 0)) { ((transferred = performTransferIndex(indexCount, peerCount, true)) > 0)) {
indexCount = transferred; indexCount = transferred;
if ((System.currentTimeMillis() - starttime) > (maxTime * peerCount)) indexCount--; else indexCount++; if ((System.currentTimeMillis() - starttime) > (maxTime * peerCount)) indexCount--; else indexCount++;
if (indexCount < 30) indexCount = 30;
return true; return true;
} else { } else {
// make a long pause // make a long pause
@ -1230,6 +1204,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
public void setCounts(int indexCount, int peerCount, long pause) { public void setCounts(int indexCount, int peerCount, long pause) {
this.indexCount = indexCount; this.indexCount = indexCount;
if (indexCount < 30) indexCount = 30;
this.peerCount = peerCount; this.peerCount = peerCount;
this.pause = pause; this.pause = pause;
} }

@ -63,7 +63,6 @@ public class yacyDHTAction implements yacyPeerAction {
} }
} }
public Enumeration getDHTSeeds(boolean up, String firstHash) { public Enumeration getDHTSeeds(boolean up, String firstHash) {
// enumerates seed-type objects: all seeds with starting point in the middle, rotating at the end/beginning // enumerates seed-type objects: all seeds with starting point in the middle, rotating at the end/beginning
return new seedDHTEnum(up, firstHash); return new seedDHTEnum(up, firstHash);
@ -104,10 +103,8 @@ public class yacyDHTAction implements yacyPeerAction {
return e2.nextElement(); return e2.nextElement();
} }
} }
} }
public Enumeration getAcceptRemoteIndexSeeds(String starthash) { public Enumeration getAcceptRemoteIndexSeeds(String starthash) {
// returns an enumeration of yacySeed-Objects // returns an enumeration of yacySeed-Objects
// that have the AcceptRemoteIndex-Flag set // that have the AcceptRemoteIndex-Flag set
@ -131,11 +128,17 @@ public class yacyDHTAction implements yacyPeerAction {
private yacySeed nextInternal() { private yacySeed nextInternal() {
yacySeed s; yacySeed s;
try {
while (se.hasMoreElements()) { while (se.hasMoreElements()) {
s = (yacySeed) se.nextElement(); s = (yacySeed) se.nextElement();
if (s == null) return null; if (s == null) return null;
if (s.getFlagAcceptRemoteIndex()) return s; if (s.getFlagAcceptRemoteIndex()) return s;
} }
} catch (kelondroException e) {
yacyCore.log.logError("database inconsistency (" + e.getMessage() + "), re-set of db.");
seedDB.resetActiveTable();
return null;
}
return null; return null;
} }
@ -147,7 +150,6 @@ public class yacyDHTAction implements yacyPeerAction {
} }
public Enumeration getAcceptRemoteCrawlSeeds(String starthash, boolean available) { public Enumeration getAcceptRemoteCrawlSeeds(String starthash, boolean available) {
return new acceptRemoteCrawlSeedEnum(starthash, available); return new acceptRemoteCrawlSeedEnum(starthash, available);
} }

@ -142,7 +142,7 @@ public class yacySeedDB {
private synchronized kelondroMap resetSeedTable(kelondroMap seedDB, File seedDBFile) { private synchronized kelondroMap resetSeedTable(kelondroMap seedDB, File seedDBFile) {
// this is an emergency function that should only be used if any problem with the // this is an emergency function that should only be used if any problem with the
// seed.db is detected // seed.db is detected
yacyCore.log.logError("seed-db " + seedDBFile.toString() + " reset (on-the-fly)"); yacyCore.log.logDebug("seed-db " + seedDBFile.toString() + " reset (on-the-fly)");
try { try {
seedDB.close(); seedDB.close();
seedDBFile.delete(); seedDBFile.delete();
@ -154,6 +154,10 @@ public class yacySeedDB {
return seedDB; return seedDB;
} }
public synchronized void resetActiveTable() { seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile); }
public synchronized void resetPassiveTable() { seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile); }
public synchronized void resetPotentialTable() { seedPotentialDB = resetSeedTable(seedPotentialDB, seedPotentialDBFile); }
public void close() { public void close() {
try { try {
seedActiveDB.close(); seedActiveDB.close();

@ -1 +0,0 @@
testblue
Loading…
Cancel
Save