refactoring (im preparation for new removeHost method)

pull/1/head
Michael Peter Christen 12 years ago
parent 7a5574cd51
commit e137ff4171

@ -423,7 +423,7 @@ public class HostBrowser {
prop.put("files_list_" + c + "_type_url", entry.getKey());
StoreType type = (StoreType) entry.getValue();
try {uri = new DigestURI(entry.getKey());} catch (final MalformedURLException e) {uri = null;}
HarvestProcess process = uri == null ? null : sb.crawlQueues.urlExists(uri.hash());
HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash());
boolean loading = load.equals(entry.getKey()) || (process != null && process != HarvestProcess.ERRORS);
boolean error = process == HarvestProcess.ERRORS || type == StoreType.EXCLUDED || type == StoreType.FAILED;
boolean dc = type != StoreType.INDEX && !error && !loading && list.containsKey(entry.getKey() + "/");

@ -184,7 +184,7 @@ public final class CrawlStacker {
final byte[] urlhash = url.hash();
if (replace) {
this.indexSegment.fulltext().remove(urlhash);
this.nextQueue.urlRemove(urlhash);
this.nextQueue.removeURL(urlhash);
String u = url.toNormalform(true);
if (u.endsWith("/")) {
u = u + "index.html";
@ -416,7 +416,7 @@ public final class CrawlStacker {
}
// check if the url is double registered
final HarvestProcess dbocc = this.nextQueue.urlExists(url.hash()); // returns the name of the queue if entry exists
final HarvestProcess dbocc = this.nextQueue.exists(url.hash()); // returns the name of the queue if entry exists
final Date oldDate = this.indexSegment.fulltext().getLoadDate(ASCII.String(url.hash()));
if (oldDate == null) {
if (dbocc != null) {

@ -142,7 +142,7 @@ public class CrawlQueues {
* @param hash
* @return if the hash exists, the name of the database is returned, otherwise null is returned
*/
public HarvestProcess urlExists(final byte[] hash) {
public HarvestProcess exists(final byte[] hash) {
if (this.delegatedURL.exists(hash)) {
return HarvestProcess.DELEGATED;
}
@ -160,7 +160,7 @@ public class CrawlQueues {
return null;
}
public void urlRemove(final byte[] hash) {
public void removeURL(final byte[] hash) {
this.noticeURL.removeByURLHash(hash);
this.delegatedURL.remove(hash);
this.errorURL.remove(hash);

@ -1565,7 +1565,7 @@ public final class Switchboard extends serverSwitch {
@Deprecated
public HarvestProcess urlExists(final String hash) {
if (this.index.exists(hash)) return HarvestProcess.LOADED;
return this.crawlQueues.urlExists(ASCII.getBytes(hash));
return this.crawlQueues.exists(ASCII.getBytes(hash));
}
/**
@ -1577,7 +1577,7 @@ public final class Switchboard extends serverSwitch {
Set<String> e = this.index.exists(ids);
Map<String, HarvestProcess> m = new HashMap<String, HarvestProcess>();
for (String id: ids) {
m.put(id, e.contains(id) ? HarvestProcess.LOADED : this.crawlQueues.urlExists(ASCII.getBytes(id)));
m.put(id, e.contains(id) ? HarvestProcess.LOADED : this.crawlQueues.exists(ASCII.getBytes(id)));
}
return m;
}
@ -1585,7 +1585,7 @@ public final class Switchboard extends serverSwitch {
public void urlRemove(final Segment segment, final byte[] hash) {
segment.fulltext().remove(hash);
ResultURLs.remove(ASCII.String(hash));
this.crawlQueues.urlRemove(hash);
this.crawlQueues.removeURL(hash);
}
public DigestURI getURL(final byte[] urlhash) {
@ -2836,13 +2836,13 @@ public final class Switchboard extends serverSwitch {
public void remove(final Collection<String> deleteIDs) {
this.index.fulltext().remove(deleteIDs);
for (String id: deleteIDs) {
this.crawlQueues.urlRemove(ASCII.getBytes(id));
this.crawlQueues.removeURL(ASCII.getBytes(id));
}
}
public void remove(final byte[] urlhash) {
this.index.fulltext().remove(urlhash);
this.crawlQueues.urlRemove(urlhash);
this.crawlQueues.removeURL(urlhash);
}
public void stackURLs(Set<DigestURI> rootURLs, final CrawlProfile profile, final Set<DigestURI> successurls, final Map<DigestURI,String> failurls) {
@ -2903,7 +2903,7 @@ public final class Switchboard extends serverSwitch {
}
// remove the document from the error-db
this.crawlQueues.urlRemove(urlhash);
this.crawlQueues.removeURL(urlhash);
// get a scraper to get the title
Document scraper;

Loading…
Cancel
Save