refactoring (im preparation for new removeHost method)

pull/1/head
Michael Peter Christen 12 years ago
parent 7a5574cd51
commit e137ff4171

@ -423,7 +423,7 @@ public class HostBrowser {
prop.put("files_list_" + c + "_type_url", entry.getKey()); prop.put("files_list_" + c + "_type_url", entry.getKey());
StoreType type = (StoreType) entry.getValue(); StoreType type = (StoreType) entry.getValue();
try {uri = new DigestURI(entry.getKey());} catch (final MalformedURLException e) {uri = null;} try {uri = new DigestURI(entry.getKey());} catch (final MalformedURLException e) {uri = null;}
HarvestProcess process = uri == null ? null : sb.crawlQueues.urlExists(uri.hash()); HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash());
boolean loading = load.equals(entry.getKey()) || (process != null && process != HarvestProcess.ERRORS); boolean loading = load.equals(entry.getKey()) || (process != null && process != HarvestProcess.ERRORS);
boolean error = process == HarvestProcess.ERRORS || type == StoreType.EXCLUDED || type == StoreType.FAILED; boolean error = process == HarvestProcess.ERRORS || type == StoreType.EXCLUDED || type == StoreType.FAILED;
boolean dc = type != StoreType.INDEX && !error && !loading && list.containsKey(entry.getKey() + "/"); boolean dc = type != StoreType.INDEX && !error && !loading && list.containsKey(entry.getKey() + "/");

@ -184,7 +184,7 @@ public final class CrawlStacker {
final byte[] urlhash = url.hash(); final byte[] urlhash = url.hash();
if (replace) { if (replace) {
this.indexSegment.fulltext().remove(urlhash); this.indexSegment.fulltext().remove(urlhash);
this.nextQueue.urlRemove(urlhash); this.nextQueue.removeURL(urlhash);
String u = url.toNormalform(true); String u = url.toNormalform(true);
if (u.endsWith("/")) { if (u.endsWith("/")) {
u = u + "index.html"; u = u + "index.html";
@ -416,7 +416,7 @@ public final class CrawlStacker {
} }
// check if the url is double registered // check if the url is double registered
final HarvestProcess dbocc = this.nextQueue.urlExists(url.hash()); // returns the name of the queue if entry exists final HarvestProcess dbocc = this.nextQueue.exists(url.hash()); // returns the name of the queue if entry exists
final Date oldDate = this.indexSegment.fulltext().getLoadDate(ASCII.String(url.hash())); final Date oldDate = this.indexSegment.fulltext().getLoadDate(ASCII.String(url.hash()));
if (oldDate == null) { if (oldDate == null) {
if (dbocc != null) { if (dbocc != null) {

@ -142,7 +142,7 @@ public class CrawlQueues {
* @param hash * @param hash
* @return if the hash exists, the name of the database is returned, otherwise null is returned * @return if the hash exists, the name of the database is returned, otherwise null is returned
*/ */
public HarvestProcess urlExists(final byte[] hash) { public HarvestProcess exists(final byte[] hash) {
if (this.delegatedURL.exists(hash)) { if (this.delegatedURL.exists(hash)) {
return HarvestProcess.DELEGATED; return HarvestProcess.DELEGATED;
} }
@ -160,7 +160,7 @@ public class CrawlQueues {
return null; return null;
} }
public void urlRemove(final byte[] hash) { public void removeURL(final byte[] hash) {
this.noticeURL.removeByURLHash(hash); this.noticeURL.removeByURLHash(hash);
this.delegatedURL.remove(hash); this.delegatedURL.remove(hash);
this.errorURL.remove(hash); this.errorURL.remove(hash);

@ -1565,7 +1565,7 @@ public final class Switchboard extends serverSwitch {
@Deprecated @Deprecated
public HarvestProcess urlExists(final String hash) { public HarvestProcess urlExists(final String hash) {
if (this.index.exists(hash)) return HarvestProcess.LOADED; if (this.index.exists(hash)) return HarvestProcess.LOADED;
return this.crawlQueues.urlExists(ASCII.getBytes(hash)); return this.crawlQueues.exists(ASCII.getBytes(hash));
} }
/** /**
@ -1577,7 +1577,7 @@ public final class Switchboard extends serverSwitch {
Set<String> e = this.index.exists(ids); Set<String> e = this.index.exists(ids);
Map<String, HarvestProcess> m = new HashMap<String, HarvestProcess>(); Map<String, HarvestProcess> m = new HashMap<String, HarvestProcess>();
for (String id: ids) { for (String id: ids) {
m.put(id, e.contains(id) ? HarvestProcess.LOADED : this.crawlQueues.urlExists(ASCII.getBytes(id))); m.put(id, e.contains(id) ? HarvestProcess.LOADED : this.crawlQueues.exists(ASCII.getBytes(id)));
} }
return m; return m;
} }
@ -1585,7 +1585,7 @@ public final class Switchboard extends serverSwitch {
public void urlRemove(final Segment segment, final byte[] hash) { public void urlRemove(final Segment segment, final byte[] hash) {
segment.fulltext().remove(hash); segment.fulltext().remove(hash);
ResultURLs.remove(ASCII.String(hash)); ResultURLs.remove(ASCII.String(hash));
this.crawlQueues.urlRemove(hash); this.crawlQueues.removeURL(hash);
} }
public DigestURI getURL(final byte[] urlhash) { public DigestURI getURL(final byte[] urlhash) {
@ -2836,13 +2836,13 @@ public final class Switchboard extends serverSwitch {
public void remove(final Collection<String> deleteIDs) { public void remove(final Collection<String> deleteIDs) {
this.index.fulltext().remove(deleteIDs); this.index.fulltext().remove(deleteIDs);
for (String id: deleteIDs) { for (String id: deleteIDs) {
this.crawlQueues.urlRemove(ASCII.getBytes(id)); this.crawlQueues.removeURL(ASCII.getBytes(id));
} }
} }
public void remove(final byte[] urlhash) { public void remove(final byte[] urlhash) {
this.index.fulltext().remove(urlhash); this.index.fulltext().remove(urlhash);
this.crawlQueues.urlRemove(urlhash); this.crawlQueues.removeURL(urlhash);
} }
public void stackURLs(Set<DigestURI> rootURLs, final CrawlProfile profile, final Set<DigestURI> successurls, final Map<DigestURI,String> failurls) { public void stackURLs(Set<DigestURI> rootURLs, final CrawlProfile profile, final Set<DigestURI> successurls, final Map<DigestURI,String> failurls) {
@ -2903,7 +2903,7 @@ public final class Switchboard extends serverSwitch {
} }
// remove the document from the error-db // remove the document from the error-db
this.crawlQueues.urlRemove(urlhash); this.crawlQueues.removeURL(urlhash);
// get a scraper to get the title // get a scraper to get the title
Document scraper; Document scraper;

Loading…
Cancel
Save