several bugfixes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3899 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 465145cb6f
commit c7a614830a

@ -72,7 +72,7 @@ public class AccessTracker_p {
prop.put("page_list_" + entCount + "_countHour", access.tailMap(new Long(System.currentTimeMillis() - 1000 * 60 * 60)).size()); prop.put("page_list_" + entCount + "_countHour", access.tailMap(new Long(System.currentTimeMillis() - 1000 * 60 * 60)).size());
entCount++; entCount++;
} }
} catch (ConcurrentModificationException e) {} // we dont want to serialize this } catch (ConcurrentModificationException e) {} // we dont want to synchronize this
prop.put("page_list", entCount); prop.put("page_list", entCount);
prop.put("page_num", entCount); prop.put("page_num", entCount);
} }
@ -84,6 +84,7 @@ public class AccessTracker_p {
if (host.length() > 0) { if (host.length() > 0) {
access = switchboard.accessTrack(host); access = switchboard.accessTrack(host);
if (access != null) { if (access != null) {
try {
Iterator ii = access.entrySet().iterator(); Iterator ii = access.entrySet().iterator();
while (ii.hasNext()) { while (ii.hasNext()) {
entry = (Map.Entry) ii.next(); entry = (Map.Entry) ii.next();
@ -91,13 +92,15 @@ public class AccessTracker_p {
prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue()))); prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue())));
prop.put("page_list_" + entCount + "_path", (String) entry.getValue()); prop.put("page_list_" + entCount + "_path", (String) entry.getValue());
entCount++; entCount++;
} }} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
} }
} else { } else {
Iterator i = switchboard.accessHosts(); Iterator i = switchboard.accessHosts();
while ((entCount < maxCount) && (i.hasNext())) { while ((entCount < maxCount) && (i.hasNext())) {
host = (String) i.next(); host = (String) i.next();
access = switchboard.accessTrack(host); access = switchboard.accessTrack(host);
try {
Iterator ii = access.entrySet().iterator(); Iterator ii = access.entrySet().iterator();
while (ii.hasNext()) { while (ii.hasNext()) {
entry = (Map.Entry) ii.next(); entry = (Map.Entry) ii.next();
@ -105,7 +108,8 @@ public class AccessTracker_p {
prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue()))); prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue())));
prop.put("page_list_" + entCount + "_path", (String) entry.getValue()); prop.put("page_list_" + entCount + "_path", (String) entry.getValue());
entCount++; entCount++;
} }} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
} }
} }
prop.put("page_list", entCount); prop.put("page_list", entCount);
@ -149,6 +153,7 @@ public class AccessTracker_p {
TreeSet handles; TreeSet handles;
int entCount = 0; int entCount = 0;
Map.Entry entry; Map.Entry entry;
try {
while ((entCount < maxCount) && (i.hasNext())) { while ((entCount < maxCount) && (i.hasNext())) {
entry = (Map.Entry) i.next(); entry = (Map.Entry) i.next();
host = (String) entry.getKey(); host = (String) entry.getKey();
@ -177,6 +182,7 @@ public class AccessTracker_p {
// next // next
entCount++; entCount++;
} }
} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
prop.put("page_list", entCount); prop.put("page_list", entCount);
prop.put("page_num", entCount); prop.put("page_num", entCount);
prop.put("page_total", (page == 3) ? switchboard.localSearches.size() : switchboard.remoteSearches.size()); prop.put("page_total", (page == 3) ? switchboard.localSearches.size() : switchboard.remoteSearches.size());

@ -16,7 +16,7 @@
You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth". You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth".
</p> </p>
<form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data"> <form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
<table border="0" cellpadding="5" cellspacing="1"> <table border="0" cellpadding="5" cellspacing="1">
<tr class="TableHeader"> <tr class="TableHeader">
<td><strong>Attribut</strong></td> <td><strong>Attribut</strong></td>

@ -48,7 +48,7 @@ public class CrawlStartExpert_p {
prop.put("crawlingIfOlderUnitHourCheck", 0); prop.put("crawlingIfOlderUnitHourCheck", 0);
prop.put("crawlingIfOlderUnitMinuteCheck", 0); prop.put("crawlingIfOlderUnitMinuteCheck", 0);
if ((crawlingIfOlder == -1) || (crawlingIfOlder == Integer.MAX_VALUE)) { if ((crawlingIfOlder == -1) || (crawlingIfOlder == Integer.MAX_VALUE)) {
prop.put("crawlingIfOlderNumber", -1); prop.put("crawlingIfOlderNumber", 1);
prop.put("crawlingIfOlderUnitYearCheck", 1); prop.put("crawlingIfOlderUnitYearCheck", 1);
} else if (crawlingIfOlder >= 60*24*365) { } else if (crawlingIfOlder >= 60*24*365) {
prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*365))); prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*365)));

@ -19,7 +19,7 @@
This is repeated as long as specified under "Crawling Depth". This is repeated as long as specified under "Crawling Depth".
</p> </p>
<form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data"> <form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
<input type="hidden" name="crawlingFilter" value=".*" /> <input type="hidden" name="crawlingFilter" value=".*" />
<input type="hidden" name="crawlingIfOlderCheck" value="off" /> <input type="hidden" name="crawlingIfOlderCheck" value="off" />
<input type="hidden" name="crawlingDomFilterCheck" value="off" /> <input type="hidden" name="crawlingDomFilterCheck" value="off" />

@ -137,7 +137,7 @@ public class IndexControl_p {
} }
if (delurl || delurlref) { if (delurl || delurlref) {
for (int i = 0; i < urlx.length; i++) { for (int i = 0; i < urlx.length; i++) {
switchboard.wordIndex.loadedURL.remove(urlx[i]); switchboard.urlRemove(urlx[i]);
} }
} }
switchboard.wordIndex.deleteContainer(keyhash); switchboard.wordIndex.deleteContainer(keyhash);
@ -157,7 +157,7 @@ public class IndexControl_p {
} }
if (delurl || delurlref) { if (delurl || delurlref) {
for (int i = 0; i < urlx.length; i++) { for (int i = 0; i < urlx.length; i++) {
switchboard.wordIndex.loadedURL.remove(urlx[i]); switchboard.urlRemove(urlx[i]);
} }
} }
Set urlHashes = new HashSet(); Set urlHashes = new HashSet();
@ -190,7 +190,17 @@ public class IndexControl_p {
} else { } else {
urlstring = entry.comp().url().toNormalform(); urlstring = entry.comp().url().toNormalform();
prop.put("urlstring", ""); prop.put("urlstring", "");
switchboard.wordIndex.loadedURL.remove(urlhash); switchboard.urlRemove(urlhash);
prop.put("result", "Removed URL " + urlstring);
}
}
if (post.containsKey("urldelete")) {
urlhash = plasmaURL.urlHash(urlstring);
if ((urlhash == null) || (urlstring == null)) {
prop.put("result", "No input given; nothing deleted.");
} else {
switchboard.urlRemove(urlhash);
prop.put("result", "Removed URL " + urlstring); prop.put("result", "Removed URL " + urlstring);
} }
} }

@ -346,8 +346,8 @@ public class plasmaCrawlProfile {
// an antry must have to be re-crawled // an antry must have to be re-crawled
String r = (String) mem.get(RECRAWL_IF_OLDER); String r = (String) mem.get(RECRAWL_IF_OLDER);
if (r == null) return Long.MAX_VALUE; else try { if (r == null) return Long.MAX_VALUE; else try {
long l = Long.parseLong(r) * ((long) 60000); long l = Long.parseLong(r) * 60000L;
if (l < 0) return Long.MAX_VALUE; else return l; return (l < 0) ? Long.MAX_VALUE : l;
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
return 0; return 0;
} }

@ -379,8 +379,8 @@ public final class plasmaCrawlStacker {
String dbocc = this.sb.urlExists(nexturlhash); String dbocc = this.sb.urlExists(nexturlhash);
indexURLEntry oldEntry = null; indexURLEntry oldEntry = null;
oldEntry = this.sb.wordIndex.loadedURL.load(nexturlhash, null); oldEntry = this.sb.wordIndex.loadedURL.load(nexturlhash, null);
boolean recrawl = (oldEntry != null) && (((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000) > profile.recrawlIfOlder()); boolean recrawl = (oldEntry != null) && ((System.currentTimeMillis() - oldEntry.loaddate().getTime()) > profile.recrawlIfOlder());
// FIXME: this does not work correctly? // apply recrawl rule
if ((dbocc != null) && (!(recrawl))) { if ((dbocc != null) && (!(recrawl))) {
reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")"; reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")";
//this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); //this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");

@ -1423,6 +1423,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
return null; return null;
} }
public void urlRemove(String hash) {
wordIndex.loadedURL.remove(hash);
noticeURL.remove(hash);
delegatedURL.remove(hash);
errorURL.remove(hash);
}
public URL getURL(String urlhash) throws IOException { public URL getURL(String urlhash) throws IOException {
if (urlhash.equals(plasmaURL.dummyHash)) return null; if (urlhash.equals(plasmaURL.dummyHash)) return null;
plasmaCrawlEntry ne = noticeURL.get(urlhash); plasmaCrawlEntry ne = noticeURL.get(urlhash);

@ -119,17 +119,15 @@ public class serverMemory {
* @return whether enough memory could be freed (or is free) or not * @return whether enough memory could be freed (or is free) or not
*/ */
public static boolean request(final long size, final boolean force) { public static boolean request(final long size, final boolean force) {
long avail; long avail = available();
if (avail >= size) return true;
if (log.isFine()) { if (log.isFine()) {
String t = new Throwable("Stack trace").getStackTrace()[1].toString(); String t = new Throwable("Stack trace").getStackTrace()[1].toString();
avail = available();
log.logFine(t + " requested " + (size >>> 10) + " KB, got " + (avail >>> 10) + " KB"); log.logFine(t + " requested " + (size >>> 10) + " KB, got " + (avail >>> 10) + " KB");
} else { }
avail = available();
}
if (avail >= size) return true;
final long avg = getAverageGCFree(); final long avg = getAverageGCFree();
if (force || avg == 0 || avg + avail >= size) { if (force || avg == 0 || avg + avail >= size) {
// this is only called if we expect that an allocation of <size> bytes would cause the jvm to call the GC anyway
final long freed = runGC(!force); final long freed = runGC(!force);
avail = available(); avail = available();
log.logInfo("performed " + ((force) ? "explicit" : "necessary") + " GC, freed " + (freed >>> 10) log.logInfo("performed " + ((force) ? "explicit" : "necessary") + " GC, freed " + (freed >>> 10)

Loading…
Cancel
Save