several bugfixes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3899 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 465145cb6f
commit c7a614830a

@ -72,7 +72,7 @@ public class AccessTracker_p {
prop.put("page_list_" + entCount + "_countHour", access.tailMap(new Long(System.currentTimeMillis() - 1000 * 60 * 60)).size());
entCount++;
}
} catch (ConcurrentModificationException e) {} // we dont want to serialize this
} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
prop.put("page_list", entCount);
prop.put("page_num", entCount);
}
@ -84,6 +84,7 @@ public class AccessTracker_p {
if (host.length() > 0) {
access = switchboard.accessTrack(host);
if (access != null) {
try {
Iterator ii = access.entrySet().iterator();
while (ii.hasNext()) {
entry = (Map.Entry) ii.next();
@ -91,13 +92,15 @@ public class AccessTracker_p {
prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue())));
prop.put("page_list_" + entCount + "_path", (String) entry.getValue());
entCount++;
}
}} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
}
} else {
Iterator i = switchboard.accessHosts();
while ((entCount < maxCount) && (i.hasNext())) {
host = (String) i.next();
access = switchboard.accessTrack(host);
try {
Iterator ii = access.entrySet().iterator();
while (ii.hasNext()) {
entry = (Map.Entry) ii.next();
@ -105,7 +108,8 @@ public class AccessTracker_p {
prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(((Long) entry.getKey()).longValue())));
prop.put("page_list_" + entCount + "_path", (String) entry.getValue());
entCount++;
}
}} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
}
}
prop.put("page_list", entCount);
@ -149,6 +153,7 @@ public class AccessTracker_p {
TreeSet handles;
int entCount = 0;
Map.Entry entry;
try {
while ((entCount < maxCount) && (i.hasNext())) {
entry = (Map.Entry) i.next();
host = (String) entry.getKey();
@ -177,6 +182,7 @@ public class AccessTracker_p {
// next
entCount++;
}
} catch (ConcurrentModificationException e) {} // we dont want to synchronize this
prop.put("page_list", entCount);
prop.put("page_num", entCount);
prop.put("page_total", (page == 3) ? switchboard.localSearches.size() : switchboard.remoteSearches.size());

@ -16,7 +16,7 @@
You can define URLs as start points for Web page crawling and start crawling here. "Crawling" means that YaCy will download the given website, extract all links in it and then download the content behind these links. This is repeated as long as specified under "Crawling Depth".
</p>
<form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data">
<form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
<table border="0" cellpadding="5" cellspacing="1">
<tr class="TableHeader">
<td><strong>Attribut</strong></td>

@ -48,7 +48,7 @@ public class CrawlStartExpert_p {
prop.put("crawlingIfOlderUnitHourCheck", 0);
prop.put("crawlingIfOlderUnitMinuteCheck", 0);
if ((crawlingIfOlder == -1) || (crawlingIfOlder == Integer.MAX_VALUE)) {
prop.put("crawlingIfOlderNumber", -1);
prop.put("crawlingIfOlderNumber", 1);
prop.put("crawlingIfOlderUnitYearCheck", 1);
} else if (crawlingIfOlder >= 60*24*365) {
prop.put("crawlingIfOlderNumber", Math.round((float)crawlingIfOlder / (float)(60*24*365)));

@ -19,7 +19,7 @@
This is repeated as long as specified under "Crawling Depth".
</p>
<form action="WatchCrawler_p.html" method="post" enctype="multipart/form-data">
<form action="WatchCrawler_p.html" method="get" enctype="multipart/form-data">
<input type="hidden" name="crawlingFilter" value=".*" />
<input type="hidden" name="crawlingIfOlderCheck" value="off" />
<input type="hidden" name="crawlingDomFilterCheck" value="off" />

@ -137,7 +137,7 @@ public class IndexControl_p {
}
if (delurl || delurlref) {
for (int i = 0; i < urlx.length; i++) {
switchboard.wordIndex.loadedURL.remove(urlx[i]);
switchboard.urlRemove(urlx[i]);
}
}
switchboard.wordIndex.deleteContainer(keyhash);
@ -157,7 +157,7 @@ public class IndexControl_p {
}
if (delurl || delurlref) {
for (int i = 0; i < urlx.length; i++) {
switchboard.wordIndex.loadedURL.remove(urlx[i]);
switchboard.urlRemove(urlx[i]);
}
}
Set urlHashes = new HashSet();
@ -190,7 +190,17 @@ public class IndexControl_p {
} else {
urlstring = entry.comp().url().toNormalform();
prop.put("urlstring", "");
switchboard.wordIndex.loadedURL.remove(urlhash);
switchboard.urlRemove(urlhash);
prop.put("result", "Removed URL " + urlstring);
}
}
if (post.containsKey("urldelete")) {
urlhash = plasmaURL.urlHash(urlstring);
if ((urlhash == null) || (urlstring == null)) {
prop.put("result", "No input given; nothing deleted.");
} else {
switchboard.urlRemove(urlhash);
prop.put("result", "Removed URL " + urlstring);
}
}

@ -346,8 +346,8 @@ public class plasmaCrawlProfile {
// an antry must have to be re-crawled
String r = (String) mem.get(RECRAWL_IF_OLDER);
if (r == null) return Long.MAX_VALUE; else try {
long l = Long.parseLong(r) * ((long) 60000);
if (l < 0) return Long.MAX_VALUE; else return l;
long l = Long.parseLong(r) * 60000L;
return (l < 0) ? Long.MAX_VALUE : l;
} catch (NumberFormatException e) {
return 0;
}

@ -379,8 +379,8 @@ public final class plasmaCrawlStacker {
String dbocc = this.sb.urlExists(nexturlhash);
indexURLEntry oldEntry = null;
oldEntry = this.sb.wordIndex.loadedURL.load(nexturlhash, null);
boolean recrawl = (oldEntry != null) && (((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000) > profile.recrawlIfOlder());
// FIXME: this does not work correctly?
boolean recrawl = (oldEntry != null) && ((System.currentTimeMillis() - oldEntry.loaddate().getTime()) > profile.recrawlIfOlder());
// apply recrawl rule
if ((dbocc != null) && (!(recrawl))) {
reason = plasmaCrawlEURL.DOUBLE_REGISTERED + dbocc + ")";
//this.log.logFine("URL '" + nexturlString + "' is double registered in '" + dbocc + "'. " + "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");

@ -1423,6 +1423,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
return null;
}
public void urlRemove(String hash) {
wordIndex.loadedURL.remove(hash);
noticeURL.remove(hash);
delegatedURL.remove(hash);
errorURL.remove(hash);
}
public URL getURL(String urlhash) throws IOException {
if (urlhash.equals(plasmaURL.dummyHash)) return null;
plasmaCrawlEntry ne = noticeURL.get(urlhash);

@ -119,17 +119,15 @@ public class serverMemory {
* @return whether enough memory could be freed (or is free) or not
*/
public static boolean request(final long size, final boolean force) {
long avail;
long avail = available();
if (avail >= size) return true;
if (log.isFine()) {
String t = new Throwable("Stack trace").getStackTrace()[1].toString();
avail = available();
log.logFine(t + " requested " + (size >>> 10) + " KB, got " + (avail >>> 10) + " KB");
} else {
avail = available();
}
if (avail >= size) return true;
}
final long avg = getAverageGCFree();
if (force || avg == 0 || avg + avail >= size) {
// this is only called if we expect that an allocation of <size> bytes would cause the jvm to call the GC anyway
final long freed = runGC(!force);
avail = available();
log.logInfo("performed " + ((force) ? "explicit" : "necessary") + " GC, freed " + (freed >>> 10)

Loading…
Cancel
Save