added a gc to cleanup process (once every 10 minutes)

pull/402/head
Michael Peter Christen 4 years ago
parent 22841ffbf1
commit 52228cb6be

@ -422,7 +422,6 @@ public final class Switchboard extends serverSwitch {
CollectionConfiguration.UNIQUE_HEURISTIC_PREFER_HTTPS = this.getConfigBool("search.ranking.uniqueheuristic.preferhttps", false);
CollectionConfiguration.UNIQUE_HEURISTIC_PREFER_WWWPREFIX = this.getConfigBool("search.ranking.uniqueheuristic.preferwwwprefix", true);
// init libraries
this.log.config("initializing libraries");
new Thread("LibraryProvider.initialize") {
@ -994,7 +993,6 @@ public final class Switchboard extends serverSwitch {
}
}.start();
// initializing the stackCrawlThread
this.crawlStacker =
new CrawlStacker(
@ -2951,6 +2949,9 @@ public final class Switchboard extends serverSwitch {
// show deadlocks if there are any in the log
if (Memory.deadlocks() > 0) Memory.logDeadlocks();
// clean up
System.gc();
return true;
} catch (final InterruptedException e ) {
this.log.info("cleanupJob: Shutdown detected");
@ -3093,7 +3094,6 @@ public final class Switchboard extends serverSwitch {
}
final long parsingEndTime = System.currentTimeMillis();
// put anchors on crawl stack
final long stackStartTime = System.currentTimeMillis();
// check if the documents have valid urls; this is not a bug patch; it is possible that
@ -3151,7 +3151,6 @@ public final class Switchboard extends serverSwitch {
|| response.profile().isCrawlerAlwaysCheckMediaType() /* the crawler must always load resources to double-check the actual Media Type even on unsupported file extensions */;
/* Handle media links */
for (Map.Entry<DigestURL, String> entry : Document.getImagelinks(documents).entrySet()) {
if (addAllLinksToCrawlStack
|| (response.profile().indexMedia() && TextParser.supportsExtension(entry.getKey()) == null)) {

Loading…
Cancel
Save