diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java index 6dcb80c71..07f9e422d 100644 --- a/htroot/AccessTracker_p.java +++ b/htroot/AccessTracker_p.java @@ -24,12 +24,10 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import java.util.AbstractMap; import java.util.Collection; import java.util.ConcurrentModificationException; import java.util.Date; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -39,9 +37,7 @@ import java.util.concurrent.LinkedBlockingQueue; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.RequestHeader; -import net.yacy.cora.sorting.ConcurrentScoreMap; import net.yacy.cora.sorting.OrderedScoreMap; -import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.ConcurrentLog; import net.yacy.peers.Seed; diff --git a/htroot/CrawlStartExpert.html b/htroot/CrawlStartExpert.html index b07b26aed..0681684ba 100644 --- a/htroot/CrawlStartExpert.html +++ b/htroot/CrawlStartExpert.html @@ -370,10 +370,10 @@ Content Filter

These are limitations on parts of a document. The filter will be applied after a web page was loaded.

-
Filter div class names
+
Filter div or nav class names
- +
set of CSS class namescomma-separated list of <div> element class names which should be filtered out
set of CSS class namescomma-separated list of <div> or <nav> element class names which should be filtered out
diff --git a/source/net/yacy/cora/protocol/Scanner.java b/source/net/yacy/cora/protocol/Scanner.java index 6d11388aa..8c84931a9 100644 --- a/source/net/yacy/cora/protocol/Scanner.java +++ b/source/net/yacy/cora/protocol/Scanner.java @@ -152,6 +152,7 @@ public class Scanner { if (access != Access.unknown) Scanner.this.services.put(this, access); } } catch (final OutOfMemoryError e) { + e.printStackTrace(); } } public long age() { diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index 1a4d46bab..36fb1e34c 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -129,6 +129,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { script(TagType.pair), span(TagType.pair), div(TagType.pair), + nav(TagType.pair), article(TagType.pair), // html5 time(TagType.pair), // html5