From ad09b786bf85d7fd1962bc4a9616afd62b99a185 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 3 Jul 2012 17:20:41 +0200 Subject: [PATCH] clean up parser data --- source/net/yacy/document/Document.java | 11 +++++++++++ .../net/yacy/document/parser/html/ContentScraper.java | 9 +++++++++ source/net/yacy/document/parser/htmlParser.java | 1 - 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index 42fb1f190..e10f8b44c 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -821,6 +821,17 @@ dc_rights ContentScraper.addAllImages(images, doc.getImages()); if (doc.lon() != 0.0f && doc.lat() != 0.0f) { lon = doc.lon(); lat = doc.lat(); } } + + // clean up parser data + for (final Document doc: docs) { + Object parserObject = doc.getParserObject(); + if (parserObject instanceof ContentScraper) { + final ContentScraper html = (ContentScraper) parserObject; + html.close(); + } + } + + // return consolidation return new Document( location, globalMime, diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index e06eabaa2..8c3ec03c0 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -864,9 +864,18 @@ public class ContentScraper extends AbstractScraper implements Scraper { // free resources super.close(); this.anchors.clear(); + this.rss.clear(); + this.css.clear(); + this.script.clear(); + this.frames.clear(); + this.iframes.clear(); + this.embeds.clear(); this.images.clear(); + this.metas.clear(); this.title = null; this.headlines = null; + this.bold.clear(); + this.italic.clear(); this.content.clear(); this.root = null; } diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java index 107329178..2c22f73cf 100644 --- a/source/net/yacy/document/parser/htmlParser.java +++ b/source/net/yacy/document/parser/htmlParser.java @@ -143,7 +143,6 @@ public class htmlParser extends AbstractParser implements Parser { scraper.getRSS(), scraper.getImages(), scraper.indexingDenied()); - //scraper.close(); ppd.setFavicon(scraper.getFavicon()); return ppd;