From 5a634cab23652e8c60ebdba645386a3312fd3c51 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 27 Apr 2009 08:46:11 +0000 Subject: [PATCH] removed generation of anchor link sets in document types that describe container formats. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5890 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/parser/rpm/rpmParser.java | 2 +- .../plasma/parser/sevenzip/SZParserExtractCallback.java | 5 ++--- source/de/anomic/plasma/parser/tar/tarParser.java | 2 +- source/de/anomic/plasma/parser/zip/zipParser.java | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/source/de/anomic/plasma/parser/rpm/rpmParser.java b/source/de/anomic/plasma/parser/rpm/rpmParser.java index 401a67ae6..af3f15806 100644 --- a/source/de/anomic/plasma/parser/rpm/rpmParser.java +++ b/source/de/anomic/plasma/parser/rpm/rpmParser.java @@ -121,7 +121,7 @@ public class rpmParser extends AbstractParser implements Parser { else if (headerNames[i].equalsIgnoreCase("SUMMARY")) summary = tag.toString(); else if (headerNames[i].equalsIgnoreCase("DESCRIPTION")) description = tag.toString(); else if (headerNames[i].equalsIgnoreCase("PACKAGER")) packager = tag.toString(); - else if (headerNames[i].equalsIgnoreCase("URL")) anchors.put(new yacyURL(tag.toString(), null), tag.toString()); + //else if (headerNames[i].equalsIgnoreCase("URL")) anchors.put(new yacyURL(tag.toString(), null), tag.toString()); } // closing the rpm file diff --git a/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java b/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java index 1db9164cf..9897f8687 100644 --- a/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java +++ b/source/de/anomic/plasma/parser/sevenzip/SZParserExtractCallback.java @@ -25,9 +25,6 @@ package de.anomic.plasma.parser.sevenzip; import java.io.IOException; import java.io.OutputStream; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; import SevenZip.ArchiveExtractCallback; import SevenZip.Archive.IInArchive; @@ -105,6 +102,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback { theDoc = this.parser.parseSource(url, mime, null, this.cfos.getContentBAOS()); } + /* // revert the above workaround final Map nanchors = new HashMap(theDoc.getAnchors().size(), 1f); final Iterator> it = theDoc.getAnchors().entrySet().iterator(); @@ -124,6 +122,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback { } theDoc.getAnchors().clear(); theDoc.getAnchors().putAll(nanchors); + */ this.doc.addSubDocument(theDoc); } } catch (final ParserException e) { diff --git a/source/de/anomic/plasma/parser/tar/tarParser.java b/source/de/anomic/plasma/parser/tar/tarParser.java index 96df00b32..bc0a8b284 100644 --- a/source/de/anomic/plasma/parser/tar/tarParser.java +++ b/source/de/anomic/plasma/parser/tar/tarParser.java @@ -173,7 +173,7 @@ public class tarParser extends AbstractParser implements Parser { docTextLength += FileUtils.copy(subDoc.getText(), docText); } - docAnchors.putAll(subDoc.getAnchors()); + //docAnchors.putAll(subDoc.getAnchors()); htmlFilterContentScraper.addAllImages(docImages, subDoc.getImages()); // release subdocument diff --git a/source/de/anomic/plasma/parser/zip/zipParser.java b/source/de/anomic/plasma/parser/zip/zipParser.java index 72b530f79..246d9da55 100644 --- a/source/de/anomic/plasma/parser/zip/zipParser.java +++ b/source/de/anomic/plasma/parser/zip/zipParser.java @@ -156,7 +156,7 @@ public class zipParser extends AbstractParser implements Parser { docTextLength += FileUtils.copy(subDoc.getText(), docText); } - docAnchors.putAll(subDoc.getAnchors()); + //docAnchors.putAll(subDoc.getAnchors()); htmlFilterContentScraper.addAllImages(docImages, subDoc.getImages()); // release subdocument