removed generation of anchor link sets in document types that describe container formats.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5890 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 50e96ee894
commit 5a634cab23

@ -121,7 +121,7 @@ public class rpmParser extends AbstractParser implements Parser {
else if (headerNames[i].equalsIgnoreCase("SUMMARY")) summary = tag.toString(); else if (headerNames[i].equalsIgnoreCase("SUMMARY")) summary = tag.toString();
else if (headerNames[i].equalsIgnoreCase("DESCRIPTION")) description = tag.toString(); else if (headerNames[i].equalsIgnoreCase("DESCRIPTION")) description = tag.toString();
else if (headerNames[i].equalsIgnoreCase("PACKAGER")) packager = tag.toString(); else if (headerNames[i].equalsIgnoreCase("PACKAGER")) packager = tag.toString();
else if (headerNames[i].equalsIgnoreCase("URL")) anchors.put(new yacyURL(tag.toString(), null), tag.toString()); //else if (headerNames[i].equalsIgnoreCase("URL")) anchors.put(new yacyURL(tag.toString(), null), tag.toString());
} }
// closing the rpm file // closing the rpm file

@ -25,9 +25,6 @@ package de.anomic.plasma.parser.sevenzip;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import SevenZip.ArchiveExtractCallback; import SevenZip.ArchiveExtractCallback;
import SevenZip.Archive.IInArchive; import SevenZip.Archive.IInArchive;
@ -105,6 +102,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
theDoc = this.parser.parseSource(url, mime, null, this.cfos.getContentBAOS()); theDoc = this.parser.parseSource(url, mime, null, this.cfos.getContentBAOS());
} }
/*
// revert the above workaround // revert the above workaround
final Map<yacyURL, String> nanchors = new HashMap<yacyURL, String>(theDoc.getAnchors().size(), 1f); final Map<yacyURL, String> nanchors = new HashMap<yacyURL, String>(theDoc.getAnchors().size(), 1f);
final Iterator<Map.Entry<yacyURL, String>> it = theDoc.getAnchors().entrySet().iterator(); final Iterator<Map.Entry<yacyURL, String>> it = theDoc.getAnchors().entrySet().iterator();
@ -124,6 +122,7 @@ public class SZParserExtractCallback extends ArchiveExtractCallback {
} }
theDoc.getAnchors().clear(); theDoc.getAnchors().clear();
theDoc.getAnchors().putAll(nanchors); theDoc.getAnchors().putAll(nanchors);
*/
this.doc.addSubDocument(theDoc); this.doc.addSubDocument(theDoc);
} }
} catch (final ParserException e) { } catch (final ParserException e) {

@ -173,7 +173,7 @@ public class tarParser extends AbstractParser implements Parser {
docTextLength += FileUtils.copy(subDoc.getText(), docText); docTextLength += FileUtils.copy(subDoc.getText(), docText);
} }
docAnchors.putAll(subDoc.getAnchors()); //docAnchors.putAll(subDoc.getAnchors());
htmlFilterContentScraper.addAllImages(docImages, subDoc.getImages()); htmlFilterContentScraper.addAllImages(docImages, subDoc.getImages());
// release subdocument // release subdocument

@ -156,7 +156,7 @@ public class zipParser extends AbstractParser implements Parser {
docTextLength += FileUtils.copy(subDoc.getText(), docText); docTextLength += FileUtils.copy(subDoc.getText(), docText);
} }
docAnchors.putAll(subDoc.getAnchors()); //docAnchors.putAll(subDoc.getAnchors());
htmlFilterContentScraper.addAllImages(docImages, subDoc.getImages()); htmlFilterContentScraper.addAllImages(docImages, subDoc.getImages());
// release subdocument // release subdocument

Loading…
Cancel
Save