removed indexing of anchor links and tagging such words as part of urls (that was wrong)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5219 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent ce57de6cb3
commit ce4715e305

@ -139,7 +139,7 @@ public final class plasmaCondenser {
// phrase 4 is the Document Author // phrase 4 is the Document Author
// phrase 5 are the tags specified in document // phrase 5 are the tags specified in document
// phrase 10 and above are the section headlines/titles (88 possible) // phrase 10 and above are the section headlines/titles (88 possible)
// phrase 98 is taken from the embedded anchor/hyperlinks description // phrase 98 is taken from the embedded anchor/hyperlinks description (REMOVED!)
// phrase 99 is taken from the media Link url and anchor description // phrase 99 is taken from the media Link url and anchor description
// phrase 100 and above are lines from the text // phrase 100 and above are lines from the text
@ -153,12 +153,19 @@ public final class plasmaCondenser {
} }
// anchors: for text indexing we add only the anchor description // anchors: for text indexing we add only the anchor description
// REMOVED! Reason:
// words from the anchor description should appear as normal text in the output from the parser
// to flag these words as appearance in dc_description would confuse, since the user expects such word as titles of
// pages that are shown in the search result. The words from the URLS should also not appear as part of the index, because they
// are not visible in the text and could be used to crate fake-content
/*
final Iterator<Map.Entry<yacyURL, String>> i = document.getAnchors().entrySet().iterator(); final Iterator<Map.Entry<yacyURL, String>> i = document.getAnchors().entrySet().iterator();
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
if ((entry == null) || (entry.getKey() == null)) continue; if ((entry == null) || (entry.getKey() == null)) continue;
insertTextToWords(entry.getValue(), 98, indexRWIEntry.flag_app_dc_description, RESULT_FLAGS, true); insertTextToWords(entry.getValue(), 98, indexRWIEntry.flag_app_dc_description, RESULT_FLAGS, true);
} }
*/
} else { } else {
this.RESULT_NUMB_WORDS = 0; this.RESULT_NUMB_WORDS = 0;
this.RESULT_DIFF_WORDS = 0; this.RESULT_DIFF_WORDS = 0;

@ -1554,6 +1554,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
document = parseDocument(in.queueEntry); document = parseDocument(in.queueEntry);
} catch (final InterruptedException e) { } catch (final InterruptedException e) {
document = null; document = null;
} catch (final Exception e) {
document = null;
} }
if (document == null) { if (document == null) {
in.queueEntry.close(); in.queueEntry.close();

Loading…
Cancel
Save