encapsulate potential expensive objects in TextSnippet to allow GC them asap

this reduces chance of OOMs at massive search & snippet-fetching

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7865 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
sixcooler 13 years ago
parent 3aa6528ed0
commit eb14111200

@ -165,39 +165,46 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
init(url.hash(), snippetLine, source, null);
return;
}
Document document = loadDocument(loader, comp, queryhashes, cacheStrategy, url, reindexing, source);
if (document == null) {
if (this.error == null) {
init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
}
return;
}
/* ===========================================================================
* COMPUTE SNIPPET
* =========================================================================== */
// we have found a parseable non-empty file: use the lines
// compute snippet from text
final Collection<StringBuilder> sentences = document.getSentences(pre);
if (sentences == null) {
init(url.hash(), null, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences");
return;
}
final SnippetExtractor tsr;
String textline = null;
HandleSet remainingHashes = queryhashes;
try {
tsr = new SnippetExtractor(sentences, queryhashes, snippetMaxLength);
textline = tsr.getSnippet();
remainingHashes = tsr.getRemainingWords();
} catch (final UnsupportedOperationException e) {
init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
return;
}
// compute snippet from media
{ //encapsulate potential expensive sentences
final Collection<StringBuilder> sentences;
{ //encapsulate potential expensive document
final Document document = loadDocument(loader, comp, queryhashes, cacheStrategy, url, reindexing, source);
if (document == null) {
if (this.error == null) {
init(url.hash(), null, ResultClass.ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
}
return;
}
/* ===========================================================================
* COMPUTE SNIPPET
* =========================================================================== */
// we have found a parseable non-empty file: use the lines
// compute snippet from text
sentences = document.getSentences(pre);
document.close();
} //encapsulate potential expensive document END
if (sentences == null) {
init(url.hash(), null, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences");
return;
}
try {
final SnippetExtractor tsr = new SnippetExtractor(sentences, queryhashes, snippetMaxLength);
textline = tsr.getSnippet();
remainingHashes = tsr.getRemainingWords();
} catch (final UnsupportedOperationException e) {
init(url.hash(), null, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
return;
}
} //encapsulate potential expensive sentences END
// compute snippet from media - attention document closed above!
//String audioline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
//String videoline = computeMediaSnippet(document.getVideolinks(), queryhashes);
//String appline = computeMediaSnippet(document.getApplinks(), queryhashes);
@ -220,7 +227,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
// finally store this snippet in our own cache
snippetsCache.put(wordhashes, urls, snippetLine);
document.close();
// document.close();
init(url.hash(), snippetLine, source, null);
}

@ -172,8 +172,7 @@ public class WordTokenizer implements Enumeration<String> {
*/
public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib) {
final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
Enumeration<String> words = null;
words = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(sentence)), meaningLib);
final Enumeration<String> words = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(sentence)), meaningLib);
int pos = 0;
String word;
byte[] hash;

Loading…
Cancel
Save