|
|
|
@ -47,7 +47,7 @@ public class SnippetExtractor {
|
|
|
|
|
int linenumber = 0;
|
|
|
|
|
int fullmatchcounter = 0;
|
|
|
|
|
lookup: for (final StringBuilder sentence: sentences) {
|
|
|
|
|
hs = WordTokenizer.hashSentence(sentence.toString(), null);
|
|
|
|
|
hs = WordTokenizer.hashSentence(sentence.toString(), null, 100);
|
|
|
|
|
positions = new TreeSet<Integer>();
|
|
|
|
|
for (final byte[] word: queryhashes) {
|
|
|
|
|
pos = hs.get(word);
|
|
|
|
@ -79,27 +79,27 @@ public class SnippetExtractor {
|
|
|
|
|
} catch (UnsupportedOperationException e) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
snippetString = tsr.snippetString;
|
|
|
|
|
if (snippetString != null && snippetString.length() > 0) {
|
|
|
|
|
remainingHashes = tsr.remainingHashes;
|
|
|
|
|
if (remainingHashes.isEmpty()) {
|
|
|
|
|
this.snippetString = tsr.snippetString;
|
|
|
|
|
if (this.snippetString != null && this.snippetString.length() > 0) {
|
|
|
|
|
this.remainingHashes = tsr.remainingHashes;
|
|
|
|
|
if (this.remainingHashes.isEmpty()) {
|
|
|
|
|
// we have found the snippet
|
|
|
|
|
return; // finished!
|
|
|
|
|
} else if (remainingHashes.size() < queryhashes.size()) {
|
|
|
|
|
} else if (this.remainingHashes.size() < queryhashes.size()) {
|
|
|
|
|
// the result has not all words in it.
|
|
|
|
|
// find another sentence that represents the missing other words
|
|
|
|
|
// and find recursively more sentences
|
|
|
|
|
maxLength = maxLength - snippetString.length();
|
|
|
|
|
maxLength = maxLength - this.snippetString.length();
|
|
|
|
|
if (maxLength < 20) maxLength = 20;
|
|
|
|
|
try {
|
|
|
|
|
tsr = new SnippetExtractor(order.values(), remainingHashes, maxLength);
|
|
|
|
|
tsr = new SnippetExtractor(order.values(), this.remainingHashes, maxLength);
|
|
|
|
|
} catch (UnsupportedOperationException e) {
|
|
|
|
|
throw e;
|
|
|
|
|
}
|
|
|
|
|
final String nextSnippet = tsr.snippetString;
|
|
|
|
|
if (nextSnippet == null) return;
|
|
|
|
|
snippetString = snippetString + (" / " + nextSnippet);
|
|
|
|
|
remainingHashes = tsr.remainingHashes;
|
|
|
|
|
this.snippetString = this.snippetString + (" / " + nextSnippet);
|
|
|
|
|
this.remainingHashes = tsr.remainingHashes;
|
|
|
|
|
return;
|
|
|
|
|
} else {
|
|
|
|
|
// error
|
|
|
|
@ -126,7 +126,7 @@ public class SnippetExtractor {
|
|
|
|
|
byte[] hash;
|
|
|
|
|
|
|
|
|
|
// find all hashes that appear in the sentence
|
|
|
|
|
final Map<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null);
|
|
|
|
|
final Map<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null, 100);
|
|
|
|
|
final Iterator<byte[]> j = queryhashes.iterator();
|
|
|
|
|
Integer pos;
|
|
|
|
|
int p, minpos = sentence.length(), maxpos = -1;
|
|
|
|
|