skip unused call parameter for hashSentence()

pull/1/head
reger 10 years ago
parent 221f86dd5e
commit 0c97cc2440

@ -48,7 +48,7 @@ public class SnippetExtractor {
int linenumber = 0;
int fullmatchcounter = 0;
lookup: for (final StringBuilder sentence: sentences) {
hs = WordTokenizer.hashSentence(sentence.toString(), null, 100);
hs = WordTokenizer.hashSentence(sentence.toString(), 100);
positions = new TreeSet<Integer>();
for (final byte[] word: queryhashes) {
pos = hs.get(word);
@ -127,7 +127,7 @@ public class SnippetExtractor {
byte[] hash;
// find all hashes that appear in the sentence
final Map<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null, 100);
final Map<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, 100);
final Iterator<byte[]> j = queryhashes.iterator();
Integer pos;
int p, minpos = sentence.length(), maxpos = -1;

@ -178,9 +178,9 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
* @param sentence the sentence to be tokenized
* @return a ordered map containing word hashes as key and positions as value. The map is orderd by the hash ordering
*/
public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib, int maxlength) {
public static SortedMap<byte[], Integer> hashSentence(final String sentence, int maxlength) {
final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), meaningLib);
WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), null);
try {
int pos = 0;
StringBuilder word;

@ -177,8 +177,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
while (i.hasNext()) {
entry = i.next();
url = entry.getKey();
desc = entry.getValue();
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
desc = entry.getValue();
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
final int ranking = removeAppearanceHashes(url.toNormalform(true), queryhashes).size() +
removeAppearanceHashes(desc, queryhashes).size();
if (ranking < 2 * queryhashes.size()) {
@ -202,8 +202,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
while (i.hasNext()) {
ientry = i.next();
url = ientry.url();
final String u = url.toNormalform(false);
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
final String u = url.toNormalform(false);
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
if (ientry.height() > 0 && ientry.height() < 32) continue;
if (ientry.width() > 0 && ientry.width() < 32) continue;
@ -226,7 +226,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
private static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) {
// remove all hashes that appear in the sentence
if (sentence == null) return queryhashes;
final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null, 100);
final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, 100);
final Iterator<byte[]> j = queryhashes.iterator();
byte[] hash;
Integer pos;
@ -254,8 +254,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
* @param blacklistType
* Type of blacklist (see class Blacklist, BLACKLIST_FOO)
* @return isBlacklisted Whether the given URL is blacklisted
*/
private static boolean isUrlBlacklisted (final BlacklistType blacklistType, final DigestURL url) {
*/
private static boolean isUrlBlacklisted (final BlacklistType blacklistType, final DigestURL url) {
final boolean isBlacklisted = Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
@ -269,4 +269,4 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
}
}

@ -535,7 +535,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
private static void removeMatchingHashes(final String sentence, final HandleSet queryhashes) {
if (queryhashes.size() == 0) return;
final Set<byte[]> m = WordTokenizer.hashSentence(sentence, null, 100).keySet();
final Set<byte[]> m = WordTokenizer.hashSentence(sentence, 100).keySet();
//for (byte[] b: m) System.out.println("sentence hash: " + ASCII.String(b));
//for (byte[] b: queryhashes) System.out.println("queryhash: " + ASCII.String(b));
ArrayList<byte[]> o = new ArrayList<byte[]>(queryhashes.size());

Loading…
Cancel
Save