skip unused call parameter for hashSentence()

pull/1/head
reger 10 years ago
parent 221f86dd5e
commit 0c97cc2440

@ -48,7 +48,7 @@ public class SnippetExtractor {
int linenumber = 0; int linenumber = 0;
int fullmatchcounter = 0; int fullmatchcounter = 0;
lookup: for (final StringBuilder sentence: sentences) { lookup: for (final StringBuilder sentence: sentences) {
hs = WordTokenizer.hashSentence(sentence.toString(), null, 100); hs = WordTokenizer.hashSentence(sentence.toString(), 100);
positions = new TreeSet<Integer>(); positions = new TreeSet<Integer>();
for (final byte[] word: queryhashes) { for (final byte[] word: queryhashes) {
pos = hs.get(word); pos = hs.get(word);
@ -127,7 +127,7 @@ public class SnippetExtractor {
byte[] hash; byte[] hash;
// find all hashes that appear in the sentence // find all hashes that appear in the sentence
final Map<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null, 100); final Map<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, 100);
final Iterator<byte[]> j = queryhashes.iterator(); final Iterator<byte[]> j = queryhashes.iterator();
Integer pos; Integer pos;
int p, minpos = sentence.length(), maxpos = -1; int p, minpos = sentence.length(), maxpos = -1;

@ -178,9 +178,9 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
* @param sentence the sentence to be tokenized * @param sentence the sentence to be tokenized
* @return a ordered map containing word hashes as key and positions as value. The map is orderd by the hash ordering * @return a ordered map containing word hashes as key and positions as value. The map is orderd by the hash ordering
*/ */
public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib, int maxlength) { public static SortedMap<byte[], Integer> hashSentence(final String sentence, int maxlength) {
final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder); final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), meaningLib); WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), null);
try { try {
int pos = 0; int pos = 0;
StringBuilder word; StringBuilder word;

@ -177,8 +177,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
url = entry.getKey(); url = entry.getKey();
desc = entry.getValue(); desc = entry.getValue();
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue; if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
final int ranking = removeAppearanceHashes(url.toNormalform(true), queryhashes).size() + final int ranking = removeAppearanceHashes(url.toNormalform(true), queryhashes).size() +
removeAppearanceHashes(desc, queryhashes).size(); removeAppearanceHashes(desc, queryhashes).size();
if (ranking < 2 * queryhashes.size()) { if (ranking < 2 * queryhashes.size()) {
@ -202,8 +202,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
while (i.hasNext()) { while (i.hasNext()) {
ientry = i.next(); ientry = i.next();
url = ientry.url(); url = ientry.url();
final String u = url.toNormalform(false); final String u = url.toNormalform(false);
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue; if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue; if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
if (ientry.height() > 0 && ientry.height() < 32) continue; if (ientry.height() > 0 && ientry.height() < 32) continue;
if (ientry.width() > 0 && ientry.width() < 32) continue; if (ientry.width() > 0 && ientry.width() < 32) continue;
@ -226,7 +226,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
private static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) { private static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) {
// remove all hashes that appear in the sentence // remove all hashes that appear in the sentence
if (sentence == null) return queryhashes; if (sentence == null) return queryhashes;
final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null, 100); final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, 100);
final Iterator<byte[]> j = queryhashes.iterator(); final Iterator<byte[]> j = queryhashes.iterator();
byte[] hash; byte[] hash;
Integer pos; Integer pos;
@ -254,8 +254,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
* @param blacklistType * @param blacklistType
* Type of blacklist (see class Blacklist, BLACKLIST_FOO) * Type of blacklist (see class Blacklist, BLACKLIST_FOO)
* @return isBlacklisted Whether the given URL is blacklisted * @return isBlacklisted Whether the given URL is blacklisted
*/ */
private static boolean isUrlBlacklisted (final BlacklistType blacklistType, final DigestURL url) { private static boolean isUrlBlacklisted (final BlacklistType blacklistType, final DigestURL url) {
final boolean isBlacklisted = Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile()); final boolean isBlacklisted = Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
@ -269,4 +269,4 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
} }
} }

@ -535,7 +535,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
private static void removeMatchingHashes(final String sentence, final HandleSet queryhashes) { private static void removeMatchingHashes(final String sentence, final HandleSet queryhashes) {
if (queryhashes.size() == 0) return; if (queryhashes.size() == 0) return;
final Set<byte[]> m = WordTokenizer.hashSentence(sentence, null, 100).keySet(); final Set<byte[]> m = WordTokenizer.hashSentence(sentence, 100).keySet();
//for (byte[] b: m) System.out.println("sentence hash: " + ASCII.String(b)); //for (byte[] b: m) System.out.println("sentence hash: " + ASCII.String(b));
//for (byte[] b: queryhashes) System.out.println("queryhash: " + ASCII.String(b)); //for (byte[] b: queryhashes) System.out.println("queryhash: " + ASCII.String(b));
ArrayList<byte[]> o = new ArrayList<byte[]>(queryhashes.size()); ArrayList<byte[]> o = new ArrayList<byte[]>(queryhashes.size());

Loading…
Cancel
Save