|
|
@ -177,8 +177,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|
|
|
while (i.hasNext()) {
|
|
|
|
while (i.hasNext()) {
|
|
|
|
entry = i.next();
|
|
|
|
entry = i.next();
|
|
|
|
url = entry.getKey();
|
|
|
|
url = entry.getKey();
|
|
|
|
desc = entry.getValue();
|
|
|
|
desc = entry.getValue();
|
|
|
|
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
|
|
|
|
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
|
|
|
|
final int ranking = removeAppearanceHashes(url.toNormalform(true), queryhashes).size() +
|
|
|
|
final int ranking = removeAppearanceHashes(url.toNormalform(true), queryhashes).size() +
|
|
|
|
removeAppearanceHashes(desc, queryhashes).size();
|
|
|
|
removeAppearanceHashes(desc, queryhashes).size();
|
|
|
|
if (ranking < 2 * queryhashes.size()) {
|
|
|
|
if (ranking < 2 * queryhashes.size()) {
|
|
|
@ -202,8 +202,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|
|
|
while (i.hasNext()) {
|
|
|
|
while (i.hasNext()) {
|
|
|
|
ientry = i.next();
|
|
|
|
ientry = i.next();
|
|
|
|
url = ientry.url();
|
|
|
|
url = ientry.url();
|
|
|
|
final String u = url.toNormalform(false);
|
|
|
|
final String u = url.toNormalform(false);
|
|
|
|
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
|
|
|
|
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
|
|
|
|
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
|
|
|
|
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
|
|
|
|
if (ientry.height() > 0 && ientry.height() < 32) continue;
|
|
|
|
if (ientry.height() > 0 && ientry.height() < 32) continue;
|
|
|
|
if (ientry.width() > 0 && ientry.width() < 32) continue;
|
|
|
|
if (ientry.width() > 0 && ientry.width() < 32) continue;
|
|
|
@ -226,7 +226,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|
|
|
private static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) {
|
|
|
|
private static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) {
|
|
|
|
// remove all hashes that appear in the sentence
|
|
|
|
// remove all hashes that appear in the sentence
|
|
|
|
if (sentence == null) return queryhashes;
|
|
|
|
if (sentence == null) return queryhashes;
|
|
|
|
final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null, 100);
|
|
|
|
final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, 100);
|
|
|
|
final Iterator<byte[]> j = queryhashes.iterator();
|
|
|
|
final Iterator<byte[]> j = queryhashes.iterator();
|
|
|
|
byte[] hash;
|
|
|
|
byte[] hash;
|
|
|
|
Integer pos;
|
|
|
|
Integer pos;
|
|
|
@ -254,8 +254,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|
|
|
* @param blacklistType
|
|
|
|
* @param blacklistType
|
|
|
|
* Type of blacklist (see class Blacklist, BLACKLIST_FOO)
|
|
|
|
* Type of blacklist (see class Blacklist, BLACKLIST_FOO)
|
|
|
|
* @return isBlacklisted Whether the given URL is blacklisted
|
|
|
|
* @return isBlacklisted Whether the given URL is blacklisted
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
private static boolean isUrlBlacklisted (final BlacklistType blacklistType, final DigestURL url) {
|
|
|
|
private static boolean isUrlBlacklisted (final BlacklistType blacklistType, final DigestURL url) {
|
|
|
|
|
|
|
|
|
|
|
|
final boolean isBlacklisted = Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
|
|
|
|
final boolean isBlacklisted = Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
|
|
|
|
|
|
|
|
|
|
|
@ -269,4 +269,4 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|