From 7b1f5b04305208428d4d55e8f1ad07b5f2468fe8 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 20 Nov 2009 13:19:12 +0000 Subject: [PATCH] - better media search ranking - better concurrency with enhanced synchronization in sort stack git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6496 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/search/MediaSnippet.java | 27 ++---- source/net/yacy/kelondro/util/SortStack.java | 92 +++++++++++--------- source/net/yacy/kelondro/util/SortStore.java | 2 +- 3 files changed, 62 insertions(+), 59 deletions(-) diff --git a/source/de/anomic/search/MediaSnippet.java b/source/de/anomic/search/MediaSnippet.java index c74df64a5..64ab6f781 100644 --- a/source/de/anomic/search/MediaSnippet.java +++ b/source/de/anomic/search/MediaSnippet.java @@ -110,21 +110,15 @@ public class MediaSnippet implements Comparable, Comparator entry; DigestURI url; String desc; - TreeSet s; final ArrayList result = new ArrayList(); while (i.hasNext()) { entry = i.next(); url = entry.getKey(); desc = entry.getValue(); - s = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes); - if (s.size() == 0) { - result.add(new MediaSnippet(mediatype, url, desc, null, 0, document.dc_source())); - continue; - } - s = TextSnippet.removeAppearanceHashes(desc, s); - if (s.size() == 0) { - result.add(new MediaSnippet(mediatype, url, desc, null, 0, document.dc_source())); - continue; + int ranking = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() + + TextSnippet.removeAppearanceHashes(desc, queryhashes).size(); + if (ranking < 2 * queryhashes.size()) { + result.add(new MediaSnippet(mediatype, url, desc, null, ranking, document.dc_source())); } } return result; @@ -140,7 +134,6 @@ public class MediaSnippet implements Comparable, Comparator s; final ArrayList result = new ArrayList(); while (i.hasNext()) { ientry = i.next(); @@ -150,14 +143,10 @@ public class MediaSnippet implements Comparable, Comparator 0 && ientry.height() < 64) continue; if (ientry.width() > 0 && ientry.width() < 64) continue; desc = ientry.alt(); - int appcount = 0; - s = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes); - appcount += queryhashes.size() - s.size(); - // if the resulting set is empty, then _all_ words from the query appeared in the url - s = TextSnippet.removeAppearanceHashes(desc, s); - appcount += queryhashes.size() - s.size(); - // if the resulting set is empty, then _all_ search words appeared in the description - final int ranking = /*(ientry.hashCode() / queryhashes.size() / 2) */ ientry.height() * ientry.width() * appcount * 10000 /* 0x7FFF0000)*/; + int appcount = queryhashes.size() * 2 - + TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() - + TextSnippet.removeAppearanceHashes(desc, queryhashes).size(); + final int ranking = Integer.MAX_VALUE - (ientry.height() + 1) * (ientry.width() + 1) * (appcount + 1); result.add(new MediaSnippet(ContentDomain.IMAGE, url, desc, ientry.width() + " x " + ientry.height(), ranking, document.dc_source())); } return result; diff --git a/source/net/yacy/kelondro/util/SortStack.java b/source/net/yacy/kelondro/util/SortStack.java index f06397829..ab1b83eee 100644 --- a/source/net/yacy/kelondro/util/SortStack.java +++ b/source/net/yacy/kelondro/util/SortStack.java @@ -60,26 +60,27 @@ public class SortStack { * @param element * @param weight */ - public synchronized void push(final E element, Long weight) { - if (this.instack.contains(element)) return; + public void push(final E element, Long weight) { + if (!this.instack.add(element)) return; // put the element on the stack - List l = this.onstack.get(weight); - if (l == null) { - l = new LinkedList(); - l.add(element); - this.onstack.put(weight, l); - } else { - l.add(element); + synchronized (this.onstack) { + List l = this.onstack.get(weight); + if (l == null) { + l = new LinkedList(); + l.add(element); + this.onstack.put(weight, l); + } else { + l.add(element); + } } - - // register it for double-check - this.instack.add(element); // check maximum size of the stack an remove elements if the stack gets too large if (this.maxsize <= 0) return; - while ((this.onstack.size() > 0) && (this.onstack.size() > this.maxsize)) { - this.onstack.remove(this.onstack.lastKey()); + while ((this.onstack.size() > 0) && (this.onstack.size() > this.maxsize)) synchronized (this.onstack) { + if ((this.onstack.size() > 0) && (this.onstack.size() > this.maxsize)) { + this.onstack.remove(this.onstack.lastKey()); + } } } @@ -87,12 +88,16 @@ public class SortStack { * return the element with the smallest weight * @return */ - public synchronized stackElement top() { + public stackElement top() { // returns the element that is currently on top of the stack - if (this.onstack.isEmpty()) return null; - final Long w = this.onstack.firstKey(); - final List l = this.onstack.get(w); - final E element = l.get(0); + final E element; + final Long w; + synchronized (this.onstack) { + if (this.onstack.isEmpty()) return null; + w = this.onstack.firstKey(); + final List l = this.onstack.get(w); + element = l.get(0); + } return new stackElement(element, w); } @@ -100,44 +105,53 @@ public class SortStack { * return the element with the smallest weight and remove it from the stack * @return */ - public synchronized stackElement pop() { + public stackElement pop() { // returns the element that is currently on top of the stack // it is removed and added to the offstack list - // this is exactly the same as element(offstack.size()) - if (this.onstack.isEmpty()) return null; - final Long w = this.onstack.firstKey(); - final List l = this.onstack.get(w); - final E element = l.remove(0); - if (l.size() == 0) this.onstack.remove(w); - this.instack.remove(element); + final E element; + final Long w; + synchronized (this.onstack) { + if (this.onstack.isEmpty()) return null; + w = this.onstack.firstKey(); + final List l = this.onstack.get(w); + element = l.remove(0); + this.instack.remove(element); + if (l.size() == 0) this.onstack.remove(w); + } return new stackElement(element, w); } - public synchronized boolean exists(final E element) { + public boolean exists(final E element) { // uses the hashCode of the element to find out of the element had been on the list or the stack return this.instack.contains(element); } - public synchronized void remove(final E element) { + public void remove(final E element) { if (!this.instack.contains(element)) return; - for (Map.Entry> entry: this.onstack.entrySet()) { - Iterator i = entry.getValue().iterator(); - while (i.hasNext()) { - if (i.next().equals(element)) { - i.remove(); - if (entry.getValue().size() == 0) { - this.onstack.remove(entry.getKey()); + synchronized (this.onstack) { + for (Map.Entry> entry: this.onstack.entrySet()) { + Iterator i = entry.getValue().iterator(); + while (i.hasNext()) { + if (i.next().equals(element)) { + i.remove(); + if (entry.getValue().size() == 0) { + this.onstack.remove(entry.getKey()); + } + return; } - return; } } } } - public synchronized boolean bottom(final long weight) { + public boolean bottom(final long weight) { // returns true if the element with that weight would be on the bottom of the stack after inserting - return weight > this.onstack.lastKey().longValue(); + Long l; + synchronized (this.onstack) { + l = this.onstack.lastKey(); + } + return weight > l.longValue(); } public class stackElement { diff --git a/source/net/yacy/kelondro/util/SortStore.java b/source/net/yacy/kelondro/util/SortStore.java index 428163c4a..f37bd2606 100644 --- a/source/net/yacy/kelondro/util/SortStore.java +++ b/source/net/yacy/kelondro/util/SortStore.java @@ -140,7 +140,7 @@ public class SortStore extends SortStack { public synchronized boolean bottom(final long weight) { if (super.bottom(weight)) return true; - return weight >= this.largest; + return weight > this.largest; } public static void main(String[] args) {