- better media search ranking

- better concurrency with enhanced synchronization in sort stack

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6496 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 4df88a4e7a
commit 7b1f5b0430

@ -110,21 +110,15 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
Map.Entry<DigestURI, String> entry; Map.Entry<DigestURI, String> entry;
DigestURI url; DigestURI url;
String desc; String desc;
TreeSet<byte[]> s;
final ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>(); final ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>();
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
url = entry.getKey(); url = entry.getKey();
desc = entry.getValue(); desc = entry.getValue();
s = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes); int ranking = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
if (s.size() == 0) { TextSnippet.removeAppearanceHashes(desc, queryhashes).size();
result.add(new MediaSnippet(mediatype, url, desc, null, 0, document.dc_source())); if (ranking < 2 * queryhashes.size()) {
continue; result.add(new MediaSnippet(mediatype, url, desc, null, ranking, document.dc_source()));
}
s = TextSnippet.removeAppearanceHashes(desc, s);
if (s.size() == 0) {
result.add(new MediaSnippet(mediatype, url, desc, null, 0, document.dc_source()));
continue;
} }
} }
return result; return result;
@ -140,7 +134,6 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
ImageEntry ientry; ImageEntry ientry;
DigestURI url; DigestURI url;
String desc; String desc;
TreeSet<byte[]> s;
final ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>(); final ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>();
while (i.hasNext()) { while (i.hasNext()) {
ientry = i.next(); ientry = i.next();
@ -150,14 +143,10 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
if (ientry.height() > 0 && ientry.height() < 64) continue; if (ientry.height() > 0 && ientry.height() < 64) continue;
if (ientry.width() > 0 && ientry.width() < 64) continue; if (ientry.width() > 0 && ientry.width() < 64) continue;
desc = ientry.alt(); desc = ientry.alt();
int appcount = 0; int appcount = queryhashes.size() * 2 -
s = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes); TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() -
appcount += queryhashes.size() - s.size(); TextSnippet.removeAppearanceHashes(desc, queryhashes).size();
// if the resulting set is empty, then _all_ words from the query appeared in the url final int ranking = Integer.MAX_VALUE - (ientry.height() + 1) * (ientry.width() + 1) * (appcount + 1);
s = TextSnippet.removeAppearanceHashes(desc, s);
appcount += queryhashes.size() - s.size();
// if the resulting set is empty, then _all_ search words appeared in the description
final int ranking = /*(ientry.hashCode() / queryhashes.size() / 2) */ ientry.height() * ientry.width() * appcount * 10000 /* 0x7FFF0000)*/;
result.add(new MediaSnippet(ContentDomain.IMAGE, url, desc, ientry.width() + " x " + ientry.height(), ranking, document.dc_source())); result.add(new MediaSnippet(ContentDomain.IMAGE, url, desc, ientry.width() + " x " + ientry.height(), ranking, document.dc_source()));
} }
return result; return result;

@ -60,26 +60,27 @@ public class SortStack<E> {
* @param element * @param element
* @param weight * @param weight
*/ */
public synchronized void push(final E element, Long weight) { public void push(final E element, Long weight) {
if (this.instack.contains(element)) return; if (!this.instack.add(element)) return;
// put the element on the stack // put the element on the stack
List<E> l = this.onstack.get(weight); synchronized (this.onstack) {
if (l == null) { List<E> l = this.onstack.get(weight);
l = new LinkedList<E>(); if (l == null) {
l.add(element); l = new LinkedList<E>();
this.onstack.put(weight, l); l.add(element);
} else { this.onstack.put(weight, l);
l.add(element); } else {
l.add(element);
}
} }
// register it for double-check
this.instack.add(element);
// check maximum size of the stack an remove elements if the stack gets too large // check maximum size of the stack an remove elements if the stack gets too large
if (this.maxsize <= 0) return; if (this.maxsize <= 0) return;
while ((this.onstack.size() > 0) && (this.onstack.size() > this.maxsize)) { while ((this.onstack.size() > 0) && (this.onstack.size() > this.maxsize)) synchronized (this.onstack) {
this.onstack.remove(this.onstack.lastKey()); if ((this.onstack.size() > 0) && (this.onstack.size() > this.maxsize)) {
this.onstack.remove(this.onstack.lastKey());
}
} }
} }
@ -87,12 +88,16 @@ public class SortStack<E> {
* return the element with the smallest weight * return the element with the smallest weight
* @return * @return
*/ */
public synchronized stackElement top() { public stackElement top() {
// returns the element that is currently on top of the stack // returns the element that is currently on top of the stack
if (this.onstack.isEmpty()) return null; final E element;
final Long w = this.onstack.firstKey(); final Long w;
final List<E> l = this.onstack.get(w); synchronized (this.onstack) {
final E element = l.get(0); if (this.onstack.isEmpty()) return null;
w = this.onstack.firstKey();
final List<E> l = this.onstack.get(w);
element = l.get(0);
}
return new stackElement(element, w); return new stackElement(element, w);
} }
@ -100,44 +105,53 @@ public class SortStack<E> {
* return the element with the smallest weight and remove it from the stack * return the element with the smallest weight and remove it from the stack
* @return * @return
*/ */
public synchronized stackElement pop() { public stackElement pop() {
// returns the element that is currently on top of the stack // returns the element that is currently on top of the stack
// it is removed and added to the offstack list // it is removed and added to the offstack list
// this is exactly the same as element(offstack.size()) final E element;
if (this.onstack.isEmpty()) return null; final Long w;
final Long w = this.onstack.firstKey(); synchronized (this.onstack) {
final List<E> l = this.onstack.get(w); if (this.onstack.isEmpty()) return null;
final E element = l.remove(0); w = this.onstack.firstKey();
if (l.size() == 0) this.onstack.remove(w); final List<E> l = this.onstack.get(w);
this.instack.remove(element); element = l.remove(0);
this.instack.remove(element);
if (l.size() == 0) this.onstack.remove(w);
}
return new stackElement(element, w); return new stackElement(element, w);
} }
public synchronized boolean exists(final E element) { public boolean exists(final E element) {
// uses the hashCode of the element to find out of the element had been on the list or the stack // uses the hashCode of the element to find out of the element had been on the list or the stack
return this.instack.contains(element); return this.instack.contains(element);
} }
public synchronized void remove(final E element) { public void remove(final E element) {
if (!this.instack.contains(element)) return; if (!this.instack.contains(element)) return;
for (Map.Entry<Long,List<E>> entry: this.onstack.entrySet()) { synchronized (this.onstack) {
Iterator<E> i = entry.getValue().iterator(); for (Map.Entry<Long,List<E>> entry: this.onstack.entrySet()) {
while (i.hasNext()) { Iterator<E> i = entry.getValue().iterator();
if (i.next().equals(element)) { while (i.hasNext()) {
i.remove(); if (i.next().equals(element)) {
if (entry.getValue().size() == 0) { i.remove();
this.onstack.remove(entry.getKey()); if (entry.getValue().size() == 0) {
this.onstack.remove(entry.getKey());
}
return;
} }
return;
} }
} }
} }
} }
public synchronized boolean bottom(final long weight) { public boolean bottom(final long weight) {
// returns true if the element with that weight would be on the bottom of the stack after inserting // returns true if the element with that weight would be on the bottom of the stack after inserting
return weight > this.onstack.lastKey().longValue(); Long l;
synchronized (this.onstack) {
l = this.onstack.lastKey();
}
return weight > l.longValue();
} }
public class stackElement { public class stackElement {

@ -140,7 +140,7 @@ public class SortStore<E> extends SortStack<E> {
public synchronized boolean bottom(final long weight) { public synchronized boolean bottom(final long weight) {
if (super.bottom(weight)) return true; if (super.bottom(weight)) return true;
return weight >= this.largest; return weight > this.largest;
} }
public static void main(String[] args) { public static void main(String[] args) {

Loading…
Cancel
Save