some performance hacks and fixed after reading dump in

http://forum.yacy-websuche.de/viewtopic.php?p=19920#p19920

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6837 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 2bc3cba6f1
commit b18a7606a0

@ -416,9 +416,9 @@ public class IndexControlRWIs_p {
prop.put("genUrlList_urlList_"+i+"_urlExists_urlStringShort", (us.length() > 40) ? (us.substring(0, 20) + "<br>" + us.substring(20, 40) + "...") : ((us.length() > 30) ? (us.substring(0, 20) + "<br>" + us.substring(20)) : us));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ranking", (entry.ranking() - rn));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(new String(entry.hash())));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(new String(entry.hash())));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash()));
prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified())));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());

@ -33,6 +33,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import net.yacy.document.content.RSSMessage;
@ -224,7 +225,7 @@ public final class search {
final long timer = System.currentTimeMillis();
//final Map<byte[], ReferenceContainer<WordReference>>[] containers = sb.indexSegment.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls));
final HashMap<byte[], ReferenceContainer<WordReference>> incc = indexSegment.termIndex().searchConjunction(theQuery.queryHashes, QueryParams.hashes2Handles(urls));
final TreeMap<byte[], ReferenceContainer<WordReference>> incc = indexSegment.termIndex().searchConjunction(theQuery.queryHashes, QueryParams.hashes2Handles(urls));
EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.COLLECTION, incc.size(), System.currentTimeMillis() - timer), false, 30000, ProfilingGraph.maxTime);
if (incc != null) {

@ -119,19 +119,19 @@ public class yacysearchitem {
prop.putHTML("content_authorized_recommend_deletelink", "/yacysearch.html?search=" + theQuery.queryString + "&Enter=Search&count=" + theQuery.displayResults() + "&offset=" + (theQuery.neededResults() - theQuery.displayResults()) + "&order=" + crypt.simpleEncode(theQuery.ranking.toExternalString()) + "&resource=local&time=3&deleteref=" + new String(result.hash()) + "&urlmaskfilter=.*");
prop.putHTML("content_authorized_recommend_recommendlink", "/yacysearch.html?search=" + theQuery.queryString + "&Enter=Search&count=" + theQuery.displayResults() + "&offset=" + (theQuery.neededResults() - theQuery.displayResults()) + "&order=" + crypt.simpleEncode(theQuery.ranking.toExternalString()) + "&resource=local&time=3&recommendref=" + new String(result.hash()) + "&urlmaskfilter=.*");
prop.put("content_authorized_urlhash", new String(result.hash()));
String resulthashString = new String(result.hash());
prop.putHTML("content_title", result.title());
prop.putXML("content_title-xml", result.title());
prop.putJSON("content_title-json", result.title());
prop.putHTML("content_link", result.urlstring());
prop.put("content_display", display);
prop.putHTML("content_faviconCode", sb.licensedURLs.aquireLicense(faviconURL)); // aquire license for favicon url loading
prop.put("content_urlhash", new String(result.hash()));
prop.put("content_urlhexhash", yacySeed.b64Hash2hexHash(new String(result.hash())));
prop.put("content_urlhash", resulthashString);
prop.put("content_urlhexhash", yacySeed.b64Hash2hexHash(resulthashString));
prop.putHTML("content_urlname", nxTools.shortenURLString(result.urlname(), urllength));
prop.put("content_date", Switchboard.dateString(result.modified()));
prop.put("content_date822", Switchboard.dateString822(result.modified()));
prop.put("content_ybr", RankingProcess.ybr(new String(result.hash())));
prop.put("content_ybr", RankingProcess.ybr(result.hash()));
prop.putHTML("content_size", Integer.toString(result.filesize())); // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename'
prop.putHTML("content_sizename", sizename(result.filesize()));
prop.putHTML("content_host", result.url().getHost());
@ -140,7 +140,6 @@ public class yacysearchitem {
prop.put("content_nl", (item == 0) ? 0 : 1);
final TreeSet<String>[] query = theQuery.queryWords();
DigestURI wordURL = null;
try {
prop.putHTML("content_words", URLEncoder.encode(query[0].toString(),"UTF-8"));
} catch (final UnsupportedEncodingException e) {}

@ -31,10 +31,10 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
@ -72,7 +72,7 @@ public final class RankingProcess extends Thread {
private final int[] flagcount; // flag counter
private final HandleSet misses; // contains url-hashes that could not been found in the LURL-DB
//private final int[] domZones;
private HashMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private TreeMap<byte[], ReferenceContainer<WordReference>> localSearchInclusion;
private int remote_resourceSize, remote_indexCount, remote_peerCount;
private int local_resourceSize, local_indexCount;
@ -683,14 +683,15 @@ public final class RankingProcess extends Thread {
useYBR = usage;
}
public static int ybr(final String urlHash) {
public static int ybr(final byte[] urlHash) {
// returns the YBR value in a range of 0..15, where 0 means best ranking and 15 means worst ranking
if (ybrTables == null) return 15;
if (!(useYBR)) return 15;
final String domHash = urlHash.substring(6);
byte[] domhash = new byte[6];
System.arraycopy(urlHash, 6, domhash, 0, 6);
final int m = Math.min(maxYBR, ybrTables.length);
for (int i = 0; i < m; i++) {
if ((ybrTables[i] != null) && (ybrTables[i].contains(domHash.getBytes()))) {
if ((ybrTables[i] != null) && (ybrTables[i].contains(domhash))) {
//System.out.println("YBR FOUND: " + urlHash + " (" + i + ")");
return i;
}

@ -177,8 +177,8 @@ public class ReferenceOrder {
}
}
public int authority(final String urlHash) {
return (doms.getScore(urlHash.substring(6)) << 8) / (1 + this.maxdomcount);
public int authority(final byte[] urlHash) {
return (doms.getScore(new String(urlHash, 6, 6)) << 8) / (1 + this.maxdomcount);
}
public long cardinal(final WordReferenceVars t) {
@ -193,10 +193,9 @@ public class ReferenceOrder {
//System.out.println("tf(" + t.urlHash + ") = " + Math.floor(1000 * t.termFrequency()) + ", min = " + Math.floor(1000 * min.termFrequency()) + ", max = " + Math.floor(1000 * max.termFrequency()) + ", tf-normed = " + tf);
int maxmaxpos = max.maxposition();
int minminpos = min.minposition();
String mdhb = new String(t.metadataHash());
final long r =
((256 - DigestURI.domLengthNormalized(t.metadataHash())) << ranking.coeff_domlength)
+ ((ranking.coeff_ybr > 12) ? ((256 - (RankingProcess.ybr(mdhb) << 4)) << ranking.coeff_ybr) : 0)
+ ((ranking.coeff_ybr > 12) ? ((256 - (RankingProcess.ybr(t.metadataHash()) << 4)) << ranking.coeff_ybr) : 0)
+ ((max.urlcomps() == min.urlcomps() ) ? 0 : (256 - (((t.urlcomps() - min.urlcomps() ) << 8) / (max.urlcomps() - min.urlcomps()) )) << ranking.coeff_urlcomps)
+ ((max.urllength() == min.urllength() ) ? 0 : (256 - (((t.urllength() - min.urllength() ) << 8) / (max.urllength() - min.urllength()) )) << ranking.coeff_urllength)
+ ((maxmaxpos == minminpos ) ? 0 : (256 - (((t.minposition() - minminpos ) << 8) / (maxmaxpos - minminpos) )) << ranking.coeff_posintext)
@ -211,7 +210,7 @@ public class ReferenceOrder {
+ ((max.lother() == min.lother()) ? 0 : (((t.lother() - min.lother() ) << 8) / (max.lother() - min.lother()) ) << ranking.coeff_lother)
+ ((max.hitcount() == min.hitcount()) ? 0 : (((t.hitcount() - min.hitcount() ) << 8) / (max.hitcount() - min.hitcount()) ) << ranking.coeff_hitcount)
+ tf
+ ((ranking.coeff_authority > 12) ? (authority(mdhb) << ranking.coeff_authority) : 0)
+ ((ranking.coeff_authority > 12) ? (authority(t.metadataHash()) << ranking.coeff_authority) : 0)
+ ((flags.get(WordReferenceRow.flag_app_dc_identifier)) ? 255 << ranking.coeff_appurl : 0)
+ ((flags.get(WordReferenceRow.flag_app_dc_title)) ? 255 << ranking.coeff_app_dc_title : 0)
+ ((flags.get(WordReferenceRow.flag_app_dc_creator)) ? 255 << ranking.coeff_app_dc_creator : 0)

@ -135,14 +135,7 @@ public class yacySearch extends Thread {
containerCache.oneFeederTerminated();
}
}
/*
public static String set2string(final TreeSet<byte[]> hashes) {
String wh = "";
final Iterator<byte[]> iter = hashes.iterator();
while (iter.hasNext()) { wh = wh + new String(iter.next()); }
return wh;
}
*/
public static String set2string(final HandleSet hashes) {
String wh = "";
final Iterator<byte[]> iter = hashes.iterator();

@ -227,8 +227,16 @@ public class MapHeap {
private String normalizeKey(String key) {
if (blob == null || key == null) return key;
if (key.length() > blob.keylength()) key = key.substring(0, blob.keylength());
while (key.length() < blob.keylength()) key += fillchar;
if (key.length() > blob.keylength()) {
return key.substring(0, blob.keylength());
}
if (key.length() < blob.keylength()) {
byte[] k = key.getBytes();
byte[] b = new byte[blob.keylength()];
System.arraycopy(k, 0, b, 0, k.length);
for (int i = k.length; i < b.length; i++) b[i] = (byte) fillchar;
return new String(b);
}
return key;
}
@ -237,13 +245,13 @@ public class MapHeap {
if (key.length > blob.keylength()) {
byte[] b = new byte[blob.keylength()];
System.arraycopy(key, 0, b, 0, blob.keylength());
key = b;
return b;
}
if (key.length < blob.keylength()) {
byte[] b = new byte[blob.keylength()];
System.arraycopy(key, 0, b, 0, key.length);
for (int i = key.length; i < blob.keylength(); i++) b[i] = (byte) fillchar;
key = b;
for (int i = key.length; i < b.length; i++) b[i] = (byte) fillchar;
return b;
}
return key;
}

@ -30,11 +30,13 @@ package net.yacy.kelondro.rwi;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.TreeSet;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Order;
public abstract class AbstractIndex <ReferenceType extends Reference> implements Index<ReferenceType> {
@ -91,16 +93,16 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
* @param urlselection
* @return map of wordhash:indexContainer
*/
public HashMap<byte[], ReferenceContainer<ReferenceType>> searchConjunction(final HandleSet wordHashes, final HandleSet urlselection) {
public TreeMap<byte[], ReferenceContainer<ReferenceType>> searchConjunction(final HandleSet wordHashes, final HandleSet urlselection) {
// first check if there is any entry that has no match; this uses only operations in ram
/*
Iterator<byte[]> i = wordHashes.iterator();
while (i.hasNext()) {
if (!this.has(i.next())); return new HashMap<byte[], ReferenceContainer<ReferenceType>>(0);
if (!this.has(i.next())); return new TreeMap<byte[], ReferenceContainer<ReferenceType>>(0);
}
*/
// retrieve entities that belong to the hashes
final HashMap<byte[], ReferenceContainer<ReferenceType>> containers = new HashMap<byte[], ReferenceContainer<ReferenceType>>(wordHashes.size());
final TreeMap<byte[], ReferenceContainer<ReferenceType>> containers = new TreeMap<byte[], ReferenceContainer<ReferenceType>>(Base64Order.enhancedCoder);
byte[] singleHash;
ReferenceContainer<ReferenceType> singleContainer;
final Iterator<byte[]> i = wordHashes.iterator();
@ -118,7 +120,7 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
}
// check result
if ((singleContainer == null || singleContainer.isEmpty())) return new HashMap<byte[], ReferenceContainer<ReferenceType>>(0);
if ((singleContainer == null || singleContainer.isEmpty())) return new TreeMap<byte[], ReferenceContainer<ReferenceType>>(Base64Order.enhancedCoder);
containers.put(singleHash, singleContainer);
}
@ -136,7 +138,7 @@ public abstract class AbstractIndex <ReferenceType extends Reference> implements
* @return ReferenceContainer the join result
* @throws RowSpaceExceededException
*/
public ReferenceContainer<ReferenceType> searchJoin(final TreeSet<byte[]> wordHashes, final HandleSet urlselection, final int maxDistance) throws RowSpaceExceededException {
public ReferenceContainer<ReferenceType> searchJoin(final HandleSet wordHashes, final HandleSet urlselection, final int maxDistance) throws RowSpaceExceededException {
// first check if there is any entry that has no match;
// this uses only operations in ram
for (byte[] wordHash: wordHashes) {

@ -30,6 +30,7 @@ package net.yacy.kelondro.rwi;
import java.io.IOException;
import java.util.HashMap;
import java.util.TreeMap;
import java.util.TreeSet;
import net.yacy.kelondro.index.HandleSet;
@ -144,7 +145,7 @@ public interface Index <ReferenceType extends Reference> {
* @param urlselection
* @return map of wordhash:indexContainer
*/
public HashMap<byte[], ReferenceContainer<ReferenceType>> searchConjunction(final HandleSet wordHashes, final HandleSet urlselection);
public TreeMap<byte[], ReferenceContainer<ReferenceType>> searchConjunction(final HandleSet wordHashes, final HandleSet urlselection);
/**
* delete all references entries

@ -27,16 +27,17 @@
package net.yacy.kelondro.rwi;
import java.util.HashMap;
import java.util.TreeMap;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.order.Base64Order;
public class TermSearch <ReferenceType extends Reference> {
private final ReferenceContainer<ReferenceType> joinResult;
HashMap<byte[], ReferenceContainer<ReferenceType>> inclusionContainers;
TreeMap<byte[], ReferenceContainer<ReferenceType>> inclusionContainers;
public TermSearch(
Index<ReferenceType> base,
@ -48,16 +49,16 @@ public class TermSearch <ReferenceType extends Reference> {
this.inclusionContainers =
(queryHashes.isEmpty()) ?
new HashMap<byte[], ReferenceContainer<ReferenceType>>(0) :
new TreeMap<byte[], ReferenceContainer<ReferenceType>>(Base64Order.enhancedCoder) :
base.searchConjunction(queryHashes, urlselection);
if (!inclusionContainers.isEmpty() &&
(inclusionContainers.size() < queryHashes.size()))
inclusionContainers = new HashMap<byte[], ReferenceContainer<ReferenceType>>(0); // prevent that only a subset is returned
inclusionContainers = new TreeMap<byte[], ReferenceContainer<ReferenceType>>(Base64Order.enhancedCoder); // prevent that only a subset is returned
HashMap<byte[], ReferenceContainer<ReferenceType>> exclusionContainers =
TreeMap<byte[], ReferenceContainer<ReferenceType>> exclusionContainers =
(inclusionContainers.isEmpty()) ?
new HashMap<byte[], ReferenceContainer<ReferenceType>>(0) :
new TreeMap<byte[], ReferenceContainer<ReferenceType>>(Base64Order.enhancedCoder) :
base.searchConjunction(excludeHashes, urlselection);
// join and exclude the result
@ -72,7 +73,7 @@ public class TermSearch <ReferenceType extends Reference> {
return this.joinResult;
}
public HashMap<byte[], ReferenceContainer<ReferenceType>> inclusion() {
public TreeMap<byte[], ReferenceContainer<ReferenceType>> inclusion() {
return this.inclusionContainers;
}

Loading…
Cancel
Save