- faster search: using different data structures that avoid multiplr calculations

- no more table copy for error-eco table
- optional table copy for lurl-entries
- more abstractions (less single constant strings)
- better logging (using host names instead of ips)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4459 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 8358652fa9
commit bd63999801

@ -102,7 +102,7 @@ public class BlogComments {
}
String pagename = post.get("page", "blog_default");
String ip = post.get("CLIENTIP", "127.0.0.1");
String ip = post.get(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
String StrAuthor = post.get("author", "anonymous");

@ -105,9 +105,9 @@ public class CrawlProfileEditor_p {
while (it.hasNext()) {
selentry = (entry)it.next();
if (selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_PROXY) ||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) ||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_REMOTE) /*||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_TEXT) ||
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA))
selentry.name().equals(plasmaSwitchboard.CRAWL_PROFILE_SNIPPET_MEDIA)*/)
continue;
prop.put("profiles_" + count + "_name", selentry.name());
prop.put("profiles_" + count + "_handle", selentry.handle());

@ -212,7 +212,7 @@ public final class Settings_p {
}
// clientIP
prop.putHTML("clientIP", (String) header.get("CLIENTIP", "<unknown>"), true); // read an artificial header addendum
prop.putHTML("clientIP", (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"), true); // read an artificial header addendum
/*
* seed upload settings

@ -50,7 +50,7 @@ public class TestApplet {
//File templatefile=filehandler.getOverlayedFile((String)post.get("url"));
File classfile = httpdFileHandler.getOverlayedClass((String)post.get("url"));
httpHeader header2=new httpHeader();
header2.put("CLIENTIP", "127.0.0.1");
header2.put(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
header2.put("PATH", post.get("url"));
serverObjects tp=null;
try {

@ -79,7 +79,7 @@ public class User{
prop.put("logged-in_identified-by", "2");
//try via ip
if(entry == null){
entry=sb.userDB.ipAuth(((String)header.get("CLIENTIP", "xxxxxx")));
entry=sb.userDB.ipAuth(((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx")));
if(entry != null){
prop.put("logged-in_identified-by", "0");
}
@ -108,7 +108,7 @@ public class User{
//identified via form-login
//TODO: this does not work for a static admin, yet.
}else if(post != null && post.containsKey("username") && post.containsKey("password")){
//entry=sb.userDB.passwordAuth((String)post.get("username"), (String)post.get("password"), (String)header.get("CLIENTIP", "xxxxxx"));
//entry=sb.userDB.passwordAuth((String)post.get("username"), (String)post.get("password"), (String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx"));
String username=(String)post.get("username");
String password=(String)post.get("password");
@ -163,7 +163,7 @@ public class User{
if(post!=null && post.containsKey("logout")){
prop.put("logged-in", "0");
if(entry != null){
entry.logout(((String)header.get("CLIENTIP", "xxxxxx")), userDB.getLoginToken(header.getHeaderCookies())); //todo: logout cookie
entry.logout(((String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "xxxxxx")), userDB.getLoginToken(header.getHeaderCookies())); //todo: logout cookie
}else{
sb.userDB.adminLogout(userDB.getLoginToken(header.getHeaderCookies()));
}

@ -72,7 +72,7 @@ public class ViewImage {
String urlString = post.get("url", "");
String urlLicense = post.get("code", "");
boolean auth = ((String) header.get("CLIENTIP", "")).equals("localhost") || sb.verifyAuthentication(header, true); // handle access rights
boolean auth = ((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost") || sb.verifyAuthentication(header, true); // handle access rights
yacyURL url = null;
if ((urlString.length() > 0) && (auth)) try {

@ -88,7 +88,7 @@ public class Wiki {
String access = switchboard.getConfig("WikiAccess", "admin");
String pagename = post.get("page", "start");
String ip = post.get("CLIENTIP", "127.0.0.1");
String ip = post.get(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
String author = post.get("author", "anonymous");
if (author.equals("anonymous")) {
author = wikiBoard.guessAuthor(ip);

@ -78,7 +78,7 @@ public class welcome {
prop.put("hostip", "Unknown Host Exception");
}
prop.put("port", serverCore.getPortNr(env.getConfig("port","8080")));
prop.put("clientip", (String) header.get("CLIENTIP", ""));
prop.put("clientip", (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, ""));
final String peertype = (yacyCore.seedDB.mySeed() == null) ? yacySeed.PEERTYPE_JUNIOR : yacyCore.seedDB.mySeed().get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN);
final boolean senior = (peertype.equals(yacySeed.PEERTYPE_SENIOR)) || (peertype.equals(yacySeed.PEERTYPE_PRINCIPAL));

@ -103,7 +103,7 @@ public final class hello {
// if ((properTest != null) && (! properTest.substring(0,1).equals("IP"))) { return null; }
// we easily know the caller's IP:
final String clientip = (String) header.get("CLIENTIP", "<unknown>"); // read an artificial header addendum
final String clientip = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"); // read an artificial header addendum
InetAddress ias = serverDomains.dnsResolve(clientip);
if (ias == null) {
prop.put("message", "cannot resolve your IP from your reported location " + clientip);

@ -282,7 +282,7 @@ public final class search {
// prepare search statistics
Long trackerHandle = new Long(System.currentTimeMillis());
HashMap<String, Object> searchProfile = theQuery.resultProfile(joincount, System.currentTimeMillis() - timestamp, urlRetrievalAllTime, snippetComputationAllTime);
String client = (String) header.get("CLIENTIP");
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
searchProfile.put("host", client);
yacySeed remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(client), true, false, false);
searchProfile.put("peername", (remotepeer == null) ? "unknown" : remotepeer.getName());

@ -89,14 +89,14 @@ public final class transfer {
final yacySeed opeer = yacyCore.seedDB.get(ohash);
if (opeer == null) {
// reject unknown peers: this does not appear fair, but anonymous senders are dangerous
sb.getLog().logFine("RankingTransmission: rejected unknown peer '" + ohash + "', current IP " + header.get("CLIENTIP", "unknown"));
sb.getLog().logFine("RankingTransmission: rejected unknown peer '" + ohash + "', current IP " + header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown"));
return prop;
}
opeer.setLastSeenUTC();
if (filename.indexOf("..") >= 0) {
// reject paths that contain '..' because they are dangerous
sb.getLog().logFine("RankingTransmission: rejected wrong path '" + filename + "' from peer " + opeer.getName() + "/" + opeer.getPublicAddress()+ ", current IP " + header.get("CLIENTIP", "unknown"));
sb.getLog().logFine("RankingTransmission: rejected wrong path '" + filename + "' from peer " + opeer.getName() + "/" + opeer.getPublicAddress()+ ", current IP " + header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown"));
return prop;
}

@ -257,7 +257,7 @@ public class yacysearch {
constraint,
true);
String client = (String) header.get("CLIENTIP"); // the search client who initiated the search
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search
// tell all threads to do nothing for a specific time
sb.intermissionAllThreads(10000);

@ -155,7 +155,7 @@ public final class userDB {
return null;
}
public Entry getUser(httpHeader header){
return getUser((String) header.get(httpHeader.AUTHORIZATION), (String)header.get("CLIENTIP"), header.getHeaderCookies());
return getUser((String) header.get(httpHeader.AUTHORIZATION), (String)header.get(httpHeader.CONNECTION_PROP_CLIENTIP), header.getHeaderCookies());
}
public Entry getUser(String auth, String ip, String cookies){
Entry entry=null;

@ -85,7 +85,7 @@ public class httpSSI {
conProp.setProperty(httpHeader.CONNECTION_PROP_PATH, path);
conProp.setProperty(httpHeader.CONNECTION_PROP_ARGS, args);
conProp.setProperty(httpHeader.CONNECTION_PROP_HTTP_VER, httpHeader.HTTP_VERSION_0_9);
conProp.setProperty("CLIENTIP", "127.0.0.1");
conProp.setProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
header.put(httpHeader.AUTHORIZATION, authorization);
httpdFileHandler.doGet(conProp, header, out);
}

@ -193,7 +193,7 @@ public final class httpd implements serverHandler {
public void initSession(serverCore.Session newsession) throws IOException {
this.session = newsession;
this.userAddress = session.userAddress; // client InetAddress
this.clientIP = this.userAddress.getHostAddress();
this.clientIP = this.userAddress.getHostName();
if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost";
if (this.clientIP.equals("0:0:0:0:0:0:0:1")) this.clientIP = "localhost";
if (this.clientIP.equals("127.0.0.1")) this.clientIP = "localhost";
@ -1147,7 +1147,7 @@ public final class httpd implements serverHandler {
// tp.put("host", serverCore.publicIP().getHostAddress());
// tp.put("port", switchboard.getConfig("port", "8080"));
String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP,"127.0.0.1");
String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "127.0.0.1");
// check if ip is local ip address
InetAddress hostAddress = serverDomains.dnsResolve(clientIP);

@ -303,13 +303,13 @@ public final class httpdFileHandler {
if ((path.substring(0,(pos==-1)?path.length():pos)).endsWith("_p") && (adminAccountBase64MD5.length() != 0)) {
//authentication required
//userDB
if(sb.userDB.hasAdminRight(authorization, conProp.getProperty("CLIENTIP"), requestHeader.getHeaderCookies())){
if(sb.userDB.hasAdminRight(authorization, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP), requestHeader.getHeaderCookies())){
//Authentication successful. remove brute-force flag
serverCore.bfHost.remove(conProp.getProperty("CLIENTIP"));
serverCore.bfHost.remove(conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
//static
}else if(authorization != null && httpd.staticAdminAuthenticated(authorization.trim().substring(6), switchboard)==4){
//Authentication successful. remove brute-force flag
serverCore.bfHost.remove(conProp.getProperty("CLIENTIP"));
serverCore.bfHost.remove(conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
//no auth
}else if (authorization == null) {
// no authorization given in response. Ask for that
@ -323,7 +323,7 @@ public final class httpdFileHandler {
return;
} else {
// a wrong authentication was given or the userDB user does not have admin access. Ask again
String clientIP = conProp.getProperty("CLIENTIP", "unknown-host");
String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown-host");
serverLog.logInfo("HTTPD", "Wrong log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'");
Integer attempts = (Integer) serverCore.bfHost.get(clientIP);
if (attempts == null)
@ -473,7 +473,7 @@ public final class httpdFileHandler {
// call an image-servlet to produce an on-the-fly - generated image
Object img = null;
try {
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP"));
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
// in case that there are no args given, args = null or empty hashmap
img = invokeServlet(targetClass, requestHeader, args);
@ -527,7 +527,7 @@ public final class httpdFileHandler {
}
} else if ((targetClass != null) && (path.endsWith(".stream"))) {
// call rewrite-class
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP"));
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
//requestHeader.put(httpHeader.CONNECTION_PROP_INPUTSTREAM, body);
//requestHeader.put(httpHeader.CONNECTION_PROP_OUTPUTSTREAM, out);
@ -570,7 +570,7 @@ public final class httpdFileHandler {
} else {
// CGI-class: call the class to create a property for rewriting
try {
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty("CLIENTIP"));
requestHeader.put(httpHeader.CONNECTION_PROP_CLIENTIP, conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP));
requestHeader.put(httpHeader.CONNECTION_PROP_PATH, path);
// in case that there are no args given, args = null or empty hashmap
Object tmp = invokeServlet(targetClass, requestHeader, args);
@ -586,7 +586,7 @@ public final class httpdFileHandler {
if (tp.containsKey(servletProperties.ACTION_AUTHENTICATE)) {
// handle brute-force protection
if (authorization != null) {
String clientIP = conProp.getProperty("CLIENTIP", "unknown-host");
String clientIP = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP, "unknown-host");
serverLog.logInfo("HTTPD", "dynamic log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'");
Integer attempts = (Integer) serverCore.bfHost.get(clientIP);
if (attempts == null)

@ -47,8 +47,6 @@ public interface indexRWIEntry {
public String urlHash();
public int quality();
public int virtualAge();
public long lastModified();

@ -26,6 +26,7 @@
package de.anomic.index;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@ -55,12 +56,13 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
this.maxdomcount = 0;
}
public void normalizeWith(indexContainer container) {
public ArrayList<indexRWIVarEntry> normalizeWith(indexContainer container) {
// normalize ranking: find minimum and maxiumum of separate ranking criteria
assert (container != null);
ArrayList<indexRWIVarEntry> result = null;
//long s0 = System.currentTimeMillis();
if ((processors > 1) && (container.size() > 10000)) {
if ((processors > 1) && (container.size() > 600)) {
// run minmax with two threads
int middle = container.size() / 2;
minmaxfinder mmf0 = new minmaxfinder(container, 0, middle);
@ -83,6 +85,8 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
entry = di.next();
this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue());
}
result = mmf0.decodedEntries;
result.addAll(mmf1.decodedContainer());
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
//System.out.println("***DEBUG*** indexRWIEntry.Order (2-THREADED): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond");
} else if (container.size() > 0) {
@ -97,10 +101,12 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
entry = di.next();
this.doms.addScore(entry.getKey(), ((Integer) entry.getValue()).intValue());
}
result = mmf.decodedContainer();
//long s1= System.currentTimeMillis(), sc = Math.max(1, s1 - s0);
//System.out.println("***DEBUG*** indexRWIEntry.Order (ONETHREAD): " + sc + " milliseconds for " + container.size() + " entries, " + (container.size() / sc) + " entries/millisecond");
}
if (this.doms.size() > 0) this.maxdomcount = this.doms.getMaxScore();
return result;
}
public kelondroOrder<indexRWIVarEntry> clone() {
@ -179,6 +185,7 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
private int start, end;
private HashMap<String, Integer> doms;
private Integer int1;
ArrayList<indexRWIVarEntry> decodedEntries;
public minmaxfinder(indexContainer container, int start /*including*/, int end /*excluding*/) {
this.container = container;
@ -186,18 +193,20 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
this.end = end;
this.doms = new HashMap<String, Integer>();
this.int1 = new Integer(1);
this.decodedEntries = new ArrayList<indexRWIVarEntry>();
}
public void run() {
// find min/max to obtain limits for normalization
this.entryMin = null;
this.entryMax = null;
indexRWIRowEntry iEntry;
indexRWIVarEntry iEntry;
int p = this.start;
String dom;
Integer count;
while (p < this.end) {
iEntry = new indexRWIRowEntry(container.get(p++));
iEntry = new indexRWIVarEntry(new indexRWIRowEntry(container.get(p++)));
this.decodedEntries.add(iEntry);
// find min/max
if (this.entryMin == null) this.entryMin = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.min(this.entryMin, iEntry);
if (this.entryMax == null) this.entryMax = new indexRWIVarEntry(iEntry); else indexRWIVarEntry.max(this.entryMax, iEntry);
@ -212,6 +221,10 @@ public class indexRWIEntryOrder extends kelondroAbstractOrder<indexRWIVarEntry>
}
}
public ArrayList<indexRWIVarEntry> decodedContainer() {
return this.decodedEntries;
}
public HashMap<String, Integer> domcount() {
return this.doms;
}

@ -88,6 +88,8 @@ public final class indexRWIRowEntry implements indexRWIEntry {
private static final int col_worddistance = 18; // i 1 initial zero; may be used as reserve: is filled during search
private static final int col_reserve = 19; // k 1 reserve
public double termFrequency;
private kelondroRow.Entry entry;
public indexRWIRowEntry(String urlHash,
@ -101,14 +103,14 @@ public final class indexRWIRowEntry implements indexRWIEntry {
int posinphrase, // position of word in its phrase
int posofphrase, // number of the phrase where word appears
int worddistance, // word distance; this is 0 by default, and set to the difference of posintext from two indexes if these are combined (simultanous search). If stored, this shows that the result was obtained by remote search
int sizeOfPage, // # of bytes of the page TODO: not needed any more
long lastmodified, // last-modified time of the document where word appears
long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
String language, // (guessed) language of document
char doctype, // type of document
int outlinksSame, // outlinks to same domain
int outlinksOther, // outlinks to other domain
kelondroBitfield flags // attributes to the url and to the word according the url
kelondroBitfield flags, // attributes to the url and to the word according the url
double termFrequency
) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
@ -136,6 +138,7 @@ public final class indexRWIRowEntry implements indexRWIEntry {
this.entry.setCol(col_posofphrase, posofphrase);
this.entry.setCol(col_worddistance, worddistance);
this.entry.setCol(col_reserve, 0);
this.termFrequency = termFrequency;
}
public indexRWIRowEntry(String urlHash, String code) {
@ -183,10 +186,6 @@ public final class indexRWIRowEntry implements indexRWIEntry {
return this.entry.getColString(col_urlhash, null);
}
public int quality() {
return 0; // not used any more
}
public int virtualAge() {
return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format
}
@ -256,7 +255,8 @@ public final class indexRWIRowEntry implements indexRWIEntry {
}
public double termFrequency() {
return (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
if (this.termFrequency == 0.0) this.termFrequency = (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
return this.termFrequency;
}
public String toString() {
@ -288,18 +288,12 @@ public final class indexRWIRowEntry implements indexRWIEntry {
public boolean isNewer(indexRWIEntry other) {
if (other == null) return true;
if (this.lastModified() > other.lastModified()) return true;
if (this.lastModified() == other.lastModified()) {
if (this.quality() > other.quality()) return true;
}
return false;
}
public boolean isOlder(indexRWIEntry other) {
if (other == null) return false;
if (this.lastModified() < other.lastModified()) return true;
if (this.lastModified() == other.lastModified()) {
if (this.quality() < other.quality()) return true;
}
return false;
}

@ -37,7 +37,7 @@ public class indexRWIVarEntry implements indexRWIEntry {
public char type;
public int hitcount, llocal, lother, phrasesintext, posintext,
posinphrase, posofphrase,
quality, urlcomps, urllength, virtualAge,
urlcomps, urllength, virtualAge,
worddistance, wordsintext, wordsintitle;
public double termFrequency;
@ -55,7 +55,6 @@ public class indexRWIVarEntry implements indexRWIEntry {
this.posintext = e.posintext();
this.posinphrase = e.posinphrase();
this.posofphrase = e.posofphrase();
this.quality = e.quality();
this.urlcomps = e.urlcomps();
this.urllength = e.urllength();
this.virtualAge = e.virtualAge();
@ -133,9 +132,29 @@ public class indexRWIVarEntry implements indexRWIEntry {
public int posofphrase() {
return posofphrase;
}
public int quality() {
return quality;
private indexRWIRowEntry toRowEntry() {
return new indexRWIRowEntry(
urlHash,
urllength, // byte-length of complete URL
urlcomps, // number of path components
wordsintitle, // length of description/length (longer are better?)
hitcount, // how often appears this word in the text
wordsintext, // total number of words
phrasesintext, // total number of phrases
posintext, // position of word in all words
posinphrase, // position of word in its phrase
posofphrase, // number of the phrase where word appears
worddistance, // word distance
lastModified, // last-modified time of the document where word appears
System.currentTimeMillis(), // update time;
language, // (guessed) language of document
type, // type of document
llocal, // outlinks to same domain
lother, // outlinks to other domain
flags, // attributes to the url and to the word according the url
termFrequency
);
}
public Entry toKelondroEntry() {
@ -144,8 +163,7 @@ public class indexRWIVarEntry implements indexRWIEntry {
}
public String toPropertyForm() {
assert false; // should not be used
return null;
return toRowEntry().toPropertyForm();
}
public String urlHash() {
@ -177,7 +195,8 @@ public class indexRWIVarEntry implements indexRWIEntry {
}
public double termFrequency() {
return termFrequency;
if (this.termFrequency == 0.0) this.termFrequency = (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
return this.termFrequency;
}
public static final void min(indexRWIVarEntry t, indexRWIEntry other) {
@ -187,7 +206,6 @@ public class indexRWIVarEntry implements indexRWIEntry {
if (t.hitcount() > (v = other.hitcount())) t.hitcount = v;
if (t.llocal() > (v = other.llocal())) t.llocal = v;
if (t.lother() > (v = other.lother())) t.lother = v;
if (t.quality() > (v = other.quality())) t.quality = v;
if (t.virtualAge() > (v = other.virtualAge())) t.virtualAge = v;
if (t.wordsintext() > (v = other.wordsintext())) t.wordsintext = v;
if (t.phrasesintext() > (v = other.phrasesintext())) t.phrasesintext = v;
@ -210,7 +228,6 @@ public class indexRWIVarEntry implements indexRWIEntry {
if (t.hitcount() < (v = other.hitcount())) t.hitcount = v;
if (t.llocal() < (v = other.llocal())) t.llocal = v;
if (t.lother() < (v = other.lother())) t.lother = v;
if (t.quality() < (v = other.quality())) t.quality = v;
if (t.virtualAge() < (v = other.virtualAge())) t.virtualAge = v;
if (t.wordsintext() < (v = other.wordsintext())) t.wordsintext = v;
if (t.phrasesintext() < (v = other.phrasesintext())) t.phrasesintext = v;

@ -115,7 +115,7 @@ public class indexURLEntry {
private kelondroRow.Entry entry;
private String snippet;
private indexRWIRowEntry word; // this is only used if the url is transported via remote search requests
private indexRWIEntry word; // this is only used if the url is transported via remote search requests
private long ranking; // during generation of a search result this value is set
public indexURLEntry(
@ -185,7 +185,7 @@ public class indexURLEntry {
return s.toString().getBytes();
}
public indexURLEntry(kelondroRow.Entry entry, indexRWIRowEntry searchedWord, long ranking) {
public indexURLEntry(kelondroRow.Entry entry, indexRWIEntry searchedWord, long ranking) {
this.entry = entry;
this.snippet = null;
this.word = searchedWord;
@ -391,7 +391,7 @@ public class indexURLEntry {
return snippet;
}
public indexRWIRowEntry word() {
public indexRWIEntry word() {
return word;
}

@ -119,7 +119,7 @@ public class kelondroSplitTable implements kelondroIndex {
// this is a kelonodroFlex table
table = new kelondroCache(new kelondroFlexTable(path, maxf, preloadTime, rowdef, 0, resetOnFail));
} else {
table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0);
table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
}
tables.put(date, table);
}

@ -66,7 +66,7 @@ import java.util.LinkedList;
import de.anomic.data.htmlTools;
import de.anomic.http.httpc;
import de.anomic.http.httpc.response;
import de.anomic.index.indexRWIRowEntry;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
@ -153,7 +153,7 @@ public final class plasmaCrawlLURL {
return 0;
}
public synchronized indexURLEntry load(String urlHash, indexRWIRowEntry searchedWord, long ranking) {
public synchronized indexURLEntry load(String urlHash, indexRWIEntry searchedWord, long ranking) {
// generates an plasmaLURLEntry using the url hash
// to speed up the access, the url-hashes are buffered
// in the hash cache.

@ -69,7 +69,7 @@ public class plasmaCrawlZURL {
if (f.isDirectory()) kelondroFlexTable.delete(cachePath, tablename); else f.delete();
}
}
urlIndex = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
urlIndex = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheDenyUsage, EcoFSBufferSize, 0);
//urlIndex = new kelondroFlexTable(cachePath, tablename, -1, rowdef, 0, true);
}

@ -28,6 +28,7 @@ package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@ -40,6 +41,7 @@ import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIEntryOrder;
import de.anomic.index.indexRWIRowEntry;
import de.anomic.index.indexRWIVarEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBinSearch;
import de.anomic.kelondro.kelondroMScoreCluster;
@ -52,8 +54,8 @@ public final class plasmaSearchRankingProcess {
public static kelondroBinSearch[] ybrTables = null; // block-rank tables
private static boolean useYBR = true;
private TreeMap<Object, indexRWIRowEntry> sortedRWIEntries; // key = ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of String
private HashMap<String, TreeMap<Object, indexRWIRowEntry>> doubleDomCache; // key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
private TreeMap<Object, indexRWIVarEntry> sortedRWIEntries; // key = ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of String
private HashMap<String, TreeMap<Object, indexRWIVarEntry>> doubleDomCache; // key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
private HashMap<String, String> handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process
private plasmaSearchQuery query;
private int sortorder;
@ -72,8 +74,8 @@ public final class plasmaSearchRankingProcess {
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchContainerMaps = null;
this.sortedRWIEntries = new TreeMap<Object, indexRWIRowEntry>();
this.doubleDomCache = new HashMap<String, TreeMap<Object, indexRWIRowEntry>>();
this.sortedRWIEntries = new TreeMap<Object, indexRWIVarEntry>();
this.doubleDomCache = new HashMap<String, TreeMap<Object, indexRWIVarEntry>>();
this.handover = new HashMap<String, String>();
this.order = null;
this.query = query;
@ -132,11 +134,11 @@ public final class plasmaSearchRankingProcess {
this.remote_indexCount += index.size();
}
indexRWIRowEntry ientry;
indexRWIVarEntry ientry;
indexURLEntry uentry;
String u;
loop: while (en.hasNext()) {
ientry = en.next();
ientry = new indexRWIVarEntry(en.next());
// check constraints
if (!testFlags(ientry)) continue loop;
@ -183,13 +185,13 @@ public final class plasmaSearchRankingProcess {
if (this.order == null) {
this.order = new indexRWIEntryOrder(query.ranking);
}
this.order.normalizeWith(index);
ArrayList<indexRWIVarEntry> decodedEntries = this.order.normalizeWith(index);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer));
// normalize entries and get ranking
timer = System.currentTimeMillis();
Iterator<indexRWIRowEntry> i = index.entries();
indexRWIRowEntry iEntry, l;
Iterator<indexRWIVarEntry> i = decodedEntries.iterator();
indexRWIVarEntry iEntry, l;
long biggestEntry = 0;
//long s0 = System.currentTimeMillis();
Long r;
@ -272,8 +274,8 @@ public final class plasmaSearchRankingProcess {
private synchronized Object[] /*{Object, indexRWIEntry}*/ bestRWI(boolean skipDoubleDom) {
// returns from the current RWI list the best entry and removed this entry from the list
Object bestEntry;
TreeMap<Object, indexRWIRowEntry> m;
indexRWIRowEntry rwi;
TreeMap<Object, indexRWIVarEntry> m;
indexRWIVarEntry rwi;
while (sortedRWIEntries.size() > 0) {
bestEntry = sortedRWIEntries.firstKey();
rwi = sortedRWIEntries.remove(bestEntry);
@ -283,7 +285,7 @@ public final class plasmaSearchRankingProcess {
m = this.doubleDomCache.get(domhash);
if (m == null) {
// first appearance of dom
m = new TreeMap<Object, indexRWIRowEntry>();
m = new TreeMap<Object, indexRWIVarEntry>();
this.doubleDomCache.put(domhash, m);
return new Object[]{bestEntry, rwi};
}
@ -292,10 +294,10 @@ public final class plasmaSearchRankingProcess {
}
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
// find best entry from all caches
Iterator<TreeMap<Object, indexRWIRowEntry>> i = this.doubleDomCache.values().iterator();
Iterator<TreeMap<Object, indexRWIVarEntry>> i = this.doubleDomCache.values().iterator();
bestEntry = null;
Object o;
indexRWIRowEntry bestrwi = null;
indexRWIVarEntry bestrwi = null;
while (i.hasNext()) {
m = i.next();
if (m.size() == 0) continue;
@ -331,7 +333,7 @@ public final class plasmaSearchRankingProcess {
while ((sortedRWIEntries.size() > 0) || (size() > 0)) {
Object[] obrwi = bestRWI(skipDoubleDom);
Object bestEntry = obrwi[0];
indexRWIRowEntry ientry = (indexRWIRowEntry) obrwi[1];
indexRWIVarEntry ientry = (indexRWIVarEntry) obrwi[1];
long ranking = (bestEntry instanceof Long) ? ((Long) bestEntry).longValue() : 0;
indexURLEntry u = wordIndex.loadedURL.load(ientry.urlHash(), ientry, ranking);
if (u != null) {
@ -347,7 +349,7 @@ public final class plasmaSearchRankingProcess {
public synchronized int size() {
//assert sortedRWIEntries.size() == urlhashes.size() : "sortedRWIEntries.size() = " + sortedRWIEntries.size() + ", urlhashes.size() = " + urlhashes.size();
int c = sortedRWIEntries.size();
Iterator<TreeMap<Object, indexRWIRowEntry>> i = this.doubleDomCache.values().iterator();
Iterator<TreeMap<Object, indexRWIVarEntry>> i = this.doubleDomCache.values().iterator();
while (i.hasNext()) c += i.next().size();
return c;
}

@ -414,7 +414,7 @@ public class plasmaSnippetCache {
resInfo = entry.getDocumentInfo();
// read resource body (if it is there)
byte []resourceArray = entry.cacheArray();
byte[] resourceArray = entry.cacheArray();
if (resourceArray != null) {
resContent = new ByteArrayInputStream(resourceArray);
resContentLength = resourceArray.length;

@ -906,7 +906,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} catch (MalformedURLException e) {
}
} else {
File networkUnitDefinitionFile = new File(rootPath, networkUnitDefinition);
File networkUnitDefinitionFile = (networkUnitDefinition.startsWith("/")) ? new File(networkUnitDefinition) : new File(rootPath, networkUnitDefinition);
if (networkUnitDefinitionFile.exists()) {
initProps = serverFileUtils.loadHashMap(networkUnitDefinitionFile);
this.setConfig(initProps);
@ -2348,14 +2348,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
wordStat.posInPhrase,
wordStat.numOfPhrase,
0,
newEntry.size(),
docDate.getTime(),
System.currentTimeMillis(),
language,
doctype,
ioLinks[0].intValue(),
ioLinks[1].intValue(),
condenser.RESULT_FLAGS
condenser.RESULT_FLAGS,
0.0
);
indexContainer wordIdxContainer = plasmaWordIndex.emptyContainer(wordHash, 1);
wordIdxContainer.add(wordIdxEntry);
@ -2573,10 +2573,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (authorization.length() > 256) return 0;
// authorization by encoded password, only for localhost access
if ((((String) header.get("CLIENTIP", "")).equals("localhost")) && (adminAccountBase64MD5.equals(authorization))) return 3; // soft-authenticated for localhost
if ((((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")).equals("localhost")) && (adminAccountBase64MD5.equals(authorization))) return 3; // soft-authenticated for localhost
// authorization by hit in userDB
if (userDB.hasAdminRight((String) header.get(httpHeader.AUTHORIZATION, "xxxxxx"), ((String) header.get("CLIENTIP", "")), header.getHeaderCookies())) return 4; //return, because 4=max
if (userDB.hasAdminRight((String) header.get(httpHeader.AUTHORIZATION, "xxxxxx"), ((String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP, "")), header.getHeaderCookies())) return 4; //return, because 4=max
// authorization with admin keyword in configuration
return httpd.staticAdminAuthenticated(authorization, this);

@ -314,13 +314,13 @@ public final class plasmaWordIndex implements indexRI {
wprop.posInPhrase,
wprop.numOfPhrase,
0,
size,
urlModified.getTime(),
System.currentTimeMillis(),
language,
doctype,
outlinksSame, outlinksOther,
wprop.flags);
wprop.flags,
0.0);
addEntry(plasmaCondenser.word2hash(word), ientry, System.currentTimeMillis(), false);
wordCount++;
}

Loading…
Cancel
Save