prevent that the size of the index is computed too many times.

Because the index size is now provided by solr, and the only way to do
that is a match for [* TO *], a size computation is quite complex and
time-consuming. Therefore this patch prevents that the method is called
at all and if necessary puts a DOS-preventing barrier in front of it.
pull/1/head
Michael Peter Christen 12 years ago
parent cca19d94d4
commit f7f3e28c5e

@ -77,7 +77,6 @@ public class HostBrowser {
// set default values
prop.put("path", "");
prop.put("result", "");
prop.putNum("ucount", fulltext.collectionSize());
prop.put("hosts", 0);
prop.put("files", 0);
prop.put("admin", admin ? 1 : 0);
@ -90,12 +89,14 @@ public class HostBrowser {
if (!searchAllowed) {
prop.put("result", "You are not allowed to use this page. Please ask an administrator for permission.");
prop.putNum("ucount", 0);
return prop;
}
String path = post == null ? "" : post.get("path", "").trim();
sb.index.fulltext().commit(true);
if (post == null || env == null) {
prop.putNum("ucount", fulltext.collectionSize());
return prop;
}
@ -150,7 +151,8 @@ public class HostBrowser {
fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"" );
Log.logInfo ("HostBrowser:", "delete documents with failtype_s = fail");
fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.excl.name() + "\"" );
Log.logInfo ("HostBrowser:", "delete documents with failtype_s = excl");
Log.logInfo ("HostBrowser:", "delete documents with failtype_s = excl");
prop.putNum("ucount", fulltext.collectionSize());
return prop;
} catch (IOException ex) {
Log.logException(ex);
@ -499,9 +501,8 @@ public class HostBrowser {
}
}
// insert constants
prop.putNum("ucount", fulltext.collectionSize());
// return rewrite properties
prop.putNum("ucount", fulltext.collectionSize());
return prop;
}

@ -66,7 +66,6 @@ public class IndexControlURLs_p {
prop.put("urlstring", "");
prop.put("urlhash", "");
prop.put("result", "");
prop.putNum("ucount", segment.fulltext().collectionSize());
prop.put("otherHosts", "");
prop.put("genUrlProfile", 0);
prop.put("statistics", 1);
@ -117,6 +116,7 @@ public class IndexControlURLs_p {
}
if (post == null || env == null) {
prop.putNum("ucount", segment.fulltext().collectionSize());
return prop; // nothing to do
}

@ -33,6 +33,9 @@ import net.yacy.visualization.RasterPlotter;
public class PerformanceGraph {
private static long indeSizeCache = 0;
private static long indexSizeTime = 0;
public static RasterPlotter respond(@SuppressWarnings("unused") final RequestHeader header, serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
@ -42,7 +45,13 @@ public class PerformanceGraph {
final int height = post.getInt("height", 240);
final boolean showMemory = !post.containsKey("nomem");
return ProfilingGraph.performanceGraph(width, height, sb.index.URLCount() + " URLS / " + sb.index.RWICount() + " WORDS IN INDEX / " + sb.index.RWIBufferCount() + " WORDS IN CACHE", showMemory);
long t = System.currentTimeMillis();
if (t - indexSizeTime > 10000) {
indeSizeCache = sb.index.fulltext().collectionSize();
indexSizeTime = t;
}
RasterPlotter graph = ProfilingGraph.performanceGraph(width, height, indeSizeCache + " URLS / " + sb.index.RWICount() + " WORDS IN INDEX / " + sb.index.RWIBufferCount() + " WORDS IN CACHE", showMemory);
return graph;
}
}

@ -139,7 +139,7 @@ public final class hello {
prop.put("yourip", reportedip);
remoteSeed.setIP(reportedip);
time = System.currentTimeMillis();
callback = Protocol.queryUrlCount(remoteSeed);
callback = Protocol.queryRWICount(remoteSeed, "Tq418bNZd6AO");
time_backping = System.currentTimeMillis() - time;
backping_method = "reportedip=" + reportedip;
} else {
@ -163,7 +163,7 @@ public final class hello {
prop.put("yourip", clientip);
remoteSeed.setIP(clientip);
time = System.currentTimeMillis();
callback = Protocol.queryUrlCount(remoteSeed);
callback = Protocol.queryRWICount(remoteSeed, "Tq418bNZd6AO"); // hash for "www"; the actual count is irrelevant, we just want to know if this works
time_backping = System.currentTimeMillis() - time;
backping_method = "clientip=" + clientip;
}

@ -109,7 +109,7 @@ public final class query {
if (obj.equals("lurlcount")) {
// return the number of all available l-url's
prop.put("response", sb.index.fulltext().collectionSize());
prop.put("response", 1 /*sb.index.fulltext().collectionSize()*/); // patched to not call collectionSize() any more because the acutal size is not needed. Instead, rwicount should be called
return prop;
}

@ -34,6 +34,9 @@ import net.yacy.server.serverSwitch;
public class yacyinteractive {
private static long indeSizeCache = 0;
private static long indexSizeTime = 0;
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
@ -57,7 +60,13 @@ public class yacyinteractive {
prop.putHTML("querys", query.replaceAll(" ", "+"));
prop.put("serverlist", query.isEmpty() ? 1 : 0);
prop.put("focus", focus ? 1 : 0);
prop.put("allowrealtime", sb.index.URLCount() < 100000 ? 1 : 0);
long t = System.currentTimeMillis();
if (t - indexSizeTime > 60000) {
indeSizeCache = sb.index.fulltext().collectionSize();
indexSizeTime = t;
}
prop.put("allowrealtime", indeSizeCache < 100000 ? 1 : 0);
return prop;
}
}

@ -144,7 +144,6 @@ public class Network
if ( (this.sb.peers.lastSeedUpload_myIP.equals(this.sb.peers.mySeed().getIP()))
&& (this.sb.peers.lastSeedUpload_seedDBSize == this.sb.peers.sizeConnected())
&& (canReachMyself())
&& (System.currentTimeMillis() - this.sb.peers.lastSeedUpload_timeStamp < 1000 * 60 * 60 * 24)
&& (this.sb.peers.mySeed().isPrincipal()) ) {
if ( log.isFine() ) {
@ -213,20 +212,6 @@ public class Network
}
}
private boolean canReachMyself() { // TODO: check if this method is necessary - depending on the used router it will not work
// returns true if we can reach ourself under our known peer address
// if we cannot reach ourself, we call a forced publishMySeed and return false
final long[] callback = Protocol.queryUrlCount(this.sb.peers.mySeed());
if ( callback[0] >= 0 && callback[1] == magic ) {
this.sb.peers.mySeed().setLastSeenUTC();
return true;
}
log.logInfo("re-connect own seed");
final String oldAddress = this.sb.peers.mySeed().getPublicAddress();
/*final int newSeeds =*/publishMySeed(true);
return (oldAddress != null && oldAddress.equals(this.sb.peers.mySeed().getPublicAddress()));
}
// use our own formatter to prevent concurrency locks with other processes
private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(
GenericFormatter.FORMAT_SHORT_SECOND,

@ -419,90 +419,72 @@ public final class Protocol {
}
}
public static int queryRWICount(final Seed target, final String wordHash) {
public static long[] queryRWICount(final Seed target, final String wordHash) {
if (target == null) return new long[] {-1, -1};
// prepare request
final String salt = crypt.randomSalt();
// send request
try {
final Map<String, ContentBody> parts =
basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
final Map<String, ContentBody> parts = basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("object", UTF8.StringBody("rwicount"));
parts.put("ttl", UTF8.StringBody("0"));
parts.put("env", UTF8.StringBody(wordHash));
final byte[] content = postToFile(target, "query.html", parts, 6000);
final Map<String, String> result = FileUtils.table(content);
if ( result == null || result.isEmpty() ) {
return -1;
if (result == null || result.isEmpty()) return new long[] {-1, -1};
final String resp = result.get("response");
if (resp == null) return new long[] {-1, -1};
String magic = result.get("magic");
if (magic == null) magic = "0";
try {
return new long[] {Long.parseLong(resp), Long.parseLong(magic)};
} catch ( final NumberFormatException e ) {
return new long[] {-1, -1};
}
return Integer.parseInt(result.get("response"));
} catch ( final Exception e ) {
Network.log.logWarning("yacyClient.queryRWICount error:" + e.getMessage());
return -1;
return new long[] {-1, -1};
}
}
/**
* check the status of a remote peer
*
* This method is a bit deprecated since we do not use it any more because of high CPU load during url count computation
* @param target
* @return an array of two long: [0] is the count of urls, [1] is a magic
*/
public static long[] queryUrlCount(final Seed target) {
if ( target == null ) {
return new long[] {
-1, -1
};
}
if (target == null) return new long[] {-1, -1};
// prepare request
final String salt = crypt.randomSalt();
// send request
try {
final Map<String, ContentBody> parts =
basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
final Map<String, ContentBody> parts = basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("object", UTF8.StringBody("lurlcount"));
parts.put("ttl", UTF8.StringBody("0"));
parts.put("env", UTF8.StringBody(""));
final byte[] content = postToFile(target, "query.html", parts, 6000);
final Map<String, String> result = FileUtils.table(content);
if ( result == null || result.isEmpty() ) {
return new long[] {
-1, -1
};
}
if (result == null || result.isEmpty()) return new long[] {-1, -1};
final String resp = result.get("response");
if ( resp == null ) {
return new long[] {
-1, -1
};
}
if (resp == null) return new long[] {-1, -1};
String magic = result.get("magic");
if ( magic == null ) {
magic = "0";
}
if (magic == null) magic = "0";
try {
return new long[] {
Long.parseLong(resp), Long.parseLong(magic)
};
return new long[] {Long.parseLong(resp), Long.parseLong(magic)};
} catch ( final NumberFormatException e ) {
return new long[] {
-1, -1
};
return new long[] {-1, -1};
}
} catch ( final IOException e ) {
if ( Network.log.isFine() ) {
Network.log.logFine("yacyClient.queryUrlCount error asking peer '"
+ target.getName()
+ "':"
+ e.toString());
Network.log.logFine("yacyClient.queryUrlCount error asking peer '" + target.getName() + "':" + e.toString());
}
return new long[] {
-1, -1
};
return new long[] {-1, -1};
}
}

@ -3507,6 +3507,8 @@ public final class Switchboard extends serverSwitch {
return (this.searchQueriesRobinsonFromRemote) * 60f / Math.max(uptime, 1f);
}
private static long indeSizeCache = 0;
private static long indexSizeTime = 0;
public void updateMySeed() {
this.peers.mySeed().put(Seed.PORT, Integer.toString(serverCore.getPortNr(getConfig("port", "8090"))));
@ -3515,7 +3517,13 @@ public final class Switchboard extends serverSwitch {
this.peers.mySeed().put(Seed.ISPEED, Integer.toString(currentPPM()));
this.peers.mySeed().put(Seed.RSPEED, Float.toString(averageQPM()));
this.peers.mySeed().put(Seed.UPTIME, Long.toString(uptime / 60)); // the number of minutes that the peer is up in minutes/day (moving average MA30)
this.peers.mySeed().put(Seed.LCOUNT, Long.toString(this.index.URLCount())); // the number of links that the peer has stored (LURL's)
long t = System.currentTimeMillis();
if (t - indexSizeTime > 60000) {
indeSizeCache = sb.index.fulltext().collectionSize();
indexSizeTime = t;
}
this.peers.mySeed().put(Seed.LCOUNT, Long.toString(indeSizeCache)); // the number of links that the peer has stored (LURL's)
this.peers.mySeed().put(Seed.NCOUNT, Integer.toString(this.crawlQueues.noticeURL.size())); // the number of links that the peer has noticed, but not loaded (NURL's)
this.peers.mySeed().put(
Seed.RCOUNT,

@ -265,9 +265,15 @@ public final class Fulltext {
* get the size of the default index
* @return
*/
private long collectionSizeLastAccess = 0;
private long collectionSizeLastValue = 0;
public long collectionSize() {
long t = System.currentTimeMillis();
if (t - this.collectionSizeLastAccess < 1000) return this.collectionSizeLastValue;
long size = this.urlIndexFile == null ? 0 : this.urlIndexFile.size();
size += this.getDefaultConnector().getSize();
this.collectionSizeLastAccess = t;
this.collectionSizeLastValue = size;
return size;
}

@ -258,10 +258,6 @@ public class Segment {
}
return 999;
}
public long URLCount() {
return this.fulltext.collectionSize();
}
public long RWICount() {
if (this.termIndex == null) return 0;

Loading…
Cancel
Save