Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 11 years ago
commit 42f3733a05

@ -25,7 +25,7 @@
<key>Java</key>
<dict>
<key>VMOptions</key>
<string>-server -d64 -Xmx600m -Xms90m -Djava.net.preferIPv4Stack=true -Dfile.encoding=UTF-8 -Dsolr.directoryFactory=solr.MMapDirectoryFactory</string>
<string>-server -Xmx600m -Xms90m -Djava.net.preferIPv4Stack=true -Dfile.encoding=UTF-8 -Dsolr.directoryFactory=solr.MMapDirectoryFactory</string>
<key>WorkingDirectory</key>
<string>$APP_PACKAGE/Contents/Resources/Java</string>
<key>MainClass</key>

@ -57,7 +57,7 @@ SHUTDOWN_TIMEOUT=50
# Default niceness if not set in config file
NICE_VAL=0
JAVA_ARGS="-server -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8 -d64 -Dsolr.directoryFactory=solr.MMapDirectoryFactory"
JAVA_ARGS="-server -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8 -Dsolr.directoryFactory=solr.MMapDirectoryFactory"
ifdef(`openSUSE', `dnl
. /etc/rc.status

@ -260,7 +260,7 @@ public class Crawler_p {
final int crawlingDomMaxPages = (crawlingDomMaxCheck) ? post.getInt("crawlingDomMaxPages", -1) : -1;
env.setConfig("crawlingDomMaxPages", Integer.toString(crawlingDomMaxPages));
boolean crawlingQ = "on".equals(post.get("crawlingQ", "on"));
boolean crawlingQ = "on".equals(post.get("crawlingQ", "off")); // on unchecked checkbox "crawlingQ" not contained in post
env.setConfig("crawlingQ", crawlingQ);
boolean followFrames = "on".equals(post.get("followFrames", "on"));

@ -46,6 +46,7 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.WordReferenceRow;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.peers.EventChannel;
import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
@ -70,20 +71,40 @@ public final class transferRWI {
final serverObjects prop = new serverObjects();
final String contentType = header.getContentType();
prop.put("unknownURL", "");
prop.put("pause", 60000);
String result = "";
if ((post == null) || (env == null)) {
logWarning(contentType, "post or env is null!");
result = "post or env is null!";
logWarning(contentType, result);
prop.put("result", result);
return prop;
}
if (!Protocol.authentifyRequest(post, env)) {
logWarning(contentType, "not authentified");
result = "not authentified";
prop.put("result", result);
return prop;
}
if (!post.containsKey("wordc")) {
logWarning(contentType, "missing wordc");
result = "missing wordc";
prop.put("result", result);
return prop;
}
if (!post.containsKey("entryc")) {
logWarning(contentType, "missing entryc");
result = "missing entryc";
prop.put("result", result);
return prop;
}
if (!post.containsKey("indexes")) {
result = "missing indexes";
prop.put("result", result);
return prop;
}
// load tests
if (Memory.load() > 2.0 || MemoryControl.shortStatus()) {
// check also Protocol.metadataRetrievalRunning.get() > 0 ?
result = "too high load"; // don't tell too much details
prop.put("result", result);
return prop;
}
@ -102,7 +123,7 @@ public final class transferRWI {
// response values
int pause = 0;
String result = "ok";
result = "ok";
final StringBuilder unknownURLs = new StringBuilder(6000);
double load = Memory.load();

@ -97,7 +97,7 @@ abstract public class AbstractRemoteHandler extends AbstractHandler implements H
}
InetAddress resolvedIP = Domains.dnsResolve(hostOnly); // during testing isLocal() failed to resolve domain against publicIP
if (sb.myPublicIP().equals(resolvedIP.getHostAddress())) {
if (resolvedIP != null && sb.myPublicIP().equals(resolvedIP.getHostAddress())) {
localVirtualHostNames.add(resolvedIP.getHostName()); // remember resolved hostname
//localVirtualHostNames.add(resolved.getHostAddress()); // might change ?
return;

@ -104,6 +104,7 @@ public class DHTSelection {
if (seed == null) continue;
if (omit != null && omit.contains(seed)) continue; // sort out peers that are target for DHT
if (seed.isLastSeenTimeout(3600000)) continue; // do not ask peers that had not been seen more than one hour (happens during a startup situation)
if (!seed.getFlagSolrAvailable()) continue; // extra peers always use solr direct, skip if solr interface is not available
if (!seed.getFlagAcceptRemoteIndex() && seed.matchPeerTags(wordhashes)) seedSelection.dec(seed, r.nextInt(10) + 2); // robinson peers with matching peer tags
if (seed.getFlagRootNode()) seedSelection.dec(seed, r.nextInt(30) + 6); // root nodes (fast peers)
if (seed.getAge() < minage) seedSelection.dec(seed, r.nextInt(15) + 3); // young peers (with fresh info)

@ -37,6 +37,7 @@ import net.yacy.cora.federate.yacy.Distribution;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.Memory;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
@ -116,7 +117,7 @@ public class Dispatcher {
gzipBody,
timeout);
final int concurrentSender = Math.min(32, WorkflowProcessor.availableCPU * 2);
final int concurrentSender = Math.min(8, WorkflowProcessor.availableCPU);
this.indexingTransmissionProcessor = new WorkflowProcessor<Transmission.Chunk>(
"transferDocumentIndex",
"This is the RWI transmission process",
@ -399,6 +400,10 @@ public class Dispatcher {
*/
public Transmission.Chunk transferDocumentIndex(final Transmission.Chunk chunk) {
// try to keep the system healthy; sleep as long as System load is too high
while (Protocol.metadataRetrievalRunning.get() > 0) try {Thread.sleep(1000);} catch (InterruptedException e) {break;}
while (Memory.load() > 2.0) try {Thread.sleep(10000);} catch (InterruptedException e) {break;}
// do the transmission
final boolean success = chunk.transmit();

@ -56,6 +56,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import net.yacy.migration;
import net.yacy.cora.date.GenericFormatter;
@ -1047,6 +1048,10 @@ public final class Protocol {
} else {
try {
final boolean myseed = target == event.peers.mySeed();
if (!myseed && !target.getFlagSolrAvailable()) { // skip if peer.dna has flag that last try resulted in error
Network.log.info("SEARCH skip (solr), remote Solr interface not accessible, peer=" + target.getName());
return -1;
}
final String address = myseed ? "localhost:" + target.getPort() : target.getPublicAddress();
final int solrtimeout = Switchboard.getSwitchboard().getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_TIMEOUT, 6000);
Thread remoteRequest = new Thread() {
@ -1072,17 +1077,20 @@ public final class Protocol {
if (remoteRequest.isAlive()) {
try {remoteRequest.interrupt();} catch (Throwable e) {}
Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + target.getPublicAddress() + " does not answer (time-out)");
target.setFlagSolrAvailable(false || myseed);
return -1; // give up, leave remoteRequest abandoned.
}
// no need to close this here because that sends a commit to remote solr which is not wanted here
} catch (final Throwable e) {
Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")");
target.setFlagSolrAvailable(false || localsearch);
return -1;
}
}
if (rsp[0] == null || docList[0] == null) {
Network.log.info("SEARCH failed (solr), remote Peer: " + target.getName() + "/" + target.getPublicAddress() + " returned null");
target.setFlagSolrAvailable(false || localsearch);
return -1;
}
@ -1327,6 +1335,8 @@ public final class Protocol {
}
}
public static AtomicInteger metadataRetrievalRunning = new AtomicInteger(0);
/**
* transfer the index. If the transmission fails, return a string describing the cause. If everything is
* ok, return null.
@ -1402,22 +1412,19 @@ public final class Protocol {
return null;
} // all url's known
// extract the urlCache from the result
// extract the urlCache from the result; this is io-intensive;
// other transmissions should not be started as long as this is running
final URIMetadataNode[] urls = new URIMetadataNode[uhs.length];
byte[] key;
for ( int i = 0; i < uhs.length; i++ ) {
metadataRetrievalRunning.incrementAndGet();
for (int i = 0; i < uhs.length; i++) {
key = ASCII.getBytes(uhs[i]);
if ( urlRefs.has(key) ) urls[i] = segment.fulltext().getMetadata(key);
if ( urls[i] == null ) {
if ( Network.log.isFine() ) {
Network.log.fine("DEBUG transferIndex: requested url hash '"
+ uhs[i]
+ "', unknownURL='"
+ uhss
+ "'");
}
if (urlRefs.has(key)) urls[i] = segment.fulltext().getMetadata(key);
if (urls[i] == null) {
if (Network.log.isFine()) Network.log.fine("DEBUG transferIndex: requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'");
}
}
metadataRetrievalRunning.decrementAndGet();
in = transferURL(targetSeed, urls, gzipBody, timeout);

@ -211,10 +211,10 @@ public class RemoteSearch extends Thread {
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_SOLR_OFF, false)) {
final SolrQuery solrQuery = event.query.solrQuery(event.getQuery().contentdom, start == 0, event.excludeintext_image);
for (Seed s: robinsonPeers) {
Thread t = solrRemoteSearch(event, solrQuery, start, count, s, targets, blacklist);
event.nodeSearchThreads.add(t);
Thread t = solrRemoteSearch(event, solrQuery, start, count, s, targets, blacklist);
event.nodeSearchThreads.add(t);
}
}
}
// start search to YaCy DHT peers
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_REMOTE_DHT_OFF, false)) {
@ -309,7 +309,10 @@ public class RemoteSearch extends Thread {
// check own peer status
if (event.peers.mySeed() == null || event.peers.mySeed().getPublicAddress() == null) { return null; }
// prepare seed targets and threads
if (targetPeer != null && targetPeer.hash != null && event.preselectedPeerHashes != null) targetPeer.setAlternativeAddress(event.preselectedPeerHashes.get(ASCII.getBytes(targetPeer.hash)));
if (targetPeer != null && targetPeer.hash != null && event.preselectedPeerHashes != null) {
if (!targetPeer.getFlagSolrAvailable()) return null; // solr interface not avail.
targetPeer.setAlternativeAddress(event.preselectedPeerHashes.get(ASCII.getBytes(targetPeer.hash)));
}
Thread solr = new Thread() {
@Override
public void run() {

@ -171,6 +171,7 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed>
public static final String SEEDLISTURL = "seedURL";
public static final String NEWS = "news"; // news attachment
public static final String DCT = "dct"; // disconnect time
public static final String SOLRAVAILABLE ="SorlAvail"; // field to remember if remotePeer solr interface is avail.
/** zero-value */
private static final String ZERO = "0";
@ -837,6 +838,31 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed>
return getFlag(FLAG_SSL_AVAILABLE);
}
/**
* remembers status of remote Solr interface dynamicly
* should not be used for the local peer
* @param value
*/
public final void setFlagSolrAvailable(final boolean value) {
if (value)
this.dna.put(Seed.SOLRAVAILABLE, "OK");
else
this.dna.put(Seed.SOLRAVAILABLE, "NA");
}
/**
* gets the last set result for remote solr status
*
* @return if status unknown it returns true
*/
public final boolean getFlagSolrAvailable() {
// field is indented to deal with 3 states
// null = never checked, "OK" and "NA" for not available
String solravail = this.dna.get(Seed.SOLRAVAILABLE);
boolean my = (solravail != null) && ("NA".equals(solravail));
return !my;
}
public final void setUnusedFlags() {
for ( int i = 4; i < 20; i++ ) {
setFlag(i, false);

@ -268,6 +268,9 @@ public class Transmission {
this.hit++;
return true;
}
Transmission.this.log.info(
"Index transfer to peer " + target.getName() + ":" + target.hash +
" failed: " + error);
this.miss++;
// write information that peer does not receive index transmissions
Transmission.this.log.info("Transfer failed of chunk to target " + target.hash + "/" + target.getName() + ": " + error);

@ -133,7 +133,7 @@ then
fi
#turn on MMap for Solr if OS is a 64bit OS
if [ -n "`uname -m | grep 64`" ]; then JAVA_ARGS="$JAVA_ARGS -d64 -Dsolr.directoryFactory=solr.MMapDirectoryFactory"; fi
if [ -n "`uname -m | grep 64`" ]; then JAVA_ARGS="$JAVA_ARGS -Dsolr.directoryFactory=solr.MMapDirectoryFactory"; fi
if [ ! -f $CONFIGFILE -a -f DATA/SETTINGS/httpProxy.conf ]
then

Loading…
Cancel
Save