Merge branch 'master' of ssh://gitorious.org/yacy/rc1

pull/1/head
orbiter 11 years ago
commit f1a395ed46

@ -613,7 +613,7 @@ collection=user
50_localcrawl_idlesleep=2000
50_localcrawl_busysleep=10
50_localcrawl_memprereq=12582912
50_localcrawl_loadprereq=8.0
50_localcrawl_loadprereq=32.0
50_localcrawl_isPaused=false
60_remotecrawlloader_idlesleep=4000
60_remotecrawlloader_busysleep=800

@ -318,7 +318,7 @@ public class Status
}
// memory usage and system attributes
prop.put("usedMemory", Formatter.bytesToString(MemoryControl.total()));
prop.put("usedMemory", Formatter.bytesToString(MemoryControl.used()));
prop.put("maxMemory", Formatter.bytesToString(MemoryControl.maxMemory()));
prop.put("usedDisk", Formatter.bytesToString(sb.observer.getSizeOfDataPath(true)));
prop.put("freeDisk", Formatter.bytesToString(sb.observer.getUsableSpace()));

@ -37,6 +37,7 @@ import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.util.LookAheadIterator;
import net.yacy.kelondro.data.word.Word;
import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.client.solrj.SolrQuery;
@ -336,8 +337,8 @@ public abstract class AbstractSolrConnector implements SolrConnector {
@Override
public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException {
assert id.length() == Word.commonHashLength : "wrong id: " + id;
final SolrQuery query = new SolrQuery();
assert id.length() == 12;
// construct query
StringBuilder sb = new StringBuilder(23);
sb.append("{!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id);

@ -27,6 +27,7 @@ import java.util.Map;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.search.schema.CollectionSchema;
@ -140,6 +141,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public SolrDocument getDocumentById(final String id, final String ... fields) throws IOException {
assert id.length() == Word.commonHashLength : "wrong id: " + id;
String q = idQuery(id);
SolrDocument doc = fields.length == 0 ? this.documentCache.get(q) : null;
if (doc != null) {
@ -209,7 +211,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
*/
@Override
public SolrDocumentList getDocumentListByQuery(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
if (offset == 0 && count == 1 && querystring.startsWith("id:")) {
if (offset == 0 && count == 1 && querystring.startsWith("id:") &&
((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') ||
querystring.length() == 15)) {
final SolrDocumentList list = new SolrDocumentList();
SolrDocument doc = getDocumentById(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields);
list.add(doc);

@ -35,6 +35,7 @@ import net.yacy.cora.storage.ARH;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.cora.storage.ConcurrentARH;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.search.schema.CollectionSchema;
@ -107,6 +108,14 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
//ConcurrentLog.info("ConcurrentUpdateSolrConnector", "sending " + docs.size() + " documents to solr");
try {
ConcurrentUpdateSolrConnector.this.connector.add(docs);
} catch (final OutOfMemoryError e) {
// clear and try again...
clearCaches();
try {
ConcurrentUpdateSolrConnector.this.connector.add(docs);
} catch (final IOException ee) {
ConcurrentLog.logException(e);
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
@ -411,6 +420,7 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
@Override
public SolrDocument getDocumentById(final String id, String... fields) throws IOException {
assert id.length() == Word.commonHashLength : "wrong id: " + id;
if (this.missCache.contains(id)) return null;
if (existIdFromDeleteQueue(id)) return null;
SolrInputDocument idoc = getFromUpdateQueue(id);
@ -443,6 +453,15 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
@Override
public SolrDocumentList getDocumentListByQuery(String querystring, int offset, int count, String... fields) throws IOException, SolrException {
if (offset == 0 && count == 1 && querystring.startsWith("id:") &&
((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') ||
querystring.length() == 15)) {
final SolrDocumentList list = new SolrDocumentList();
SolrDocument doc = getDocumentById(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields);
list.add(doc);
return list;
}
SolrDocumentList sdl = this.connector.getDocumentListByQuery(querystring, offset, count, AbstractSolrConnector.ensureEssentialFieldsIncluded(fields));
/*
Iterator<SolrDocument> i = sdl.iterator();

@ -28,6 +28,7 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.kelondro.data.word.Word;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
@ -172,6 +173,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException {
assert key.length() == Word.commonHashLength : "wrong id: " + key;
SolrDocument doc;
if ((solr0 != null && ((doc = solr0.getDocumentById(key, fields)) != null)) || (solr1 != null && ((doc = solr1.getDocumentById(key, fields)) != null))) {
return doc;
@ -205,7 +207,9 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public SolrDocumentList getDocumentListByQuery(final String querystring, final int offset, final int count, final String ... fields) throws IOException {
if (this.solr0 == null && this.solr1 == null) return new SolrDocumentList();
if (offset == 0 && count == 1 && querystring.startsWith("id:")) {
if (offset == 0 && count == 1 && querystring.startsWith("id:") &&
((querystring.length() == 17 && querystring.charAt(3) == '"' && querystring.charAt(16) == '"') ||
querystring.length() == 15)) {
final SolrDocumentList list = new SolrDocumentList();
SolrDocument doc = getDocumentById(querystring.charAt(3) == '"' ? querystring.substring(4, querystring.length() - 1) : querystring.substring(3), fields);
list.add(doc);

@ -2057,7 +2057,7 @@ public final class Switchboard extends serverSwitch {
// set a random password if no password is configured
if ( getConfigBool(SwitchboardConstants.ADMIN_ACCOUNT_FOR_LOCALHOST, false)
&& getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").isEmpty() ) {
// make a 'random' password
// make a 'random' password, this will keep the ability to log in from localhost without password
setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "0000" + this.genRandomPassword());
setConfig(SwitchboardConstants.ADMIN_ACCOUNT, "");
}

@ -924,10 +924,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// collect hosts from index which shall take part in citation computation
String query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
CollectionSchema.process_sxt.getSolrFieldName() + ":" + ProcessType.CITATION.toString();
CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
ReversibleScoreMap<String> hostscore;
try {
hostscore = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
Map<String, ReversibleScoreMap<String>> hostfacet = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName());
hostscore = hostfacet.get(CollectionSchema.host_s.getSolrFieldName());
} catch (final IOException e2) {
ConcurrentLog.logException(e2);
hostscore = new ClusteredScoreMap<String>();
@ -989,6 +990,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
CRHost crh = new CRHost(segment, rrCache, host, 0.85d, 6);
int convergence_attempts = 0;
while (convergence_attempts++ < 30) {
ConcurrentLog.info("CollectionConfiguration", "convergence step " + convergence_attempts + " for host " + host + " ...");
if (crh.convergenceStep()) break;
}
ConcurrentLog.info("CollectionConfiguration", "convergence for host " + host + " after " + convergence_attempts + " steps");
@ -1006,8 +1008,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// process all documents at the webgraph for the outgoing links of this document
SolrDocument doc;
int allcount = 0;
if (segment.fulltext().useWebgraph()) {
try {
int proccount = 0;
long start = System.currentTimeMillis();
for (String host: hostscore.keyList(true)) {
if (hostscore.get(host) <= 0) continue;
// select all webgraph edges and modify their cr value
@ -1042,7 +1047,13 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
ConcurrentLog.logException(e);
}
countcheck++;
proccount++; allcount++;
if (proccount % 1000 == 0) ConcurrentLog.info(
"CollectionConfiguration", "webgraph - postprocessed " + proccount + " from " + count + " documents; " +
(proccount * 1000 / (System.currentTimeMillis() - start)) + " docs/second; " +
((System.currentTimeMillis() - start) * (count - proccount) / proccount / 60000) + " minutes remaining");
}
if (count != countcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous webgraph document count for host " + host + ": expected=" + count + ", counted=" + countcheck);
}
} catch (final IOException e2) {
@ -1055,10 +1066,10 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// process all documents in collection
query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
int proccount = 0, proccount_clickdepthchange = 0, proccount_referencechange = 0, proccount_citationchange = 0, proccount_uniquechange = 0;
Map<String, Long> hostExtentCache = new HashMap<String, Long>(); // a mapping from the host id to the number of documents which contain this host-id
Set<String> uniqueURLs = new HashSet<String>();
try {
int proccount = 0, proccount_clickdepthchange = 0, proccount_referencechange = 0, proccount_citationchange = 0, proccount_uniquechange = 0;
long count = collectionConnector.getCountByQuery(query);
long start = System.currentTimeMillis();
ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the collection for harvestkey " + harvestkey);
@ -1120,8 +1131,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
//connector.deleteById(ASCII.String(id));
collectionConnector.add(sid);
proccount++;
if (proccount % 100 == 0) ConcurrentLog.info("CollectionConfiguration", "postprocessed " + proccount + " from " + count + " documents; " + (proccount * 1000 / (System.currentTimeMillis() - start)) + " docs/second; " + ((System.currentTimeMillis() - start) * (count - proccount) / proccount / 60000) + " minutes remaining");
proccount++; allcount++;
if (proccount % 100 == 0) ConcurrentLog.info(
"CollectionConfiguration", "collection - postprocessed " + proccount + " from " + count + " documents; " +
(proccount * 1000 / (System.currentTimeMillis() - start)) + " docs/second; " +
((System.currentTimeMillis() - start) * (count - proccount) / proccount / 60000) + " minutes remaining");
} catch (final Throwable e1) {
ConcurrentLog.logException(e1);
failids.add(i);
@ -1143,7 +1157,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
} catch (IOException e3) {
ConcurrentLog.warn("CollectionConfiguration", e3.getMessage(), e3);
}
return proccount;
return allcount;
}
private static final class CRV {

Loading…
Cancel
Save