luc 9 years ago
commit 49331dc523

2
.gitignore vendored

@ -4,7 +4,7 @@ lib/yacycore.jar
libbuild/svnRevNr.jar
libbuild/GitRevTask.jar
gen/**
DATA/
/DATA
classes/
RELEASE/
/yacy.pid

@ -90,8 +90,8 @@
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
<dynamicField name="*_p" type="location" indexed="true" stored="true"/>
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="true" />
<dynamicField name="*_p" type="location" indexed="true" stored="true"/>
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_val" type="int" indexed="true" stored="true" multiValued="true"/> <!-- YaCy special -->
<!-- unused dynamicFields?

@ -838,7 +838,7 @@
of SearchComponents (see below) and supports distributed
queries across multiple shards
-->
<requestHandler name="/select" class="solr.SearchHandler">
<requestHandler name="/select" class="solr.SearchHandler" startup="lazy">
<!-- default values for query parameters can be specified, these
will be overridden by parameters in the request
-->
@ -902,7 +902,7 @@
</requestHandler>
<!-- A request handler that returns indented JSON by default -->
<requestHandler name="/query" class="solr.SearchHandler">
<requestHandler name="/query" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="wt">json</str>
@ -923,7 +923,7 @@
also always fetch the complete index from the leader because a partial
sync will not be possible in the absence of this handler.
-->
<requestHandler name="/get" class="solr.RealTimeGetHandler">
<requestHandler name="/get" class="solr.RealTimeGetHandler" startup="lazy">
<lst name="defaults">
<str name="omitHeader">true</str>
<str name="wt">json</str>
@ -936,7 +936,7 @@
Do not change these defaults.
-->
<requestHandler name="/export" class="solr.SearchHandler">
<requestHandler name="/export" class="solr.SearchHandler" startup="lazy">
<lst name="invariants">
<str name="rq">{!xport}</str>
<str name="wt">xsort</str>
@ -966,7 +966,7 @@
This handler will pick a response format to match the input
if the 'wt' parameter is not explicit
-->
<requestHandler name="/update" class="solr.UpdateRequestHandler">
<requestHandler name="/update" class="solr.UpdateRequestHandler" startup="lazy">
<!-- See below for information on defining
updateRequestProcessorChains that can be used by name
on each Update Request
@ -1101,7 +1101,7 @@
-->
<!-- ping/healthcheck -->
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<requestHandler name="/admin/ping" class="solr.PingRequestHandler" startup="lazy">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
@ -1117,7 +1117,7 @@
</requestHandler>
<!-- Echo the request contents back to the client -->
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" startup="lazy">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="echoHandler">true</str>
@ -1137,7 +1137,7 @@
https://wiki.apache.org/solr/SolrCloud/
-->
<requestHandler name="/replication" class="solr.ReplicationHandler" >
<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy">
<!--
To enable simple master/slave replication, uncomment one of the
sections below, depending on whether this solr instance should be

@ -105,33 +105,31 @@ public class Translator {
* @param translationFile the File, which contains the Lists
* @return a HashMap, which contains for each File a HashMap with translations.
*/
public static Map<String, Map<String, String>> loadTranslationsLists(final File translationFile){
public static Map<String, Map<String, String>> loadTranslationsLists(final File translationFile) {
final Map<String, Map<String, String>> lists = new HashMap<String, Map<String, String>>(); //list of translationLists for different files.
Map<String, String> translationList = new LinkedHashMap<String, String>(); //current Translation Table (maintaining input order)
final List<String> list = FileUtils.getListArray(translationFile);
String forFile = "";
for (final String line : list){
if (line.isEmpty() || line.charAt(0) != '#'){
final String[] split = line.split("==", 2);
if (split.length == 2) {
translationList.put(split[0], split[1]);
//}else{ //Invalid line
}
} else if (line.startsWith("#File: ")) {
if (!forFile.equals("")){
lists.put(forFile, translationList);
}
if (line.charAt(6) == ' ') {
forFile=line.substring(7);
} else {
forFile=line.substring(6);
}
if (lists.containsKey(forFile)) {
translationList = lists.get(forFile);
} else {
translationList = new LinkedHashMap<String, String>();
for (final String line : list) {
if (!line.isEmpty()) {
if (line.charAt(0) != '#') {
final String[] split = line.split("==", 2);
if (split.length == 2) {
translationList.put(split[0], split[1]);
//}else{ //Invalid line
}
} else if (line.startsWith("#File:")) {
if (!forFile.isEmpty()) {
lists.put(forFile, translationList);
}
forFile = line.substring(6).trim(); //skip "#File:"
if (lists.containsKey(forFile)) {
translationList = lists.get(forFile);
} else {
translationList = new LinkedHashMap<String, String>();
}
}
}
}

@ -34,6 +34,7 @@ import java.util.Date;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
@ -92,7 +93,7 @@ public class bzipParser extends AbstractParser implements Parser {
}
zippedContent.close();
out.close();
final String filename = location.getFileName();
// create maindoc for this bzip container, register with supplied url & mime
maindoc = new Document(
location,
@ -101,7 +102,7 @@ public class bzipParser extends AbstractParser implements Parser {
this,
null,
null,
null,
AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
null,
null,
null,

@ -35,6 +35,7 @@ import java.util.zip.GZIPInputStream;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
@ -90,6 +91,7 @@ public class gzipParser extends AbstractParser implements Parser {
}
zippedContent.close();
out.close();
final String filename = location.getFileName();
// create maindoc for this gzip container, register with supplied url & mime
maindoc = new Document(
location,
@ -98,7 +100,7 @@ public class gzipParser extends AbstractParser implements Parser {
this,
null,
null,
null,
AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
null,
null,
null,

@ -35,6 +35,7 @@ import java.io.OutputStream;
import java.util.Date;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
@ -62,6 +63,8 @@ public class sevenzipParser extends AbstractParser implements Parser {
final String charset,
final int timezoneOffset,
final IInStream source) throws Parser.Failure, InterruptedException {
final String filename = location.getFileName();
final Document doc = new Document(
location,
mimeType,
@ -69,12 +72,12 @@ public class sevenzipParser extends AbstractParser implements Parser {
this,
null,
null,
AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title,
null,
null,
null,
null,
null,
0.0f, 0.0f,
0.0d, 0.0d,
(Object)null,
null,
null,

@ -72,7 +72,8 @@ public class tarParser extends AbstractParser implements Parser {
final int timezoneOffset,
InputStream source) throws Parser.Failure, InterruptedException {
final String ext = MultiProtocolURL.getFileExtension(location.getFileName());
final String filename = location.getFileName();
final String ext = MultiProtocolURL.getFileExtension(filename);
if (ext.equals("gz") || ext.equals("tgz")) {
try {
source = new GZIPInputStream(source);
@ -84,14 +85,14 @@ public class tarParser extends AbstractParser implements Parser {
final TarArchiveInputStream tis = new TarArchiveInputStream(source);
// create maindoc for this bzip container
Document maindoc = new Document(
final Document maindoc = new Document(
location,
mimeType,
charset,
this,
null,
null,
null,
AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
null,
null,
null,

@ -33,6 +33,7 @@ import java.util.zip.ZipInputStream;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
@ -79,15 +80,16 @@ public class zipParser extends AbstractParser implements Parser {
ZipEntry entry;
final ZipInputStream zis = new ZipInputStream(source);
final String filename = location.getFileName();
// create maindoc for this zip container with supplied url and mime
Document maindoc = new Document(
final Document maindoc = new Document(
location,
mimeType,
charset,
this,
null,
null,
null,
AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
null,
null,
null,

@ -51,7 +51,7 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
*/
@Override
protected final long free() {
return getUsage(eden, false).getCommitted() - getUsage(eden, false).getUsed();
return youngAvailable();
}
/**
@ -61,7 +61,7 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
*/
@Override
protected final long available() {
return available(true);
return available(false);
}
/**
@ -70,7 +70,7 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
* @return bytes
*/
private final long available(final boolean force) {
return force & properState(force) ? Math.max(youngAvailable(), oldAvailable()) : Math.min(youngAvailable(), Math.max(M, oldAvailable()));
return force & properState(force) ? Math.max(youngAvailable(), oldAvailable()) : oldAvailable();
}
/**
@ -99,6 +99,14 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
protected final long maxMemory() {
return heap.getHeapMemoryUsage().getMax();
}
/**
* get the memory that needs to be available for properState
*/
protected final long minMemory()
{
return getUsage(eden, true).getUsed();
}
/**
* checks if a specified amount of bytes are available
@ -151,7 +159,7 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
* @return if survivor fits into old space
*/
private boolean properState(final boolean force) {
final long surv = force? Math.max(M, getUsage(survivor, false).getUsed()) : getUsage(survivor, false).getCommitted();
final long surv = force? M + getUsage(survivor, false).getUsed() : getUsage(survivor, false).getCommitted();
return surv < oldAvailable();
}

@ -162,7 +162,7 @@ public final class InstantBusyThread extends AbstractBusyThread implements BusyT
thread.setIdleSleep(-1);
thread.setBusySleep(-1);
thread.setMemPreReqisite(0);
thread.setLoadPreReqisite(99); // this is called during initialization phase and some code parts depend on it; therefore we cannot set a prerequisite that prevents the start of that thread
thread.setLoadPreReqisite(Double.MAX_VALUE); // this is called during initialization phase and some code parts depend on it; therefore we cannot set a prerequisite that prevents the start of that thread
thread.start();
return thread;
}

@ -48,7 +48,8 @@ import net.yacy.search.EventTracker;
public class AccessTracker {
private final static long DUMP_PERIOD = 60000L;
private final static long DUMP_PERIOD = 3600000L;
private final static int DUMP_SIZE = 50000;
private static final int minSize = 100;
private static final int maxSize = 1000;
@ -89,6 +90,8 @@ public class AccessTracker {
private static final LinkedList<QueryParams> remoteSearches = new LinkedList<QueryParams>();
private static final ArrayList<String> log = new ArrayList<String>();
private static long lastLogDump = System.currentTimeMillis();
private static long localCount = 0;
private static long remoteCount = 0;
private static File dumpFile = null;
public static void setDumpFile(File f) {
@ -141,9 +144,9 @@ public class AccessTracker {
return null;
}
public static int size(final Location location) {
if (location == Location.local) synchronized (localSearches) {return localSearches.size();}
if (location == Location.remote) synchronized (remoteSearches) {return remoteSearches.size();}
public static long size(final Location location) {
if (location == Location.local) synchronized (localSearches) {return localCount + localSearches.size();}
if (location == Location.remote) synchronized (remoteSearches) {return remoteCount + remoteSearches.size();}
return 0;
}
@ -155,10 +158,6 @@ public class AccessTracker {
public static void addToDump(String querystring, String resultcount) {
addToDump(querystring, resultcount, new Date());
if (lastLogDump + DUMP_PERIOD < System.currentTimeMillis()) {
lastLogDump = System.currentTimeMillis();
dumpLog();
}
}
public static void addToDump(String querystring, String resultcount, Date d) {
@ -173,12 +172,21 @@ public class AccessTracker {
synchronized (log) {
log.add(sb.toString());
}
if (log.size() > DUMP_SIZE || lastLogDump + DUMP_PERIOD < System.currentTimeMillis()) {
dumpLog();
}
}
public static void dumpLog() {
lastLogDump = System.currentTimeMillis();
localCount += localSearches.size();
while (!localSearches.isEmpty()) {
addToDump(localSearches.removeFirst(), 0);
}
remoteCount += remoteSearches.size();
while (!remoteSearches.isEmpty()) {
addToDump(remoteSearches.removeFirst(), 0);
}
Thread t = new Thread() {
@Override
public void run() {

@ -95,7 +95,7 @@ public final class QueryParams {
private static final Map<String, CollectionSchema> defaultfacetfields = new HashMap<String, CollectionSchema>();
static {
// the key shall match with configuration property search.navigation
defaultfacetfields.put("location", CollectionSchema.coordinate_p_0_coordinate); // coordinate_p can't be used for facet (subfields), as value isn't used subfield can be used
// defaultfacetfields.put("location", CollectionSchema.coordinate_p_0_coordinate); // coordinate_p can't be used for facet (subfields), as value isn't used subfield can be used
defaultfacetfields.put("hosts", CollectionSchema.host_s);
defaultfacetfields.put("protocol", CollectionSchema.url_protocol_s);
defaultfacetfields.put("filetype", CollectionSchema.url_file_ext_s);

@ -79,8 +79,8 @@ public enum CollectionSchema implements SchemaDeclaration {
// optional but recommended
coordinate_p(SolrType.location, true, true, false, false, false, "point in degrees of latitude,longitude as declared in WSG84"),
coordinate_p_0_coordinate(SolrType.coordinate, true, true, false, false, false, "automatically created subfield, (latitude)"),
coordinate_p_1_coordinate(SolrType.coordinate, true, true, false, false, false, "automatically created subfield, (longitude)"),
coordinate_p_0_coordinate(SolrType.coordinate, true, false, false, false, false, "automatically created subfield, (latitude)"),
coordinate_p_1_coordinate(SolrType.coordinate, true, false, false, false, false, "automatically created subfield, (longitude)"),
ip_s(SolrType.string, true, true, false, false, false, "ip of host of url (after DNS lookup)"),
author(SolrType.text_general, true, true, false, false, true, "content of author-tag"),
author_sxt(SolrType.string, true, true, true, false, false, "content of author-tag as copy-field from author. This is used for facet generation"),
@ -263,7 +263,7 @@ public enum CollectionSchema implements SchemaDeclaration {
this.omitNorms = omitNorms;
this.searchable = searchable;
this.comment = comment;
this.docValues = (type == SolrType.string || type == SolrType.date);
this.docValues = (type == SolrType.string || type == SolrType.date || type.name().startsWith("num_"));
// verify our naming scheme
String name = this.name();
int p = name.indexOf('_');

@ -117,7 +117,7 @@ public enum WebgraphSchema implements SchemaDeclaration {
this.omitNorms = omitNorms;
this.searchable = searchable;
this.comment = comment;
this.docValues = (type == SolrType.string || type == SolrType.date);
this.docValues = (type == SolrType.string || type == SolrType.date || type.name().startsWith("num_"));
// verify our naming scheme
String name = this.name();
int p = name.indexOf('_');

Loading…
Cancel
Save