luc 9 years ago
commit 49331dc523

2
.gitignore vendored

@ -4,7 +4,7 @@ lib/yacycore.jar
libbuild/svnRevNr.jar libbuild/svnRevNr.jar
libbuild/GitRevTask.jar libbuild/GitRevTask.jar
gen/** gen/**
DATA/ /DATA
classes/ classes/
RELEASE/ RELEASE/
/yacy.pid /yacy.pid

@ -90,8 +90,8 @@
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/> <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/> <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
<dynamicField name="*_p" type="location" indexed="true" stored="true"/> <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="true" /> <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_val" type="int" indexed="true" stored="true" multiValued="true"/> <!-- YaCy special --> <dynamicField name="*_val" type="int" indexed="true" stored="true" multiValued="true"/> <!-- YaCy special -->
<!-- unused dynamicFields? <!-- unused dynamicFields?

@ -838,7 +838,7 @@
of SearchComponents (see below) and supports distributed of SearchComponents (see below) and supports distributed
queries across multiple shards queries across multiple shards
--> -->
<requestHandler name="/select" class="solr.SearchHandler"> <requestHandler name="/select" class="solr.SearchHandler" startup="lazy">
<!-- default values for query parameters can be specified, these <!-- default values for query parameters can be specified, these
will be overridden by parameters in the request will be overridden by parameters in the request
--> -->
@ -902,7 +902,7 @@
</requestHandler> </requestHandler>
<!-- A request handler that returns indented JSON by default --> <!-- A request handler that returns indented JSON by default -->
<requestHandler name="/query" class="solr.SearchHandler"> <requestHandler name="/query" class="solr.SearchHandler" startup="lazy">
<lst name="defaults"> <lst name="defaults">
<str name="echoParams">explicit</str> <str name="echoParams">explicit</str>
<str name="wt">json</str> <str name="wt">json</str>
@ -923,7 +923,7 @@
also always fetch the complete index from the leader because a partial also always fetch the complete index from the leader because a partial
sync will not be possible in the absence of this handler. sync will not be possible in the absence of this handler.
--> -->
<requestHandler name="/get" class="solr.RealTimeGetHandler"> <requestHandler name="/get" class="solr.RealTimeGetHandler" startup="lazy">
<lst name="defaults"> <lst name="defaults">
<str name="omitHeader">true</str> <str name="omitHeader">true</str>
<str name="wt">json</str> <str name="wt">json</str>
@ -936,7 +936,7 @@
Do not change these defaults. Do not change these defaults.
--> -->
<requestHandler name="/export" class="solr.SearchHandler"> <requestHandler name="/export" class="solr.SearchHandler" startup="lazy">
<lst name="invariants"> <lst name="invariants">
<str name="rq">{!xport}</str> <str name="rq">{!xport}</str>
<str name="wt">xsort</str> <str name="wt">xsort</str>
@ -966,7 +966,7 @@
This handler will pick a response format to match the input This handler will pick a response format to match the input
if the 'wt' parameter is not explicit if the 'wt' parameter is not explicit
--> -->
<requestHandler name="/update" class="solr.UpdateRequestHandler"> <requestHandler name="/update" class="solr.UpdateRequestHandler" startup="lazy">
<!-- See below for information on defining <!-- See below for information on defining
updateRequestProcessorChains that can be used by name updateRequestProcessorChains that can be used by name
on each Update Request on each Update Request
@ -1101,7 +1101,7 @@
--> -->
<!-- ping/healthcheck --> <!-- ping/healthcheck -->
<requestHandler name="/admin/ping" class="solr.PingRequestHandler"> <requestHandler name="/admin/ping" class="solr.PingRequestHandler" startup="lazy">
<lst name="invariants"> <lst name="invariants">
<str name="q">solrpingquery</str> <str name="q">solrpingquery</str>
</lst> </lst>
@ -1117,7 +1117,7 @@
</requestHandler> </requestHandler>
<!-- Echo the request contents back to the client --> <!-- Echo the request contents back to the client -->
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" startup="lazy">
<lst name="defaults"> <lst name="defaults">
<str name="echoParams">explicit</str> <str name="echoParams">explicit</str>
<str name="echoHandler">true</str> <str name="echoHandler">true</str>
@ -1137,7 +1137,7 @@
https://wiki.apache.org/solr/SolrCloud/ https://wiki.apache.org/solr/SolrCloud/
--> -->
<requestHandler name="/replication" class="solr.ReplicationHandler" > <requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy">
<!-- <!--
To enable simple master/slave replication, uncomment one of the To enable simple master/slave replication, uncomment one of the
sections below, depending on whether this solr instance should be sections below, depending on whether this solr instance should be

@ -105,33 +105,31 @@ public class Translator {
* @param translationFile the File, which contains the Lists * @param translationFile the File, which contains the Lists
* @return a HashMap, which contains for each File a HashMap with translations. * @return a HashMap, which contains for each File a HashMap with translations.
*/ */
public static Map<String, Map<String, String>> loadTranslationsLists(final File translationFile){ public static Map<String, Map<String, String>> loadTranslationsLists(final File translationFile) {
final Map<String, Map<String, String>> lists = new HashMap<String, Map<String, String>>(); //list of translationLists for different files. final Map<String, Map<String, String>> lists = new HashMap<String, Map<String, String>>(); //list of translationLists for different files.
Map<String, String> translationList = new LinkedHashMap<String, String>(); //current Translation Table (maintaining input order) Map<String, String> translationList = new LinkedHashMap<String, String>(); //current Translation Table (maintaining input order)
final List<String> list = FileUtils.getListArray(translationFile); final List<String> list = FileUtils.getListArray(translationFile);
String forFile = ""; String forFile = "";
for (final String line : list){ for (final String line : list) {
if (line.isEmpty() || line.charAt(0) != '#'){ if (!line.isEmpty()) {
final String[] split = line.split("==", 2); if (line.charAt(0) != '#') {
if (split.length == 2) { final String[] split = line.split("==", 2);
translationList.put(split[0], split[1]); if (split.length == 2) {
//}else{ //Invalid line translationList.put(split[0], split[1]);
} //}else{ //Invalid line
} else if (line.startsWith("#File: ")) { }
if (!forFile.equals("")){ } else if (line.startsWith("#File:")) {
lists.put(forFile, translationList); if (!forFile.isEmpty()) {
} lists.put(forFile, translationList);
if (line.charAt(6) == ' ') { }
forFile=line.substring(7); forFile = line.substring(6).trim(); //skip "#File:"
} else { if (lists.containsKey(forFile)) {
forFile=line.substring(6); translationList = lists.get(forFile);
} } else {
if (lists.containsKey(forFile)) { translationList = new LinkedHashMap<String, String>();
translationList = lists.get(forFile); }
} else {
translationList = new LinkedHashMap<String, String>();
} }
} }
} }

@ -34,6 +34,7 @@ import java.util.Date;
import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.document.AbstractParser; import net.yacy.document.AbstractParser;
import net.yacy.document.Document; import net.yacy.document.Document;
import net.yacy.document.Parser; import net.yacy.document.Parser;
@ -92,7 +93,7 @@ public class bzipParser extends AbstractParser implements Parser {
} }
zippedContent.close(); zippedContent.close();
out.close(); out.close();
final String filename = location.getFileName();
// create maindoc for this bzip container, register with supplied url & mime // create maindoc for this bzip container, register with supplied url & mime
maindoc = new Document( maindoc = new Document(
location, location,
@ -101,7 +102,7 @@ public class bzipParser extends AbstractParser implements Parser {
this, this,
null, null,
null, null,
null, AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
null, null,
null, null,
null, null,

@ -35,6 +35,7 @@ import java.util.zip.GZIPInputStream;
import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.document.AbstractParser; import net.yacy.document.AbstractParser;
import net.yacy.document.Document; import net.yacy.document.Document;
import net.yacy.document.Parser; import net.yacy.document.Parser;
@ -90,6 +91,7 @@ public class gzipParser extends AbstractParser implements Parser {
} }
zippedContent.close(); zippedContent.close();
out.close(); out.close();
final String filename = location.getFileName();
// create maindoc for this gzip container, register with supplied url & mime // create maindoc for this gzip container, register with supplied url & mime
maindoc = new Document( maindoc = new Document(
location, location,
@ -98,7 +100,7 @@ public class gzipParser extends AbstractParser implements Parser {
this, this,
null, null,
null, null,
null, AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
null, null,
null, null,
null, null,

@ -35,6 +35,7 @@ import java.io.OutputStream;
import java.util.Date; import java.util.Date;
import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.document.AbstractParser; import net.yacy.document.AbstractParser;
import net.yacy.document.Document; import net.yacy.document.Document;
@ -62,6 +63,8 @@ public class sevenzipParser extends AbstractParser implements Parser {
final String charset, final String charset,
final int timezoneOffset, final int timezoneOffset,
final IInStream source) throws Parser.Failure, InterruptedException { final IInStream source) throws Parser.Failure, InterruptedException {
final String filename = location.getFileName();
final Document doc = new Document( final Document doc = new Document(
location, location,
mimeType, mimeType,
@ -69,12 +72,12 @@ public class sevenzipParser extends AbstractParser implements Parser {
this, this,
null, null,
null, null,
AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title,
null, null,
null, null,
null, null,
null, null,
null, 0.0d, 0.0d,
0.0f, 0.0f,
(Object)null, (Object)null,
null, null,
null, null,

@ -72,7 +72,8 @@ public class tarParser extends AbstractParser implements Parser {
final int timezoneOffset, final int timezoneOffset,
InputStream source) throws Parser.Failure, InterruptedException { InputStream source) throws Parser.Failure, InterruptedException {
final String ext = MultiProtocolURL.getFileExtension(location.getFileName()); final String filename = location.getFileName();
final String ext = MultiProtocolURL.getFileExtension(filename);
if (ext.equals("gz") || ext.equals("tgz")) { if (ext.equals("gz") || ext.equals("tgz")) {
try { try {
source = new GZIPInputStream(source); source = new GZIPInputStream(source);
@ -84,14 +85,14 @@ public class tarParser extends AbstractParser implements Parser {
final TarArchiveInputStream tis = new TarArchiveInputStream(source); final TarArchiveInputStream tis = new TarArchiveInputStream(source);
// create maindoc for this bzip container // create maindoc for this bzip container
Document maindoc = new Document( final Document maindoc = new Document(
location, location,
mimeType, mimeType,
charset, charset,
this, this,
null, null,
null, null,
null, AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
null, null,
null, null,
null, null,

@ -33,6 +33,7 @@ import java.util.zip.ZipInputStream;
import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.document.AbstractParser; import net.yacy.document.AbstractParser;
import net.yacy.document.Document; import net.yacy.document.Document;
import net.yacy.document.Parser; import net.yacy.document.Parser;
@ -79,15 +80,16 @@ public class zipParser extends AbstractParser implements Parser {
ZipEntry entry; ZipEntry entry;
final ZipInputStream zis = new ZipInputStream(source); final ZipInputStream zis = new ZipInputStream(source);
final String filename = location.getFileName();
// create maindoc for this zip container with supplied url and mime // create maindoc for this zip container with supplied url and mime
Document maindoc = new Document( final Document maindoc = new Document(
location, location,
mimeType, mimeType,
charset, charset,
this, this,
null, null,
null, null,
null, AbstractParser.singleList(filename.isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(filename)), // title
null, null,
null, null,
null, null,

@ -51,7 +51,7 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
*/ */
@Override @Override
protected final long free() { protected final long free() {
return getUsage(eden, false).getCommitted() - getUsage(eden, false).getUsed(); return youngAvailable();
} }
/** /**
@ -61,7 +61,7 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
*/ */
@Override @Override
protected final long available() { protected final long available() {
return available(true); return available(false);
} }
/** /**
@ -70,7 +70,7 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
* @return bytes * @return bytes
*/ */
private final long available(final boolean force) { private final long available(final boolean force) {
return force & properState(force) ? Math.max(youngAvailable(), oldAvailable()) : Math.min(youngAvailable(), Math.max(M, oldAvailable())); return force & properState(force) ? Math.max(youngAvailable(), oldAvailable()) : oldAvailable();
} }
/** /**
@ -100,6 +100,14 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
return heap.getHeapMemoryUsage().getMax(); return heap.getHeapMemoryUsage().getMax();
} }
/**
* get the memory that needs to be available for properState
*/
protected final long minMemory()
{
return getUsage(eden, true).getUsed();
}
/** /**
* checks if a specified amount of bytes are available * checks if a specified amount of bytes are available
* after the jvm recycled unused objects * after the jvm recycled unused objects
@ -151,7 +159,7 @@ public class GenerationMemoryStrategy extends MemoryStrategy {
* @return if survivor fits into old space * @return if survivor fits into old space
*/ */
private boolean properState(final boolean force) { private boolean properState(final boolean force) {
final long surv = force? Math.max(M, getUsage(survivor, false).getUsed()) : getUsage(survivor, false).getCommitted(); final long surv = force? M + getUsage(survivor, false).getUsed() : getUsage(survivor, false).getCommitted();
return surv < oldAvailable(); return surv < oldAvailable();
} }

@ -162,7 +162,7 @@ public final class InstantBusyThread extends AbstractBusyThread implements BusyT
thread.setIdleSleep(-1); thread.setIdleSleep(-1);
thread.setBusySleep(-1); thread.setBusySleep(-1);
thread.setMemPreReqisite(0); thread.setMemPreReqisite(0);
thread.setLoadPreReqisite(99); // this is called during initialization phase and some code parts depend on it; therefore we cannot set a prerequisite that prevents the start of that thread thread.setLoadPreReqisite(Double.MAX_VALUE); // this is called during initialization phase and some code parts depend on it; therefore we cannot set a prerequisite that prevents the start of that thread
thread.start(); thread.start();
return thread; return thread;
} }

@ -48,7 +48,8 @@ import net.yacy.search.EventTracker;
public class AccessTracker { public class AccessTracker {
private final static long DUMP_PERIOD = 60000L; private final static long DUMP_PERIOD = 3600000L;
private final static int DUMP_SIZE = 50000;
private static final int minSize = 100; private static final int minSize = 100;
private static final int maxSize = 1000; private static final int maxSize = 1000;
@ -89,6 +90,8 @@ public class AccessTracker {
private static final LinkedList<QueryParams> remoteSearches = new LinkedList<QueryParams>(); private static final LinkedList<QueryParams> remoteSearches = new LinkedList<QueryParams>();
private static final ArrayList<String> log = new ArrayList<String>(); private static final ArrayList<String> log = new ArrayList<String>();
private static long lastLogDump = System.currentTimeMillis(); private static long lastLogDump = System.currentTimeMillis();
private static long localCount = 0;
private static long remoteCount = 0;
private static File dumpFile = null; private static File dumpFile = null;
public static void setDumpFile(File f) { public static void setDumpFile(File f) {
@ -141,9 +144,9 @@ public class AccessTracker {
return null; return null;
} }
public static int size(final Location location) { public static long size(final Location location) {
if (location == Location.local) synchronized (localSearches) {return localSearches.size();} if (location == Location.local) synchronized (localSearches) {return localCount + localSearches.size();}
if (location == Location.remote) synchronized (remoteSearches) {return remoteSearches.size();} if (location == Location.remote) synchronized (remoteSearches) {return remoteCount + remoteSearches.size();}
return 0; return 0;
} }
@ -155,10 +158,6 @@ public class AccessTracker {
public static void addToDump(String querystring, String resultcount) { public static void addToDump(String querystring, String resultcount) {
addToDump(querystring, resultcount, new Date()); addToDump(querystring, resultcount, new Date());
if (lastLogDump + DUMP_PERIOD < System.currentTimeMillis()) {
lastLogDump = System.currentTimeMillis();
dumpLog();
}
} }
public static void addToDump(String querystring, String resultcount, Date d) { public static void addToDump(String querystring, String resultcount, Date d) {
@ -173,12 +172,21 @@ public class AccessTracker {
synchronized (log) { synchronized (log) {
log.add(sb.toString()); log.add(sb.toString());
} }
if (log.size() > DUMP_SIZE || lastLogDump + DUMP_PERIOD < System.currentTimeMillis()) {
dumpLog();
}
} }
public static void dumpLog() { public static void dumpLog() {
lastLogDump = System.currentTimeMillis();
localCount += localSearches.size();
while (!localSearches.isEmpty()) { while (!localSearches.isEmpty()) {
addToDump(localSearches.removeFirst(), 0); addToDump(localSearches.removeFirst(), 0);
} }
remoteCount += remoteSearches.size();
while (!remoteSearches.isEmpty()) {
addToDump(remoteSearches.removeFirst(), 0);
}
Thread t = new Thread() { Thread t = new Thread() {
@Override @Override
public void run() { public void run() {

@ -95,7 +95,7 @@ public final class QueryParams {
private static final Map<String, CollectionSchema> defaultfacetfields = new HashMap<String, CollectionSchema>(); private static final Map<String, CollectionSchema> defaultfacetfields = new HashMap<String, CollectionSchema>();
static { static {
// the key shall match with configuration property search.navigation // the key shall match with configuration property search.navigation
defaultfacetfields.put("location", CollectionSchema.coordinate_p_0_coordinate); // coordinate_p can't be used for facet (subfields), as value isn't used subfield can be used // defaultfacetfields.put("location", CollectionSchema.coordinate_p_0_coordinate); // coordinate_p can't be used for facet (subfields), as value isn't used subfield can be used
defaultfacetfields.put("hosts", CollectionSchema.host_s); defaultfacetfields.put("hosts", CollectionSchema.host_s);
defaultfacetfields.put("protocol", CollectionSchema.url_protocol_s); defaultfacetfields.put("protocol", CollectionSchema.url_protocol_s);
defaultfacetfields.put("filetype", CollectionSchema.url_file_ext_s); defaultfacetfields.put("filetype", CollectionSchema.url_file_ext_s);

@ -79,8 +79,8 @@ public enum CollectionSchema implements SchemaDeclaration {
// optional but recommended // optional but recommended
coordinate_p(SolrType.location, true, true, false, false, false, "point in degrees of latitude,longitude as declared in WSG84"), coordinate_p(SolrType.location, true, true, false, false, false, "point in degrees of latitude,longitude as declared in WSG84"),
coordinate_p_0_coordinate(SolrType.coordinate, true, true, false, false, false, "automatically created subfield, (latitude)"), coordinate_p_0_coordinate(SolrType.coordinate, true, false, false, false, false, "automatically created subfield, (latitude)"),
coordinate_p_1_coordinate(SolrType.coordinate, true, true, false, false, false, "automatically created subfield, (longitude)"), coordinate_p_1_coordinate(SolrType.coordinate, true, false, false, false, false, "automatically created subfield, (longitude)"),
ip_s(SolrType.string, true, true, false, false, false, "ip of host of url (after DNS lookup)"), ip_s(SolrType.string, true, true, false, false, false, "ip of host of url (after DNS lookup)"),
author(SolrType.text_general, true, true, false, false, true, "content of author-tag"), author(SolrType.text_general, true, true, false, false, true, "content of author-tag"),
author_sxt(SolrType.string, true, true, true, false, false, "content of author-tag as copy-field from author. This is used for facet generation"), author_sxt(SolrType.string, true, true, true, false, false, "content of author-tag as copy-field from author. This is used for facet generation"),
@ -263,7 +263,7 @@ public enum CollectionSchema implements SchemaDeclaration {
this.omitNorms = omitNorms; this.omitNorms = omitNorms;
this.searchable = searchable; this.searchable = searchable;
this.comment = comment; this.comment = comment;
this.docValues = (type == SolrType.string || type == SolrType.date); this.docValues = (type == SolrType.string || type == SolrType.date || type.name().startsWith("num_"));
// verify our naming scheme // verify our naming scheme
String name = this.name(); String name = this.name();
int p = name.indexOf('_'); int p = name.indexOf('_');

@ -117,7 +117,7 @@ public enum WebgraphSchema implements SchemaDeclaration {
this.omitNorms = omitNorms; this.omitNorms = omitNorms;
this.searchable = searchable; this.searchable = searchable;
this.comment = comment; this.comment = comment;
this.docValues = (type == SolrType.string || type == SolrType.date); this.docValues = (type == SolrType.string || type == SolrType.date || type.name().startsWith("num_"));
// verify our naming scheme // verify our naming scheme
String name = this.name(); String name = this.name();
int p = name.indexOf('_'); int p = name.indexOf('_');

Loading…
Cancel
Save