diff --git a/defaults/yacy.init b/defaults/yacy.init index e8f2b78c5..d94671b43 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -691,7 +691,7 @@ crawlPause.localsearch=50 crawlPause.remotesearch=10 # Some configuration values for the crawler -crawler.clientTimeout=9000 +crawler.clientTimeout=30000 # http crawler specific settings; size in bytes crawler.http.accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 diff --git a/htroot/DemoServlet.java b/htroot/DemoServlet.java index 98d6ce3e3..42cc2b491 100644 --- a/htroot/DemoServlet.java +++ b/htroot/DemoServlet.java @@ -1,11 +1,6 @@ -import java.util.Iterator; - import net.yacy.yacy; import net.yacy.cora.protocol.RequestHeader; import net.yacy.interaction.Interaction; -import net.yacy.search.Switchboard; -import de.anomic.data.BookmarkHelper; -import de.anomic.data.UserDB; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -13,17 +8,15 @@ public final class DemoServlet { public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - + // return variable that accumulates replacements final serverObjects prop = new serverObjects(); - - final Switchboard sb = Switchboard.getSwitchboard(); - + prop.put("temperature", "-10°C"); - + // Display currently logged on user prop.put("username", Interaction.GetLoggedOnUser(header)); - + //Generate Userlist int numUsers = 0; for (String user : Interaction.GetUsers()) { @@ -31,36 +24,36 @@ public final class DemoServlet { numUsers++; } prop.put("users", numUsers); - - - + + + if (post != null) { - + if (post.containsKey("submit")) { - + prop.put("temperature", post.get("textthing")); - + String filename= post.get("textthing"); - + int counter = 0; - + while (counter < 10) { - + prop.put("localimg_"+counter+"_path","/"+filename); - + prop.put("localimg_"+counter+"_checked", "2"); counter++; } - + prop.put("localimg", counter); - - - + + + prop.put("temperature", yacy.homedir+"/DATA/HTDOCS/"+filename); } - + } - + // return rewrite properties return prop; } diff --git a/htroot/DemoServletInteraction.java b/htroot/DemoServletInteraction.java index 2e974e421..f8d34a4b0 100644 --- a/htroot/DemoServletInteraction.java +++ b/htroot/DemoServletInteraction.java @@ -1,5 +1,4 @@ import net.yacy.cora.protocol.RequestHeader; -import de.anomic.data.BookmarkHelper; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -9,9 +8,9 @@ public final class DemoServletInteraction { final serverObjects post, final serverSwitch env) { // return variable that accumulates replacements final serverObjects prop = new serverObjects(); - + prop.put("temperature", "-10°C"); - + // return rewrite properties return prop; } diff --git a/htroot/DemoServletRDF.java b/htroot/DemoServletRDF.java index 593d6bcbe..25d2143d1 100644 --- a/htroot/DemoServletRDF.java +++ b/htroot/DemoServletRDF.java @@ -1,6 +1,4 @@ -import net.yacy.yacy; import net.yacy.cora.protocol.RequestHeader; -import de.anomic.data.BookmarkHelper; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -10,37 +8,37 @@ public final class DemoServletRDF { final serverObjects post, final serverSwitch env) { // return variable that accumulates replacements final serverObjects prop = new serverObjects(); - + // prop.put("temperature", "-10°C"); - + if (post != null) { - + if (post.containsKey("submit")) { prop.put("temperature", post.get("textthing")); - + String filename= post.get("textthing"); - + // prop.put("imglink", filename+".jpg"); - + int counter = 0; - + while (counter < 10) { - + prop.put("localimg_"+counter+"_path","/"+filename); - + prop.put("localimg_"+counter+"_checked", "2"); counter++; } - + prop.put("localimg", counter); - - - + + + // prop.put("temperature",yacy.homedir+"/DATA/HTDOCS/"+filename); } - + } - + // return rewrite properties return prop; } diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java index 32e60d18c..c5906fdb9 100644 --- a/htroot/DictionaryLoader_p.java +++ b/htroot/DictionaryLoader_p.java @@ -24,8 +24,8 @@ import java.net.MalformedURLException; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.services.federated.yacy.CacheStrategy; import net.yacy.document.LibraryProvider; -import net.yacy.document.geolocalization.GeonamesLocation; -import net.yacy.document.geolocalization.OpenGeoDBLocation; +import net.yacy.document.geolocation.GeonamesLocation; +import net.yacy.document.geolocation.OpenGeoDBLocation; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; @@ -68,7 +68,7 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0); prop.put("geon0ActionLoaded", 1); @@ -98,7 +98,7 @@ public class DictionaryLoader_p { if (post.containsKey("geon0Activate")) { LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0ActionActivated", 1); } @@ -111,7 +111,7 @@ public class DictionaryLoader_p { final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file()); LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname); - LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0); prop.put("geo1ActionLoaded", 1); @@ -141,7 +141,7 @@ public class DictionaryLoader_p { if (post.containsKey("geo1Activate")) { LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file()); - LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); + LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), null)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geo1ActionActivated", 1); } @@ -229,7 +229,7 @@ public class DictionaryLoader_p { LibraryProvider.activatePND(); prop.put("pnd0ActionActivated", 1); } - + // check status again for (final LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) { prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0); diff --git a/htroot/Vocabulary_p.html b/htroot/Vocabulary_p.html index 7a2fe2bf4..468eab159 100644 --- a/htroot/Vocabulary_p.html +++ b/htroot/Vocabulary_p.html @@ -3,6 +3,59 @@ YaCy '#[clientname]#': Federated Index #%env/templates/metas.template%# + #%env/templates/header.template%# @@ -31,14 +84,17 @@ #(create)#:: -
+ +
Vocabulary Production It is possible to produce a vocabulary out of the existing search index. This is done using a given 'objectspace' which you can enter as a URL Stub. This stub is used to find all matching URLs. If the remaining path from the matching URLs then denotes a single file, the file name is used as vocabulary term. This works best with wikis. Try to use a wiki url as objectspace path.
Vocabulary Name
-
Objectspace
+
Objectspace
+
+
Discover Terms from
object link file name  object page title  object page title (splitted)  object page author
@@ -51,6 +107,7 @@
Vocabulary Name
#[name]#
File
#(editable)#[automatically generated, not stored, cannot be edited]::#[file]##(/editable)#
+
Size
#[size]#
Namespace
#[namespace]#
Predicate
#[predicate]#
Prefix
#[prefix]#
@@ -73,10 +130,10 @@ #{terms}# #(editable)# ::#(/editable)# - #(editable)# ::#(/editable)# + #(editable)# ::#(/editable)# #[term]# - #(editable)##[synonyms]#::#(/editable)# - #(editable)##[objectlink]#::#(/editable)# + #(editable)##[synonyms]#::#(/editable)# + #(editable)##[objectlink]#::#(/editable)# #{/terms}# #(editable)#:: diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java index 5a7eeca93..325fe0e21 100644 --- a/htroot/Vocabulary_p.java +++ b/htroot/Vocabulary_p.java @@ -35,6 +35,7 @@ import net.yacy.cora.lod.vocabulary.YaCyMetadata; import net.yacy.cora.protocol.RequestHeader; import net.yacy.document.LibraryProvider; import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; @@ -50,44 +51,71 @@ public class Vocabulary_p { Collection vocs = LibraryProvider.autotagging.getVocabularies(); String vocabularyName = (post == null) ? null : post.get("vocabulary", null); + String discovername = (post == null) ? null : post.get("discovername", null); Tagging vocabulary = vocabularyName == null ? null : LibraryProvider.autotagging.getVocabulary(vocabularyName); if (vocabulary == null) vocabularyName = null; - int count = 0; - for (Tagging v: vocs) { - prop.put("vocabularyset_" + count + "_name", v.getName()); - prop.put("vocabularyset_" + count + "_selected", (vocabularyName != null && vocabularyName.equals(v.getName())) ? 1 : 0); - count++; - } - prop.put("vocabularyset", count); - if (post != null) { try { if (vocabulary == null) { // create a vocabulary - String discovername = post.get("discovername", ""); - if (discovername.length() > 0) { + if (discovername != null && discovername.length() > 0) { String discoverobjectspace = post.get("discoverobjectspace", ""); MultiProtocolURI discoveruri = null; if (discoverobjectspace.length() > 0) try {discoveruri = new MultiProtocolURI(discoverobjectspace);} catch (MalformedURLException e) {} if (discoveruri == null) discoverobjectspace = ""; Map table = new TreeMap(); File propFile = LibraryProvider.autotagging.getVocabularyFile(discovername); + boolean discoverFromPath = post.get("discovermethod", "").equals("path"); + boolean discoverFromTitle = post.get("discovermethod", "").equals("title"); + boolean discoverFromTitleSplitted = post.get("discovermethod", "").equals("titlesplitted"); + boolean discoverFromAuthor = post.get("discovermethod", "").equals("author"); if (discoveruri != null) { String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default"); Segment segment = sb.indexSegments.segment(segmentName); Iterator ui = segment.urlSelector(discoveruri); + String t; while (ui.hasNext()) { DigestURI u = ui.next(); String u0 = u.toNormalform(true, false); - String t = u0.substring(discoverobjectspace.length()); - if (t.indexOf('/') >= 0) continue; - int p = t.indexOf('.'); - if (p >= 0) t = t.substring(0, p); - while ((p = t.indexOf(':')) >= 0) t = t.substring(p + 1); - while ((p = t.indexOf('=')) >= 0) t = t.substring(p + 1); - if (p >= 0) t = t.substring(p + 1); + t = ""; + if (discoverFromPath) { + t = u0.substring(discoverobjectspace.length()); + if (t.indexOf('/') >= 0) continue; + int p = t.indexOf('.'); + if (p >= 0) t = t.substring(0, p); + while ((p = t.indexOf(':')) >= 0) t = t.substring(p + 1); + while ((p = t.indexOf('=')) >= 0) t = t.substring(p + 1); + if (p >= 0) t = t.substring(p + 1); + } + if (discoverFromTitle || discoverFromTitleSplitted) { + URIMetadataRow m = segment.urlMetadata().load(u.hash()); + if (m != null) t = m.dc_title(); + if (t.endsWith(".jpg") || t.endsWith(".gif")) continue; + } + if (discoverFromAuthor) { + URIMetadataRow m = segment.urlMetadata().load(u.hash()); + if (m != null) t = m.dc_creator(); + } + t = t.replaceAll("\"", " ").replaceAll("'", " ").replaceAll(",", " ").replaceAll(" ", " ").trim(); if (t.length() == 0) continue; - table.put(t, new Tagging.SOTuple("", u0)); + if (discoverFromTitleSplitted) { + String[] ts = t.split(" "); + for (String s: ts) { + if (s.length() == 0) continue; + if (s.endsWith(".jpg") || s.endsWith(".gif")) continue; + table.put(s, new Tagging.SOTuple("", u0)); + } + } else if (discoverFromAuthor) { + String[] ts = t.split(";"); // author names are often separated by ';' + for (String s: ts) { + if (s.length() == 0) continue; + int p = s.indexOf(','); // check if there is a reversed method to mention the name + if (p >= 0) s = s.substring(p + 1).trim() + " " + s.substring(0, p).trim(); + table.put(s, new Tagging.SOTuple("", u0)); + } + } else { + table.put(t, new Tagging.SOTuple("", u0)); + } } } Tagging newvoc = new Tagging(discovername, propFile, discoverobjectspace, table); @@ -143,6 +171,14 @@ public class Vocabulary_p { } } + int count = 0; + for (Tagging v: vocs) { + prop.put("vocabularyset_" + count + "_name", v.getName()); + prop.put("vocabularyset_" + count + "_selected", ((vocabularyName != null && vocabularyName.equals(v.getName())) || (discovername != null && discovername.equals(v.getName()))) ? 1 : 0); + count++; + } + prop.put("vocabularyset", count); + prop.put("create", vocabularyName == null ? 1 : 0); if (vocabulary == null) { @@ -164,7 +200,9 @@ public class Vocabulary_p { prop.putHTML("edit_tripleN", vocabulary.getObjectspace() == null ? "none - missing objectspace" : "<" + yacyurl + "> <" + DCTerms.references.getPredicate() + "> \"[reference-link]#[tag]\" ."); int c = 0; boolean dark = false; - for (Map.Entry entry: vocabulary.list().entrySet()) { + Map list = vocabulary.list(); + prop.put("edit_size", list.size()); + for (Map.Entry entry: list.entrySet()) { prop.put("edit_terms_" + c + "_editable", editable ? 1 : 0); prop.put("edit_terms_" + c + "_dark", dark ? 1 : 0); dark = !dark; prop.putHTML("edit_terms_" + c + "_term", entry.getKey()); @@ -175,6 +213,7 @@ public class Vocabulary_p { if (c > 3000) break; } prop.put("edit_terms", c); + } // return rewrite properties diff --git a/htroot/api/yacydoc.java b/htroot/api/yacydoc.java index a49d5d2bd..5fbf31794 100644 --- a/htroot/api/yacydoc.java +++ b/htroot/api/yacydoc.java @@ -25,24 +25,12 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -import java.io.ByteArrayOutputStream; import java.net.MalformedURLException; -import java.util.Iterator; -import java.util.Map; - -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.rdf.model.Resource; import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.document.ASCII; -import net.yacy.cora.document.UTF8; import net.yacy.cora.lod.JenaTripleStore; -import net.yacy.cora.lod.vocabulary.Tagging; -import net.yacy.cora.lod.vocabulary.YaCyMetadata; import net.yacy.cora.protocol.RequestHeader; -import net.yacy.cora.sorting.ConcurrentScoreMap; -import net.yacy.cora.sorting.ScoreMap; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.word.Word; @@ -139,7 +127,7 @@ public class yacydoc { prop.put("yacy_citations", sb.indexSegments.segment(Segments.Process.PUBLIC).urlCitation().count(entry.hash())); prop.put("yacy_inbound", entry.llocal()); prop.put("yacy_outbound", entry.lother()); - + // extract the submodel from the triplestore prop.putXML("triples", JenaTripleStore.getMetadataByURLHash(entry.hash())); diff --git a/htroot/interaction/Triple.java b/htroot/interaction/Triple.java index 92f92c841..58913d028 100644 --- a/htroot/interaction/Triple.java +++ b/htroot/interaction/Triple.java @@ -32,7 +32,6 @@ package interaction; import net.yacy.cora.lod.JenaTripleStore; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; -import net.yacy.interaction.Interaction; import net.yacy.search.Switchboard; import de.anomic.data.UserDB; import de.anomic.server.serverObjects; @@ -99,55 +98,37 @@ public class Triple { String s = ""; String p = ""; String o = ""; + String result = ""; Boolean global = false; - if(post != null){ + if (post != null) { - if(post.containsKey("s")){ - s = post.get("s"); - } - - if(post.containsKey("sp")){ - s = post.get("sp") + "#" + s; - } - - if(post.containsKey("p")){ - p = post.get("p"); - } - - if(post.containsKey("pp")){ - p = post.get("pp") + "#" + p; - } + s = post.get("s", ""); + p = post.get("p", ""); + o = post.get("o", ""); - if(post.containsKey("o")){ - o = post.get("o"); - } + if (post.containsKey("sp")) s = post.get("sp") + "#" + s; + if (post.containsKey("pp")) p = post.get("pp") + "#" + p; global = post.containsKey("global"); + if (post.containsKey("load")) { + if (global) { + result = JenaTripleStore.getObject(s, p); + } else { + result = JenaTripleStore.getPrivateObject(s, p, username); + } + } else { + if (global) { + JenaTripleStore.addTriple(s, p, o); + } else { + JenaTripleStore.addTriple(s, p, o, username); + } + } } - if (post.containsKey("load")) { - - if (global) { - o = JenaTripleStore.getObject(s, p); - } else { - o = JenaTripleStore.getPrivateObject(s, p, username); - } - - - } else { - - if (global) { - JenaTripleStore.addTriple(s, p, o); - } else { - JenaTripleStore.addTriple(s, p, o, username); - } - - } - - prop.put("result", o); + prop.put("result", result); return prop; } diff --git a/htroot/interaction_elements/OverlayInteraction.java b/htroot/interaction_elements/OverlayInteraction.java index 34072e410..237cdf8d3 100644 --- a/htroot/interaction_elements/OverlayInteraction.java +++ b/htroot/interaction_elements/OverlayInteraction.java @@ -1,24 +1,21 @@ package interaction_elements; import net.yacy.cora.protocol.RequestHeader; -import net.yacy.search.Switchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; public class OverlayInteraction { - + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - - final Switchboard sb = (Switchboard) env; - + final serverObjects prop = new serverObjects(); prop.put("enabled", env.getConfigBool("interaction.overlayinteraction.enabled", false) ? "1" : "0"); - + prop.put("enabled_url", post.get("url", "")); - - prop.put("enabled_urlhash", post.get("urlhash", "")); - + + prop.put("enabled_urlhash", post.get("urlhash", "")); + return prop; } } diff --git a/htroot/interaction_elements/Tag_part.java b/htroot/interaction_elements/Tag_part.java index fd4154eb5..36f17ab9b 100644 --- a/htroot/interaction_elements/Tag_part.java +++ b/htroot/interaction_elements/Tag_part.java @@ -4,36 +4,31 @@ package interaction_elements; import java.util.Collection; import net.yacy.cora.lod.vocabulary.Tagging; -import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.document.LibraryProvider; -import net.yacy.search.Switchboard; -import de.anomic.data.UserDB; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; public class Tag_part { - + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { - - final Switchboard sb = (Switchboard) env; - + final serverObjects prop = new serverObjects(); - + prop.put("hash", post.get("hash", "")); - + String vocabularies = ""; - + Collection vocs = LibraryProvider.autotagging.getVocabularies(); - + for (Tagging v: vocs) { vocabularies += v.getName()+","; } - + vocabularies += "manual"; - + prop.put("vocabularies", vocabularies); - + return prop; } } diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 0876c324b..65734d6c8 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -55,7 +55,7 @@ import net.yacy.document.Condenser; import net.yacy.document.Document; import net.yacy.document.LibraryProvider; import net.yacy.document.Parser; -import net.yacy.document.geolocalization.GeoLocation; +import net.yacy.document.geolocation.GeoLocation; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.word.Word; @@ -480,7 +480,7 @@ public class yacysearch { if (p > 0) { String k = vocabulary.substring(0, p); String v = vocabulary.substring(p + 1); - metatags.add(LibraryProvider.autotagging.metatag(LibraryProvider.autotagging.prefixChar + k + ":" + v)); + metatags.add(LibraryProvider.autotagging.metatag(k, v)); } } diff --git a/htroot/yacysearch_location.java b/htroot/yacysearch_location.java index 68b3ac080..f1c78180b 100644 --- a/htroot/yacysearch_location.java +++ b/htroot/yacysearch_location.java @@ -28,7 +28,7 @@ import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.services.federated.opensearch.SRURSSConnector; import net.yacy.document.LibraryProvider; -import net.yacy.document.geolocalization.GeoLocation; +import net.yacy.document.geolocation.GeoLocation; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import de.anomic.server.serverCore; diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java index 2a349a601..6d8726fbe 100644 --- a/source/de/anomic/crawler/retrieval/HTTPLoader.java +++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java @@ -67,7 +67,7 @@ public final class HTTPLoader { this.log = theLog; // refreshing timeout value - this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 10000); + this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 30000); } public Response load(final Request entry, final int maxFileSize, final boolean checkBlacklist) throws IOException { diff --git a/source/de/anomic/server/serverSwitch.java b/source/de/anomic/server/serverSwitch.java index 122d8730a..ac175ca20 100644 --- a/source/de/anomic/server/serverSwitch.java +++ b/source/de/anomic/server/serverSwitch.java @@ -61,7 +61,7 @@ public class serverSwitch // configuration management private final File configFile; private final String configComment; - private final File dataPath; + public final File dataPath; public final File appPath; protected boolean firstInit; protected Log log; @@ -167,7 +167,7 @@ public class serverSwitch /** * get my public IP, either set statically or figure out dynamic - * @return + * @return */ public String myPublicIP() { // if a static IP was configured, we have to return it here ... @@ -191,7 +191,7 @@ public class serverSwitch /** * add whole map of key-value pairs to config - * @param otherConfigs + * @param otherConfigs */ public void setConfig(final Map otherConfigs) { final Iterator> i = otherConfigs.entrySet().iterator(); @@ -228,7 +228,7 @@ public class serverSwitch /** * Gets a configuration parameter from the properties. - * + * * @param key name of the configuration parameter * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value @@ -246,7 +246,7 @@ public class serverSwitch /** * Gets a configuration parameter from the properties. - * + * * @param key name of the configuration parameter * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value @@ -261,7 +261,7 @@ public class serverSwitch /** * Gets a configuration parameter from the properties. - * + * * @param key name of the configuration parameter * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value @@ -276,7 +276,7 @@ public class serverSwitch /** * Gets a configuration parameter from the properties. - * + * * @param key name of the configuration parameter * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value @@ -291,7 +291,7 @@ public class serverSwitch /** * Gets a configuration parameter from the properties. - * + * * @param key name of the configuration parameter * @param dflt default value which will be used in case parameter can not be found or if it is invalid * @return value if the parameter or default value @@ -302,7 +302,7 @@ public class serverSwitch /** * Create a File instance for a configuration setting specifying a path. - * + * * @param key config key * @param dflt default path value, that is used when there is no value key in the * configuration. @@ -311,19 +311,19 @@ public class serverSwitch * the relative path setting. */ public File getDataPath(final String key, final String dflt) { - return getFileByPath(key, dflt, dataPath); + return getFileByPath(key, dflt, this.dataPath); } /** * return file at path from config entry "key", or fallback to default dflt * @param key * @param dflt - * @return + * @return */ public File getAppPath(final String key, final String dflt) { - return getFileByPath(key, dflt, appPath); + return getFileByPath(key, dflt, this.appPath); } - + private File getFileByPath(String key, String dflt, File prefix) { final String path = getConfig(key, dflt).replace('\\', '/'); final File f = new File(path); @@ -345,7 +345,7 @@ public class serverSwitch /** * Gets configuration parameters which have been removed during initialization. - * + * * @return contains parameter name as key and parameter value as value */ public ConcurrentMap getRemoved() { @@ -613,7 +613,7 @@ public class serverSwitch /** * Retrieve text data (e. g. config file) from file file may be an url or a filename with path relative to * rootPath parameter - * + * * @param file url or filename * @param rootPath searchpath for file * @param file file to use when remote fetching fails (null if unused) @@ -665,7 +665,7 @@ public class serverSwitch /** * Generates a random password. - * + * * @return random password which is 20 characters long. */ public String genRandomPassword() { @@ -674,7 +674,7 @@ public class serverSwitch /** * Generates a random password of a given length. - * + * * @param length length o password * @return password of given length */ diff --git a/source/net/yacy/cora/lod/JenaTripleStore.java b/source/net/yacy/cora/lod/JenaTripleStore.java index a1e1405be..40f604f74 100644 --- a/source/net/yacy/cora/lod/JenaTripleStore.java +++ b/source/net/yacy/cora/lod/JenaTripleStore.java @@ -2,12 +2,14 @@ package net.yacy.cora.lod; +import java.io.BufferedOutputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.util.Iterator; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; @@ -38,7 +40,7 @@ public class JenaTripleStore { public static Model model = ModelFactory.createDefaultModel(); static { init(model); - + } private final static void init(Model model) { model.setNsPrefix(YaCyMetadata.PREFIX, YaCyMetadata.NAMESPACE); @@ -48,7 +50,7 @@ public class JenaTripleStore { model.setNsPrefix("pnd", "http://dbpedia.org/ontology/individualisedPnd"); model.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE); } - + public static long size() { return model.size(); } @@ -81,7 +83,7 @@ public class JenaTripleStore { InputStream is = FileManager.get().open(fileNameOrUri); LoadNTriples(is); } - + public static void LoadNTriples(InputStream is) throws IOException { Model tmp = ModelFactory.createDefaultModel(); if (is != null) { @@ -105,20 +107,33 @@ public class JenaTripleStore { model = model.union(tmp); } } - - public static void saveFile(String filename) { + + public static void saveFile(String filename) { saveFile(filename, model); } public static void saveFile(String filename, Model model) { + File f = new File(filename); + File ftmp = new File(filename + "." + System.currentTimeMillis()); + if (model.size() == 0 && !f.exists()) { + // we don't store zero-size models if they did not exist before + Log.logInfo("TRIPLESTORE", "NOT saving triplestore with " + model.size() + " triples to " + filename); + return; + } Log.logInfo("TRIPLESTORE", "Saving triplestore with " + model.size() + " triples to " + filename); - FileOutputStream fout; + OutputStream fout; try { - fout = new FileOutputStream(filename); + fout = new BufferedOutputStream(new FileOutputStream(ftmp)); model.write(fout); + fout.close(); + // if something went wrong until here, the original file is not overwritten + // since we are happy here, we can remove the old file and replace it with the new one + f.delete(); + if (!f.exists()) { + ftmp.renameTo(f); + } Log.logInfo("TRIPLESTORE", "Saved triplestore with " + model.size() + " triples to " + filename); } catch (Exception e) { - // TODO Auto-generated catch block Log.logWarning("TRIPLESTORE", "Saving to " + filename+" failed"); } } @@ -145,13 +160,13 @@ public class JenaTripleStore { Property pr = model.getProperty(predicate); JenaTripleStore.model.removeAll(r, pr, (Resource) null); } - + public static void addTriple(String subject, String predicate, String object, String username) { if (privatestorage != null && privatestorage.containsKey(username)) { addTriple (subject, predicate, object, privatestorage.get(username)); } } - + public static void addTriple(String subject, String predicate, String object) { addTriple (subject, predicate, object, model); } @@ -162,43 +177,43 @@ public class JenaTripleStore { r.addProperty(pr, object); Log.logInfo("TRIPLESTORE", "ADD " + subject + " - " + predicate + " - " + object); } - + public static String getObject(final String subject, final String predicate) { - Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... "); - Iterator ni = JenaTripleStore.getObjects(subject, predicate); - if (!ni.hasNext()) return ""; - return ni.next().toString(); + String object = ""; + if (ni.hasNext()) object = ni.next().toString(); + Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " - " + object); + return object; } - public static Iterator getObjects(final String subject, final String predicate) { + public static Iterator getObjects(final String subject, final String predicate) { final Resource r = subject == null ? null : JenaTripleStore.getResource(subject); return getObjects(r, predicate); } - - public static String getPrivateObject(final String subject, final String predicate, final String username) { - Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ("+username+")"); - + + public static String getPrivateObject(final String subject, final String predicate, final String username) { Iterator ni = JenaTripleStore.getPrivateObjects(subject, predicate, username); - if (!ni.hasNext()) return ""; - return ni.next().toString(); + String object = ""; + if (ni.hasNext()) object = ni.next().toString(); + Log.logInfo("TRIPLESTORE", "GET (" + username + ") " + subject + " - " + predicate + " - " + object); + return object; } - + private static Iterator getPrivateObjects(final String subject, final String predicate, final String username) { if (privatestorage != null && privatestorage.containsKey(username)) { return getObjects(privatestorage.get(username).getResource(subject), predicate, privatestorage.get(username)); } return null; } - + public static Iterator getObjects(final Resource r, final String predicate) { return getObjects(r, predicate, model); } private static Iterator getObjects(final Resource r, final String predicate, final Model model) { final Property pr = model.getProperty(predicate); - final StmtIterator iter = model.listStatements(r, pr, (Resource) null); - + final StmtIterator iter = model.listStatements(r, pr, (Resource) null); + return new Iterator() { @Override public boolean hasNext() { @@ -214,15 +229,15 @@ public class JenaTripleStore { } }; } - + public static Iterator getSubjects(final String predicate) { return getSubjects(predicate, model); } private static Iterator getSubjects(final String predicate, final Model model) { final Property pr = model.getProperty(predicate); - final ResIterator iter = model.listSubjectsWithProperty(pr); - + final ResIterator iter = model.listSubjectsWithProperty(pr); + return new Iterator() { @Override public boolean hasNext() { @@ -246,7 +261,7 @@ public class JenaTripleStore { m.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE); return m; } - + public static String getMetadataByURLHash(byte[] urlhash) { String subject = YaCyMetadata.hashURI(urlhash); Model model = JenaTripleStore.getSubmodelBySubject(subject); @@ -254,83 +269,66 @@ public class JenaTripleStore { model.write(baos, "RDF/XML-ABBREV"); return UTF8.String(baos.toByteArray()); } - + public static void initPrivateStores() { - Switchboard switchboard = Switchboard.getSwitchboard(); - Log.logInfo("TRIPLESTORE", "Init private stores"); - if (privatestorage == null) privatestorage = new ConcurrentHashMap(); - if (privatestorage != null) privatestorage.clear(); try { - Iterator it = switchboard.userDB.iterator(true); - while (it.hasNext()) { de.anomic.data.UserDB.Entry e = it.next(); String username = e.getUserName(); - File triplestore = new File(switchboard.getConfig("triplestore", new File(switchboard.getDataPath(), "DATA/TRIPLESTORE").getAbsolutePath())); - File currentuserfile = new File(triplestore, "private_store_"+username+".rdf"); - Log.logInfo("TRIPLESTORE", "Init " + username + " from "+currentuserfile.getAbsolutePath()); - Model tmp = ModelFactory.createDefaultModel(); - - init (tmp); - + init (tmp); + if (currentuserfile.exists()) { - - Log.logInfo("TRIPLESTORE", "Loading from " + currentuserfile.getAbsolutePath()); InputStream is = FileManager.get().open(currentuserfile.getAbsolutePath()); if (is != null) { // read the RDF/XML file tmp.read(is, null); Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples from " + currentuserfile.getAbsolutePath()); - - } else { throw new IOException("cannot read " + currentuserfile.getAbsolutePath()); } } - + if (tmp != null) { - privatestorage.put(username, tmp); - } - - } - } - catch (Exception anyex) { - + } catch (Exception anyex) { Log.logException(anyex); - } - } - public static void savePrivateStores(Switchboard switchboard) { - + public static void savePrivateStores() { + Switchboard switchboard = Switchboard.getSwitchboard(); Log.logInfo("TRIPLESTORE", "Saving user triplestores"); - if (privatestorage == null) return; - for (Entry s : privatestorage.entrySet()) { - File triplestore = new File(switchboard.getConfig("triplestore", new File(switchboard.getDataPath(), "DATA/TRIPLESTORE").getAbsolutePath())); - File currentuserfile = new File(triplestore, "private_store_"+s.getKey()+".rdf"); - saveFile (currentuserfile.getAbsolutePath(), s.getValue()); - } } + private static long lastModelSizeStored = -1; + + public static void saveAll() { + Switchboard sb = Switchboard.getSwitchboard(); + File triplestore = new File(sb.getConfig("triplestore", new File(sb.dataPath, "DATA/TRIPLESTORE").getAbsolutePath())); + if (model.size() != lastModelSizeStored){ + JenaTripleStore.saveFile(new File(triplestore, "local.rdf").getAbsolutePath()); + lastModelSizeStored = model.size(); + } + JenaTripleStore.savePrivateStores(); + } + } diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index dac5a628e..57604d0aa 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -36,7 +36,7 @@ import java.util.regex.Pattern; import net.yacy.cora.storage.Files; import net.yacy.document.WordCache.Dictionary; -import net.yacy.document.geolocalization.Locations; +import net.yacy.document.geolocation.Locations; public class Tagging { @@ -95,7 +95,7 @@ public class Tagging { vocloop: for (Map.Entry e: table.entrySet()) { if (e.getValue().getSynonymsCSV() == null || e.getValue().getSynonymsCSV().length() == 0) { term = normalizeKey(e.getKey()); - v = normalizeWord(e.getKey()); + v = normalizeTerm(e.getKey()); this.synonym2term.put(v, term); this.term2synonym.put(term, v); if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); @@ -108,13 +108,13 @@ public class Tagging { tagloop: for (String synonym: tags) { if (synonym.length() == 0) continue tagloop; synonyms.add(synonym); - synonym = normalizeWord(synonym); + synonym = normalizeTerm(synonym); if (synonym.length() == 0) continue tagloop; synonyms.add(synonym); this.synonym2term.put(synonym, term); this.term2synonym.put(term, synonym); } - String synonym = normalizeWord(term); + String synonym = normalizeTerm(term); this.synonym2term.put(synonym, term); this.term2synonym.put(term, synonym); if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); @@ -169,9 +169,8 @@ public class Tagging { } - - public void updateTerm(String term, String[] synonyms) { - + public int size() { + return this.term2objectlink.size(); } private File tmpFile() { @@ -402,7 +401,7 @@ public class Tagging { } if (pl[1] == null) { term = normalizeKey(pl[0]); - v = normalizeWord(pl[0]); + v = normalizeTerm(pl[0]); this.synonym2term.put(v, term); this.term2synonym.put(term, v); if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); @@ -416,13 +415,13 @@ public class Tagging { tagloop: for (String synonym: tags) { if (synonym.length() == 0) continue tagloop; synonyms.add(synonym); - synonym = normalizeWord(synonym); + synonym = normalizeTerm(synonym); if (synonym.length() == 0) continue tagloop; synonyms.add(synonym); this.synonym2term.put(synonym, term); this.term2synonym.put(term, synonym); } - String synonym = normalizeWord(term); + String synonym = normalizeTerm(term); this.synonym2term.put(synonym, term); this.term2synonym.put(term, synonym); if (pl[2] != null && pl[2].length() > 0) this.term2objectlink.put(term, pl[2]); @@ -439,8 +438,9 @@ public class Tagging { this(name); Set locNames = localization.locationNames(); for (String loc: locNames) { - this.synonym2term.put(loc.toLowerCase(), loc); - this.term2synonym.put(loc, loc.toLowerCase()); + String syn = normalizeTerm(loc); + this.synonym2term.put(syn, loc); + this.term2synonym.put(loc, syn); } } @@ -493,14 +493,14 @@ public class Tagging { return this.propFile; } - public Metatag getMetatagFromSynonym(char prefix, final String word) { + public Metatag getMetatagFromSynonym(final String word) { String printname = this.synonym2term.get(word); if (printname == null) return null; - return new Metatag(prefix, printname); + return new Metatag(printname); } - public Metatag getMetatagFromTerm(char prefix, final String word) { - return new Metatag(prefix, word); + public Metatag getMetatagFromTerm(final String word) { + return new Metatag(word); } public Set getSynonyms(String term) { @@ -532,20 +532,23 @@ public class Tagging { private final static Pattern PATTERN_UE = Pattern.compile("\u00FC"); private final static Pattern PATTERN_SZ = Pattern.compile("\u00DF"); - public static final String normalizeWord(String word) { - word = word.trim().toLowerCase(); - word = PATTERN_AE.matcher(word).replaceAll("ae"); - word = PATTERN_OE.matcher(word).replaceAll("oe"); - word = PATTERN_UE.matcher(word).replaceAll("ue"); - word = PATTERN_SZ.matcher(word).replaceAll("ss"); - return word; + public static final String normalizeTerm(String term) { + term = term.trim().toLowerCase(); + term = PATTERN_AE.matcher(term).replaceAll("ae"); + term = PATTERN_OE.matcher(term).replaceAll("oe"); + term = PATTERN_UE.matcher(term).replaceAll("ue"); + term = PATTERN_SZ.matcher(term).replaceAll("ss"); + // remove comma + int p; + while ((p = term.indexOf(',')) >= 0) { + term = term.substring(p + 1).trim() + " " + term.substring(0, p); + } + return term; } public class Metatag { private final String object; - private final char prefix; - public Metatag(char prefix, String object) { - this.prefix = prefix; + public Metatag(String object) { this.object = object; } @@ -563,7 +566,7 @@ public class Tagging { @Override public String toString() { - return this.prefix + Tagging.this.navigatorName + ":" + encodePrintname(this.object); + return Tagging.this.navigatorName + ":" + encodePrintname(this.object); } @Override @@ -589,12 +592,12 @@ public class Tagging { return PATTERN_UL.matcher(maskname).replaceAll(" "); } - public static String cleanTagFromAutotagging(char prefix, final String tagString) { + public static String cleanTagFromAutotagging(final String tagString) { if (tagString == null || tagString.length() == 0) return ""; String[] tags = PATTERN_SP.split(tagString); StringBuilder sb = new StringBuilder(tagString.length()); for (String tag : tags) { - if (tag.length() > 0 && tag.charAt(0) != prefix) { + if (tag.length() > 0) { sb.append(tag).append(' '); } } diff --git a/source/net/yacy/document/Autotagging.java b/source/net/yacy/document/Autotagging.java index 05e4dde32..5e01093bc 100644 --- a/source/net/yacy/document/Autotagging.java +++ b/source/net/yacy/document/Autotagging.java @@ -20,19 +20,15 @@ package net.yacy.document; -import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; import java.util.Collection; -import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import net.yacy.cora.document.UTF8; import net.yacy.cora.lod.vocabulary.Tagging; -import net.yacy.document.WordCache.Dictionary; -import net.yacy.document.geolocalization.Locations; +import net.yacy.document.geolocation.Locations; import net.yacy.kelondro.logging.Log; /** @@ -44,7 +40,6 @@ public class Autotagging { private final static Object PRESENT = new Object(); - public final char prefixChar; private final File autotaggingPath; private final Map vocabularies; // mapping from vocabulary name to the tagging vocabulary private final Map allTags; @@ -58,10 +53,9 @@ public class Autotagging { * properties without values are allowed (the value is then set to the key) * also the value can be used as a tag */ - public Autotagging(final File autotaggingPath, char prefixChar) { + public Autotagging(final File autotaggingPath) { this.vocabularies = new ConcurrentHashMap(); this.autotaggingPath = autotaggingPath; - this.prefixChar = prefixChar; this.allTags = new ConcurrentHashMap(); if (this.autotaggingPath == null || !this.autotaggingPath.exists()) { return; @@ -114,16 +108,6 @@ public class Autotagging { } } - public void addDictionaries(Map dictionaries) { - for (Map.Entry entry: dictionaries.entrySet()) { - Tagging voc = new Tagging(entry.getKey(), entry.getValue()); - this.vocabularies.put(entry.getKey(), voc); - for (String t: voc.tags()) { - this.allTags.put(t, PRESENT); - } - } - } - public void addPlaces(Locations locations) { if (locations.size() == 0) return; // otherwise we get a navigation that does nothing Tagging voc = new Tagging("Locations", locations); @@ -137,28 +121,10 @@ public class Autotagging { } } - /** - * produce a set of tags for a given text. - * The set contains the names of the tags with a prefix character at the front - * @param text - * @return - */ - public Set getPrintTagsFromText(String text) { - Set as = new HashSet(); - if (this.vocabularies.isEmpty()) return as; - final WordTokenizer tokens = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(text)), LibraryProvider.dymLib); - String tag; - while (tokens.hasMoreElements()) { - tag = getTagFromTerm(tokens.nextElement().toString()).toString(); - if (tag != null) as.add(tag); - } - return as; - } - public int size() { return this.vocabularies.size(); } - + /** * maximum number of compound tags (number of words in one tag) * @return @@ -171,41 +137,17 @@ public class Autotagging { public Tagging.Metatag getTagFromTerm(String term) { if (this.vocabularies.isEmpty()) return null; Tagging.Metatag tag; - term = Tagging.normalizeWord(term); + term = Tagging.normalizeTerm(term); for (Map.Entry v: this.vocabularies.entrySet()) { - tag = v.getValue().getMetatagFromSynonym(this.prefixChar, term); + tag = v.getValue().getMetatagFromSynonym(term); if (tag != null) return tag; } return null; } - public static boolean metatagAppearIn(final Tagging.Metatag metatag, final String[] tags) { - String tag = metatag.toString(); - for (String s: tags) { - if (tag.equals(s)) return true; - } - return false; - } - - public Tagging.Metatag metatag(String metatag) { - int p = metatag.indexOf(':'); - if (p < 0) throw new RuntimeException("bad metatag: metatag = " + metatag); - String vocName = metatag.substring(1, p); + public Tagging.Metatag metatag(String vocName, String term) { Tagging tagging = this.vocabularies.get(vocName); - return tagging.getMetatagFromTerm(this.prefixChar, Tagging.decodeMaskname(metatag.substring(p + 1))); - } - - public String cleanTagFromAutotagging(String tagString) { - return Tagging.cleanTagFromAutotagging(this.prefixChar, tagString); - } - - public static void main(String[] args) { - Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$'); - for (Map.Entry entry: a.vocabularies.entrySet()) { - System.out.println(entry); - } - Set tags = a.getPrintTagsFromText("In die Tueren und Fluchttueren muessen noch Schloesser eingebaut werden"); - System.out.println(tags); + return tagging.getMetatagFromTerm(Tagging.decodeMaskname(term)); } } diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index bf2210bb0..9bfecb957 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -227,10 +227,6 @@ dc_rights String objectspace = vocabulary.getObjectspace(); StringBuilder sb = new StringBuilder(e.getValue().size() * 20); for (Tagging.Metatag s: e.getValue()) { - String t = s.toString(); - if (!this.keywords.contains(t)) { - this.keywords.add(t); - } sb.append(',').append(s.getObject()); String objectlink = vocabulary.getObjectlink(s.getObject()); if ((objectspace != null && objectspace.length() > 0) || (objectlink != null && objectlink.length() > 0)) { diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index c0f4b940e..2c0acc83a 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -47,9 +47,9 @@ import net.yacy.cora.lod.JenaTripleStore; import net.yacy.cora.lod.vocabulary.Tagging; import net.yacy.cora.lod.vocabulary.Tagging.SOTuple; import net.yacy.cora.storage.Files; -import net.yacy.document.geolocalization.GeonamesLocation; -import net.yacy.document.geolocalization.OpenGeoDBLocation; -import net.yacy.document.geolocalization.OverarchingLocation; +import net.yacy.document.geolocation.GeonamesLocation; +import net.yacy.document.geolocation.OpenGeoDBLocation; +import net.yacy.document.geolocation.OverarchingLocation; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; @@ -57,7 +57,6 @@ import com.hp.hpl.jena.rdf.model.Resource; public class LibraryProvider { - public static final char tagPrefix = '$'; public static final String path_to_source_dictionaries = "source"; public static final String path_to_did_you_mean_dictionaries = "didyoumean"; public static final String path_to_autotagging_dictionaries = "autotagging"; @@ -116,7 +115,7 @@ public class LibraryProvider { dictRoot = rootPath; // initialize libraries - initAutotagging(tagPrefix); + initAutotagging(); activateDeReWo(); initDidYouMean(); integrateOpenGeoDB(); @@ -136,11 +135,11 @@ public class LibraryProvider { if ( geo0.exists() ) { geo0.renameTo(Dictionary.GEODB0.fileDisabled()); } - geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false)); + geoLoc.activateLocation(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, dymLib)); return; } if ( geo0.exists() ) { - geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false)); + geoLoc.activateLocation(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, dymLib)); return; } } @@ -148,7 +147,7 @@ public class LibraryProvider { public static void integrateGeonames() { final File geon = Dictionary.GEON0.file(); if ( geon.exists() ) { - geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon)); + geoLoc.activateLocation(Dictionary.GEON0.nickname, new GeonamesLocation(geon, dymLib)); return; } } @@ -161,12 +160,12 @@ public class LibraryProvider { dymLib = new WordCache(dymDict); } - public static void initAutotagging(char prefix) { + public static void initAutotagging() { final File autotaggingPath = new File(dictRoot, path_to_autotagging_dictionaries); if ( !autotaggingPath.exists() ) { autotaggingPath.mkdirs(); } - autotagging = new Autotagging(autotaggingPath, prefix); + autotagging = new Autotagging(autotaggingPath); } public static void activateDeReWo() { @@ -220,7 +219,7 @@ public class LibraryProvider { Resource resource = i.next(); String subject = resource.toString(); - // prepare a propert term from the subject uri + // prepare a proper term from the subject uri int p = subject.lastIndexOf('/'); if (p < 0) continue; String term = subject.substring(p + 1); @@ -229,9 +228,10 @@ public class LibraryProvider { if (p >= 0) term = term.substring(0, p); term = term.replaceAll("_", " ").trim(); if (term.length() == 0) continue; + if (term.indexOf(' ') < 0) continue; // accept only names that have at least two parts // store the term into the vocabulary map - map.put(term, new SOTuple("", subject)); + map.put(term, new SOTuple(Tagging.normalizeTerm(term), subject)); } try { Log.logInfo("LibraryProvider", "adding vocabulary to autotagging"); diff --git a/source/net/yacy/document/geolocalization/GeoLocation.java b/source/net/yacy/document/geolocation/GeoLocation.java similarity index 98% rename from source/net/yacy/document/geolocalization/GeoLocation.java rename to source/net/yacy/document/geolocation/GeoLocation.java index c9b5e21f4..17ead99eb 100644 --- a/source/net/yacy/document/geolocalization/GeoLocation.java +++ b/source/net/yacy/document/geolocation/GeoLocation.java @@ -20,7 +20,7 @@ * If not, see . */ -package net.yacy.document.geolocalization; +package net.yacy.document.geolocation; import java.util.Comparator; diff --git a/source/net/yacy/document/geolocalization/GeoPoint.java b/source/net/yacy/document/geolocation/GeoPoint.java similarity index 98% rename from source/net/yacy/document/geolocalization/GeoPoint.java rename to source/net/yacy/document/geolocation/GeoPoint.java index 51813f8a3..f396beaea 100644 --- a/source/net/yacy/document/geolocalization/GeoPoint.java +++ b/source/net/yacy/document/geolocation/GeoPoint.java @@ -20,7 +20,7 @@ * If not, see . */ -package net.yacy.document.geolocalization; +package net.yacy.document.geolocation; /** * Geolocation storage may vary using different data structures for the points. diff --git a/source/net/yacy/document/geolocalization/GeonamesLocation.java b/source/net/yacy/document/geolocation/GeonamesLocation.java similarity index 96% rename from source/net/yacy/document/geolocalization/GeonamesLocation.java rename to source/net/yacy/document/geolocation/GeonamesLocation.java index 4bf284ccd..f3f2d6ab2 100644 --- a/source/net/yacy/document/geolocalization/GeonamesLocation.java +++ b/source/net/yacy/document/geolocation/GeonamesLocation.java @@ -20,7 +20,7 @@ * If not, see . */ -package net.yacy.document.geolocalization; +package net.yacy.document.geolocation; import java.io.BufferedReader; import java.io.File; @@ -40,6 +40,7 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import net.yacy.document.StringBuilderComparator; +import net.yacy.document.WordCache; import net.yacy.kelondro.logging.Log; public class GeonamesLocation implements Locations @@ -73,7 +74,7 @@ public class GeonamesLocation implements Locations private final TreeMap> name2ids; private final File file; - public GeonamesLocation(final File file) { + public GeonamesLocation(final File file, WordCache dymLib) { // this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/ this.file = file; @@ -117,6 +118,8 @@ public class GeonamesLocation implements Locations c.setPopulation((int) Long.parseLong(fields[14])); this.id2loc.put(id, c); for ( final StringBuilder name : locnames ) { + if (dymLib != null && dymLib.contains(name)) continue; + if (name.length() < OverarchingLocation.MINIMUM_NAME_LENGTH) continue; List locs = this.name2ids.get(name); if ( locs == null ) { locs = new ArrayList(1); diff --git a/source/net/yacy/document/geolocalization/IntegerGeoPoint.java b/source/net/yacy/document/geolocation/IntegerGeoPoint.java similarity index 98% rename from source/net/yacy/document/geolocalization/IntegerGeoPoint.java rename to source/net/yacy/document/geolocation/IntegerGeoPoint.java index 1e21f0cdb..34151d142 100644 --- a/source/net/yacy/document/geolocalization/IntegerGeoPoint.java +++ b/source/net/yacy/document/geolocation/IntegerGeoPoint.java @@ -1,4 +1,4 @@ -package net.yacy.document.geolocalization; +package net.yacy.document.geolocation; /** * GeoPoint implementation with Integer accuracy diff --git a/source/net/yacy/document/geolocalization/Locations.java b/source/net/yacy/document/geolocation/Locations.java similarity index 98% rename from source/net/yacy/document/geolocalization/Locations.java rename to source/net/yacy/document/geolocation/Locations.java index 5652ab02b..2a56a4ea8 100644 --- a/source/net/yacy/document/geolocalization/Locations.java +++ b/source/net/yacy/document/geolocation/Locations.java @@ -21,7 +21,7 @@ */ -package net.yacy.document.geolocalization; +package net.yacy.document.geolocation; import java.util.Set; import java.util.TreeSet; diff --git a/source/net/yacy/document/geolocalization/OpenGeoDBLocation.java b/source/net/yacy/document/geolocation/OpenGeoDBLocation.java similarity index 90% rename from source/net/yacy/document/geolocalization/OpenGeoDBLocation.java rename to source/net/yacy/document/geolocation/OpenGeoDBLocation.java index 949bc0b1a..0c09b8ab4 100644 --- a/source/net/yacy/document/geolocalization/OpenGeoDBLocation.java +++ b/source/net/yacy/document/geolocation/OpenGeoDBLocation.java @@ -20,7 +20,7 @@ * If not, see . */ -package net.yacy.document.geolocalization; +package net.yacy.document.geolocation; import java.io.BufferedReader; import java.io.File; @@ -40,6 +40,7 @@ import java.util.TreeSet; import java.util.zip.GZIPInputStream; import net.yacy.document.StringBuilderComparator; +import net.yacy.document.WordCache; import net.yacy.kelondro.logging.Log; /** @@ -59,7 +60,7 @@ public class OpenGeoDBLocation implements Locations private final Map zip2id; private final File file; - public OpenGeoDBLocation(final File file, final boolean lonlat) { + public OpenGeoDBLocation(final File file, WordCache dymLib) { this.file = file; this.id2loc = new HashMap(); @@ -99,21 +100,22 @@ public class OpenGeoDBLocation implements Locations line = line.substring(18 + 7); v = line.split(","); v = line.split(","); - if ( lonlat ) { - lon = Float.parseFloat(v[2]); - lat = Float.parseFloat(v[3]); - } else { - lat = Float.parseFloat(v[2]); - lon = Float.parseFloat(v[3]); - } + lat = Float.parseFloat(v[2]); + lon = Float.parseFloat(v[3]); this.id2loc.put(Integer.parseInt(v[0]), new GeoLocation(lat, lon)); } if ( line.startsWith("geodb_textdata ") ) { line = line.substring(15 + 7); v = line.split(","); if ( v[1].equals("500100000") ) { // Ortsname + if (v.length > 10) { + // a ',' is probably inside the location name + v[2] = v[2] + "," + v[3]; + } id = Integer.parseInt(v[0]); h = removeQuotes(v[2]); + if (h.length() < OverarchingLocation.MINIMUM_NAME_LENGTH) continue; + if (dymLib != null && dymLib.contains(new StringBuilder(h))) continue; List l = this.name2ids.get(new StringBuilder(h)); if ( l == null ) { l = new ArrayList(1); @@ -136,8 +138,8 @@ public class OpenGeoDBLocation implements Locations } else if ( v[1].equals("400300000") ) { // Ortstyp id = Integer.parseInt(v[0]); h = removeQuotes(v[2]); - final Integer hc = h.hashCode(); /* + final Integer hc = h.hashCode(); final byte[] tb = this.locTypeHash2locType.get(hc); if ( tb == null ) { this.locTypeHash2locType.put(hc, UTF8.getBytes(h)); @@ -173,13 +175,12 @@ public class OpenGeoDBLocation implements Locations } private static final String removeQuotes(String s) { - if ( s.length() > 0 && s.charAt(0) != '\'' ) { - return s; + if ( s.length() > 0 && s.charAt(0) == '\'' ) { + s = s.substring(1); } - if ( s.charAt(s.length() - 1) != '\'' ) { - return s; + if ( s.charAt(s.length() - 1) == '\'' ) { + s = s.substring(0, s.length() - 1); } - s = s.substring(1, s.length() - 1); return s; } diff --git a/source/net/yacy/document/geolocalization/OverarchingLocation.java b/source/net/yacy/document/geolocation/OverarchingLocation.java similarity index 96% rename from source/net/yacy/document/geolocalization/OverarchingLocation.java rename to source/net/yacy/document/geolocation/OverarchingLocation.java index 1367b332e..687b3cefe 100644 --- a/source/net/yacy/document/geolocalization/OverarchingLocation.java +++ b/source/net/yacy/document/geolocation/OverarchingLocation.java @@ -20,7 +20,7 @@ * If not, see . */ -package net.yacy.document.geolocalization; +package net.yacy.document.geolocation; import java.util.HashMap; import java.util.HashSet; @@ -30,6 +30,7 @@ import java.util.TreeSet; public class OverarchingLocation implements Locations { + public static int MINIMUM_NAME_LENGTH = 4; private final Map services; /** @@ -44,7 +45,7 @@ public class OverarchingLocation implements Locations { * @param nickname the nickname of the service * @param service the service */ - public void activateLocalization(final String nickname, final Locations service) { + public void activateLocation(final String nickname, final Locations service) { this.services.put(nickname, service); } diff --git a/source/net/yacy/document/importer/OAIPMHLoader.java b/source/net/yacy/document/importer/OAIPMHLoader.java index f23b35149..af7f9db86 100644 --- a/source/net/yacy/document/importer/OAIPMHLoader.java +++ b/source/net/yacy/document/importer/OAIPMHLoader.java @@ -49,7 +49,20 @@ public class OAIPMHLoader { // load the file from the net Log.logInfo("OAIPMHLoader", "loading record from " + source.toNormalform(true, false)); - final Response response = loader.load(loader.request(source, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, true); + Response response = null; + IOException ee = null; + for (int i = 0; i < 5; i++) { + // make some retries if first attempt fails + try { + response = loader.load(loader.request(source, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, true); + break; + } catch (IOException e) { + Log.logWarning("OAIPMHLoader", "loading failed at attempt " + (i + 1) + ": " + source.toNormalform(true, false)); + ee = e; + continue; + } + } + if (response == null) throw ee; final byte[] b = response.getContent(); this.resumptionToken = new ResumptionToken(source, b); //System.out.println("*** ResumptionToken = " + this.resumptionToken.toString()); @@ -59,19 +72,6 @@ public class OAIPMHLoader { // transaction-safe writing FileUtils.copy(b, f0); f0.renameTo(f1); - - /* - SurrogateReader sr = new SurrogateReader(new ByteArrayInputStream(b), 100); - Thread srt = new Thread(sr); - srt.start(); - DCEntry dce; - while ((dce = sr.take()) != DCEntry.poison) { - System.out.println(dce.toString()); - } - try { - srt.join(); - } catch (InterruptedException e) {} - */ } public ResumptionToken getResumptionToken() { @@ -246,4 +246,4 @@ http://nbn-resolving.de/urn:nbn:de:bsz:960-opus-1860 -*/ \ No newline at end of file +*/ diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index fe2f8c559..386722b59 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -99,7 +99,8 @@ public class ContentScraper extends AbstractScraper implements Scraper { strong(TagType.pair), i(TagType.pair), li(TagType.pair), - script(TagType.pair); + script(TagType.pair), + style(TagType.pair); public TagType type; private Tag(final TagType type) { @@ -201,6 +202,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { @Override public void scrapeText(final char[] newtext, final String insideTag) { // System.out.println("SCRAPE: " + UTF8.String(newtext)); + if (insideTag != null && ("script".equals(insideTag) || "style".equals(insideTag))) return; int p, pl, q, s = 0; // match evaluation pattern @@ -434,7 +436,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { } @Override - public void scrapeTag1(final String tagname, final Properties tagopts, final char[] text) { + public void scrapeTag1(final String tagname, final Properties tagopts, char[] text) { // System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + UTF8.String(text)); if (tagname.equalsIgnoreCase("a") && text.length < 2048) { final String href = tagopts.getProperty("href", EMPTY_STRING); diff --git a/source/net/yacy/document/parser/html/TransformerWriter.java b/source/net/yacy/document/parser/html/TransformerWriter.java index e2db4aa77..46c52dcaf 100644 --- a/source/net/yacy/document/parser/html/TransformerWriter.java +++ b/source/net/yacy/document/parser/html/TransformerWriter.java @@ -202,7 +202,7 @@ public final class TransformerWriter extends Writer { if (tag == null) { // case (1): this is not a tag opener/closer - if (this.scraper != null) this.scraper.scrapeText(content, null); + if (this.scraper != null && content.length > 0) this.scraper.scrapeText(content, null); if (this.transformer != null) return this.transformer.transformText(content); return content; } @@ -222,7 +222,9 @@ public final class TransformerWriter extends Writer { // we are collection tag text for the tag 'filterTag' -> case (4) - (7) if (tag == null || tag.equals("!")) { // case (4): getting no tag, go on collecting content - if (this.scraper != null) this.scraper.scrapeText(content, this.filterTag); + if (this.scraper != null) { + this.scraper.scrapeText(content, this.filterTag); + } if (this.transformer != null) { this.filterCont.append(this.transformer.transformText(content)); } else { @@ -330,7 +332,7 @@ public final class TransformerWriter extends Writer { if (in[1] == '/') { // a closing tag tagend = tagEnd(in, 2); - tag = new String(in, 2, tagend - 2); + tag = new String(in, 2, tagend - 2).toLowerCase(); final char[] text = new char[in.length - tagend - 1]; System.arraycopy(in, tagend, text, 0, in.length - tagend - 1); return filterTag(tag, false, text, quotechar); @@ -338,7 +340,7 @@ public final class TransformerWriter extends Writer { // an opening tag tagend = tagEnd(in, 1); - tag = new String(in, 1, tagend - 1); + tag = new String(in, 1, tagend - 1).toLowerCase(); final char[] text = new char[in.length - tagend - 1]; System.arraycopy(in, tagend, text, 0, in.length - tagend - 1); return filterTag(tag, true, text, quotechar); diff --git a/source/net/yacy/interaction/AugmentHtmlStream.java b/source/net/yacy/interaction/AugmentHtmlStream.java index a9605488d..396dcbfe6 100644 --- a/source/net/yacy/interaction/AugmentHtmlStream.java +++ b/source/net/yacy/interaction/AugmentHtmlStream.java @@ -23,7 +23,6 @@ import net.yacy.search.Switchboard; import org.htmlparser.Tag; import org.htmlparser.Text; -import org.htmlparser.tags.LinkTag; import org.htmlparser.util.NodeList; import org.htmlparser.visitors.NodeVisitor; @@ -58,7 +57,7 @@ public class AugmentHtmlStream { // Link Log.logInfo("AUGMENTATION", tag.getAttribute("href")); - LinkTag lt = (LinkTag)tag; + //LinkTag lt = (LinkTag)tag; } @@ -90,7 +89,7 @@ public class AugmentHtmlStream { private int counter; public VisitorText() { - this.setCounter(0); + this.counter = 0; } @Override @@ -377,7 +376,7 @@ public class AugmentHtmlStream { .nextNode()); NodeList bodychildren = bt.getChildren(); - + bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction_elements/OverlayInteraction.html?urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false, true), requestHeader))); bt.setChildren(bodychildren); diff --git a/source/net/yacy/interaction/Interaction.java b/source/net/yacy/interaction/Interaction.java index ad3089986..b10bf0b86 100644 --- a/source/net/yacy/interaction/Interaction.java +++ b/source/net/yacy/interaction/Interaction.java @@ -9,7 +9,6 @@ import java.util.Map; import java.util.Set; import net.yacy.cora.document.UTF8; -import net.yacy.cora.lod.JenaTripleStore; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.http.HTTPClient; @@ -22,10 +21,6 @@ import net.yacy.search.Switchboard; import org.apache.http.entity.mime.content.ContentBody; -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.Resource; -import com.hp.hpl.jena.rdf.model.StmtIterator; - import de.anomic.data.UserDB; @@ -36,7 +31,7 @@ public class Interaction { UserDB.Entry entry = null; - String result = "anonymous"; + //String result = "anonymous"; entry = Switchboard.getSwitchboard().userDB.proxyAuth((requestHeader.get(RequestHeader.AUTHORIZATION, "xxxxxx"))); if(entry != null){ @@ -126,8 +121,8 @@ public class Interaction { } - - + + public static String GetTableentry(String url, String type, String username, String peer) { diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index aefff4930..b46feb0b7 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -37,7 +37,7 @@ import java.util.regex.Pattern; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.UTF8; -import net.yacy.document.LibraryProvider; +import net.yacy.cora.lod.vocabulary.Tagging; import net.yacy.kelondro.data.word.WordReferenceRow; import net.yacy.kelondro.data.word.WordReferenceVars; import net.yacy.kelondro.index.Row; @@ -230,7 +230,7 @@ public class URIMetadataRow implements URIMetadata { String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = ""; String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = ""; String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = ""; - tags = LibraryProvider.autotagging.cleanTagFromAutotagging(tags); + tags = Tagging.cleanTagFromAutotagging(tags); String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = ""; String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0"; String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0"; @@ -313,7 +313,7 @@ public class URIMetadataRow implements URIMetadata { assert (s.toString().indexOf(0) < 0); s.append(",author=").append(crypt.simpleEncode(metadata.dc_creator())); assert (s.toString().indexOf(0) < 0); - s.append(",tags=").append(crypt.simpleEncode(LibraryProvider.autotagging.cleanTagFromAutotagging(metadata.dc_subject()))); + s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(metadata.dc_subject()))); assert (s.toString().indexOf(0) < 0); s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher())); assert (s.toString().indexOf(0) < 0); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 4c77aeddf..4fd8657cb 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -632,7 +632,7 @@ public final class Switchboard extends serverSwitch + " entries" + ", " + ppRamString(userDbFile.length() / 1024)); - + // init user triplestores JenaTripleStore.initPrivateStores(); @@ -664,7 +664,7 @@ public final class Switchboard extends serverSwitch } } }.start(); - + // define a realtime parsable mimetype list this.log.logConfig("Parser: Initializing Mime Type deny list"); TextParser.setDenyMime(getConfig(SwitchboardConstants.PARSER_MIME_DENY, "")); @@ -2222,6 +2222,11 @@ public final class Switchboard extends serverSwitch this.tables.cleanFailURLS(getConfigLong("cleanup.failedSearchURLtimeout", -1)); } + // periodically store the triple store + if (getConfigBool("triplestore.persistent", false)) { + JenaTripleStore.saveAll(); + } + return true; } catch ( final InterruptedException e ) { this.log.logInfo("cleanupJob: Shutdown detected"); diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index a041e7b62..4ea5a9545 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -716,22 +716,6 @@ public final class RWIProcess extends Thread } } - // check vocabulary constraint - /* - final String tags = page.dc_subject(); - final String[] taglist = tags == null || tags.length() == 0 ? new String[0] : SPACE_PATTERN.split(page.dc_subject()); - if (this.query.metatags != null && this.query.metatags.size() > 0) { - // all metatags must appear in the tags list - for (Tagging.Metatag metatag: this.query.metatags) { - if (!Autotagging.metatagAppearIn(metatag, taglist)) { - this.sortout++; - //Log.logInfo("RWIProcess", "sorted out " + page.url()); - continue takeloop; - } - } - } - */ - // evaluate information of metadata for navigation // author navigation: if ( pageauthor != null && pageauthor.length() > 0 ) { @@ -787,24 +771,6 @@ public final class RWIProcess extends Thread this.filetypeNavigator.inc(fileext); } - // vocabulary navigation - /* - tagharvest: for (String tag: taglist) { - if (tag.length() < 1 || tag.charAt(0) != LibraryProvider.tagPrefix) continue tagharvest; - try { - Tagging.Metatag metatag = LibraryProvider.autotagging.metatag(tag); - ScoreMap voc = this.vocabularyNavigator.get(metatag.getVocabularyName()); - if (voc == null) { - voc = new ConcurrentScoreMap(); - this.vocabularyNavigator.put(metatag.getVocabularyName(), voc); - } - voc.inc(metatag.getObject()); - } catch (RuntimeException e) { - // tag may not be well-formed - } - } - */ - // accept url return page; } diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 21fb8e5d9..2d0f92b9b 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -306,19 +306,26 @@ public final class yacy { HTTPClient.setDefaultUserAgent(ClientIdentification.getUserAgent()); // initial fill of the triplestore - try { - File triplestore = new File(sb.getConfig("triplestore", new File(dataHome, "DATA/TRIPLESTORE").getAbsolutePath())); - mkdirIfNeseccary(triplestore); - for (String s: triplestore.list()) { - if ((s.endsWith(".rdf") || s.endsWith(".nt")) && !s.equals("local.rdf") && !s.endsWith("_triplestore.rdf") && !s.startsWith("private_store_")) JenaTripleStore.load(new File(triplestore, s).getAbsolutePath()); + File triplestore = new File(sb.getConfig("triplestore", new File(dataHome, "DATA/TRIPLESTORE").getAbsolutePath())); + mkdirIfNeseccary(triplestore); + for (String s: triplestore.list()) { + if ((s.endsWith(".rdf") || s.endsWith(".nt")) && !s.equals("local.rdf") && !s.endsWith("_triplestore.rdf") && !s.startsWith("private_store_")) { + try { + JenaTripleStore.load(new File(triplestore, s).getAbsolutePath()); + } catch (IOException e) { + Log.logException(e); + } + } + } + if (sb.getConfigBool("triplestore.persistent", false)) { + File local = new File(triplestore, "local.rdf"); + if (local.exists()) { + try { + JenaTripleStore.load(local.getAbsolutePath()); + } catch (IOException e) { + Log.logException(e); + } } - if (sb.getConfigBool("triplestore.persistent", false)) { - File local = new File(triplestore, "local.rdf"); - if (local.exists()) JenaTripleStore.load(local.getAbsolutePath()); - } - - } catch (IOException e) { - Log.logException(e); } // start main threads @@ -407,26 +414,11 @@ public final class yacy { server.terminate(false); server.interrupt(); server.close(); - /* - if (server.isAlive()) try { - // TODO only send request, don't read response (cause server is already down resulting in error) - final DigestURI u = new DigestURI((server.withSSL()?"https":"http")+"://localhost:" + serverCore.getPortNr(port), null); - Client.wget(u.toString(), null, 10000); // kick server - Log.logConfig("SHUTDOWN", "sent termination signal to server socket"); - } catch (final IOException ee) { - Log.logConfig("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)"); - } - */ -// Client.closeAllConnections(); -// MultiThreadedHttpConnectionManager.shutdownAll(); // idle until the processes are down if (server.isAlive()) { - //Thread.sleep(2000); // wait a while server.interrupt(); -// MultiThreadedHttpConnectionManager.shutdownAll(); } -// MultiThreadedHttpConnectionManager.shutdownAll(); Log.logConfig("SHUTDOWN", "server has terminated"); sb.close(); } catch (final Exception e) { @@ -440,11 +432,9 @@ public final class yacy { } finally { } + // save the triple store if (sb.getConfigBool("triplestore.persistent", false)) { - File triplestore = new File(sb.getConfig("triplestore", new File(dataHome, "DATA/TRIPLESTORE").getAbsolutePath())); - JenaTripleStore.saveFile(new File(triplestore, "local.rdf").getAbsolutePath()); - - JenaTripleStore.savePrivateStores(sb); + JenaTripleStore.saveAll(); } Log.logConfig("SHUTDOWN", "goodbye. (this is the last line)");