Merged master into postprocessing branch

pull/71/head
luccioman 9 years ago
commit 06d4f93d03

@ -19,7 +19,7 @@
<key>CFBundleAllowMixedLocalizations</key>
<string>true</string>
<key>CFBundleExecutable</key>
<string>startYACY.sh</string>
<string>startYACYMacOS.sh</string>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundlePackageType</key>

@ -0,0 +1,8 @@
#!/usr/bin/env sh
# Launcher for YaCy in a MacOS bundle :
# rely on the generic startYACY.sh, but specifies the user home relative path for YaCy data
# This data directory is set in conforming to OS X File System Programming Guide
# see : https://developer.apple.com/library/ios/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/MacOSXDirectories/MacOSXDirectories.html
"`dirname $0`"/startYACY.sh -s "'Library/Application Support/net.yacy.YaCy'"

@ -764,7 +764,8 @@
</copy>
<copy file="${addon}/YaCy.app/Contents/Info.plist" tofile="${release_mac}/YaCy.app/Contents/Info.plist" filtering="true" overwrite="true" />
<move file="${release_main}" tofile="${release_mac}/YaCy.app/Contents/MacOS" verbose="false" />
<!-- startYACY.sh will be the main entry point : we set permissions to make it an executable file -->
<!-- startYACY.sh and startYACYMacOS.sh will be the main entry points : we set permissions to make it executable files -->
<chmod file="${release_mac}/YaCy.app/Contents/MacOS/startYACYMacOS.sh" perm="755"/>
<chmod file="${release_mac}/YaCy.app/Contents/MacOS/startYACY.sh" perm="755"/>
<exec executable="hdiutil">
<arg line="create -srcfolder ${release_mac}/YaCy.app ${release}/yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.dmg"/>

@ -18,6 +18,10 @@ WORKDIR /opt
# - Compile with ant
# - remove unnecessary and size consuming .git directory
# - remove ant and git packages
# Possible alternative : copy directly your current sources an remove git clone command from the following RUN
# COPY . /opt/yacy_search_server/
RUN apt-get update && \
apt-get install -yq ant git && \
git clone https://github.com/yacy/yacy_search_server.git && \
@ -30,14 +34,17 @@ RUN apt-get update && \
# Set initial admin password : "docker" (encoded with custom yacy md5 function net.yacy.cora.order.Digest.encodeMD5Hex())
RUN sed -i "/adminAccountBase64MD5=/c\adminAccountBase64MD5=MD5:e672161ffdce91be4678605f4f4e6786" /opt/yacy_search_server/defaults/yacy.init
# Intially enable HTTPS : this is the most secure option for remote administrator authentication
RUN sed -i "/server.https=false/c\server.https=true" /opt/yacy_search_server/defaults/yacy.init
# Create user and group yacy : this user will be used to run YaCy main process
RUN adduser --system --group --no-create-home --disabled-password yacy
# Set ownership of yacy install directory to yacy user/group
RUN chown yacy:yacy -R /opt/yacy_search_server
# Expose port 8090
EXPOSE 8090
# Expose HTTP and HTTPS default ports
EXPOSE 8090 8443
# Set data volume : yacy data and configuration will persist aven after container stop or destruction
VOLUME ["/opt/yacy_search_server/DATA"]

@ -51,6 +51,10 @@ WORKDIR /opt
# - compile with apache ant
# - remove unnecessary and size consuming .git directory
# - delete git package and ant binary install
# Possible alternative : copy directly your current sources an remove git clone command from the following RUN
# COPY . /opt/yacy_search_server/
RUN apk update && \
apk add --no-cache git && \
git clone https://github.com/yacy/yacy_search_server.git && \
@ -62,14 +66,17 @@ RUN apk update && \
# Set initial admin password : "docker" (encoded with custom yacy md5 function net.yacy.cora.order.Digest.encodeMD5Hex())
RUN sed -i "/adminAccountBase64MD5=/c\adminAccountBase64MD5=MD5:e672161ffdce91be4678605f4f4e6786" /opt/yacy_search_server/defaults/yacy.init
# Intially enable HTTPS : this is the most secure option for remote administrator authentication
RUN sed -i "/server.https=false/c\server.https=true" /opt/yacy_search_server/defaults/yacy.init
# Create user and group yacy : this user will be used to run YaCy main process
RUN addgroup yacy && adduser -S -G yacy -H -D yacy
# Set ownership of yacy install directory to yacy user/group
RUN chown yacy:yacy -R /opt/yacy_search_server
# Expose port 8090
EXPOSE 8090
# Expose HTTP and HTTPS default ports
EXPOSE 8090 8443
# Set data volume : yacy data and configuration will persist aven after container stop or destruction
VOLUME ["/opt/yacy_search_server/DATA"]

@ -18,6 +18,11 @@ Using yacy_search_server/docker/Dockerfile :
cd yacy_search_server/docker
docker build .
To build the Alpine variant :
cd yacy_search_server/docker
docker build -f Dockerfile.alpine .
## Image variants
`luccioman/yacy:latest`
@ -49,12 +54,12 @@ You can retrieve the container IP address with `docker inspect`.
#### Easier to handle
docker run --name yacy -p 8090:8090 --log-opt max-size=100m --log-opt max-file=2 luccioman/yacy
docker run --name yacy -p 8090:8090 -p 8443:8443 --log-opt max-size=200m --log-opt max-file=2 luccioman/yacy
##### Options detail
* --name : allow easier management of your container (without it, docker automatically generate a new name at each startup).
* -p : map host port and container port, allowing web interface access through the usual http://localhost:8090.
* -p 8090:8090 -p 8443:8443 : map host ports to YaCy container ports, allowing web interface access through the usual http://localhost:8090 and https://localhost:8443 (you can set a different mapping, for example -p 443:8443 if you prefer to use the default HTTPS port on your host)
* --log-opt max-size : limit maximum docker log file size for this container
* --log-opt max-file : limit number of docker rotated log files for this container
@ -76,9 +81,47 @@ Note that you can list all docker volumes with :
docker volume ls
#### As background process
#### Start as background process
docker run -d luccioman/yacy
### HTTPS support
This images are default configured with HTTPS enabled, and use a default certificate stored in defaults/freeworldKeystore. You should use your own certificate. In order to do it, you can proceed as follow.
#### Self-signed certificate
A self-signed certificate will provide encrypted communications with your YaCy server, but browsers will still complain about an invalid security certificate with the error "SEC_ERROR_UNKNOWN_ISSUER". If it is sufficient for you, you can permanently add and exception to your browser.
This kind of certificate can be generated and added to your YaCy Docker container with the following :
keytool -keystore /var/lib/docker/volumes/[your_yacy_volume]/_data/SETTINGS/yacykeystore -genkey -keyalg RSA -alias yacycert
Then edit YaCy config file. For example with the nano text editor :
nano /var/lib/docker/volumes/[your_yacy_volume]/_data/SETTINGS/yacy.conf
And configure the keyStoreXXXX properties accordingly :
keyStore=/opt/yacy_search_server/DATA/SETTINGS/yacykeystore
keyStorePassword=yourpassword
#### Import an existing certificate:
Importing a certificate validated by a certification authority (CA) will ensure you have full HTTPS support with no security errors when accessing your YaCy peer. You can import an existing certificate in pkcs12 format.
First copy it to the YaCy Docker container volume :
cp [yourStore].pkcs12 /var/lib/docker/volumes/[your_yacy_volume]/_data/SETTINGS/[yourStore].pkcs12
Then edit YaCy config file. For example with the nano text editor :
nano /var/lib/docker/volumes/[your_yacy_volume]/_data/SETTINGS/yacy.conf
And configure the pkcs12XXX properties accordingly :
pkcs12ImportFile=/opt/yacy_search_server/DATA/SETTINGS/[yourStore].pkcs12
pkcs12ImportPwd=yourpassword
### Next starts
@ -109,7 +152,7 @@ OR
Create new container based on pulled image, using volume data from old container :
docker create --name [tmp-container_name] -p 8090:8090 --volumes-from=[container_name] --log-opt max-size=100m --log-opt max-file=2 luccioman/yacy:latest
docker create --name [tmp-container_name] -p 8090:8090 -p 8443:8443 --volumes-from=[container_name] --log-opt max-size=100m --log-opt max-file=2 luccioman/yacy:latest
Stop old container :

@ -2,5 +2,6 @@ yacy:
image: 'luccioman/yacy:latest'
ports:
- '8090:8090'
- '8443:8443'
restart: on-failure
autoredeploy: true

@ -1,5 +1,4 @@
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.crawler.data.CrawlProfile.CrawlAttribute;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects;

@ -43,6 +43,10 @@
A change in the personal profile will create a news entry. You can see recently made changes of
profile entries on the Network page, where that profile change is visualized with a '*' beside the 'P' (profile) - selector.
</li>
<li>
Publishing of added or modified translation for the user interface. Other peers may include it in their local translation list.
To publish a translation, use the integrated <a href="Translator_p.html">translation editor</a> to add a translation and publish it afterwards.
</li>
</ul>
<p>
More news services will follow.

@ -6,7 +6,7 @@
</head>
<body>
#%env/templates/header.template%#
#%env/templates/submenuComputation.template%#
<h2 class="yacy">Translation News for Language #[currentlang]#</h2>
<form method="post" enctype="multipart/form-data">

@ -42,7 +42,6 @@ import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.utils.crypt;
import net.yacy.utils.translation.TranslationManager;
import net.yacy.utils.translation.TranslatorXliff;
public class TransNews_p {
@ -90,32 +89,13 @@ public class TransNews_p {
continue;
}
if (NewsPool.CATEGORY_TRANSLATION_ADD.equals(rtmp.category())) {
//String tmplng = rtmp.attribute("language", null);
String tmplng = rtmp.attribute("language", null);
String tmpfile = rtmp.attribute("file", null);
String tmpsource = rtmp.attribute("source", null);
String tmptarget = rtmp.attribute("target", null);
//String tmptarget = rtmp.attribute("target", null);
if (sb.peers.mySeed().hash.equals(rtmp.originator())) {
/*
if (tmplng != null && tmplng.equals(currentlang)) {
sendit = false;
break;
}*/
if (tmpfile != null && tmpfile.equals(file)) {
sendit = false;
break;
}
if (tmpsource != null && tmpsource.equals(sourcetxt)) {
sendit = false;
break;
}
if (tmptarget != null && tmptarget.equals(targettxt)) {
sendit = false;
break;
}
}
// if news with file and source exist (maybe from other peer) - skip sending another msg (to avoid confusion)
if ((tmpfile != null && tmpfile.equals(file))
if ((tmplng != null && tmplng.equals(currentlang)) && (tmpfile != null && tmpfile.equals(file))
&& (tmpsource != null && tmpsource.equals(sourcetxt))) {
sendit = false;
break;
@ -189,8 +169,8 @@ public class TransNews_p {
final HashMap<String, Integer> positiveHashes = new HashMap<String, Integer>(); // a mapping from an url hash to Integer (count of votes)
accumulateVotes(sb, negativeHashes, positiveHashes, NewsPool.INCOMING_DB);
final ScoreMap<String> ranking = new ConcurrentScoreMap<String>(); // score cluster for url hashes
final HashMap<String, NewsDB.Record> Translation = new HashMap<String, NewsDB.Record>(); // a mapping from an url hash to a kelondroRow.Entry with display properties
accumulateTranslations(sb, Translation, ranking, negativeHashes, positiveHashes, NewsPool.INCOMING_DB);
final HashMap<String, NewsDB.Record> translation = new HashMap<String, NewsDB.Record>(); // a mapping from an url hash to a kelondroRow.Entry with display properties
accumulateTranslations(sb, translation, ranking, negativeHashes, positiveHashes, NewsPool.INCOMING_DB);
// read out translation-news array and create property entries
final Iterator<String> k = ranking.keys(false);
@ -207,7 +187,7 @@ public class TransNews_p {
continue;
}
row = Translation.get(refid);
row = translation.get(refid);
if (row == null) {
continue;
}
@ -220,6 +200,7 @@ public class TransNews_p {
continue;
}
if (!lang.equals(currentlang)) continue;
String existingtarget = null; //transMgr.getTranslation(filename, source);
Map<String, String> tmpMap = localTrans.get(filename);

@ -39,7 +39,6 @@ import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.peers.graphics.WebStructureGraph;
import net.yacy.search.Switchboard;
import net.yacy.server.serverObjects;

@ -33,6 +33,7 @@
<ul class="SubMenu">
<li><a href="Surftips.html" class="MenuItemLink">Surftips</a></li>
<li><a href="Wiki.html?display=1" class="MenuItemLink">Local Peer Wiki</a></li>
<li><a href="TransNews_p.html" class="MenuItemLink">UI Translations</a></li>
</ul>
</div>
</div>

@ -45,7 +45,6 @@ import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.storage.HandleSet;
@ -380,7 +379,9 @@ public final class search {
// prepare reference hints
final long timer = System.currentTimeMillis();
final ScoreMap<String> topicNavigator = sb.index.connectedRWI() ? theSearch.getTopics(5, 100) : new ConcurrentScoreMap<String>();
//final ScoreMap<String> topicNavigator = sb.index.connectedRWI() ? theSearch.getTopics(5, 100) : new ConcurrentScoreMap<String>();
final ScoreMap<String> topicNavigator = theSearch.getTopics(); // as there is currently no index interaction in getTopics(), we can use it by default
final StringBuilder refstr = new StringBuilder(6000);
final Iterator<String> navigatorIterator = topicNavigator.keys(false);
int i = 0;

@ -24,6 +24,7 @@ import java.util.Map;
import java.util.Set;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.JSONObject;
import net.yacy.peers.Seed;
import net.yacy.search.Switchboard;
import net.yacy.server.serverCore;
@ -91,14 +92,14 @@ public final class seedlist {
Set<String> ips = seed.getIPs();
if (ips == null || ips.size() == 0) continue;
prop.putJSON("peers_" + count + "_map_0_k", Seed.HASH);
prop.put("peers_" + count + "_map_0_v", '"' + serverObjects.toJSON(seed.hash) + '"');
prop.put("peers_" + count + "_map_0_v", JSONObject.quote(seed.hash));
prop.put("peers_" + count + "_map_0_c", 1);
Map<String, String> map = seed.getMap();
int c = 1;
if (!addressonly) {
for (Map.Entry<String, String> m: map.entrySet()) {
prop.putJSON("peers_" + count + "_map_" + c + "_k", m.getKey());
prop.put("peers_" + count + "_map_" + c + "_v", '"' + serverObjects.toJSON(m.getValue()) + '"');
prop.put("peers_" + count + "_map_" + c + "_v", JSONObject.quote(m.getValue()));
prop.put("peers_" + count + "_map_" + c + "_c", 1);
c++;
}
@ -106,7 +107,7 @@ public final class seedlist {
// construct a list of ips
StringBuilder a = new StringBuilder();
a.append('[');
for (String ip: ips) a.append('"').append(serverObjects.toJSON(seed.getPublicAddress(ip))).append('"').append(',');
for (String ip: ips) a.append(JSONObject.quote(seed.getPublicAddress(ip))).append(',');
a.setCharAt(a.length()-1, ']');
prop.putJSON("peers_" + count + "_map_" + c + "_k", "Address");
prop.put("peers_" + count + "_map_" + c + "_v", a.toString());

@ -1212,8 +1212,14 @@ Category==Categorie
Received==Reçu
Distributed==Distribu&eacute;
Attributes==Attributs
"#(page)#::Process Selected News::Delete Selected News::Abort Publication of Selected News::Delete Selected News#(/page)#"==#(page)#::Traiter les nouvelles s&eacute;lectionn&eacute;es::Supprimer les nouvelles s&eacute;lectionn&eacute;es::Annuler la publication des nouvelles s&eacute;lectionn&eacute;es::Supprimer les nouvelles s&eacute;lection&eacute;es#(/page)#
"#(page)#::Process All News::Delete All News::Abort Publication of All News::Delete All News#(/page)#"==#(page)#::Traiter toutes les nouvelles::Supprimer toutes les nouvelles::Annuler la publication de toutes les nouvelles::Supprimer toutes les nouvelles#(/page)#
Process Selected News==Traiter les nouvelles s&eacute;lectionn&eacute;es
Delete Selected News==Supprimer les nouvelles s&eacute;lectionn&eacute;es
Abort Publication of Selected News==Annuler la publication des nouvelles s&eacute;lectionn&eacute;es
Delete Selected News==Supprimer les nouvelles s&eacute;lection&eacute;es
Process All News==Traiter toutes les nouvelles
Delete All News==Supprimer toutes les nouvelles
Abort Publication of All News==Annuler la publication de toutes les nouvelles
Delete All News==Supprimer toutes les nouvelles
#-----------------------------
#File: Performance_p.html

@ -5966,6 +5966,21 @@
<trans-unit id="49006831" xml:space="preserve" approved="no" translate="yes">
<source>profile entries on the Network page, where that profile change is visualized with a '*' beside the 'P' (profile) - selector.</source>
</trans-unit>
<trans-unit id="80e8551f" xml:space="preserve" approved="no" translate="yes">
<source>Publishing of added or modified translation for the user interface.</source>
</trans-unit>
<trans-unit id="aff2f3ce" xml:space="preserve" approved="no" translate="yes">
<source>Other peers may include it in their local translation list.</source>
</trans-unit>
<trans-unit id="304f12eb" xml:space="preserve" approved="no" translate="yes">
<source>To publish a translation, use the integrated</source>
</trans-unit>
<trans-unit id="f7855dbc" xml:space="preserve" approved="no" translate="yes">
<source>translation editor</source>
</trans-unit>
<trans-unit id="7e95942a" xml:space="preserve" approved="no" translate="yes">
<source>to add a translation and publish it afterwards.</source>
</trans-unit>
<trans-unit id="ac7ecc34" xml:space="preserve" approved="no" translate="yes">
<source>Above you can see four menues:</source>
</trans-unit>
@ -6008,6 +6023,24 @@
<trans-unit id="8eff8577" xml:space="preserve" approved="no" translate="yes">
<source>Attributes</source>
</trans-unit>
<trans-unit id="8ae5c927" xml:space="preserve" approved="no" translate="yes">
<source>Process Selected News</source>
</trans-unit>
<trans-unit id="d40fbba3" xml:space="preserve" approved="no" translate="yes">
<source>Delete Selected News</source>
</trans-unit>
<trans-unit id="43ac9953" xml:space="preserve" approved="no" translate="yes">
<source>Abort Publication of Selected News</source>
</trans-unit>
<trans-unit id="49ff88e3" xml:space="preserve" approved="no" translate="yes">
<source>Process All News</source>
</trans-unit>
<trans-unit id="6fcef8e7" xml:space="preserve" approved="no" translate="yes">
<source>Delete All News</source>
</trans-unit>
<trans-unit id="30aa4d37" xml:space="preserve" approved="no" translate="yes">
<source>Abort Publication of All News</source>
</trans-unit>
<trans-unit id="8c39153f" xml:space="preserve" approved="no" translate="yes">
<source>"#(page)#::Process Selected News::Delete Selected News::Abort Publication of Selected News::Delete Selected News#(/page)#"</source>
</trans-unit>
@ -6821,6 +6854,30 @@
<trans-unit id="1ba97b50" xml:space="preserve" approved="no" translate="yes">
<source>"Re-Set to default"</source>
</trans-unit>
<trans-unit id="8410369e" xml:space="preserve" approved="no" translate="yes">
<source>&gt;Filter Query&lt;</source>
</trans-unit>
<trans-unit id="78033f39" xml:space="preserve" approved="no" translate="yes">
<source>The Filter Query is attached to every query.</source>
</trans-unit>
<trans-unit id="70b334ee" xml:space="preserve" approved="no" translate="yes">
<source>Use this to statically add a selection criteria to reduce the set of results.</source>
</trans-unit>
<trans-unit id="c227ca00" xml:space="preserve" approved="no" translate="yes">
<source>Example: "http_unique_b:true AND www_unique_b:true" will filter out all results where urls appear also with/without http(s) and/or with/without 'www.' prefix.</source>
</trans-unit>
<trans-unit id="4112ab05" xml:space="preserve" approved="no" translate="yes">
<source>To find appropriate fields for this query, see the</source>
</trans-unit>
<trans-unit id="7838433d" xml:space="preserve" approved="no" translate="yes">
<source>YaCy Solr Schema</source>
</trans-unit>
<trans-unit id="849166a5" xml:space="preserve" approved="no" translate="yes">
<source>Warning: bad expressions here will cause that you don't have any search result!</source>
</trans-unit>
<trans-unit id="47e46c2" xml:space="preserve" approved="no" translate="yes">
<source>"Set Filter Query"</source>
</trans-unit>
<trans-unit id="14f04b0f" xml:space="preserve" approved="no" translate="yes">
<source>&gt;Boost Query&lt;</source>
</trans-unit>
@ -10100,6 +10157,9 @@
<trans-unit id="a66de681" xml:space="preserve" approved="no" translate="yes">
<source>&gt;Local Peer Wiki&lt;</source>
</trans-unit>
<trans-unit id="cf394e0e" xml:space="preserve" approved="no" translate="yes">
<source>UI Translations</source>
</trans-unit>
</body>
</file>

@ -349,7 +349,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
for (String s: snippets) {
if ((l == null || s.length() > l.length()) && s.indexOf(' ') > 0) l = s;
}
return l;
return l.replaceAll("\"", "'");
}
public static void openTag(final Writer writer, final String tag) throws IOException {

@ -34,9 +34,10 @@ import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.solr.responsewriter.OpensearchResponseWriter.ResHead;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.JSONObject;
import net.yacy.data.URLLicense;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
@ -54,6 +55,9 @@ import org.apache.solr.search.SolrIndexSearcher;
/**
* write the opensearch result in YaCys special way to include as much as in opensearch is included.
* This will also include YaCy facets.
*
* example:
* http://localhost:8090/solr/select?hl=false&wt=yjson&facet=true&facet.mincount=1&facet.field=host_s&facet.field=url_file_ext_s&facet.field=url_protocol_s&facet.field=author_sxt&facet.field=collection_sxt&start=0&rows=10&query=www
*/
public class YJsonResponseWriter implements QueryResponseWriter {
@ -135,7 +139,6 @@ public class YJsonResponseWriter implements QueryResponseWriter {
Document doc = searcher.doc(id, OpensearchResponseWriter.SOLR_FIELDS);
List<IndexableField> fields = doc.getFields();
int fieldc = fields.size();
List<String> texts = new ArrayList<String>();
MultiProtocolURL url = null;
String urlhash = null;
List<String> descriptions = new ArrayList<String>();
@ -166,13 +169,11 @@ public class YJsonResponseWriter implements QueryResponseWriter {
}
if (CollectionSchema.title.getSolrFieldName().equals(fieldName)) {
title = value.stringValue();
texts.add(title);
continue;
}
if (CollectionSchema.description_txt.getSolrFieldName().equals(fieldName)) {
String description = value.stringValue();
descriptions.add(description);
texts.add(description);
continue;
}
if (CollectionSchema.id.getSolrFieldName().equals(fieldName)) {
@ -197,31 +198,33 @@ public class YJsonResponseWriter implements QueryResponseWriter {
solitaireTag(writer, "sizename", sizemb > 0 ? (Integer.toString(sizemb) + " mbyte") : sizekb > 0 ? (Integer.toString(sizekb) + " kbyte") : (Integer.toString(size) + " byte"));
continue;
}
if (CollectionSchema.text_t.getSolrFieldName().equals(fieldName)) {
texts.add(value.stringValue());
continue;
}
if (CollectionSchema.h1_txt.getSolrFieldName().equals(fieldName) || CollectionSchema.h2_txt.getSolrFieldName().equals(fieldName) ||
CollectionSchema.h3_txt.getSolrFieldName().equals(fieldName) || CollectionSchema.h4_txt.getSolrFieldName().equals(fieldName) ||
CollectionSchema.h5_txt.getSolrFieldName().equals(fieldName) || CollectionSchema.h6_txt.getSolrFieldName().equals(fieldName)) {
// because these are multi-valued fields, there can be several of each
texts.add(value.stringValue());
continue;
}
//missing: "code","faviconCode"
}
// compute snippet from texts
solitaireTag(writer, "path", path.toString());
solitaireTag(writer, "title", title.length() == 0 ? (texts.size() == 0 ? path.toString() : texts.get(0)) : title);
solitaireTag(writer, "title", title.length() == 0 ? path.toString() : title.replaceAll("\"", "'"));
LinkedHashSet<String> snippet = urlhash == null ? null : snippets.get(urlhash);
if (snippet == null) {snippet = new LinkedHashSet<>(); snippet.addAll(descriptions);}
OpensearchResponseWriter.removeSubsumedTitle(snippet, title);
writer.write("\"description\":\""); writer.write(serverObjects.toJSON(snippet == null || snippet.size() == 0 ? (descriptions.size() > 0 ? descriptions.get(0) : "") : OpensearchResponseWriter.getLargestSnippet(snippet))); writer.write("\"\n}\n");
String snippetstring = snippet == null || snippet.size() == 0 ? (descriptions.size() > 0 ? descriptions.get(0) : "") : OpensearchResponseWriter.getLargestSnippet(snippet);
if (snippetstring.length() > 140) {
snippetstring = snippetstring.substring(0, 140);
int sp = snippetstring.lastIndexOf(' ');
if (sp >= 0) snippetstring = snippetstring.substring(0, sp) + " ..."; else snippetstring = snippetstring + "...";
}
writer.write("\"description\":"); writer.write(JSONObject.quote(snippetstring)); writer.write("\n}\n");
if (i < responseCount - 1) {
writer.write(",\n".toCharArray());
}
} catch (final Throwable ee) {}
} catch (final Throwable ee) {
ConcurrentLog.logException(ee);
writer.write("\"description\":\"\"\n}\n");
if (i < responseCount - 1) {
writer.write(",\n".toCharArray());
}
}
}
writer.write("],\n".toCharArray());
@ -240,16 +243,20 @@ public class YJsonResponseWriter implements QueryResponseWriter {
@SuppressWarnings("unchecked")
NamedList<Integer> collections = facetFields == null ? null : (NamedList<Integer>) facetFields.get(CollectionSchema.collection_sxt.getSolrFieldName());
int facetcount = 0;
if (domains != null) {
writer.write(facetcount > 0 ? ",\n" : "\n");
writer.write("{\"facetname\":\"domains\",\"displayname\":\"Provider\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray());
for (int i = 0; i < domains.size(); i++) {
facetEntry(writer, "site", domains.getName(i), Integer.toString(domains.getVal(i)));
if (i < domains.size() - 1) writer.write(',');
writer.write("\n");
}
writer.write("]},\n".toCharArray());
writer.write("]}".toCharArray());
facetcount++;
}
if (filetypes != null) {
writer.write(facetcount > 0 ? ",\n" : "\n");
writer.write("{\"facetname\":\"filetypes\",\"displayname\":\"Filetypes\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray());
List<Map.Entry<String, Integer>> l = new ArrayList<Map.Entry<String,Integer>>();
for (Map.Entry<String, Integer> e: filetypes) {
@ -262,36 +269,43 @@ public class YJsonResponseWriter implements QueryResponseWriter {
if (i < l.size() - 1) writer.write(',');
writer.write("\n");
}
writer.write("]},\n".toCharArray());
writer.write("]}".toCharArray());
facetcount++;
}
if (protocols != null) {
writer.write(facetcount > 0 ? ",\n" : "\n");
writer.write("{\"facetname\":\"protocols\",\"displayname\":\"Protocol\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray());
for (int i = 0; i < protocols.size(); i++) {
facetEntry(writer, "protocol", protocols.getName(i), Integer.toString(protocols.getVal(i)));
if (i < protocols.size() - 1) writer.write(',');
writer.write("\n");
}
writer.write("]},\n".toCharArray());
writer.write("]}".toCharArray());
facetcount++;
}
if (authors != null) {
writer.write(facetcount > 0 ? ",\n" : "\n");
writer.write("{\"facetname\":\"authors\",\"displayname\":\"Authors\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray());
for (int i = 0; i < authors.size(); i++) {
facetEntry(writer, "author", authors.getName(i), Integer.toString(authors.getVal(i)));
if (i < authors.size() - 1) writer.write(',');
writer.write("\n");
}
writer.write("]},\n".toCharArray());
writer.write("]}".toCharArray());
facetcount++;
}
if (collections != null) {
writer.write(facetcount > 0 ? ",\n" : "\n");
writer.write("{\"facetname\":\"collections\",\"displayname\":\"Collections\",\"type\":\"String\",\"min\":\"0\",\"max\":\"0\",\"mean\":\"0\",\"elements\":[\n".toCharArray());
for (int i = 0; i < collections.size(); i++) {
facetEntry(writer, "collection", collections.getName(i), Integer.toString(collections.getVal(i)));
if (i < collections.size() - 1) writer.write(',');
writer.write("\n");
}
writer.write("]},\n".toCharArray());
writer.write("]}".toCharArray());
facetcount++;
}
writer.write("]}]}\n".toCharArray());
writer.write("\n]}]}\n".toCharArray());
if (jsonp != null) {
writer.write("])".toCharArray());
@ -300,14 +314,16 @@ public class YJsonResponseWriter implements QueryResponseWriter {
public static void solitaireTag(final Writer writer, final String tagname, String value) throws IOException {
if (value == null) return;
writer.write('"'); writer.write(tagname); writer.write("\":\""); writer.write(serverObjects.toJSON(value)); writer.write("\","); writer.write('\n');
writer.write('"'); writer.write(tagname); writer.write("\":"); writer.write(JSONObject.quote(value)); writer.write(','); writer.write('\n');
}
private static void facetEntry(final Writer writer, final String modifier, final String propname, String value) throws IOException {
writer.write("{\"name\": \""); writer.write(propname);
writer.write("\", \"count\": \""); writer.write(value);
writer.write("\", \"modifier\": \""); writer.write(modifier); writer.write("%3A"); writer.write(propname);
writer.write("\"}");
private static void facetEntry(final Writer writer, String modifier, String propname, final String value) throws IOException {
modifier = modifier.replaceAll("\"", "'").trim();
propname = propname.replaceAll("\"", "'").trim();
writer.write("{\"name\":"); writer.write(JSONObject.quote(propname));
writer.write(",\"count\":"); writer.write(JSONObject.quote(value.replaceAll("\"", "'").trim()));
writer.write(",\"modifier\":"); writer.write(JSONObject.quote(modifier+"%3A"+propname));
writer.write("}");
}
}
/**

@ -37,7 +37,6 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.geo.GeoLocation;
import net.yacy.cora.geo.Locations;
import net.yacy.cora.storage.Files;

@ -133,7 +133,10 @@ public class ConcurrentScoreMap<E> extends AbstractScoreMap<E> implements ScoreM
if (obj == null) return;
// use atomic operations
this.map.putIfAbsent(obj, new AtomicLong(0));
final AtomicLong old = this.map.putIfAbsent(obj, new AtomicLong(0));
// adjust overall counter if value replaced
if (old != null) this.gcount -= old.longValue(); // must use old befor setting a new value (it's a object reference)
this.map.get(obj).set(newScore);
// increase overall counter

@ -47,7 +47,6 @@ import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.OS;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;

@ -29,7 +29,6 @@ package net.yacy.crawler.data;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
@ -51,7 +50,6 @@ import net.yacy.cora.document.feed.Hit;
import net.yacy.cora.document.feed.RSSFeed;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.solr.FailCategory;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.ConnectionInfo;

@ -32,7 +32,6 @@ import java.util.Date;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.HeaderFramework;

@ -52,7 +52,6 @@ import net.yacy.document.parser.ooxmlParser;
import net.yacy.document.parser.pdfParser;
import net.yacy.document.parser.pptParser;
import net.yacy.document.parser.psParser;
import net.yacy.document.parser.rdfParser;
import net.yacy.document.parser.rssParser;
import net.yacy.document.parser.rtfParser;
import net.yacy.document.parser.sevenzipParser;

@ -78,7 +78,7 @@ public class Tokenizer {
int wordHandleCount = 0;
//final int sentenceHandleCount = 0;
int allwordcounter = 0;
final int allsentencecounter = 0;
int allsentencecounter = 0;
int wordInSentenceCounter = 1;
boolean comb_indexof = false, last_last = false, last_index = false;
//final Map<StringBuilder, Phrase> sentences = new HashMap<StringBuilder, Phrase>(100);
@ -89,6 +89,14 @@ public class Tokenizer {
try {
while (wordenum.hasMoreElements()) {
String word = wordenum.nextElement().toString().toLowerCase(Locale.ENGLISH);
// handle punktuation (start new sentence)
if (word.length() == 1 && SentenceReader.punctuation(word.charAt(0))) {
// store sentence
currsentwords.clear();
wordInSentenceCounter = 1;
allsentencecounter++;
continue;
}
if (word.length() < wordminsize) continue;
// get tags from autotagging
@ -144,40 +152,32 @@ public class Tokenizer {
System.arraycopy(wordcache, 1, wordcache, 0, wordcache.length - 1);
wordcache[wordcache.length - 1] = word;
// distinguish punctuation and words
wordlen = word.length();
if (wordlen == 1 && SentenceReader.punctuation(word.charAt(0))) { // TODO: wordlen == 1 never true (see earlier if < wordminsize )
// store sentence
currsentwords.clear();
wordInSentenceCounter = 1;
// check index.of detection
if (last_last && comb_indexof && word.equals("modified")) {
this.RESULT_FLAGS.set(flag_cat_indexof, true);
wordenum.pre(true); // parse lines as they come with CRLF
}
if (last_index && (wordminsize > 2 || word.equals("of"))) comb_indexof = true;
last_last = word.equals("last");
last_index = word.equals("index");
// store word
allwordcounter++;
currsentwords.add(word);
Word wsp = this.words.get(word);
if (wsp != null) {
// word already exists
wordHandle = wsp.posInText;
wsp.inc();
} else {
// check index.of detection
if (last_last && comb_indexof && word.equals("modified")) {
this.RESULT_FLAGS.set(flag_cat_indexof, true);
wordenum.pre(true); // parse lines as they come with CRLF
}
if (last_index && (wordminsize > 2 || word.equals("of"))) comb_indexof = true;
last_last = word.equals("last");
last_index = word.equals("index");
// store word
allwordcounter++;
currsentwords.add(word);
Word wsp = this.words.get(word);
if (wsp != null) {
// word already exists
wordHandle = wsp.posInText;
wsp.inc();
} else {
// word does not yet exist, create new word entry
wordHandle = wordHandleCount++;
wsp = new Word(wordHandle, wordInSentenceCounter, /* sentences.size() + */ 100);
wsp.flags = this.RESULT_FLAGS.clone();
this.words.put(word.toLowerCase(), wsp);
}
// we now have the unique handle of the word, put it into the sentence:
wordInSentenceCounter++;
// word does not yet exist, create new word entry
wordHandle = ++wordHandleCount; // let start pos with 1
wsp = new Word(wordHandle, wordInSentenceCounter, allsentencecounter + 100); // nomal sentence start at 100 !
wsp.flags = this.RESULT_FLAGS.clone();
this.words.put(word.toLowerCase(), wsp);
}
// we now have the unique handle of the word, put it into the sentence:
wordInSentenceCounter++;
}
} finally {
wordenum.close();

@ -56,7 +56,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
private StringBuilder nextElement0() {
StringBuilder s;
while (this.e.hasMoreElements()) {
s = this.e.nextElement(); // next word (punctuation and invisible chars filtered)
s = this.e.nextElement(); // next word (invisible chars filtered)
return s;
}
return null;
@ -118,7 +118,13 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
for (int i = 0; i < r.length(); i++) { // tokenize one sentence
c = r.charAt(i);
if (SentenceReader.punctuation(c)) { // punctuation check is simple/quick, do it before invisible
if (sb.length() > 0) {this.s.add(sb); sb = new StringBuilder(20);}
if (sb.length() > 0) {
this.s.add(sb);
sb = new StringBuilder(1);
}
sb.append(c);
this.s.add(sb);
sb = new StringBuilder(20);
} else if (SentenceReader.invisible(c)) { // ! currently punctuation again checked by invisible()
if (sb.length() > 0) {this.s.add(sb); sb = new StringBuilder(20);}
} else {

@ -31,7 +31,6 @@ import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;

@ -31,7 +31,6 @@ import java.util.Date;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import net.yacy.cora.document.id.AnchorURL;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.document.AbstractParser;

@ -124,7 +124,7 @@ public class SolrSelectServlet extends HttpServlet {
Switchboard sb = Switchboard.getSwitchboard();
// TODO: isUserInRole needs a login to jetty container (not done automatically on admin from localhost)
boolean authenticated = hrequest.isUserInRole(UserDB.AccessRight.ADMIN_RIGHT.toString());;
boolean authenticated = hrequest.isUserInRole(UserDB.AccessRight.ADMIN_RIGHT.toString());
// count remote searches if this was part of a p2p search
if (mmsp.getMap().containsKey("partitions")) {
@ -190,11 +190,21 @@ public class SolrSelectServlet extends HttpServlet {
if ((responseWriter instanceof YJsonResponseWriter || responseWriter instanceof OpensearchResponseWriter) && "true".equals(mmsp.get("hl", "true"))) {
// add options for snippet generation
if (!mmsp.getMap().containsKey("hl.q")) mmsp.getMap().put("hl.q", new String[]{q});
if (!mmsp.getMap().containsKey("hl.fl")) mmsp.getMap().put("hl.fl", new String[]{CollectionSchema.description_txt + "," + CollectionSchema.h4_txt.getSolrFieldName() + "," + CollectionSchema.h3_txt.getSolrFieldName() + "," + CollectionSchema.h2_txt.getSolrFieldName() + "," + CollectionSchema.h1_txt.getSolrFieldName() + "," + CollectionSchema.text_t.getSolrFieldName()});
if (!mmsp.getMap().containsKey("hl.fl")) mmsp.getMap().put("hl.fl", new String[]{CollectionSchema.description_txt.getSolrFieldName() + "," + CollectionSchema.h4_txt.getSolrFieldName() + "," + CollectionSchema.h3_txt.getSolrFieldName() + "," + CollectionSchema.h2_txt.getSolrFieldName() + "," + CollectionSchema.h1_txt.getSolrFieldName() + "," + CollectionSchema.text_t.getSolrFieldName()});
if (!mmsp.getMap().containsKey("hl.alternateField")) mmsp.getMap().put("hl.alternateField", new String[]{CollectionSchema.description_txt.getSolrFieldName()});
if (!mmsp.getMap().containsKey("hl.simple.pre")) mmsp.getMap().put("hl.simple.pre", new String[]{"<b>"});
if (!mmsp.getMap().containsKey("hl.simple.post")) mmsp.getMap().put("hl.simple.post", new String[]{"</b>"});
if (!mmsp.getMap().containsKey("hl.fragsize")) mmsp.getMap().put("hl.fragsize", new String[]{Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH)});
if (!mmsp.getMap().containsKey("fl")) mmsp.getMap().put("fl", new String[]{
CollectionSchema.sku.getSolrFieldName() + "," +
CollectionSchema.title + "," +
CollectionSchema.description_txt.getSolrFieldName() + "," +
CollectionSchema.id.getSolrFieldName() + "," +
CollectionSchema.url_paths_sxt.getSolrFieldName() + "," +
CollectionSchema.last_modified.getSolrFieldName() + "," +
CollectionSchema.size_i.getSolrFieldName() + "," +
CollectionSchema.url_protocol_s.getSolrFieldName() + "," +
CollectionSchema.url_file_ext_s.getSolrFieldName()});
}
// get the embedded connector

@ -252,9 +252,16 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
return (0xff & this.entry.getColByte(col_hitcount));
}
/**
* First position of word in text
* @return Collection with one element
*/
@Override
public Collection<Integer> positions() {
return new ArrayList<Integer>(0);
int pos = (int) this.entry.getColLong(col_posintext);
ArrayList arr = new ArrayList<Integer>(1);
arr.add(pos);
return arr;
}
@Override

@ -60,10 +60,11 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
public final byte[] urlHash;
private String hostHash = null;
private final char type;
private int hitcount, llocal, lother, phrasesintext,
posinphrase, posofphrase,
urlcomps, urllength,
wordsintext, wordsintitle;
private int hitcount, // how often appears this word in the text
llocal, lother, phrasesintext,
posinphrase, posofphrase,
urlcomps, urllength,
wordsintext, wordsintitle;
private int virtualAge;
private final Queue<Integer> positions;
private double termFrequency;
@ -210,6 +211,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return this.type;
}
/**
* How often appears this word in the text
* @return
*/
@Override
public int hitcount() {
return this.hitcount;
@ -259,7 +264,9 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.hitcount, // how often appears this word in the text
this.wordsintext, // total number of words
this.phrasesintext, // total number of phrases
this.positions.isEmpty() ? 1 : this.positions.iterator().next(), // position of word in all words
// TODO: positon 1 on empty positions may give high ranking scores for unknown pos (needs to be checked if 0 would be appropriate)
this.positions.isEmpty() ? -1 : this.positions.iterator().next(), // position of word in all words
this.posinphrase, // position of word in its phrase
this.posofphrase, // number of the phrase where word appears
this.lastModified, // last-modified time of the document where word appears

@ -63,9 +63,17 @@ public abstract class AbstractReference implements Reference {
private static int max(Collection<Integer> a) {
if (a == null || a.isEmpty()) return Integer.MIN_VALUE;
Iterator<Integer> i = a.iterator();
/*
expirienced concurrency issue with this short cut 2016-09-06
on i.next w/o test of hasNext before
java.util.NoSuchElementException at java.util.concurrent.LinkedBlockingQueue$Itr.next(LinkedBlockingQueue.java:828)
if (a.size() == 1) return i.next();
if (a.size() == 2) return Math.max(i.next(), i.next());
int r = i.next();
*/
int r = Integer.MIN_VALUE;
int s;
while (i.hasNext()) {
s = i.next();
@ -77,9 +85,12 @@ public abstract class AbstractReference implements Reference {
private static int min(Collection<Integer> a) {
if (a == null || a.isEmpty()) return Integer.MAX_VALUE;
Iterator<Integer> i = a.iterator();
/* concurrency issue (see max())
if (a.size() == 1) return i.next();
if (a.size() == 2) return Math.min(i.next(), i.next());
int r = i.next();
*/
int r = Integer.MAX_VALUE;
int s;
while (i.hasNext()) {
s = i.next();
@ -103,10 +114,11 @@ public abstract class AbstractReference implements Reference {
if (positions().size() < 2) return 0;
int d = 0;
Iterator<Integer> i = positions().iterator();
int s0 = i.next(), s1;
// int s0 = i.next(), s1; // concurrency issue see max()
int s0 = -1, s1;
while (i.hasNext()) {
s1 = i.next();
d += Math.abs(s0 - s1);
if (s0 > 0) d += Math.abs(s0 - s1);
s0 = s1;
}
return d / (positions().size() - 1);

@ -48,8 +48,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Semaphore;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.feed.RSSFeed;

@ -325,6 +325,8 @@ public class NewsPool {
if (this.newsDB.get(record.id()) == null) {
this.incomingNews.push(record); // we want to see our own news..
this.outgoingNews.push(record); // .. and put it on the publishing list
} else {
ConcurrentLog.info("NewsPool", "publishing of news aborted, news with same id (time + originator) exists id=" + record.id());
}
} catch (final Exception e) {
ConcurrentLog.logException(e);

@ -30,7 +30,6 @@ import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.feed.RSSMessage;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.kelondro.util.MapTools;
import net.yacy.peers.operation.yacyVersion;
public class PeerActions {
@ -261,7 +260,7 @@ public class PeerActions {
final String cre1 = MapTools.string2map(decodedString, ",").get("cre");
final String cre2 = MapTools.string2map(record.toString(), ",").get("cre");
if ((cre1 == null) || (cre2 == null) || (!(cre1.equals(cre2)))) {
System.out.println("### ERROR - cre are not equal: cre1=" + cre1 + ", cre2=" + cre2);
Network.log.warn("processPeerArrival: ### ERROR - message creation date verification not equal: cre1=" + cre1 + ", cre2=" + cre2);
return;
}
try {

@ -33,7 +33,6 @@ import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

@ -1329,10 +1329,32 @@ public final class SearchEvent {
public long getSnippetComputationTime() {
return this.snippetComputationAllTime;
}
public ScoreMap<String> getTopicNavigator(final int count ) {
/**
* Get topics in a ScoreMap if config allows topic navigator
* (the topics are filtered by badwords, stopwords and words included in the query)
*
* @param count max number of topics returned
* @return ScoreMap with max number of topics or null if
*/
public ScoreMap<String> getTopicNavigator(final int count) {
if (this.topicNavigatorCount > 0 && count >= 0) { //topicNavigatorCount set during init, 0=no nav
return this.getTopics(count != 0 ? count : this.topicNavigatorCount, 500);
if (!this.ref.sizeSmaller(2)) {
ScoreMap<String> result;
int ic = count != 0 ? count : this.topicNavigatorCount;
if (this.ref.size() <= ic) { // size matches return map directly
result = this.getTopics(/*ic, 500*/);
} else { // collect top most count topics
result = new ConcurrentScoreMap<String>();
Iterator<String> it = this.getTopics(/*ic, 500*/).keys(false);
while (ic-- > 0 && it.hasNext()) {
String word = it.next();
result.set(word, this.ref.get(word));
}
}
return result;
}
}
return null;
}
@ -1428,7 +1450,11 @@ public final class SearchEvent {
*/
public void addResult(URIMetadataNode resultEntry, final float score) {
if (resultEntry == null) return;
final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, new ConcurrentScoreMap<String>() /*this.snippetProcess.rankingProcess.getTopicNavigator(10)*/);
final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.ref /*this.getTopicNavigator(MAX_TOPWORDS)*/);
// TODO: above was originally using (see below), but getTopicNavigator returns this.ref and possibliy alters this.ref on first call (this.ref.size < 2 -> this.ref.clear)
// TODO: verify and straighten the use of addTopic, getTopic and getTopicNavigator and related score calculation
// final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.getTopicNavigator(MAX_TOPWORDS));
resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api
this.resultList.put(new ReverseElement<URIMetadataNode>(resultEntry, ranking)); // remove smallest in case of overflow
if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
@ -1439,10 +1465,19 @@ public final class SearchEvent {
long r = 0;
// for media search: prefer pages with many links
r += rentry.limage() << this.query.ranking.coeff_cathasimage;
r += rentry.laudio() << this.query.ranking.coeff_cathasaudio;
r += rentry.lvideo() << this.query.ranking.coeff_cathasvideo;
r += rentry.lapp() << this.query.ranking.coeff_cathasapp;
switch (this.query.contentdom) {
case IMAGE:
r += rentry.limage() << this.query.ranking.coeff_cathasimage;
break;
case AUDIO:
r += rentry.laudio() << this.query.ranking.coeff_cathasaudio;
break;
case VIDEO:
r += rentry.lvideo() << this.query.ranking.coeff_cathasvideo;
break;
case APP:
r += rentry.lapp() << this.query.ranking.coeff_cathasapp;
}
// apply citation count
//System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother());
@ -1458,24 +1493,27 @@ public final class SearchEvent {
final String urlstring = rentry.url().toNormalform(true);
final String[] urlcomps = MultiProtocolURL.urlComps(urlstring);
final String[] descrcomps = MultiProtocolURL.splitpattern.split(rentry.title().toLowerCase());
for (final String urlcomp : urlcomps) {
// apply query-in-result matching
final QueryGoal.NormalizedWords urlcompmap = new QueryGoal.NormalizedWords(urlcomps);
final QueryGoal.NormalizedWords descrcompmap = new QueryGoal.NormalizedWords(descrcomps);
// the token map is used (instead of urlcomps/descrcomps) to determine appearance in url/title and eliminate double occurances
// (example Title="News News News News News News - today is party -- News News News News News News" to add one score instead of 12 * score !)
for (final String urlcomp : urlcompmap) {
int tc = topwords.get(urlcomp);
if (tc > 0) r += Math.max(1, tc) << this.query.ranking.coeff_urlcompintoplist;
}
for (final String descrcomp : descrcomps) {
for (final String descrcomp : descrcompmap) {
int tc = topwords.get(descrcomp);
if (tc > 0) r += Math.max(1, tc) << this.query.ranking.coeff_descrcompintoplist;
}
// apply query-in-result matching
final QueryGoal.NormalizedWords urlcomph = new QueryGoal.NormalizedWords(urlcomps);
final QueryGoal.NormalizedWords descrcomph = new QueryGoal.NormalizedWords(descrcomps);
final Iterator<String> shi = this.query.getQueryGoal().getIncludeWords();
String queryword;
while (shi.hasNext()) {
queryword = shi.next();
if (urlcomph.contains(queryword)) r += 256 << this.query.ranking.coeff_appurl;
if (descrcomph.contains(queryword)) r += 256 << this.query.ranking.coeff_app_dc_title;
if (urlcompmap.contains(queryword)) r += 256 << this.query.ranking.coeff_appurl;
if (descrcompmap.contains(queryword)) r += 256 << this.query.ranking.coeff_app_dc_title;
}
return r;
}
@ -1818,14 +1856,23 @@ public final class SearchEvent {
// this is only available if execQuery() was called before
return this.localSearchInclusion;
}
public ScoreMap<String> getTopics(final int maxcount, final long maxtime) {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
/**
* Return the list of words that had been computed by statistics over all
* words that appeared in the url or the description of all urls
*
* @return ScoreMap
*/
public ScoreMap<String> getTopics(/* final int maxcount, final long maxtime */) {
/* ---------------------------------- start of rem (2016-09-03)
// TODO: result map is not used currently, verify if it should and use or delete this code block
// TODO: as it is not used now - in favour of performance this code block is rem'ed (2016-09-03)
final ScoreMap<String> result = new ConcurrentScoreMap<String>();
if ( this.ref.sizeSmaller(2) ) {
this.ref.clear(); // navigators with one entry are not useful
}
final Map<String, Float> counts = new HashMap<String, Float>();
final Iterator<String> i = this.ref.keys(false);
String word;
@ -1851,11 +1898,17 @@ public final class SearchEvent {
result.set(ce.getKey(), (int) (((double) maxcount) * (ce.getValue() - min) / (max - min)));
}
}
/* ------------------------------------ end of rem (2016-09-03) */
return this.ref;
}
private final static Pattern lettermatch = Pattern.compile("[a-z]+");
/**
* Collects topics in a ScoreMap for words not included in the query words.
* Words are also filtered by badword blacklist and stopword list.
* @param words
*/
public void addTopic(final String[] words) {
String word;
for ( final String w : words ) {
@ -1872,6 +1925,10 @@ public final class SearchEvent {
}
}
/**
* Ad title words to this searchEvent's topic score map
* @param resultEntry
*/
protected void addTopics(final URIMetadataNode resultEntry) {
// take out relevant information for reference computation
if ((resultEntry.url() == null) || (resultEntry.title() == null)) return;

@ -228,13 +228,13 @@ public class ReferenceOrder {
assert this.ranking != null;
final long tf = ((this.max.termFrequency() == this.min.termFrequency()) ? 0 : (((int)(((t.termFrequency()-this.min.termFrequency())*256.0)/(this.max.termFrequency() - this.min.termFrequency())))) << this.ranking.coeff_termfrequency);
//System.out.println("tf(" + t.urlHash + ") = " + Math.floor(1000 * t.termFrequency()) + ", min = " + Math.floor(1000 * min.termFrequency()) + ", max = " + Math.floor(1000 * max.termFrequency()) + ", tf-normed = " + tf);
final int maxmaxpos = this.max.maxposition();
final int maxmaxpos = this.max.maxposition(); // returns Integer.MIN_VALUE if positions empty
final int minminpos = this.min.minposition();
final long r =
((256 - DigestURL.domLengthNormalized(t.urlhash())) << this.ranking.coeff_domlength)
+ ((this.max.urlcomps() == this.min.urlcomps() ) ? 0 : (256 - (((t.urlcomps() - this.min.urlcomps() ) << 8) / (this.max.urlcomps() - this.min.urlcomps()) )) << this.ranking.coeff_urlcomps)
+ ((this.max.urllength() == this.min.urllength() ) ? 0 : (256 - (((t.urllength() - this.min.urllength() ) << 8) / (this.max.urllength() - this.min.urllength()) )) << this.ranking.coeff_urllength)
+ ((maxmaxpos == minminpos) ? 0 : (256 - (((t.minposition() - minminpos) << 8) / (maxmaxpos - minminpos))) << this.ranking.coeff_posintext)
+ ((maxmaxpos == minminpos || maxmaxpos < 0) ? 0 : (256 - (((t.minposition() - minminpos) << 8) / (maxmaxpos - minminpos))) << this.ranking.coeff_posintext)
+ ((this.max.posofphrase() == this.min.posofphrase()) ? 0 : (256 - (((t.posofphrase() - this.min.posofphrase() ) << 8) / (this.max.posofphrase() - this.min.posofphrase()) )) << this.ranking.coeff_posofphrase)
+ ((this.max.posinphrase() == this.min.posinphrase()) ? 0 : (256 - (((t.posinphrase() - this.min.posinphrase() ) << 8) / (this.max.posinphrase() - this.min.posinphrase()) )) << this.ranking.coeff_posinphrase)
+ ((this.max.distance() == this.min.distance() ) ? 0 : (256 - (((t.distance() - this.min.distance() ) << 8) / (this.max.distance() - this.min.distance()) )) << this.ranking.coeff_worddistance)

@ -64,7 +64,7 @@ public final class serverClassLoader extends ClassLoader {
@Override
protected Class<?> findClass(String classname) throws ClassNotFoundException {
// construct path to htroot for a servletname
File cpath = new File (Switchboard.getSwitchboard().getDataPath(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT),classname+".class");
File cpath = new File (Switchboard.getSwitchboard().getAppPath(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT),classname+".class");
return loadClass(cpath);
}

@ -63,6 +63,7 @@ import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.RequestHeader.FileType;
import net.yacy.cora.util.JSONObject;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.util.Formatter;
import net.yacy.search.Switchboard;
@ -81,12 +82,6 @@ public class serverObjects implements Serializable, Cloneable {
public final static String ADMIN_AUTHENTICATE_MSG = "admin log-in. If you don't know the password, set it with {yacyhome}/bin/passwd.sh {newpassword}";
private final static Pattern patternNewline = Pattern.compile("\n");
private final static Pattern patternDoublequote = Pattern.compile("\"");
private final static Pattern patternSlash = Pattern.compile("/");
private final static Pattern patternB = Pattern.compile("\b");
private final static Pattern patternF = Pattern.compile("\f");
private final static Pattern patternR = Pattern.compile("\r");
private final static Pattern patternT = Pattern.compile("\t");
private boolean localized = true;
@ -284,20 +279,10 @@ public class serverObjects implements Serializable, Cloneable {
* @param key key name as String.
* @param value a String that will be reencoded for JSON output.
*/
public void putJSON(final String key, final String value) {
put(key, toJSON(value));
}
public static String toJSON(String value) {
// value = value.replaceAll("\\", "\\\\");
value = patternDoublequote.matcher(value).replaceAll("'");
value = patternSlash.matcher(value).replaceAll("\\/");
value = patternB.matcher(value).replaceAll("\\b");
value = patternF.matcher(value).replaceAll("\\f");
value = patternNewline.matcher(value).replaceAll("\\r");
value = patternR.matcher(value).replaceAll("\\r");
value = patternT.matcher(value).replaceAll("\\t");
return value;
public void putJSON(final String key, String value) {
value = JSONObject.quote(value);
value = value.substring(1, value.length() - 1);
put(key, value);
}
/**
@ -558,9 +543,4 @@ public class serverObjects implements Serializable, Cloneable {
return this.map;
}
public static void main(final String[] args) {
final String v = "ein \"zitat\"";
System.out.println(toJSON(v));
}
}

@ -713,13 +713,17 @@ public final class yacy {
//System.out.print("args=["); for (int i = 0; i < args.length; i++) System.out.print(args[i] + ", "); System.out.println("]");
if ((args.length >= 1) && (args[0].toLowerCase().equals("-startup") || args[0].equals("-start"))) {
// normal start-up of yacy
if (args.length > 1) dataRoot = new File(System.getProperty("user.home").replace('\\', '/'), args[1]);
preReadSavedConfigandInit(dataRoot);
if (args.length > 1) {
dataRoot = new File(System.getProperty("user.home").replace('\\', '/'), args[1]);
}
preReadSavedConfigandInit(dataRoot);
startup(dataRoot, applicationRoot, startupMemFree, startupMemTotal, false);
} else if (args.length >= 1 && args[0].toLowerCase().equals("-gui")) {
// start-up of yacy with gui
if (args.length > 1) dataRoot = new File(System.getProperty("user.home").replace('\\', '/'), args[1]);
preReadSavedConfigandInit(dataRoot);
if (args.length > 1) {
dataRoot = new File(System.getProperty("user.home").replace('\\', '/'), args[1]);
}
preReadSavedConfigandInit(dataRoot);
startup(dataRoot, applicationRoot, startupMemFree, startupMemTotal, true);
} else if ((args.length >= 1) && ((args[0].toLowerCase().equals("-shutdown")) || (args[0].equals("-stop")))) {
// normal shutdown of yacy
@ -732,7 +736,7 @@ public final class yacy {
} else if ((args.length >= 1) && (args[0].toLowerCase().equals("-version"))) {
// show yacy version
System.out.println(copyright);
} else if ((args.length > 1) && (args[0].toLowerCase().equals("-config"))) {
} else if ((args.length > 1) && (args[0].toLowerCase().equals("-config"))) {
// set config parameter. Special handling of adminAccount=user:pwd (generates md5 encoded password)
// on Windows parameter should be enclosed in doublequotes to accept = sign (e.g. -config "port=8090" "port.ssl=8043")
File f = new File (dataRoot,"DATA/SETTINGS/");
@ -778,9 +782,11 @@ public final class yacy {
}
System.out.println();
}
} else {
if (args.length == 1) applicationRoot= new File(args[0]);
preReadSavedConfigandInit(dataRoot);
} else {
if (args.length == 1) {
applicationRoot= new File(args[0]);
}
preReadSavedConfigandInit(dataRoot);
startup(dataRoot, applicationRoot, startupMemFree, startupMemTotal, false);
}
} finally {

@ -40,6 +40,7 @@ Options
-l, --logging save the output of YaCy to yacy.log
-d, --debug show the output of YaCy on the console
-p, --print-out only print the command, which would be executed to start YaCy
-s, --startup [data-path] start YaCy using the specified data folder path, relative to the current user home
-g, --gui start a gui for YaCy
USAGE
}
@ -47,16 +48,16 @@ USAGE
#startup YaCy
cd "`dirname $0`"
if [ $OS = "OpenBSD" ]
if [ $OS = "OpenBSD" ] || [ $OS = "Darwin" ]
then
if [ $(echo $@ | grep -o "\-\-" | wc -l) -ne 0 ]
then
echo "WARNING: Unfortunately this script does not support long options in $OS."
fi
options="`getopt hdlptg: $*`"
options="`getopt hdlptsg: $*`"
else
options="`getopt -n YaCy -o h,d,l,p,t,g -l help,debug,logging,print-out,tail-log,gui -- $@`"
options="`getopt -n YaCy -o h,d,l,p,t,s,g -l help,debug,logging,print-out,tail-log,startup,gui -- $@`"
fi
if [ $? -ne 0 ];then
@ -71,6 +72,7 @@ LOGGING=0
DEBUG=0
PRINTONLY=0
TAILLOG=0
STARTUP=0
GUI=0
for option in $options;do
if [ $isparameter -ne 1 ];then #option
@ -101,17 +103,25 @@ for option in $options;do
-t|--tail-log)
TAILLOG=1
;;
-s|-startup)
STARTUP=1
isparameter=1
;;
-g|--gui)
GUI=1
isparameter=1
;;
esac #case option
else #parameter
if [ x$option = "--" ];then #option / parameter separator
if [ $option = "--" ];then #option / parameter separator
isparameter=1;
continue
else
parameter="$parameter $option"
if [ $parameter ];then
parameter="$parameter $option"
else
parameter="$option"
fi
fi
fi #parameter or option?
done
@ -189,8 +199,11 @@ for N in lib/*.jar; do CLASSPATH="$CLASSPATH$N:"; done
CLASSPATH=".:$CLASSPATH"
cmdline="$JAVA $JAVA_ARGS -classpath $CLASSPATH net.yacy.yacy";
if [ $GUI -eq 1 ] #gui
if [ $STARTUP -eq 1 ] #startup
then
cmdline="$cmdline -startup $parameter"
elif [ $GUI -eq 1 ];then #gui
cmdline="$cmdline -gui $parameter"
fi
if [ $DEBUG -eq 1 ] #debug

@ -0,0 +1,34 @@
package net.yacy.cora.sorting;
import java.util.Iterator;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
public class ConcurrentScoreMapTest {
/**
* Test of totalCount method, of class ConcurrentScoreMap.
*/
@Test
public void testTotalCount() {
final ConcurrentScoreMap<String> csm = new ConcurrentScoreMap<String>();
csm.set("first", 10);
csm.set("second", 5);
csm.set("third", 13);
csm.set("first", 100);
final Iterator<String> it = csm.keys(true);
long sum = 0;
while (it.hasNext()) {
String x = it.next();
long val = csm.get(x);
sum += val;
}
assertEquals(sum, csm.totalCount());
}
}

@ -1,6 +1,5 @@
package net.yacy.data.wiki;
import java.io.BufferedReader;
import org.junit.Test;
import static org.junit.Assert.*;

@ -0,0 +1,39 @@
package net.yacy.document;
import java.net.MalformedURLException;
import java.util.Map;
import net.yacy.cora.document.WordCache;
import net.yacy.kelondro.data.word.Word;
import org.junit.Test;
import static org.junit.Assert.*;
public class TokenizerTest {
/**
* Test of words method, of class Tokenizer.
*/
@Test
public void testWords() throws MalformedURLException {
// pos = 1 2 3 4 5 6 7 8 9 10 // 1-letter words don't count
String text = "One word is not a sentence because words are just words.";
WordCache meaningLib = new WordCache(null);
boolean doAutotagging = false;
VocabularyScraper scraper = null;
Tokenizer t = new Tokenizer(null, text, meaningLib, doAutotagging, scraper);
Map<String, Word> words = t.words;
// test extracted word information (position)
Word w = words.get("word");
assertEquals("position of 'word' ", 2, w.posInText);
assertEquals("occurence of 'word' ", 1, w.occurrences());
w = words.get("words");
assertEquals("position of 'words' ", 7, w.posInText);
assertEquals("occurence of 'words' ", 2, w.occurrences());
}
}

@ -22,8 +22,12 @@ public class WordTokenizerTest {
int cnt = 0;
while (wt.hasMoreElements()) {
StringBuilder sb = wt.nextElement();
assertEquals("word", sb.toString());
cnt++;
if (sb.length() > 1) { // skip punktuation
assertEquals("word", sb.toString());
cnt++;
} else {
assertTrue("punktuation", SentenceReader.punctuation(sb.charAt(0)));
}
}
wt.close();
assertEquals(10, cnt);

Loading…
Cancel
Save