Merge remote-tracking branch 'origin/master' into docker

pull/55/head
luccioman 9 years ago
commit adc657004d

@ -61,8 +61,8 @@ public class CrawlMonitorRemoteStart {
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", dark ? "1" : "0");
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created().toString());
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername);
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString());
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention").toString());
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL"));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention"));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", ("true".equals(record.attributes().get("crawlingQ"))) ? "1" : "0");
showedCrawl++;
@ -88,8 +88,8 @@ public class CrawlMonitorRemoteStart {
prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", dark ? "1" : "0");
prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created().toString());
prop.putHTML("otherCrawlStartFinished_" + showedCrawl + "_peername", peername);
prop.putHTML("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString());
prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention").toString());
prop.putHTML("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL"));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention"));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth"));
prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", ("true".equals(record.attributes().get("crawlingQ"))) ? "1" : "0");
showedCrawl++;

@ -594,6 +594,9 @@ public class Crawler_p {
m.remove("generalFilter");
m.remove("specificFilter");
m.put("intention", post.get("intention", "").replace(',', '/'));
if (successurls.size() > 0) { // just include at least one of the startURL's in case of multiple for the news service
m.put("startURL", successurls.iterator().next().toNormalform(true));
}
sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), NewsPool.CATEGORY_CRAWL_START, m);
}
} else {

@ -60,9 +60,7 @@ import net.yacy.data.BookmarksDB.Bookmark;
import net.yacy.data.DidYouMean;
import net.yacy.data.UserDB;
import net.yacy.data.ymark.YMarkTables;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
import net.yacy.document.Parser;
import net.yacy.document.Tokenizer;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.util.Bitfield;
@ -73,7 +71,6 @@ import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.EventChannel;
import net.yacy.peers.NewsPool;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
@ -195,6 +192,7 @@ public class yacysearch {
prop.put("geoinfo", "0");
prop.put("rss_queryenc", "");
prop.put("meanCount", 5);
prop.put("eventID",""); // mandatory parameter for yacysearchtrailer/yacysearchitem includes
return prop;
}

@ -87,7 +87,7 @@ public class GitRevMavenTask extends AbstractMojo {
break;
}
}
if (lastTag != null || distance++ > 999) {
if (lastTag != null || distance++ > 90999) {
break;
}
}

@ -79,7 +79,7 @@ public class GitRevTask extends org.apache.tools.ant.Task {
break;
}
}
if (lastTag != null || distance++ > 999) break;
if (lastTag != null || distance++ > 90999) break;
}
walk.dispose();
if (lastTag == null) {

@ -1069,7 +1069,7 @@ Network Scanner==网络扫描器
YaCy can scan a network segment for available http, ftp and smb server.==YaCy可扫描http, ftp 和smb服务器.
You must first select a IP range and then, after this range is scanned,==须先指定IP范围, 再进行扫描,
it is possible to select servers that had been found for a full-site crawl.==才有可能选择主机并将其作为全站crawl的服务器.
No servers had been detected in the given IP range #[iprange]#.
#No servers had been detected in the given IP range==
Please enter a different IP range for another scan.==未检测到可用服务器, 请重新指定IP范围.
Please wait...==请稍候...
>Scan the network<==>扫描网络<
@ -2952,7 +2952,6 @@ New Password is empty.==新密码为空.
#File: ViewFile.html
#---------------------------
YaCy '#[clientname]#': View URL Content==YaCy '#[clientname]#': 查看文件内容
View URL Content==查看链接内容
>Get URL Viewer<==>获取链接浏览器<
>URL Metadata<==>链接元数据<

@ -1334,7 +1334,7 @@ Network Scanner==Netzwerk Scanner
YaCy can scan a network segment for available http, ftp and smb server.==YaCy kann ein Netzwerksegment auf verfügbare HTTP, FTP und SMB Server hin absuchen.
You must first select a IP range and then, after this range is scanned,==Sie müssen zuerst einen IP Bereich festlegen und dann, nachdem dieser Bereich gescannt wurde,
it is possible to select servers that had been found for a full-site crawl.==ist es möglich einen gefunden Server für eine volle Seiten Suche crawlen zu lassen.
No servers had been detected in the given IP range #[iprange]#.
No servers had been detected in the given IP range==Es wurde kein Server im angegebenen IP Bereich gefunden
Please enter a different IP range for another scan.==Bitte geben Sie einen anderen IP Bereich ein für einen weiteren Scan.
Please wait...==Bitte warten...
>Scan the network<==>Das Netzwerk Scannen<
@ -3147,7 +3147,7 @@ For community support, please visit our==Für Unterstützung aus der Community,
#File: Status_p.inc
#---------------------------
#System Status==System Status
System Status==Systemstatus
Unknown==unbekannt
YaCy version:==YaCy Version:
Uptime:==Online seit:
@ -3493,7 +3493,6 @@ New Password is empty.==Das neue Passwort ist leer.
#File: ViewFile.html
#---------------------------
YaCy '#[clientname]#': View URL Content==YaCy '#[clientname]#': Zeige URL Inhalte
View URL Content==Zeige URL Inhalte
>Get URL Viewer<==>URL Betrachter<
"Show Metadata"=="Metadaten anzeigen"

@ -101,7 +101,7 @@ You do not need to provide any personal data here, but if you want to distribute
#Yahoo!==Yahoo!
#MSN=MSN
Comment==Σχόλιο
"Save"==Αποθήκευση
"Save"=="Αποθήκευση"
#-----------------------------
#File: Connections_p.html

@ -1077,7 +1077,7 @@ Network Scanner==नेटवर्क स्कैनर
YaCy can scan a network segment for available http, ftp and smb server.==YaCy उपलब्ध HTTP, FTP और किसी सर्वर के लिए एक नेटवर्क खंड स्कैन कर सकते हैं.
You must first select a IP range and then, after this range is scanned,==इस श्रृंखला स्कैन के बाद आप पहली बार, तो एक आईपी श्रेणी का चयन करना चाहिए
it is possible to select servers that had been found for a full-site crawl.==यह एक पूरी साइट क्रॉल के लिए पाया गया था कि सर्वर का चयन करने के लिए संभव है.
No servers had been detected in the given IP range #[iprange]#.
#No servers had been detected in the given IP range==
Please enter a different IP range for another scan.==एक और स्कैन के लिए एक अलग आईपी रेंज दर्ज करें.
Please wait...==कृपया प्रतीक्षा करें ...
>Scan the network<==>नेटवर्क स्कैन<

@ -97,7 +97,7 @@ You do not need to provide any personal data here, but if you want to distribute
#Yahoo!==Yahoo!
#MSN=MSN
Comment==Commento
"Save"==Salva
"Save"=="Salva"
#-----------------------------
#File: Connections_p.html

@ -1587,9 +1587,6 @@
<trans-unit id="bfcc5088" xml:space="preserve" approved="no" translate="yes">
<source>"Submit"</source>
</trans-unit>
<trans-unit id="fda7badd" xml:space="preserve" approved="no" translate="yes">
<source>http://www.iana.org/assignments/media-types/&lt;/a&gt;</source>
</trans-unit>
</body>
</file>
@ -2117,6 +2114,9 @@
<trans-unit id="5d032a67" xml:space="preserve" approved="no" translate="yes">
<source>Release will be installed. Please wait.</source>
</trans-unit>
<trans-unit id="3c7a4df9" xml:space="preserve" approved="no" translate="yes">
<source>You installed YaCy with a package manager.</source>
</trans-unit>
<trans-unit id="20240fb3" xml:space="preserve" approved="no" translate="yes">
<source>To update YaCy, use the package manager:</source>
</trans-unit>
@ -2192,9 +2192,6 @@
<trans-unit id="8713e5b1" xml:space="preserve" approved="no" translate="yes">
<source>Last Deploy</source>
</trans-unit>
<trans-unit id="3c7a4df9" xml:space="preserve" approved="no" translate="yes">
<source>You installed YaCy with a package manager.</source>
</trans-unit>
</body>
</file>
@ -3297,8 +3294,8 @@
<trans-unit id="e82c07f9" xml:space="preserve" approved="no" translate="yes">
<source>it is possible to select servers that had been found for a full-site crawl.</source>
</trans-unit>
<trans-unit id="ede81ac7" xml:space="preserve" approved="no" translate="yes">
<source>No servers had been detected in the given IP range #[iprange]#.</source>
<trans-unit id="9ad86623" xml:space="preserve" approved="no" translate="yes">
<source>No servers had been detected in the given IP range</source>
</trans-unit>
<trans-unit id="c8005605" xml:space="preserve" approved="no" translate="yes">
<source>Please enter a different IP range for another scan.</source>
@ -5518,12 +5515,9 @@
<file original="Network.html" source-language="en" datatype="html">
<body>
<trans-unit id="3e587bc4" xml:space="preserve" approved="no" translate="yes">
<source>YaCy '#[clientname]#': YaCy Search Network</source>
</trans-unit>
<trans-unit id="3024b1ff" xml:space="preserve" approved="no" translate="yes">
<source>YaCy Search Network '#[networkName]#'</source>
</trans-unit>
<trans-unit id="ff8b8258" xml:space="preserve" approved="no" translate="yes">
<source>YaCy Search Network</source>
</trans-unit>
<trans-unit id="1f205c10" xml:space="preserve" approved="no" translate="yes">
<source>YaCy Network&lt;</source>
</trans-unit>
@ -5815,9 +5809,6 @@
<trans-unit id="b5434abc" xml:space="preserve" approved="no" translate="yes">
<source>&gt;DHT-in&lt;</source>
</trans-unit>
<trans-unit id="ff8b8258" xml:space="preserve" approved="no" translate="yes">
<source>YaCy Search Network</source>
</trans-unit>
<trans-unit id="31955db0" xml:space="preserve" approved="no" translate="yes">
<source>Count of Connected Senior Peers</source>
</trans-unit>
@ -6752,10 +6743,19 @@
</trans-unit>
<trans-unit id="c6803af7" xml:space="preserve" approved="no" translate="yes">
<source>"Set Boost Query"</source>
</trans-unit>
</trans-unit>
<trans-unit id="c50afe39" xml:space="preserve" approved="no" translate="yes">
<source>field not in local index (boost has no effect)</source>
</trans-unit>
<trans-unit id="ebe8bd4e" xml:space="preserve" approved="no" translate="yes">
<source>You can boost with vocabularies, use the field</source>
</trans-unit>
<trans-unit id="369809bc" xml:space="preserve" approved="no" translate="yes">
<source>with values</source>
</trans-unit>
<trans-unit id="abdacdba" xml:space="preserve" approved="no" translate="yes">
<source>You can also boost on logarithmic occurrence counters of the fields</source>
</trans-unit>
<trans-unit id="af04244c" xml:space="preserve" approved="no" translate="yes">
<source>"Set Field Boosts"</source>
</trans-unit>
@ -8658,9 +8658,6 @@
<trans-unit id="a444325e" xml:space="preserve" approved="no" translate="yes">
<source>See the page info about the url.</source>
</trans-unit>
<trans-unit id="700fabc1" xml:space="preserve" approved="no" translate="yes">
<source>YaCy '#[clientname]#': View URL Content</source>
</trans-unit>
<trans-unit id="df784dad" xml:space="preserve" approved="no" translate="yes">
<source>View URL Content</source>
</trans-unit>

@ -1131,7 +1131,6 @@ List of possible crawl start URLs==Список ссылок для провер
#File: CrawlProfileEditor_p.html
#---------------------------
Crawl Profile Editor==Изменение профиля индексирования
>Crawl Profile Editor<==>Изменение профиля индексирования<
>Crawler Steering<==>Управление индексатором<
>Crawl Scheduler<==>Планировщик индексирования<
>Scheduled Crawls can be modified in this table<==>Запланированное индексирование можно изменить в этой таблице<
@ -1461,7 +1460,7 @@ Network Scanner==Сканер сети
YaCy can scan a network segment for available http, ftp and smb server.==YaCy может сканировать такие сегменты сети как http-, ftp- и smb-серверы .
You must first select a IP range and then, after this range is scanned,==Сначала вы должны выбрать диапазон IP-адресов, а затем диапазон сканирования.
it is possible to select servers that had been found for a full-site crawl.==После этого можно выбрать серверы для полного индексирования сайта.
No servers had been detected in the given IP range #[iprange]#.==Серверы не обнаружены в заданном диапазоне IP-адресов.
No servers had been detected in the given IP range==Серверы не обнаружены в заданном диапазоне IP-адресов
Please enter a different IP range for another scan.==Пожалуйста, введите другой диапазон IP-адресов, для повторного сканирования.
Please wait...==Пожалуйста, подождите...
>Scan the network<==>Сканирование сети<
@ -2383,8 +2382,7 @@ The target peer is alive but did not receive your message. Sorry.==Узел по
#File: Network.html
#---------------------------
YaCy '#[clientname]#': YaCy Search Network==YaCy '#[clientname]#': Мониторинг сети
YaCy Search Network '#[networkName]#'==Мониторинг сети YaCy
YaCy Search Network==Мониторинг сети YaCy
YaCy Network<==Сеть YaCy<
The information that is presented on this page can also be retrieved as XML.==Информация, указанная на этой странице, также может быть получена как XML.
Click the API icon to see the XML.==Нажмите на иконку API, чтобы увидеть XML.
@ -2876,7 +2874,6 @@ field not in local index (boost has no effect)==поля нет в локаль
#File: RegexTest.html
#---------------------------
RegexTest==Тест регулярного выражения
Regex Test==Тест регулярного выражения
Test String==Тест строки
Regular Expression==Регулярное выражение
@ -3767,7 +3764,6 @@ Parsed Sentences==Разобранные предложения
Parsed Tokens/Words==Разобранные маркеры/слова
Link List==Список ссылок
Citation Report==Отчет цитирования
>CitationReport<==>Отчет цитирования<
"Show"=="Показать"
Unable to find URL Entry in DB==Невозможно найти запись ссылки в базе данных.
Invalid URL==Неправильный URL-адрес
@ -3938,14 +3934,13 @@ Title==Заголовок
#File: WatchWebStructure_p.html
#---------------------------
Web Structure<==Вэб-структура<
Web Structure==Вэб-структура
The data that is visualized here can also be retrieved in a XML file, which lists the reference relation between the domains.==Эти данные, также могут быть получены в виде XML-файла с перекрёстными ссылками между доменами.
With a GET-property 'about' you get only reference relations about the host that you give in the argument field for 'about'.==Указав параметр "GET" 'about' вы получите только перекрёстные ссылки о хосте, которые указан в поле 'about'.
With a GET-property 'latest' you get a list of references that had been computed during the current run-time of YaCy, and with each next call only an update to the next list of references.==Указав параметр GET" 'latest' вы получите список ссылок вычисленных во время текущей работы YaCy, обновляющийся при каждом следующем вызове.
Click the API icon to see the XML file.==Нажмите на иконку API для просмотра XML-файла.
To see a list of all APIs, please visit the==Для просмотра списка всех API, пожалуйста, посетите
API wiki page==страницу API Wiki
Web Structure==Вэб-структура
>Host List<==>Список хостов<
>#[count]# outlinks==>#[count]# внешних ссылок
host<==Хост<

@ -427,7 +427,7 @@ You can also use your peer without opening it, but this is not recomended.==Ви
#File: ConfigHeuristics_p.html
#---------------------------
Heuristics Configuration==Настройки евристики
A <a href="http://en.wikipedia.org/wiki/Heuristic">heuristic</a> is an 'experience-based technique that help in problem solving, learning and discovery' (wikipedia).==<a href="https://de.wikipedia.org/wiki/Heuristik">Heuristik</a> 'bezeichnet die Kunst, mit begrenztem Wissen und wenig Zeit zu guten Lösungen zu kommen.' (Wikipedia).
#A <a href="http://en.wikipedia.org/wiki/Heuristic">heuristic</a> is an 'experience-based technique that help in problem solving, learning and discovery' (wikipedia).==<a href="https://de.wikipedia.org/wiki/Heuristik">Heuristik</a> 'bezeichnet die Kunst, mit begrenztem Wissen und wenig Zeit zu guten Lösungen zu kommen.' (Wikipedia).
The search heuristics that can be switched on here are techniques that help the discovery of possible search results based on link guessing, in-search crawling and requests to other search engines.==Пошукова евристика може бути використовувати методи, які допомагають виявити можливі результати пошуку з використанням запитів по посиланнях, вбудованого сканування та запитів до інших пошукових систем.
When a search heuristic is used, the resulting links are not used directly as search result but the loaded pages are indexed and stored like other content.==При використанні пошукової евристики знайдені посилання не відображаються як пошукові результати, а індексуються та зберігаються разом з іншим вмістом.
This ensures that blacklists can be used and that the searched word actually appears on the page that was discovered by the heuristic.==Це гарантує, що чорні списки можуть бути використані, і що пошукові терміни з’являються дійсно на сторінках, які були знайдені за допомогою евристики.
@ -1993,8 +1993,7 @@ You cannot call this page directly. Instead, use a link on the <a href="Network.
#File: Network.html
#---------------------------
YaCy '#[clientname]#': YaCy Search Network==YaCy "#[clientname]#": Пошукова мережа YaCy
YaCy Search Network '#[networkName]#'==Пошукова мережа YaCy "#[networkName]#"
YaCy Search Network==Пошукова мережа YaCy
YaCy Network<==Мережа YaCy<
The information that is presented on this page can also be retrieved as XML.==Інформацію на цій сторінці також можна отримати в форматі XML.
Click the API icon to see the XML.==Натисніть значок API для відображення XML.
@ -2448,18 +2447,18 @@ You have to <a href="Settings_p.html?page=ProxyAccess">setup the proxy</a> befor
#File: QuickCrawlLink_p.html
#---------------------------
Quick Crawl Link==Schnell Crawl Link
Quickly adding Bookmarks:==Schnell Crawl Lesezeichen:
Simply drag and drop the link shown below to your Browsers Toolbar/Link-Bar.==Ziehen Sie einfach den unten stehenden Link auf Ihre Browser Toolbar/Linkbar.
If you click on it while browsing, the currently viewed website will be inserted into the YaCy crawling queue for indexing.==Wenn Sie, während Sie surfen, auf dieses Lesezeichen klicken, wird die gerade betrachtete Seite zum YaCy Crawler-Puffer hinzugefügt, um indexiert zu werden.
Crawl with YaCy==Mit YaCy crawlen
Title:==Titel:
Link:==link:
Status:==Status:
URL successfully added to Crawler Queue==Die Url wurde erfolgreich zum Crawler-Puffer hinzugefügt.
Malformed URL==Fehler in der URL
Unable to create new crawling profile for URL:==Es ist nicht möglich für diese URL ein Crawling Profil zu erstellen:
Unable to add URL to crawler queue:==Es ist nicht möglich die URL zum Crawler-Puffer hinzuzufügen:
Quick Crawl Link==Швидке сканування посилання
#Quickly adding Bookmarks:==Schnell Crawl Lesezeichen:
#Simply drag and drop the link shown below to your Browsers Toolbar/Link-Bar.==Ziehen Sie einfach den unten stehenden Link auf Ihre Browser Toolbar/Linkbar.
#If you click on it while browsing, the currently viewed website will be inserted into the YaCy crawling queue for indexing.==Wenn Sie, während Sie surfen, auf dieses Lesezeichen klicken, wird die gerade betrachtete Seite zum YaCy Crawler-Puffer hinzugefügt, um indexiert zu werden.
#Crawl with YaCy==Mit YaCy crawlen
#Title:==Titel:
#Link:==link:
#Status:==Status:
#URL successfully added to Crawler Queue==Die Url wurde erfolgreich zum Crawler-Puffer hinzugefügt.
#Malformed URL==Fehler in der URL
#Unable to create new crawling profile for URL:==Es ist nicht möglich für diese URL ein Crawling Profil zu erstellen:
#Unable to add URL to crawler queue:==Es ist nicht möglich die URL zum Crawler-Puffer hinzuzufügen:
#-----------------------------
#File: Ranking_p.html
@ -2941,7 +2940,7 @@ Go back to the <a href="Settings_p.html">Settings</a> page==Назад до ст
Your system is not protected by a password==Ваша система не захищена паролем
Please go to the <a href="ConfigAccounts_p.html">User Administration</a> page and set an administration password.==Будь-ласка, перейдіть на сторінку <a href="ConfigAccounts_p.html">керування користувачами</a> і виставте основний пароль.
You don't have the correct access right to perform this task.==У вас немає дозволу на запуск цього додатка.
Please log in.==Bitte melden Sie sich an.
#Please log in.==Bitte melden Sie sich an.
You can now go back to the <a href="Settings_p.html">Settings</a> page if you want to make more changes.==Якщо хочете зробити інші зміни, можна перейти назад на сторінку <a href="Settings_p.html">налаштувань</a>.
See you soon!==До зустрічі!
Just a moment, please!==Зачекайте трохи, будь ласка!

@ -151,7 +151,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.3</version>
<version>2.10.4</version>
<configuration>
<reportOutputDirectory>javadoc</reportOutputDirectory>
<author>true</author>
@ -248,7 +248,7 @@
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.5.3</version>
<version>2.6</version>
<configuration>
<descriptors>
<descriptor>assembly.xml</descriptor>
@ -266,6 +266,16 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
</plugins>
</build>
</profile>

@ -59,8 +59,8 @@ import net.yacy.search.SwitchboardConstants;
public final class CrawlSwitchboard {
public static final String CRAWL_PROFILE_AUTOCRAWL_DEEP = "autocrawlDeep";
public static final String CRAWL_PROFILE_AUTOCRAWL_SHALLOW = "autocrawlShallow";
public static final String CRAWL_PROFILE_AUTOCRAWL_DEEP = "autocrawlDeep";
public static final String CRAWL_PROFILE_AUTOCRAWL_SHALLOW = "autocrawlShallow";
public static final String CRAWL_PROFILE_PROXY = "proxy";
public static final String CRAWL_PROFILE_REMOTE = "remote";
public static final String CRAWL_PROFILE_SNIPPET_LOCAL_TEXT = "snippetLocalText";
@ -107,18 +107,12 @@ public final class CrawlSwitchboard {
private final File queuesRoot;
private Switchboard switchboard;
public CrawlSwitchboard(final String networkName, Switchboard switchboard) {
public CrawlSwitchboard(Switchboard switchboard) {
this.switchboard = switchboard;
this.log = this.switchboard.log;
this.queuesRoot = this.switchboard.queuesRoot;
this.defaultPushProfiles = new ConcurrentHashMap<>();
this.log.info("Initializing Word Index for the network '" + networkName + "'.");
if ( networkName == null || networkName.isEmpty() ) {
log.severe("no network name given - shutting down");
System.exit(0);
}
this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap<byte[], CrawlProfile>(Base64Order.enhancedCoder));
this.profilesActiveCrawlsCounter = new ConcurrentHashMap<String, RowHandleSet>();

@ -937,7 +937,17 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
//extra treatment for headlines
if (Arrays.binarySearch(HEADLINE_TAGS, tags.openWiki) >= 0) {
processHeadline(stringBuilder, firstPosition, tags, secondPosition, direlem);
// require line starts with headline markup (hdr e.g. " == Title == " but not "Seven = six plus one" )
int i = 0;
boolean beginsWith = true;
while (i < firstPosition) {
if (stringBuilder.charAt(i) > ' ') {
beginsWith = false;
break;
}
i++;
}
if (beginsWith) processHeadline(stringBuilder, firstPosition, tags, secondPosition, direlem);
} else {
final int oldLength = stringBuilder.length();
stringBuilder.replace(firstPosition, firstPosition + tags.openWikiLength, tags.openHTML);

@ -517,6 +517,7 @@ public final class FileUtils {
/**
* Read lines of a file into an ArrayList.
* Empty lines in the file are ignored.
*
* @param listFile the file
* @return the resulting array as an ArrayList
@ -529,7 +530,7 @@ public final class FileUtils {
br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile), StandardCharsets.UTF_8));
while ( (line = br.readLine()) != null ) {
list.add(line);
if (!line.isEmpty()) list.add(line);
}
br.close();
} catch (final IOException e ) {
@ -576,6 +577,7 @@ public final class FileUtils {
/**
* Read lines of a text file into a String, optionally ignoring comments.
* Empty lines are always ignored.
*
* @param listFile the File to read from.
* @param withcomments If <code>false</code> ignore lines starting with '#'.

@ -1147,25 +1147,25 @@ public final class Protocol {
return 0;
}
List<URIMetadataNode> container = new ArrayList<URIMetadataNode>();
List<URIMetadataNode> resultContainer = new ArrayList<URIMetadataNode>();
Network.log.info("SEARCH (solr), returned " + docList[0].size() + " out of " + docList[0].getNumFound() + " documents and " + facets.size() + " facets " + facets.keySet().toString() + " from " + (target == null ? "shard" : ("peer " + target.hash + ":" + target.getName())));
int term = count;
Collection<SolrInputDocument> docs;
if (event.addResultsToLocalIndex) { // only needed to store remote results
docs = new ArrayList<SolrInputDocument>(docList[0].size());
} else docs = null;
for (final SolrDocument doc: docList[0]) {
for (final SolrDocument tmpdoc: docList[0]) {
//System.out.println("***DEBUG*** " + ((String) doc.getFieldValue("sku")));
if ( term-- <= 0 ) {
break; // do not process more that requested (in case that evil peers fill us up with rubbish)
}
// get one single search result
if ( doc == null ) {
if ( tmpdoc == null ) {
continue;
}
URIMetadataNode urlEntry;
try {
urlEntry = new URIMetadataNode(doc);
urlEntry = new URIMetadataNode(tmpdoc);
} catch (MalformedURLException ex) {
continue;
}
@ -1198,73 +1198,61 @@ public final class Protocol {
// put the remote documents to the local index. We must convert the solr document to a solr input document:
if (event.addResultsToLocalIndex) {
/* Check document size, only if a limit is set on remote documents size allowed to be stored to local index */
if(checkDocumentSize(doc, event.getRemoteDocStoredMaxSize() * 1024)) {
final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(doc);
// the input document stays untouched because it contains top-level cloned objects
docs.add(sid);
// will be stored to index, and is a full solr document, can be added to firstseen
event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis()));
} else {
Network.log.info("Document size greater than " + event.getRemoteDocStoredMaxSize() + " kbytes, excludes it from being stored to local index. Url : " + urlEntry.urlstring());
}
/* Check document size, only if a limit is set on remote documents size allowed to be stored to local index */
if (checkDocumentSize(tmpdoc, event.getRemoteDocStoredMaxSize() * 1024)) {
final SolrInputDocument sid = event.query.getSegment().fulltext().getDefaultConfiguration().toSolrInputDocument(tmpdoc);
// the input document stays untouched because it contains top-level cloned objects
docs.add(sid);
// will be stored to index, and is a full solr document, can be added to firstseen
event.query.getSegment().setFirstSeenTime(urlEntry.hash(), Math.min(urlEntry.moddate().getTime(), System.currentTimeMillis()));
} else {
Network.log.info("Document size greater than " + event.getRemoteDocStoredMaxSize() + " kbytes, excludes it from being stored to local index. Url : " + urlEntry.urlstring());
}
}
// after this conversion we can remove the largest and not used field text_t and synonyms_sxt from the document
// because that goes into a search cache and would take a lot of memory in the search cache
//doc.removeFields(CollectionSchema.text_t.getSolrFieldName());
doc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName());
tmpdoc.removeFields(CollectionSchema.synonyms_sxt.getSolrFieldName());
ResultURLs.stack(
ASCII.String(urlEntry.url().hash()),
urlEntry.url().getHost(),
event.peers.mySeed().hash.getBytes(),
UTF8.getBytes(target.hash),
EventOrigin.QUERIES);
ASCII.String(urlEntry.url().hash()),
urlEntry.url().getHost(),
event.peers.mySeed().hash.getBytes(),
UTF8.getBytes(target.hash),
EventOrigin.QUERIES);
}
// add the url entry to the word indexes
container.add(urlEntry);
// add the url entry to the checked results
resultContainer.add(urlEntry);
}
final int dls = docList[0].size();
final int numFound = (int) docList[0].getNumFound();
docList[0].clear();
docList[0] = null;
if (localsearch) {
event.addNodes(container, facets, snippets, true, "localpeer", numFound);
event.addNodes(resultContainer, facets, snippets, true, "localpeer", numFound);
event.addFinalize();
event.addExpectedRemoteReferences(-count);
Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + numFound + " references");
Network.log.info("local search (solr): localpeer sent " + resultContainer.size() + "/" + numFound + " references");
} else {
if (event.addResultsToLocalIndex) {
/*
* Current thread might be interrupted by SearchEvent.cleanup()
*/
if (Thread.interrupted()) {
throw new InterruptedException("solrQuery interrupted");
}
WriteToLocalIndexThread writeToLocalIndexThread = new WriteToLocalIndexThread(event.query.getSegment(),
docs);
writeToLocalIndexThread.start();
try {
writeToLocalIndexThread.join();
} catch (InterruptedException e) {
/*
* Current thread interruption might happen while waiting
* for writeToLocalIndexThread.
*/
writeToLocalIndexThread.stopWriting();
throw new InterruptedException("solrQuery interrupted");
}
docs.clear();
/*
* Current thread might be interrupted by SearchEvent.cleanup()
*/
if (Thread.interrupted()) {
throw new InterruptedException("solrQuery interrupted");
}
WriteToLocalIndexThread writeToLocalIndexThread = new WriteToLocalIndexThread(event.query.getSegment(),
docs); // will clear docs on return
writeToLocalIndexThread.start();
}
event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound);
event.addNodes(resultContainer, facets, snippets, false, target.getName() + "/" + target.hash, numFound);
event.addFinalize();
event.addExpectedRemoteReferences(-count);
Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + numFound + " references");
Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (resultContainer.size()) + "/" + numFound + " references");
}
return dls;
return resultContainer.size();
}
/**
@ -1285,6 +1273,7 @@ public final class Protocol {
/**
* Parameters must be not null.
* After writing the collection is cleared
* @param segment solr segment to write
* @param docs solr documents collection to put to segment
*/
@ -1300,17 +1289,19 @@ public final class Protocol {
this.stop.set(true);
}
@Override
public void run() {
for (SolrInputDocument doc: docs) {
if(stop.get()) {
Network.log.info("Writing documents collection to Solr segment was stopped.");
return;
}
segment.putDocument(doc);
@Override
public void run() {
for (SolrInputDocument doc : docs) {
if (stop.get()) {
docs.clear();
Network.log.info("Writing documents collection to Solr segment was stopped.");
return;
}
segment.putDocument(doc);
}
}
}
docs.clear();
}
}
/**
* Only when maxSize is greater than zero, check that doc size is lower. To

@ -607,7 +607,7 @@ public final class Switchboard extends serverSwitch {
}
// create a crawler
this.crawler = new CrawlSwitchboard(networkName, this);
this.crawler = new CrawlSwitchboard(this);
// start yacy core
this.log.config("Starting YaCy Protocol Core");
@ -1398,7 +1398,7 @@ public final class Switchboard extends serverSwitch {
// create a crawler
this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object
this.crawler = new CrawlSwitchboard(networkName, this);
this.crawler = new CrawlSwitchboard(this);
// init a DHT transmission dispatcher
this.dhtDispatcher =

@ -1,5 +1,6 @@
package net.yacy.data.wiki;
import java.io.BufferedReader;
import org.junit.Test;
import static org.junit.Assert.*;
@ -32,4 +33,26 @@ public class WikiCodeTest {
}
}
}
/**
* test header wiki markup
*/
@Test
public void testProcessLineOfWikiCode() {
String[] hdrTeststr = new String[]{ // ok test header
"== Header ==", "==Header=="};
String[] nohdrTeststr = new String[]{ // wrong test header
"Text of = Header, false = wrong", "One=Two"};
WikiCode wc = new WikiCode();
for (String s : hdrTeststr) { // test ok header
String erg = wc.transform("8090", s);
assertTrue("<h2> tag expected:"+erg, erg.contains("<h2>"));
}
for (String s : nohdrTeststr) { // test wrong header
String erg = wc.transform("8090", s);
assertFalse("no header tag expected:"+erg, erg.contains("<h1>"));
}
}
}

Loading…
Cancel
Save