changed link to new forum location

pull/461/head
Michael Peter Christen 3 years ago
parent 52fe2ed8ba
commit 0579a9546a

@ -45,7 +45,7 @@ The (GPLv2+) source code used to build YaCy is distributed with the package (in
## Where is the documentation?
- [Homepage](https://yacy.net)
- [International Forum](https://searchlab.eu)
- [International Forum](https://community.searchlab.eu)
- [German wiki](https://wiki.yacy.net/index.php/De:Start)
- [Esperanto wiki](https://wiki.yacy.net/index.php/Eo:Start)
- [French wiki](https://wiki.yacy.net/index.php/Fr:Start)
@ -168,7 +168,7 @@ To start developing YaCy in **IntelliJ IDEA**:
- Run via IDE -> Add Configuration -> + -> Gradle -> in Run Tasks add: build run
- (in above line "build" is needed to make sure all needed Gradle tasks are executed before run)
To join our development community, got to https://searchlab.eu
To join our development community, got to https://community.searchlab.eu
Send pull requests to https://github.com/yacy/yacy_search_server
@ -204,7 +204,7 @@ scripts you can easily create more shell API access methods.
## Contact
[Visit the international YaCy forum](https://searchlab.eu)
[Visit the international YaCy forum](https://community.searchlab.eu)
where you can start a discussion there in your own language.
Questions and requests for paid customization and integration into enterprise solutions.

@ -178,7 +178,7 @@
#(hintSupport)#::
<dt class="hintIcon"><img src="env/grafics/idea.png" width="32" height="32" alt="idea"/></dt>
<dd class="hint">If you need professional support, please write to <tt>support@yacy.net</tt><br>For community support, please visit our <a href="https://searchlab.eu" target="_blank">forum</a>.
<dd class="hint">If you need professional support, please write to <tt>support@yacy.net</tt><br>For community support, please visit our <a href="https://community.searchlab.eu" target="_blank">forum</a>.
</dd>
#(/hintSupport)#

@ -192,7 +192,7 @@ XDtoU7vQ/wIAAP//AwBb7ktEXQ4nqQAAAABJRU5ErkJggg==" width="128" height="64" alt="K
<p>Please send us feed-back about your experience with an<br/>
<a href="http://sayat.me/YaCy" target="_blank">anonymous message</a><br/>
or a<br/>
posting to our <a href="https://searchlab.eu" target="_blank">web forums</a><br/>
posting to our <a href="https://community.searchlab.eu" target="_blank">web forums</a><br/>
or a<br/>
<a href="https://github.com/yacy/yacy_search_server/issues" target="_blank">bug report</a>!</p>
<h2>Professional Support</h2>

@ -105,7 +105,7 @@
</li>
<li>&nbsp;</li>
<li id="header_community">
<form action="https://searchlab.eu" target="_blank" method="get">
<form action="https://community.searchlab.eu" target="_blank" method="get">
<button accesskey="f" type="submit" class="btn btn-inverse navbar-btn label-info" title="Community">
<span class="glyphicon glyphicon-user"></span>
<span class="hidden-sm"> Forum</span>
@ -128,7 +128,7 @@
<li class="divider" role="separator"></li>
<li id="header_tutorial"><a href="https://www.youtube.com/user/YaCyTutorials/videos" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;YaCy Tutorials</a></li>
<li id="header_download"><a href="https://yacy.net" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Download YaCy</a></li>
<li id="header_community"><a href="https://searchlab.eu" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Community (Web Forums)</a></li>
<li id="header_community"><a href="https://community.searchlab.eu" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Community (Web Forums)</a></li>
<li id="header_git"><a href="https://github.com/yacy/yacy_search_server" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Git Repository</a></li>
</ul>
</li>

@ -60,7 +60,7 @@
<li id="header_jslicense"><a href="jslicense.html" data-jslicense="1">JavaScript information</a></li>
<li class="divider" role="separator"></li>
<li id="header_download"><a href="https://yacy.net" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Download YaCy</a></li>
<li id="header_community"><a href="https://searchlab.eu" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Community (Web Forums)</a></li>
<li id="header_community"><a href="https://community.searchlab.eu" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Community (Web Forums)</a></li>
<li id="header_git"><a href="https://github.com/yacy/yacy_search_server/commits/master" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Git Repository</a></li>
<li id="header_bugs"><a href="https://github.com/yacy/yacy_search_server/issues" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Bugtracker</a></li>
</ul>

@ -40,7 +40,7 @@
<li id="header_jslicense"><a href="jslicense.html" data-jslicense="1">JavaScript information</a></li>
<li class="divider" role="separator"></li>
<li id="header_download"><a href="https://yacy.net" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Download YaCy</a></li>
<li id="header_community"><a href="https://searchlab.eu" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Community (Web Forums)</a></li>
<li id="header_community"><a href="https://community.searchlab.eu" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Community (Web Forums)</a></li>
<li id="header_git"><a href="https://github.com/yacy/yacy_search_server/commits/master" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Git Repository</a></li>
<li id="header_bugs"><a href="https://github.com/yacy/yacy_search_server/issues" target="_blank"><i>external</i>&nbsp;&nbsp;&nbsp;Bugtracker</a></li>
</ul>

@ -135,7 +135,7 @@ $(function() {
</a>
</td>
<td>
<a href="https://searchlab.eu" target="_blanc">
<a href="https://community.searchlab.eu" target="_blanc">
<img alt="YaCy Forum" title="YaCy Forum" src="img-1/Discussion.png" border="0"/>
<br />
Forum

@ -38,7 +38,7 @@ apfelmaennchen
</ul>
<h3>Bug Tracker</h3>
<ul>
<li>Please report bugs in the official <a href="https://searchlab.eu" target="_blank">YaCy-Forum</a> - thanks!</li>
<li>Please report bugs in the official <a href="https://community.searchlab.eu" target="_blank">YaCy-Forum</a> - thanks!</li>
<li>29-09-2008: There seems to be timing problems with the RSS search result. In some cases no result is shown at all, even after a page reload!</li>
<li>19-04-2008: currently only contentdom="text" is working, all others will fail...</li>
</ul>

@ -1811,7 +1811,7 @@ deselect the partial import flag==Den teilweisen Import Flag abschalten
When an export is started, surrogate files are generated into DATA/SURROGATE/in which are automatically fetched by an indexer thread.==Wenn ein Export gestartet wird werden Hilfsdateien in DATA/SURROGATE/in erzeugt, die automatisch von einem Indexer Thread geholt und verarbeitet werden.
All indexed surrogate files are then moved to DATA/SURROGATE/out and can be re-cycled when an index is deleted.==Alle indexierten Hilfsdateien werden dann nach DATA/SURROGATE/out verschoben und können recycelt werden wenn ein Index gelöscht wird.
The URL stub==Der Teil der URL
like https://searchlab.eu==wie z.B. https://searchlab.eu
like https://community.searchlab.eu==wie z.B. https://community.searchlab.eu
this must be the path right in front of '/viewtopic.php?'==dies muss den kompletten Pfad vor '/viewtopic.php?' enthalten
Type==Typ
> of database<==> der Datenbank<

@ -2157,7 +2157,7 @@ where it can be retrieved using the URL==d'o&ugrave; elle peut &ecirc;tre r&eacu
Your Web Page Indexer is idle. You can start your own web crawl <a href="CrawlStartSite.html">here</a>==Votre indexeur de pages web est inactif. Vous pouvez d&eacute;marrer votre propre balayage du web <a href="CrawlStartSite.html">ici</a>
Your Web Page Indexer is busy. You can <a href="Crawler_p.html">monitor your web crawl</a> here.==Votre indexeur de pages web est actif. Vous pouvez surveiller votre balayage du web <a href="Crawler_p.html">ici</a>.
If you need professional support, please write to==Si vous avez besoin d'une assistance professionnelle, vous pouvez &eacute;crire &agrave;
For community support, please visit our <a href="https://searchlab.eu" target="_blank">forum</a>==Si vous cherchez l'aide de la communaut&eacute;, vous pouvez visiter notre <a href="https://searchlab.eu" target="_blank">forum</a>
For community support, please visit our <a href="https://community.searchlab.eu" target="_blank">forum</a>==Si vous cherchez l'aide de la communaut&eacute;, vous pouvez visiter notre <a href="https://community.searchlab.eu" target="_blank">forum</a>
"Follow YaCy on Twitter"=="Suivez YaCy sur Twitter"
@ -2302,7 +2302,7 @@ Therefore we like to ask you: do you like YaCy? Will you use it again... if not,
Please send us feed-back about your experience with an==Vous pouvez nous faire parvenir vos commentaires avec un
anonymous message==message anonyme
or a<br/>==ou un<br/>
posting to our <a href="https://searchlab.eu" target="_blank">web forums</a><br/>==message sur nos <a href="https://searchlab.eu" target="_blank">forums</a><br/>
posting to our <a href="https://community.searchlab.eu" target="_blank">web forums</a><br/>==message sur nos <a href="https://community.searchlab.eu" target="_blank">forums</a><br/>
bug report==rapport de bogue
<h2>Professional Support</h2>==<h2>Support professionnel</h2>
If you are a professional user and you would like to use YaCy in your company in combination with consulting services by YaCy specialists, please see==Si vous souhaitez utiliser YaCy dans votre entreprise en association avec un service de consultations d'expertise par des sp&eacute;cialistes de YaCy, veuillez consulter

@ -2527,7 +2527,7 @@
<source>The URL stub</source>
</trans-unit>
<trans-unit id="8bafb5ec" xml:space="preserve" approved="no" translate="yes">
<source>like https://searchlab.eu</source>
<source>like https://community.searchlab.eu</source>
</trans-unit>
<trans-unit id="6d4ca1b5" xml:space="preserve" approved="no" translate="yes">
<source>this must be the path right in front of '/viewtopic.php?'</source>

@ -1938,7 +1938,7 @@ deselect the partial import flag==снимите флаг частичного
When an export is started, surrogate files are generated into DATA/SURROGATE/in which are automatically fetched by an indexer thread.==После начала экспорта, замещающие файлы создаются в DATA/SURROGATE/in и автоматически добавляются в индексатор.
All indexed surrogate files are then moved to DATA/SURROGATE/out and can be re-cycled when an index is deleted.==Все проиндексированные замещающие файлы перемещаются в DATA/SURROGATE/out и могут быть использованы повторно в случае удаления индекса.
The URL stub==Часть ссылки
like https://searchlab.eu==например, https://searchlab.eu
like https://community.searchlab.eu==например, https://community.searchlab.eu
this must be the path right in front of '/viewtopic.php?'==(путь до '/viewtopic.php?')
Type==Тип
> of database<==> базы данных<

@ -1558,7 +1558,7 @@ in phpmyadmin/config.inc.php and place your dump file in /tmp (Otherwise it is n
deselect the partial import flag==Зніміть прапорець часткового імпорту
When an export is started, surrogate files are generated into DATA/SURROGATE/in which are automatically fetched by an indexer thread.==При запуску експорту в DATA/SURROGATE/in створюються допоміжні файли, які автоматично вилучаються та опрацьовуються потоком індексувача.
All indexed surrogate files are then moved to DATA/SURROGATE/out and can be re-cycled when an index is deleted.==Всі проіндексовані допоміжні файли потім переміщуються в DATA/SURROGATE/out, і можуть бути знову оброблені, якщо індекс буде видалено.
<b>The URL stub</b>,<br />like https://searchlab.eu==<b>Частина URL, як наприклад,</b><br />https://searchlab.eu
<b>The URL stub</b>,<br />like https://community.searchlab.eu==<b>Частина URL, як наприклад,</b><br />https://community.searchlab.eu
this must be the path right in front of '/viewtopic.php?'==повний шлях перед "/viewtopic.php?"
Type==Тип
Host</b> of the database<==Ім’я хосту</b> БД<

@ -1042,7 +1042,7 @@ deselect the partial import flag==取消部分导入
When an export is started, surrogate files are generated into DATA/SURROGATE/in which are automatically fetched by an indexer thread.==导出过程开始时, 在 DATA/SURROGATE/in 目录下自动生成备份文件, 并且会被索引器自动爬取.
All indexed surrogate files are then moved to DATA/SURROGATE/out and can be re-cycled when an index is deleted.==所有被索引的备份文件都在 DATA/SURROGATE/out 目录下, 并被索引器循环利用.
The URL stub==URL根域名
like https://searchlab.eu==比如链接 https://searchlab.eu
like https://community.searchlab.eu==比如链接 https://community.searchlab.eu
this must be the path right in front of '/viewtopic.php?'==必须在'/viewtopic.php?'前面
Type==数据库
> of database<==> 类型<

@ -250,7 +250,7 @@ else
echo "**** USE AT YOUR OWN RISK! Project home and releases: http://yacy.net/ ****"
echo "** LOG of YaCy: DATA/LOG/yacy00.log (and yacy<xx>.log) **"
echo "** STOP YaCy: execute stopYACY.sh and wait some seconds **"
echo "** GET HELP for YaCy: join our community at https://searchlab.eu **"
echo "** GET HELP for YaCy: join our community at https://community.searchlab.eu **"
echo "*******************************************************************************"
if [ $DEBUG -eq 1 ] #debug
then

@ -29,7 +29,7 @@ Echo **** (C) by Michael Peter Christen, usage granted under the GPL Version 2
Echo **** USE AT YOUR OWN RISK! Project home and releases: http://yacy.net/ ****
Echo ** LOG of YaCy: DATA/LOG/yacy00.log (and yacy^<xx^>.log) **
Echo ** STOP YaCy: execute stopYACY.bat and wait some seconds **
Echo ** GET HELP for YaCy: join our community at https://searchlab.eu **
Echo ** GET HELP for YaCy: join our community at https://community.searchlab.eu **
Echo *******************************************************************************
Echo ^>^> YaCy started as daemon process. Administration at http://localhost:%port% ^<^<

@ -58,7 +58,7 @@ public class GenericXMLParserTest {
@Before
public void setUp() {
parser = new GenericXMLParser();
this.parser = new GenericXMLParser();
}
/**
@ -73,11 +73,11 @@ public class GenericXMLParserTest {
final String[] fileNames = { "umlaute_dc_xml_iso.xml", "umlaute_dc_xml_utf8.xml" };
final File folder = new File("test" + File.separator + "parsertest" + File.separator);
for (String fileName : fileNames) {
FileInputStream inStream = new FileInputStream(new File(folder, fileName));
DigestURL location = new DigestURL("http://localhost/" + fileName);
for (final String fileName : fileNames) {
final FileInputStream inStream = new FileInputStream(new File(folder, fileName));
final DigestURL location = new DigestURL("http://localhost/" + fileName);
try {
Document[] documents = parser.parse(location, "text/xml", null, new VocabularyScraper(), 0,
final Document[] documents = this.parser.parse(location, "text/xml", null, new VocabularyScraper(), 0,
inStream);
assertNotNull("Parser result must not be null for file " + fileName, documents);
assertNotNull("Parsed text must not be empty for file " + fileName, documents[0].getTextString());
@ -107,11 +107,11 @@ public class GenericXMLParserTest {
private void testCharsetDetection(final GenericXMLParser parser, final byte[] encodedXML,
final String contentTypeHeader, final String expectedCharset, final String expectedConntainedText)
throws Exception {
InputStream inStream = new ByteArrayInputStream(encodedXML);
String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
DigestURL location = new DigestURL("http://localhost/testfile.xml");
final InputStream inStream = new ByteArrayInputStream(encodedXML);
final String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
final DigestURL location = new DigestURL("http://localhost/testfile.xml");
try {
Document[] documents = parser.parse(location, contentTypeHeader, charsetFromHttpHeader,
final Document[] documents = parser.parse(location, contentTypeHeader, charsetFromHttpHeader,
new VocabularyScraper(), 0, inStream);
assertEquals(expectedCharset, documents[0].getCharset());
assertNotNull(documents[0].getTextString());
@ -138,7 +138,7 @@ public class GenericXMLParserTest {
*/
byte[] encodedXML = ("<?xml version=\"1.0\" encoding=\"utf-8\"?>" + UMLAUT_TEXT_TAG)
.getBytes(StandardCharsets.UTF_8);
testCharsetDetection(parser, encodedXML, "application/xml; charset=utf-8", StandardCharsets.UTF_8.name(),
testCharsetDetection(this.parser, encodedXML, "application/xml; charset=utf-8", StandardCharsets.UTF_8.name(),
"Maßkrügen");
/*
@ -146,7 +146,7 @@ public class GenericXMLParserTest {
* declaration
*/
encodedXML = ("<?xml version=\"1.0\"?>" + UMLAUT_TEXT_TAG).getBytes(StandardCharsets.UTF_8);
testCharsetDetection(parser, encodedXML, "application/xml; charset=utf-8", StandardCharsets.UTF_8.name(),
testCharsetDetection(this.parser, encodedXML, "application/xml; charset=utf-8", StandardCharsets.UTF_8.name(),
"Maßkrügen");
}
@ -169,7 +169,7 @@ public class GenericXMLParserTest {
*/
byte[] encodedXML = ("<?xml version=\"1.0\" encoding=\"utf-16\"?>" + UMLAUT_TEXT_TAG)
.getBytes(StandardCharsets.UTF_16);
testCharsetDetection(parser, encodedXML, "application/xml; charset=utf-16", StandardCharsets.UTF_16.name(),
testCharsetDetection(this.parser, encodedXML, "application/xml; charset=utf-16", StandardCharsets.UTF_16.name(),
"Maßkrügen");
/*
@ -177,7 +177,7 @@ public class GenericXMLParserTest {
* XML declaration having only BOM (Byte Order Mark)
*/
encodedXML = ("<?xml version=\"1.0\"?>" + UMLAUT_TEXT_TAG).getBytes(StandardCharsets.UTF_16);
testCharsetDetection(parser, encodedXML, "application/xml; charset=utf-16",
testCharsetDetection(this.parser, encodedXML, "application/xml; charset=utf-16",
StandardCharsets.UTF_16BE.name(), "Maßkrügen");
/*
@ -186,14 +186,14 @@ public class GenericXMLParserTest {
*/
encodedXML = ("<?xml version=\"1.0\" encoding=\"utf-16\"?>" + UMLAUT_TEXT_TAG)
.getBytes(StandardCharsets.UTF_16);
testCharsetDetection(parser, encodedXML, "application/xml", StandardCharsets.UTF_16.name(), "Maßkrügen");
testCharsetDetection(this.parser, encodedXML, "application/xml", StandardCharsets.UTF_16.name(), "Maßkrügen");
/*
* Charset is omitted in both Content-Type HTTP header and XML
* declaration with BOM (Byte Order Mark)
*/
encodedXML = ("<?xml version=\"1.0\"?>" + UMLAUT_TEXT_TAG).getBytes(StandardCharsets.UTF_16);
testCharsetDetection(parser, encodedXML, "application/xml", StandardCharsets.UTF_16BE.name(), "Maßkrügen");
testCharsetDetection(this.parser, encodedXML, "application/xml", StandardCharsets.UTF_16BE.name(), "Maßkrügen");
}
/**
@ -211,9 +211,9 @@ public class GenericXMLParserTest {
* ISO-8859-1 charset provided only in XML declaration without BOM (Byte
* Order Mark)
*/
byte[] encodedXML = ("<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>" + UMLAUT_TEXT_TAG)
final byte[] encodedXML = ("<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>" + UMLAUT_TEXT_TAG)
.getBytes(StandardCharsets.ISO_8859_1);
testCharsetDetection(parser, encodedXML, "application/xml", StandardCharsets.ISO_8859_1.name(),
testCharsetDetection(this.parser, encodedXML, "application/xml", StandardCharsets.ISO_8859_1.name(),
"Maßkrügen");
}
@ -234,7 +234,7 @@ public class GenericXMLParserTest {
* XML encoded as UTF-8 without BOM (Byte Order Mark)
*/
byte[] encodedXML = ("<?xml version=\"1.0\"?>" + UMLAUT_TEXT_TAG).getBytes(StandardCharsets.UTF_8);
testCharsetDetection(parser, encodedXML, "application/xml", StandardCharsets.UTF_8.name(), "Maßkrügen");
testCharsetDetection(this.parser, encodedXML, "application/xml", StandardCharsets.UTF_8.name(), "Maßkrügen");
/*
* XML encoded as ASCII, with non ascii chars encoded as entities
@ -242,7 +242,7 @@ public class GenericXMLParserTest {
encodedXML = ("<?xml version=\"1.0\"?>"
+ "<text>In M&#x000FC;nchen steht ein Hofbr&#x000E4;uhaus, dort gibt es Bier in Ma&#x000DF;kr&#x000FC;gen</text>")
.getBytes(StandardCharsets.US_ASCII);
testCharsetDetection(parser, encodedXML, "application/xml", StandardCharsets.UTF_8.name(), "Maßkrügen");
testCharsetDetection(this.parser, encodedXML, "application/xml", StandardCharsets.UTF_8.name(), "Maßkrügen");
}
/**
@ -260,9 +260,9 @@ public class GenericXMLParserTest {
* UTF-16BE charset provided both in Content-Type HTTP header and in XML
* declaration, without BOM (Byte Order Mark)
*/
byte[] encodedXML = ("<?xml version='1.0' encoding='utf-16be'?>" + UMLAUT_TEXT_TAG)
final byte[] encodedXML = ("<?xml version='1.0' encoding='utf-16be'?>" + UMLAUT_TEXT_TAG)
.getBytes(StandardCharsets.UTF_16BE);
testCharsetDetection(parser, encodedXML, "application/xml; charset=utf-16be",
testCharsetDetection(this.parser, encodedXML, "application/xml; charset=utf-16be",
StandardCharsets.UTF_16BE.name(), "Maßkrügen");
}
@ -279,24 +279,24 @@ public class GenericXMLParserTest {
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\">" + "<head>"
+ "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />"
+ "<title>XHTML attributes URLs test</title>" + "</head>" + "<body>"
+ "Here are YaCy<a href=\"http://yacy.net\">home page</a> and <a href=\"https://searchlab.eu\">International Forum</a>."
+ "Here are YaCy<a href=\"http://yacy.net\">home page</a> and <a href=\"https://community.searchlab.eu\">International Forum</a>."
+ "And this is a relative link to a <a href=\"/document.html\">sub document</a>." + "</body>"
+ "</html>";
InputStream inStream = new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8.name()));
final InputStream inStream = new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8.name()));
final String contentTypeHeader = "text/xhtml";
String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
DigestURL location = new DigestURL("http://localhost/testfile.xml");
final String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
final DigestURL location = new DigestURL("http://localhost/testfile.xml");
try {
Document[] documents = parser.parse(location, contentTypeHeader, charsetFromHttpHeader,
final Document[] documents = this.parser.parse(location, contentTypeHeader, charsetFromHttpHeader,
new VocabularyScraper(), 0, inStream);
assertEquals(1, documents.length);
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
final Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
assertNotNull(detectedAnchors);
assertEquals(3, detectedAnchors.size());
assertTrue(detectedAnchors.contains(new AnchorURL("http://www.w3.org/1999/xhtml")));
assertTrue(detectedAnchors.contains(new AnchorURL("http://yacy.net")));
assertTrue(detectedAnchors.contains(new AnchorURL("https://searchlab.eu")));
assertTrue(detectedAnchors.contains(new AnchorURL("https://community.searchlab.eu")));
} finally {
inStream.close();
}
@ -316,23 +316,23 @@ public class GenericXMLParserTest {
+ "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />"
+ "<title>XHTML content URLs test</title>" + "</head>" + "<body>" + "Here are some YaCy links:" + "<dl>"
+ "<dt>Home page</dt>" + "<dd>http://yacy.net</dd>" + "<dt>International Forum</dt>"
+ "<dd>https://searchlab.eu</dd>" + "</dl>"
+ "<dd>https://community.searchlab.eu</dd>" + "</dl>"
+ "And this is a mention to a relative link : /document.html " + "</body>" + "</html>";
InputStream inStream = new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8.name()));
final InputStream inStream = new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8.name()));
final String contentTypeHeader = "text/xhtml";
String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
DigestURL location = new DigestURL("http://localhost/testfile.xml");
final String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
final DigestURL location = new DigestURL("http://localhost/testfile.xml");
try {
Document[] documents = parser.parse(location, contentTypeHeader, charsetFromHttpHeader,
final Document[] documents = this.parser.parse(location, contentTypeHeader, charsetFromHttpHeader,
new VocabularyScraper(), 0, inStream);
assertEquals(1, documents.length);
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
final Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
assertNotNull(detectedAnchors);
assertEquals(3, detectedAnchors.size());
assertTrue(detectedAnchors.contains(new AnchorURL("http://www.w3.org/1999/xhtml")));
assertTrue(detectedAnchors.contains(new AnchorURL("http://yacy.net")));
assertTrue(detectedAnchors.contains(new AnchorURL("https://searchlab.eu")));
assertTrue(detectedAnchors.contains(new AnchorURL("https://community.searchlab.eu")));
} finally {
inStream.close();
}
@ -346,12 +346,12 @@ public class GenericXMLParserTest {
public void testParseXMLFragment() throws Exception {
final String xhtml = "<root><node><subNode1>Node content1</subNode1><subNode2>Node content2</subNode2></node></root>";
InputStream inStream = new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8.name()));
final InputStream inStream = new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8.name()));
final String contentTypeHeader = "text/xml";
String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
DigestURL location = new DigestURL("http://localhost/testfile.xml");
final String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
final DigestURL location = new DigestURL("http://localhost/testfile.xml");
try {
Document[] documents = parser.parse(location, contentTypeHeader, charsetFromHttpHeader,
final Document[] documents = this.parser.parse(location, contentTypeHeader, charsetFromHttpHeader,
new VocabularyScraper(), 0, inStream);
assertEquals(1, documents.length);
assertEquals("Node content1 Node content2", documents[0].getTextString());
@ -368,13 +368,13 @@ public class GenericXMLParserTest {
*/
@Test
public void testParseWithLimits() throws Exception {
String xhtml = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
final String xhtml = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">"
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\">" + "<head>"
+ "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />"
+ "<title>XHTML content URLs test</title>" + "</head>" + "<body>" + "<p>Here are some YaCy URLs: "
+ "Home page : http://yacy.net - International Forum : "
+ "https://searchlab.eu "
+ "https://community.searchlab.eu "
+ "and this is a mention to a relative URL : /document.html</p>"
+ "<p>Here are YaCy<a href=\"http://mantis.tokeek.de\">bug tracker</a> and <a href=\"https://wiki.yacy.net/index.php/\">Wiki</a>."
+ "And this is a relative link to another <a href=\"/document2.html\">sub document</a></p>"
@ -383,21 +383,21 @@ public class GenericXMLParserTest {
/* Content within limits */
InputStream inStream = new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8.name()));
final String contentTypeHeader = "text/xhtml";
String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
DigestURL location = new DigestURL("http://localhost/testfile.xml");
final String charsetFromHttpHeader = HeaderFramework.getCharacterEncoding(contentTypeHeader);
final DigestURL location = new DigestURL("http://localhost/testfile.xml");
try {
Document[] documents = parser.parseWithLimits(location, contentTypeHeader, charsetFromHttpHeader, new VocabularyScraper(), 0, inStream, Integer.MAX_VALUE, Long.MAX_VALUE);
final Document[] documents = this.parser.parseWithLimits(location, contentTypeHeader, charsetFromHttpHeader, new VocabularyScraper(), 0, inStream, Integer.MAX_VALUE, Long.MAX_VALUE);
assertEquals(1, documents.length);
assertFalse(documents[0].isPartiallyParsed());
assertTrue(documents[0].getTextString().contains("And this is a relative link"));
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
final Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
assertNotNull(detectedAnchors);
assertEquals(5, detectedAnchors.size());
assertTrue(detectedAnchors.contains(new AnchorURL("http://www.w3.org/1999/xhtml")));
assertTrue(detectedAnchors.contains(new AnchorURL("http://yacy.net")));
assertTrue(detectedAnchors.contains(new AnchorURL("https://searchlab.eu")));
assertTrue(detectedAnchors.contains(new AnchorURL("https://community.searchlab.eu")));
assertTrue(detectedAnchors.contains(new AnchorURL("http://mantis.tokeek.de")));
assertTrue(detectedAnchors.contains(new AnchorURL("https://wiki.yacy.net/index.php/")));
} finally {
@ -407,7 +407,7 @@ public class GenericXMLParserTest {
/* Links limit exceeded */
inStream = new ByteArrayInputStream(xhtml.getBytes(StandardCharsets.UTF_8.name()));
try {
Document[] documents = parser.parseWithLimits(location, contentTypeHeader, charsetFromHttpHeader,
final Document[] documents = this.parser.parseWithLimits(location, contentTypeHeader, charsetFromHttpHeader,
new VocabularyScraper(), 0, inStream, 2, Long.MAX_VALUE);
assertEquals(1, documents.length);
assertTrue(documents[0].isPartiallyParsed());
@ -415,7 +415,7 @@ public class GenericXMLParserTest {
assertTrue(documents[0].getTextString().contains("Home page"));
assertFalse(documents[0].getTextString().contains("And this is a relative link"));
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
final Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
assertNotNull(detectedAnchors);
assertEquals(2, detectedAnchors.size());
assertTrue(detectedAnchors.contains(new AnchorURL("http://www.w3.org/1999/xhtml")));
@ -425,7 +425,7 @@ public class GenericXMLParserTest {
}
/* Bytes limit exceeded */
StringBuilder xhtmlBuilder = new StringBuilder("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>")
final StringBuilder xhtmlBuilder = new StringBuilder("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>")
.append("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">")
.append("<html xmlns=\"http://www.w3.org/1999/xhtml\">")
.append("<head>")
@ -434,7 +434,7 @@ public class GenericXMLParserTest {
.append("</head>")
.append("<body><p>Here are some YaCy URLs: ")
.append("Home page : http://yacy.net - International Forum : ")
.append("https://searchlab.eu ")
.append("https://community.searchlab.eu ")
.append("and this is a mention to a relative URL : /document.html</p>");
/* Add some filler text to reach a total size beyond SAX parser internal input stream buffers */
@ -442,25 +442,25 @@ public class GenericXMLParserTest {
xhtmlBuilder.append("<p>Some text to parse</p>");
}
int firstBytes = xhtmlBuilder.toString().getBytes(StandardCharsets.UTF_8.name()).length;
final int firstBytes = xhtmlBuilder.toString().getBytes(StandardCharsets.UTF_8.name()).length;
xhtmlBuilder.append("<p>Here are YaCy<a href=\"http://mantis.tokeek.de\">bug tracker</a> and <a href=\"https://wiki.yacy.net/index.php/\">Wiki</a>.")
.append("And this is a relative link to another <a href=\"/document2.html\">sub document</a></p>")
.append("</body></html>");
inStream = new ByteArrayInputStream(xhtmlBuilder.toString().getBytes(StandardCharsets.UTF_8.name()));
try {
Document[] documents = parser.parseWithLimits(location, contentTypeHeader, charsetFromHttpHeader, new VocabularyScraper(), 0, inStream, Integer.MAX_VALUE, firstBytes);
final Document[] documents = this.parser.parseWithLimits(location, contentTypeHeader, charsetFromHttpHeader, new VocabularyScraper(), 0, inStream, Integer.MAX_VALUE, firstBytes);
assertEquals(1, documents.length);
assertTrue(documents[0].isPartiallyParsed());
assertTrue(documents[0].getTextString().contains("and this is a mention to a relative URL"));
assertFalse(documents[0].getTextString().contains("And this is a relative link to another"));
Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
final Collection<AnchorURL> detectedAnchors = documents[0].getAnchors();
assertNotNull(detectedAnchors);
assertEquals(3, detectedAnchors.size());
assertTrue(detectedAnchors.contains(new AnchorURL("http://www.w3.org/1999/xhtml")));
assertTrue(detectedAnchors.contains(new AnchorURL("http://yacy.net")));
assertTrue(detectedAnchors.contains(new AnchorURL("https://searchlab.eu")));
assertTrue(detectedAnchors.contains(new AnchorURL("https://community.searchlab.eu")));
} finally {
inStream.close();
}

@ -143,13 +143,13 @@ public class ContentScraperTest {
@Test
public void testGetStartDates() throws MalformedURLException, IOException {
List<Date> dateResultList;
DigestURL root = new DigestURL("http://test.org/test.html");
final DigestURL root = new DigestURL("http://test.org/test.html");
String page = "<html><body>"
final String page = "<html><body>"
+ "<time datetime='2016-12-23'>23. Dezember 2016</time>" // html5 time tag
+ "</body></html>";
ContentScraper scraper = new ContentScraper(root, 10, new HashSet<String>(), new VocabularyScraper(), 0);
final ContentScraper scraper = new ContentScraper(root, 10, new HashSet<String>(), new VocabularyScraper(), 0);
final Writer writer = new TransformerWriter(null, null, scraper, false);
FileUtils.copy(new StringReader(page), writer);
@ -157,11 +157,11 @@ public class ContentScraperTest {
dateResultList = scraper.getStartDates();
Calendar cal = Calendar.getInstance();
final Calendar cal = Calendar.getInstance();
cal.setTimeInMillis(0); // to zero hours
cal.set(2016, Calendar.DECEMBER, 23);
for (Date d : dateResultList) {
for (final Date d : dateResultList) {
Assert.assertEquals(cal.getTime(), d);
}
scraper.close();
@ -173,35 +173,35 @@ public class ContentScraperTest {
*/
@Test
public void testFindAbsoluteURLs() throws MalformedURLException {
final String[] urlStrings = { "http://yacy.net", "https://searchlab.eu", "https://en.wikipedia.org" };
final String[] urlStrings = { "http://yacy.net", "https://community.searchlab.eu", "https://en.wikipedia.org" };
final List<AnchorURL> urls = new ArrayList<>();
for (String urlString : urlStrings) {
for (final String urlString : urlStrings) {
urls.add(new AnchorURL(urlString));
}
/* Test with various white space separators */
String[] separators = { " ", "\n", "\t", "\r" };
for (String separator : separators) {
StringBuilder text = new StringBuilder();
for (String urlString : urlStrings) {
final String[] separators = { " ", "\n", "\t", "\r" };
for (final String separator : separators) {
final StringBuilder text = new StringBuilder();
for (final String urlString : urlStrings) {
if (text.length() > 0) {
text.append(separator);
}
text.append(urlString);
}
Collection<AnchorURL> detectedURLs = new ArrayList<>();
final Collection<AnchorURL> detectedURLs = new ArrayList<>();
ContentScraper.findAbsoluteURLs(text.toString(), detectedURLs, null);
Assert.assertEquals(urls.size(), detectedURLs.size());
Assert.assertTrue(urls.containsAll(detectedURLs));
}
/* URLs surrounded with parenthesis */
String[] texts = { "(http://yacy.net)", "YaCy home page (http://yacy.net)",
final String[] texts = { "(http://yacy.net)", "YaCy home page (http://yacy.net)",
"Nested parentheses (YaCy home page (http://yacy.net))",
"Text in parenthesis (example : http://yacy.net)", "A markdown link [YaCy home page](http://yacy.net)",
"A markdown [example](http://yacy.net \"YaCy home page\") inline link" };
for (String text : texts) {
Collection<AnchorURL> detectedURLs = new ArrayList<>();
for (final String text : texts) {
final Collection<AnchorURL> detectedURLs = new ArrayList<>();
ContentScraper.findAbsoluteURLs(text, detectedURLs, null);
Assert.assertEquals(1, detectedURLs.size());
Assert.assertEquals(new AnchorURL("http://yacy.net"), detectedURLs.iterator().next());
@ -209,11 +209,11 @@ public class ContentScraperTest {
/* URLs surrounded with square brackets */
//http://[abcd:ef01:2345:6789:abcd:ef01:2345:6789]/
String[] squareBracketsTexts = { "[http://yacy.net]", "YaCy home page [http://yacy.net]",
final String[] squareBracketsTexts = { "[http://yacy.net]", "YaCy home page [http://yacy.net]",
"Nested brackets [YaCy home page [http://yacy.net]]",
"A mediawiki external link with different label [http://yacy.net YaCy home page]" };
for(String text : squareBracketsTexts) {
Collection<AnchorURL> detectedURLs = new ArrayList<>();
for(final String text : squareBracketsTexts) {
final Collection<AnchorURL> detectedURLs = new ArrayList<>();
ContentScraper.findAbsoluteURLs(text, detectedURLs, null);
Assert.assertEquals(1, detectedURLs.size());
Assert.assertEquals(new AnchorURL("http://yacy.net"), detectedURLs.iterator().next());
@ -221,11 +221,11 @@ public class ContentScraperTest {
/* URLs surrounded with curly brackets */
//http://[abcd:ef01:2345:6789:abcd:ef01:2345:6789]/
String[] curlyBracketsTexts = { "{http://yacy.net}", "YaCy home page {http://yacy.net}",
final String[] curlyBracketsTexts = { "{http://yacy.net}", "YaCy home page {http://yacy.net}",
"Nested brackets {YaCy home page {http://yacy.net}}",
"Text in brackets {example : http://yacy.net}" };
for(String text : curlyBracketsTexts) {
Collection<AnchorURL> detectedURLs = new ArrayList<>();
for(final String text : curlyBracketsTexts) {
final Collection<AnchorURL> detectedURLs = new ArrayList<>();
ContentScraper.findAbsoluteURLs(text, detectedURLs, null);
Assert.assertEquals(1, detectedURLs.size());
Assert.assertEquals(new AnchorURL("http://yacy.net"), detectedURLs.iterator().next());
@ -277,7 +277,7 @@ public class ContentScraperTest {
*/
@Test
public void testFindAbsoluteURLsMaxURLs() throws MalformedURLException {
final String text = "Some test URLS : http://yacy.net - https://searchlab.eu - https://en.wikipedia.org";
final String text = "Some test URLS : http://yacy.net - https://community.searchlab.eu - https://en.wikipedia.org";
/* No limit */
ArrayList<AnchorURL> detectedURLs = new ArrayList<>();
@ -425,7 +425,7 @@ public class ContentScraperTest {
html2Results.put(html, expectedUrls);
for (final Entry<String, String[]> html2Result : html2Results.entrySet()) {
ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet<String>(), new VocabularyScraper(), 0);
final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet<String>(), new VocabularyScraper(), 0);
try (final Writer writer = new TransformerWriter(null, null, scraper, false)) {
FileUtils.copy(new StringReader(html2Result.getKey()), writer);
@ -500,7 +500,7 @@ public class ContentScraperTest {
for (final Entry<String, String[]> html2Result : html2Results.entrySet()) {
ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet<String>(), new VocabularyScraper(), 0);
final ContentScraper scraper = new ContentScraper(docUrl, 10, new HashSet<String>(), new VocabularyScraper(), 0);
try (final Writer writer = new TransformerWriter(null, null, scraper, false)) {
FileUtils.copy(new StringReader(html2Result.getKey()), writer);

@ -93,7 +93,7 @@ public class ooxmlParserTest {
final String mimetype = testFile[1];
final AnchorURL url = new AnchorURL("http://localhost/" + filename);
AbstractParser p = new ooxmlParser();
final AbstractParser p = new ooxmlParser();
inStream = new FileInputStream(file);
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, inStream);
for (final Document doc : docs) {
@ -116,7 +116,7 @@ public class ooxmlParserTest {
if (content != null) {
try {
content.close();
} catch (IOException ioe) {
} catch (final IOException ioe) {
System.out.println("Could not close text input stream");
}
}
@ -126,7 +126,7 @@ public class ooxmlParserTest {
if (inStream != null) {
try {
inStream.close();
} catch (IOException ioe) {
} catch (final IOException ioe) {
System.out.println("Could not close input stream on file " + filename);
}
}
@ -145,7 +145,7 @@ public class ooxmlParserTest {
final String mimetype = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
final AnchorURL url = new AnchorURL("http://localhost/" + fileName);
AbstractParser p = new ooxmlParser();
final AbstractParser p = new ooxmlParser();
try(InputStream inStream = new FileInputStream(file);) {
final Document[] docs = p.parse(url, mimetype, null, new VocabularyScraper(), 0, inStream);
assertNotNull("Documents result must not be null", docs);
@ -153,7 +153,7 @@ public class ooxmlParserTest {
assertNotNull("Detected URLs must not be null", anchors);
assertEquals("2 URLs should be detected", 2, anchors.size());
assertTrue("YaCy home page URL should have been parsed: " + anchors.toString(), anchors.contains(new AnchorURL("http://yacy.net/")));
assertTrue("YaCy forum URL should have been parsed: " + anchors.toString(), anchors.contains(new AnchorURL("https://searchlab.eu/")));
assertTrue("YaCy forum URL should have been parsed: " + anchors.toString(), anchors.contains(new AnchorURL("https://community.searchlab.eu/")));
}
}

Loading…
Cancel
Save