fix Umlaut handling in blekko heuristic search term

http://mantis.tokeek.de/view.php?id=169
observation: blekko seams to block xxxbot agents (=0 results)
pull/1/head
reger 10 years ago
parent ab98f69592
commit fe6f5a395d

@ -12,7 +12,7 @@
## - all lines beginning with '#' and where the second character is not '#' are commented-out keyword lines ## - all lines beginning with '#' and where the second character is not '#' are commented-out keyword lines
## ##
#Blekko = http://blekko.com/ws/{searchTerms}+/rss # get 20 results from blekko #Blekko = http://blekko.com/ws/{searchTerms} /rss # get 20 results from blekko
#Faroo-News = http://www.faroo.com/api?q={searchTerms}&start={startIndex}&length=20&l=en&src=news&f=rss # get results from Faroo news-search #Faroo-News = http://www.faroo.com/api?q={searchTerms}&start={startIndex}&length=20&l=en&src=news&f=rss # get results from Faroo news-search
#WordPress.com = http://en.search.wordpress.com/?q={searchTerms}&f=feed&page={startPage?} #Search WordPress.com Blogs #WordPress.com = http://en.search.wordpress.com/?q={searchTerms}&f=feed&page={startPage?} #Search WordPress.com Blogs
#Sueddeutsche.de = http://suche.sueddeutsche.de/query/{searchTerms}?output=rss # Sueddeutsche Zeitung Artikel Archiv #Sueddeutsche.de = http://suche.sueddeutsche.de/query/{searchTerms}?output=rss # Sueddeutsche Zeitung Artikel Archiv

@ -23,7 +23,9 @@ import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.feed.RSSFeed; import net.yacy.cora.document.feed.RSSFeed;
import net.yacy.cora.document.feed.RSSMessage; import net.yacy.cora.document.feed.RSSMessage;
@ -39,6 +41,7 @@ import net.yacy.document.TextParser;
import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import org.apache.http.entity.mime.content.ContentBody;
/** /**
* Handling of queries to remote OpenSearch systems. Iterates to a list of * Handling of queries to remote OpenSearch systems. Iterates to a list of
@ -83,10 +86,10 @@ public class OpenSearchConnector extends AbstractFederateSearchConnector impleme
// see http://www.loc.gov/standards/sru/ // see http://www.loc.gov/standards/sru/
String searchurl = this.parseSearchTemplate(baseurl, query.getQueryGoal().getQueryString(false), 0, query.itemsPerPage); String searchurl = this.parseSearchTemplate(baseurl, query.getQueryGoal().getQueryString(false), 0, query.itemsPerPage);
try { try {
MultiProtocolURL aurl = new MultiProtocolURL(MultiProtocolURL.unescape(searchurl)); MultiProtocolURL aurl = new MultiProtocolURL(searchurl);
try { try {
this.lastaccesstime = System.currentTimeMillis(); this.lastaccesstime = System.currentTimeMillis();
final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyIntranetCrawlerAgent); final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
byte[] result = httpClient.GETbytes(aurl, null, null, false); byte[] result = httpClient.GETbytes(aurl, null, null, false);
RSSReader rssReader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result); RSSReader rssReader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
if (rssReader != null) { if (rssReader != null) {

Loading…
Cancel
Save