refactoring towards a unified loading mechanism for MultiProtocolURIs

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7065 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent caece04f26
commit 5e7081cd19

@ -37,13 +37,10 @@ import java.io.PrintWriter;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.crawler.retrieval.HTTPLoader;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
@ -101,10 +98,7 @@ public class ConfigAppearance_p {
Iterator<String> it;
try {
final DigestURI u = new DigestURI(url, null);
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
// it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000));
it = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 10000));
it = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
} catch (final IOException e) {
prop.put("status", "1");// unable to get URL
prop.put("status_url", url);

@ -37,15 +37,12 @@ import java.io.PrintWriter;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.data.WorkTables;
import de.anomic.data.translator;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
@ -106,10 +103,7 @@ public class ConfigLanguage_p {
Iterator<String> it;
try{
final DigestURI u = new DigestURI(url, null);
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
// it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000));
it = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 10000));
it = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
}catch(final IOException e){
prop.put("status", "1");//unable to get url
prop.put("status_url", url);

@ -35,7 +35,7 @@ import java.util.StringTokenizer;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.util.DateFormatter;
@ -62,7 +62,7 @@ public class SettingsAck_p {
final Switchboard sb = (Switchboard) env;
// get referer for backlink
final DigestURI referer = header.referer();
final MultiProtocolURI referer = header.referer();
prop.put("referer", (referer == null) ? "Settings_p.html" : referer.toNormalform(true, true));
//if (post == null) System.out.println("POST: NULL"); else System.out.println("POST: " + post.toString());

@ -43,15 +43,12 @@ import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.data.listManager;
import de.anomic.data.list.ListAccumulator;
import de.anomic.data.list.XMLBlacklistImporter;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.SearchEventCache;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacySeed;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
@ -143,16 +140,10 @@ public class sharedBlacklist_p {
if (downloadURLOld != null) {
// download the blacklist
try {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA,"no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL,"no-cache");
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
// get List
DigestURI u = new DigestURI(downloadURLOld, null);
// otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 1000));
otherBlacklist = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 1000));
otherBlacklist = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
} catch (final Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
prop.putHTML("status_name", Hash);
@ -169,10 +160,7 @@ public class sharedBlacklist_p {
try {
final DigestURI u = new DigestURI(downloadURL, null);
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
// otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000)); //get List
otherBlacklist = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 10000));
otherBlacklist = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
} catch (final Exception e) {
prop.put("status", STATUS_URL_PROBLEM);
prop.putHTML("status_address",downloadURL);

@ -127,7 +127,7 @@ public class FileLoader {
}
// load the resource
InputStream is = url.getInputStream();
InputStream is = url.getInputStream(null, -1);
byte[] b = FileUtils.read(is);
is.close();

@ -146,7 +146,7 @@ public class SMBLoader {
}
// load the resource
InputStream is = url.getInputStream();
InputStream is = url.getInputStream(null, -1);
byte[] b = FileUtils.read(is);
is.close();

@ -385,7 +385,7 @@ public final class HTTPDProxyHandler {
final Request request = new Request(
null,
url,
requestHeader.referer() == null ? null : requestHeader.referer().hash(),
requestHeader.referer() == null ? null : new DigestURI(requestHeader.referer()).hash(),
"",
cachedResponseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
@ -512,7 +512,7 @@ public final class HTTPDProxyHandler {
final Request request = new Request(
null,
url,
requestHeader.referer() == null ? null : requestHeader.referer().hash(),
requestHeader.referer() == null ? null : new DigestURI(requestHeader.referer()).hash(),
"",
responseHeader.lastModified(),
sb.crawler.defaultProxyProfile.handle(),
@ -937,7 +937,7 @@ public final class HTTPDProxyHandler {
// the CONTENT_LENGTH will be added by entity and cause a ClientProtocolException if set
final int contentLength = requestHeader.getContentLength();
requestHeader.remove(RequestHeader.CONTENT_LENGTH);
requestHeader.remove(HeaderFramework.CONTENT_LENGTH);
final HTTPClient client = setupHttpClient(requestHeader, connectHost);

@ -30,7 +30,7 @@ import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.kelondro.util.DateFormatter;
import de.anomic.server.serverCore;
@ -84,18 +84,18 @@ public class RequestHeader extends HeaderFramework {
super(reverseMappingCache, othermap);
}
public DigestURI referer() {
public MultiProtocolURI referer() {
String referer = get(REFERER, null);
if (referer == null) return null;
try {
return new DigestURI(referer, null);
return new MultiProtocolURI(referer);
} catch (MalformedURLException e) {
return null;
}
}
public String refererHost() {
final DigestURI url = referer();
final MultiProtocolURI url = referer();
if (url == null) return null;
return url.getHost();
}

@ -122,7 +122,7 @@ public class DocumentIndex extends Segment {
if (!url.canRead()) throw new IOException("cannot read file");
Document[] documents;
try {
documents = TextParser.parseSource(url, null, null, url.length(), url.getInputStream());
documents = TextParser.parseSource(url, null, null, url.length(), url.getInputStream(null, -1));
} catch (Exception e) {
throw new IOException("cannot parse " + url.toString() + ": " + e.getMessage());
}

@ -281,12 +281,7 @@ public class serverSwitch {
File ret;
final String path = getConfig(key, dflt).replace('\\', '/');
final File f = new File(path);
if (f == null) {
ret = null;
} else {
ret = (f.isAbsolute() ? new File(f.getAbsolutePath()) : new File(this.rootPath, path));
}
ret = (f.isAbsolute() ? new File(f.getAbsolutePath()) : new File(this.rootPath, path));
return ret;
}

@ -23,14 +23,10 @@ package de.anomic.tools;
import java.util.Hashtable;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.cora.protocol.http.ProxySettings;
import net.yacy.kelondro.data.meta.DigestURI;
import de.anomic.crawler.retrieval.HTTPLoader;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
public class loaderThreads {
@ -120,10 +116,7 @@ public class loaderThreads {
public void run() {
try {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
// page = Client.wget(url.toString(), reqHeader, timeout);
page = HttpConnector.wget(url.toString(), reqHeader.entrySet(), timeout);
page = url.get(HTTPLoader.crawlerUserAgent, timeout);
loaded = true;
process.feed(page);
if (process.status() == loaderCore.STATUS_FAILED) {

@ -47,7 +47,6 @@ import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
//import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
@ -57,11 +56,12 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.RSSFeed;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.RSSReader;
import net.yacy.cora.protocol.ByteArrayBody;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.cora.protocol.http.HTTPConnector;
import net.yacy.cora.services.Search;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
@ -78,18 +78,12 @@ import net.yacy.kelondro.util.ByteBuffer;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
//import org.apache.commons.httpclient.methods.multipart.ByteArrayPartSource;
//import org.apache.commons.httpclient.methods.multipart.Part;
import org.apache.http.entity.mime.content.ContentBody;
import org.apache.http.entity.mime.content.StringBody;
import de.anomic.crawler.ResultURLs;
import de.anomic.crawler.retrieval.EventOrigin;
import de.anomic.crawler.retrieval.HTTPLoader;
//import de.anomic.http.client.DefaultCharsetStringPart;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.RankingProfile;
import de.anomic.search.RankingProcess;
import de.anomic.search.SearchEvent;
@ -103,25 +97,11 @@ import de.anomic.tools.crypt;
public final class yacyClient {
/**
* @see wput
* @param target
* @param filename
* @param post
* @return
* @throws IOException
*/
// private static byte[] postToFile(final yacySeed target, final String filename, final List<Part> post, final int timeout) throws IOException {
// return HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/" + filename, target.getHexHash() + ".yacyh", post, timeout, false);
// }
// private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final List<Part> post, final int timeout) throws IOException {
// return HttpConnector.wput("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", post, timeout, false);
// }
private static byte[] postToFile(final yacySeed target, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
return HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/" + filename, target.getHexHash() + ".yacyh", parts, timeout);
return HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts);
}
private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
return HttpConnector.wput("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts, timeout);
return HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts);
}
/**
@ -150,18 +130,14 @@ public final class yacyClient {
Map<String, String> result = null;
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), null, salt);
try {
// generate request
// post.add(new DefaultCharsetStringPart("count", "20"));
// post.add(new DefaultCharsetStringPart("seed", mySeed.genSeedStr(salt)));
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
parts.put("count", new StringBody("20"));
parts.put("seed", new StringBody(mySeed.genSeedStr(salt)));
// send request
final long start = System.currentTimeMillis();
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/hello.html", yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", post, 10000, false);
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/hello.html", yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts, 30000);
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts);
yacyCore.log.logInfo("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' contacted peer at " + address + ", received " + ((content == null) ? "null" : content.length) + " bytes, time = " + (System.currentTimeMillis() - start) + " milliseconds");
result = FileUtils.table(content);
} catch (final Exception e) {
@ -266,13 +242,9 @@ public final class yacyClient {
public static yacySeed querySeed(final yacySeed target, final String seedHash) {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
// post.add(new DefaultCharsetStringPart("object", "seed"));
// post.add(new DefaultCharsetStringPart("env", seedHash));
// send request
try {
// final byte[] content = postToFile(target, "query.html", post, 10000);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("object", new StringBody("seed"));
parts.put("env", new StringBody(seedHash));
@ -291,14 +263,9 @@ public final class yacyClient {
public static int queryRWICount(final yacySeed target, final String wordHash) {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
// post.add(new DefaultCharsetStringPart("object", "rwicount"));
// post.add(new DefaultCharsetStringPart("ttl", "0"));
// post.add(new DefaultCharsetStringPart("env", wordHash));
// send request
try {
// final byte[] content = postToFile(target, "query.html", post, 5000);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("object", new StringBody("rwicount"));
parts.put("ttl", new StringBody("0"));
@ -319,14 +286,9 @@ public final class yacyClient {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
// post.add(new DefaultCharsetStringPart("object", "lurlcount"));
// post.add(new DefaultCharsetStringPart("ttl", "0"));
// post.add(new DefaultCharsetStringPart("env", ""));
// send request
try {
// final byte[] content = postToFile(target, "query.html", post, 5000);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("object", new StringBody("lurlcount"));
parts.put("ttl", new StringBody("0"));
@ -356,20 +318,15 @@ public final class yacyClient {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
// post.add(new DefaultCharsetStringPart("call", "remotecrawl"));
// post.add(new DefaultCharsetStringPart("count", Integer.toString(maxCount)));
// post.add(new DefaultCharsetStringPart("time", Long.toString(maxTime)));
// send request
try {
/* a long time-out is needed */
// final byte[] result = HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/urls.xml", target.getHexHash() + ".yacyh", post, (int) maxTime);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("call", new StringBody("remotecrawl"));
parts.put("count", new StringBody(Integer.toString(maxCount)));
parts.put("time", new StringBody(Long.toString(maxTime)));
final byte[] result = HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/urls.xml", target.getHexHash() + ".yacyh", parts, (int) maxTime);
final byte[] result = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
final RSSReader reader = RSSReader.parse(result);
if (reader == null) {
yacyCore.log.logWarning("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null");
@ -442,25 +399,6 @@ public final class yacyClient {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
// post.add(new DefaultCharsetStringPart("myseed", mySeed.genSeedStr(salt)));
// post.add(new DefaultCharsetStringPart("count", Integer.toString(Math.max(10, count))));
// post.add(new DefaultCharsetStringPart("resource", ((global) ? "global" : "local")));
// post.add(new DefaultCharsetStringPart("partitions", Integer.toString(partitions)));
// post.add(new DefaultCharsetStringPart("query", wordhashes));
// post.add(new DefaultCharsetStringPart("exclude", excludehashes));
// post.add(new DefaultCharsetStringPart("duetime", "1000"));
// post.add(new DefaultCharsetStringPart("urls", urlhashes));
// post.add(new DefaultCharsetStringPart("prefer", prefer.toString()));
// post.add(new DefaultCharsetStringPart("filter", filter.toString()));
// post.add(new DefaultCharsetStringPart("language", language));
// post.add(new DefaultCharsetStringPart("sitehash", sitehash));
// post.add(new DefaultCharsetStringPart("authorhash", authorhash));
// post.add(new DefaultCharsetStringPart("ttl", "0"));
// post.add(new DefaultCharsetStringPart("maxdist", Integer.toString(maxDistance)));
// post.add(new DefaultCharsetStringPart("profile", crypt.simpleEncode(rankingProfile.toExternalString())));
// post.add(new DefaultCharsetStringPart("constraint", (constraint == null) ? "" : constraint.exportB64()));
// if (secondarySearchSuperviser != null) post.add(new DefaultCharsetStringPart("abstracts", "auto"));
final long timestamp = System.currentTimeMillis();
boolean thisIsASecondarySearch = urlhashes.length() > 0;
assert !thisIsASecondarySearch || secondarySearchSuperviser == null;
@ -489,7 +427,7 @@ public final class yacyClient {
parts.put("constraint", new StringBody((constraint == null) ? "" : constraint.exportB64()));
if (secondarySearchSuperviser != null)
parts.put("abstracts", new StringBody("auto"));
result = FileUtils.table(HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/search.html", target.getHexHash() + ".yacyh", parts, 60000));
result = FileUtils.table(HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
} catch (final IOException e) {
yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore);
//yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
@ -693,12 +631,9 @@ public final class yacyClient {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetHash, salt);
// post.add(new DefaultCharsetStringPart("process", "permission"));
// send request
try {
// final byte[] content = postToFile(seedDB, targetHash, "message.html", post, 5000);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt);
parts.put("process", new StringBody("permission"));
final byte[] content = postToFile(seedDB, targetHash, "message.html", parts, 5000);
@ -716,19 +651,9 @@ public final class yacyClient {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetHash, salt);
// post.add(new DefaultCharsetStringPart("process", "post"));
// post.add(new DefaultCharsetStringPart("myseed", seedDB.mySeed().genSeedStr(salt)));
// post.add(new DefaultCharsetStringPart("subject", subject));
// try {
// post.add(new DefaultCharsetStringPart("message", new String(message, "UTF-8")));
// } catch (final UnsupportedEncodingException e) {
// post.add(new DefaultCharsetStringPart("message", new String(message)));
// }
// send request
try {
// final byte[] content = postToFile(seedDB, targetHash, "message.html", post, 20000);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt);
parts.put("process", new StringBody("post"));
parts.put("myseed", new StringBody(seedDB.mySeed().genSeedStr(salt)));
@ -765,23 +690,16 @@ public final class yacyClient {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), null, salt);
// post.add(new DefaultCharsetStringPart("process", "permission"));
// post.add(new DefaultCharsetStringPart("purpose", "crcon"));
// post.add(new DefaultCharsetStringPart("filename", filename));
// post.add(new DefaultCharsetStringPart("filesize", Long.toString(filesize)));
// post.add(new DefaultCharsetStringPart("can-send-protocol", "http"));
// send request
try {
// final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 10000);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
parts.put("process", new StringBody("permission"));
parts.put("purpose", new StringBody("crcon"));
parts.put("filename", new StringBody(filename));
parts.put("filesize", new StringBody(Long.toString(filesize)));
parts.put("can-send-protocol", new StringBody("http"));
final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, parts, 10000);
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 10000, targetAddress, parts);
final Map<String, String> result = FileUtils.table(content);
return result;
} catch (final Exception e) {
@ -795,17 +713,9 @@ public final class yacyClient {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), null, salt);
// post.add(new DefaultCharsetStringPart("process", "store"));
// post.add(new DefaultCharsetStringPart("purpose", "crcon"));
// post.add(new DefaultCharsetStringPart("filesize", Long.toString(file.length)));
// post.add(new DefaultCharsetStringPart("md5", Digest.encodeMD5Hex(file)));
// post.add(new DefaultCharsetStringPart("access", access));
// post.add(new DefaultCharsetFilePart("filename", new ByteArrayPartSource(filename, file)));
// send request
try {
// final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 20000);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
parts.put("process", new StringBody("store"));
parts.put("purpose", new StringBody("crcon"));
@ -813,7 +723,7 @@ public final class yacyClient {
parts.put("md5", new StringBody(Digest.encodeMD5Hex(file)));
parts.put("access", new StringBody(access));
parts.put("filename", new ByteArrayBody(file, filename));
final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, parts, 20000);
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 20000, targetAddress, parts);
final Map<String, String> result = FileUtils.table(content);
return result;
} catch (final Exception e) {
@ -874,13 +784,6 @@ public final class yacyClient {
// prepare request
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
// post.add(new DefaultCharsetStringPart("process", process));
// post.add(new DefaultCharsetStringPart("urlhash", ((entry == null) ? "" : new String(entry.hash()))));
// post.add(new DefaultCharsetStringPart("result", result));
// post.add(new DefaultCharsetStringPart("reason", reason));
// post.add(new DefaultCharsetStringPart("wordh", wordhashes));
// post.add(new DefaultCharsetStringPart("lurlEntry", ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), salt))));
// determining target address
final String address = target.getClusterAddress();
@ -888,7 +791,6 @@ public final class yacyClient {
// send request
try {
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/crawlReceipt.html", target.getHexHash() + ".yacyh", post, 10000);
// prepare request
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
parts.put("process", new StringBody(process));
@ -898,7 +800,7 @@ public final class yacyClient {
parts.put("wordh", new StringBody(wordhashes));
parts.put("lurlEntry", new StringBody(((entry == null) ? "" : crypt.simpleEncode(entry.toString(), salt))));
// send request
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/crawlReceipt.html", target.getHexHash() + ".yacyh", parts, 10000);
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts);
return FileUtils.table(content);
} catch (final Exception e) {
// most probably a network time-out exception
@ -1020,13 +922,11 @@ public final class yacyClient {
// prepare post values
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetSeed.hash, salt);
// enabling gzip compression for post request body
if (gzipBody && (targetSeed.getVersion() < yacyVersion.YACY_SUPPORTS_GZIP_POST_REQUESTS_CHUNKED)) {
gzipBody = false;
}
// post.add(new DefaultCharsetStringPart("wordc", Integer.toString(indexes.size())));
int indexcount = 0;
final StringBuilder entrypost = new StringBuilder(indexes.size() * 73);
@ -1050,16 +950,12 @@ public final class yacyClient {
result.put("unknownURL", "");
return result;
}
// post.add(new DefaultCharsetStringPart("entryc", Integer.toString(indexcount)));
// post.add(new DefaultCharsetStringPart("indexes", entrypost.toString()));
try {
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferRWI.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
parts.put("wordc", new StringBody(Integer.toString(indexes.size())));
parts.put("entryc", new StringBody(Integer.toString(indexcount)));
parts.put("indexes", new StringBody(entrypost.toString()));
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferRWI.html", targetSeed.getHexHash() + ".yacyh", parts, timeout);
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
final Iterator<String> v = FileUtils.strings(content);
// this should return a list of urlhashes that are unknown
@ -1080,7 +976,6 @@ public final class yacyClient {
// prepare post values
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetSeed.hash, salt);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
// enabling gzip compression for post request body
@ -1096,18 +991,15 @@ public final class yacyClient {
resource = urls[i].toString();
//System.out.println("*** DEBUG resource = " + resource);
if (resource != null && resource.indexOf(0) == -1) {
// post.add(new DefaultCharsetStringPart("url" + urlc, resource));
parts.put("url" + urlc, new StringBody(resource));
urlPayloadSize += resource.length();
urlc++;
}
}
}
// post.add(new DefaultCharsetStringPart("urlc", Integer.toString(urlc)));
try {
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferURL.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody);
parts.put("urlc", new StringBody(Integer.toString(urlc)));
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferURL.html", targetSeed.getHexHash() + ".yacyh", parts, timeout);
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
final Iterator<String> v = FileUtils.strings(content);
final Map<String, String> result = FileUtils.table(v);
@ -1124,14 +1016,12 @@ public final class yacyClient {
// this post a message to the remote message board
final String salt = crypt.randomSalt();
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetSeed.hash, salt);
String address = targetSeed.getClusterAddress();
if (address == null) { address = "localhost:8080"; }
try {
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/profile.html", targetSeed.getHexHash() + ".yacyh", post, 5000);
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/profile.html", targetSeed.getHexHash() + ".yacyh", parts, 5000);
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts);
return FileUtils.table(content);
} catch (final Exception e) {
yacyCore.log.logSevere("yacyClient.getProfile error:" + e.getMessage());
@ -1151,27 +1041,14 @@ public final class yacyClient {
final byte[] wordhashe = Word.word2hash("test");
//System.out.println("permission=" + permissionMessage(args[1]));
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
// final byte[] content = Client.wget(
// "http://" + target.getPublicAddress() + "/yacy/search.html" +
// "?myseed=" + sb.peers.mySeed().genSeedStr(null) +
// "&youare=" + target.hash + "&key=" +
// "&myseed=" + sb.peers.mySeed() .genSeedStr(null) +
// "&count=10" +
// "&resource=global" +
// "&query=" + new String(wordhashe) +
// "&network.unit.name=" + Switchboard.getSwitchboard().getConfig(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT),
// reqHeader, 10000, target.getHexHash() + ".yacyh");
final byte[] content = HttpConnector.wget("http://" + target.getPublicAddress() + "/yacy/search.html" +
final byte[] content = new MultiProtocolURI("http://" + target.getPublicAddress() + "/yacy/search.html" +
"?myseed=" + sb.peers.mySeed().genSeedStr(null) +
"&youare=" + target.hash + "&key=" +
"&myseed=" + sb.peers.mySeed() .genSeedStr(null) +
"&count=10" +
"&resource=global" +
"&query=" + new String(wordhashe) +
"&network.unit.name=" + Switchboard.getSwitchboard().getConfig(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT),
reqHeader.entrySet(), 10000, target.getHexHash() + ".yacyh");
"&network.unit.name=" + Switchboard.getSwitchboard().getConfig(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT)).get(HTTPLoader.crawlerUserAgent, 10000);
final Map<String, String> result = FileUtils.table(content);
System.out.println("Result=" + result.toString());
} catch (final Exception e) {
@ -1181,9 +1058,9 @@ public final class yacyClient {
} else if(args.length == 1) {
System.out.println("wput Test");
// connection params
URL url = null;
MultiProtocolURI url = null;
try {
url = new URL(args[0]);
url = new MultiProtocolURI(args[0]);
} catch (final MalformedURLException e) {
Log.logException(e);
}
@ -1193,19 +1070,6 @@ public final class yacyClient {
}
final String vhost = url.getHost();
final int timeout = 10000;
// final boolean gzipBody = false;
// // data
// final List<Part> post = new ArrayList<Part>();
// post.add(new DefaultCharsetStringPart("process", "permission"));
// post.add(new DefaultCharsetStringPart("purpose", "crcon"));
// //post.add(new FilePart("filename", new ByteArrayPartSource(filename, file)));
// // do it!
// try {
// final byte[] response = HttpConnector.wput(url.toString(), vhost, post, timeout, gzipBody);
// System.out.println(new String(response));
// } catch (final IOException e) {
// Log.logException(e);
// }
// new data
final LinkedHashMap<String,ContentBody> newpost = new LinkedHashMap<String,ContentBody>();
try {
@ -1216,7 +1080,7 @@ public final class yacyClient {
}
byte[] res;
try {
res = HttpConnector.wput(url.toString(), vhost, newpost, timeout);
res = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(url, timeout, vhost, newpost);
System.out.println(new String(res));
} catch (IOException e1) {
Log.logException(e1);

@ -21,6 +21,8 @@
package net.yacy.cora.document;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@ -39,6 +41,7 @@ import jcifs.smb.SmbFile;
import jcifs.smb.SmbFileInputStream;
import net.yacy.cora.document.Punycode.PunycodeException;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.util.Domains;
/**
@ -934,12 +937,45 @@ public class MultiProtocolURI implements Serializable {
return null;
}
public InputStream getInputStream() throws IOException {
public InputStream getInputStream(final String userAgent, final int timeout) throws IOException {
if (isFile()) return new FileInputStream(getFSFile());
if (isSMB()) return new SmbFileInputStream(getSmbFile());
if (isHTTP() || isHTTPS()) {
final HTTPClient client = new HTTPClient();
client.setTimout(timeout);
client.setUserAgent(userAgent);
client.setHost(this.getHost());
return new ByteArrayInputStream(client.GETbytes(this.toNormalform(false, false)));
}
return null;
}
public byte[] get(final String userAgent, final int timeout) throws IOException {
if (isFile()) return read(new FileInputStream(getFSFile()));
if (isSMB()) return read(new SmbFileInputStream(getSmbFile()));
if (isHTTP() || isHTTPS()) {
final HTTPClient client = new HTTPClient();
client.setTimout(timeout);
client.setUserAgent(userAgent);
client.setHost(this.getHost());
return client.GETbytes(this.toNormalform(false, false));
}
return null;
}
public static byte[] read(final InputStream source) throws IOException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final byte[] buffer = new byte[2048];
int c;
while ((c = source.read(buffer, 0, 2048)) > 0) baos.write(buffer, 0, c);
baos.flush();
baos.close();
return baos.toByteArray();
}
//---------------------
private static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"';

@ -1,193 +0,0 @@
/**
* HttpConnector
* Copyright 2010 by Michael Peter Christen
* First released 25.05.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.protocol;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Set;
import java.util.Map.Entry;
//import java.util.List;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.http.HTTPClient;
//import org.apache.commons.httpclient.methods.multipart.Part;
import org.apache.http.entity.mime.content.ContentBody;
import de.anomic.crawler.retrieval.HTTPLoader;
// import de.anomic.http.client.Client;
//import de.anomic.http.server.HeaderFramework;
//import de.anomic.http.server.RequestHeader;
//import de.anomic.http.server.ResponseContainer;
public class HttpConnector {
// /**
// * send data to the server named by vhost
// *
// * @param address address of the server
// * @param vhost name of the server at address which should respond
// * @param post data to send (name-value-pairs)
// * @param timeout in milliseconds
// * @return response body
// * @throws IOException
// */
// public static byte[] wput(final String url, final String vhost, final List<Part> post, final int timeout) throws IOException {
// return wput(url, vhost, post, timeout, false);
// }
// /**
// * send data to the server named by vhost
// *
// * @param address address of the server
// * @param vhost name of the server at address which should respond
// * @param post data to send (name-value-pairs)
// * @param timeout in milliseconds
// * @param gzipBody send with content gzip encoded
// * @return response body
// * @throws IOException
// */
// public static byte[] wput(final String url, final String vhost, final List<Part> post, final int timeout, final boolean gzipBody) throws IOException {
// final RequestHeader header = new RequestHeader();
// header.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
// header.put(HeaderFramework.HOST, vhost);
// final de.anomic.http.client.Client client = new de.anomic.http.client.Client(timeout, header);
//
// ResponseContainer res = null;
// byte[] content = null;
// try {
// // send request/data
// res = client.POST(url, post, gzipBody);
// content = res.getData();
// } finally {
// if(res != null) {
// // release connection
// res.closeStream();
// }
// }
// return content;
// }
/**
* send data to the server named by vhost
*
* @param url address of the server
* @param vhost name of the server at address which should respond
* @param post data to send (name-value-pairs)
* @param timeout in milliseconds
* @return response body
* @throws IOException
*/
public static byte[] wput(final String url, final String vhost, LinkedHashMap<String,ContentBody> post, final int timeout) throws IOException {
final HTTPClient client = new HTTPClient();
client.setTimout(timeout);
client.setUserAgent(HTTPLoader.yacyUserAgent);
client.setHost(vhost);
return client.POSTbytes(url, post);
}
/**
* get data from the server named by url
*
* @param url address of the server
* @param timeout in milliseconds
* @return response body
* @throws IOException
*/
public static byte[] wget(final MultiProtocolURI url, final int timeout) throws IOException {
return wget(url.toNormalform(false, false), url.getHost(), timeout);
}
/**
* get data from the server named by vhost
*
* @param url address of the server
* @param vhost name of the server at address which should respond
* @param timeout in milliseconds
* @return response body
* @throws IOException
*/
public static byte[] wget(final String url, final String vhost, final int timeout) throws IOException {
final HTTPClient client = new HTTPClient();
client.setTimout(timeout);
client.setUserAgent(HTTPLoader.yacyUserAgent);
client.setHost(vhost);
return client.GETbytes(url);
}
/**
* get data from the server named by vhost
*
* @param url address of the server
* @param entrys of RequestHeader
* @param timeout in milliseconds
* @return response body
* @throws IOException
*/
public static byte[] wget(final String url, final Set<Entry<String, String>> entrys, final int timeout) throws IOException {
return wget(url, entrys, timeout, null);
}
/**
* get data from the server named by url
*
* @param url address of the server
* @param entrys of RequestHeader
* @param timeout in milliseconds
* @param vhost name of the server at address which should respond
* @return response body
* @throws IOException
*/
public static byte[] wget(final String url, final Set<Entry<String, String>> entrys, final int timeout, final String vhost) throws IOException {
final HTTPClient client = new HTTPClient();
client.setHeader(entrys);
client.setTimout(timeout);
client.setHost(vhost);
return client.GETbytes(url);
}
// public static byte[] wget(final String url, final String vhost, final int timeout) throws IOException {
// final RequestHeader header = new RequestHeader();
// header.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
// header.put(HeaderFramework.HOST, vhost);
// final de.anomic.http.client.Client client = new de.anomic.http.client.Client(timeout, header);
//
// ResponseContainer res = null;
// byte[] content = null;
// try {
// // send request/data
// res = client.GET(url);
// content = res.getData();
// } finally {
// if(res != null) {
// // release connection
// res.closeStream();
// }
// }
// return content;
// }
}

@ -2048,14 +2048,14 @@ public class FTPClient {
// save ip address in high byte order
// byte[] Bytes = LocalIp.getAddress();
final byte[] Bytes = Domains.myPublicLocalIP().getHostAddress().getBytes();
final byte[] b = Domains.myPublicLocalIP().getHostAddress().getBytes();
// bytes greater than 127 should not be printed as negative
final short[] Shorts = new short[4];
final short[] s = new short[4];
for (int i = 0; i < 4; i++) {
Shorts[i] = Bytes[i];
if (Shorts[i] < 0) {
Shorts[i] += 256;
s[i] = b[i];
if (s[i] < 0) {
s[i] += 256;
}
}
@ -2064,7 +2064,7 @@ public class FTPClient {
send("PORT "
+
// "127,0,0,1," +
Shorts[0] + "," + Shorts[1] + "," + Shorts[2] + "," + Shorts[3] + "," + ((DataPort & 0xff00) >> 8)
s[0] + "," + s[1] + "," + s[2] + "," + s[3] + "," + ((DataPort & 0xff00) >> 8)
+ "," + (DataPort & 0x00ff));
// read status of the command from the control port

@ -329,7 +329,7 @@ public class HTTPClient {
* @return content bytes
* @throws IOException
*/
public byte[] POSTbytes(final String uri, final LinkedHashMap<String,ContentBody> parts) throws IOException {
public byte[] POSTbytes(final String uri, final LinkedHashMap<String, ContentBody> parts) throws IOException {
final HttpPost httpPost = new HttpPost(uri);
final MultipartEntity multipartEntity = new MultipartEntity();

@ -0,0 +1,71 @@
/**
* HttpConnector
* Copyright 2010 by Michael Peter Christen
* First released 25.05.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.protocol.http;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.MultiProtocolURI;
import org.apache.http.entity.mime.content.ContentBody;
/**
* This Connector is a convenience class to access the protocol-specific http client class.
*/
public class HTTPConnector {
private static final Map<String, HTTPConnector> cons = new ConcurrentHashMap<String, HTTPConnector>();
private String userAgent;
private HTTPConnector(String userAgent) {
this.userAgent = userAgent;
}
public static final HTTPConnector getConnector(String userAgent) {
HTTPConnector c = cons.get(userAgent);
if (c != null) return c;
c = new HTTPConnector(userAgent);
return c;
}
/**
* send data to the server named by vhost
*
* @param url address of the server
* @param vhost name of the server at address which should respond
* @param post data to send (name-value-pairs)
* @param timeout in milliseconds
* @return response body
* @throws IOException
*/
public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, LinkedHashMap<String, ContentBody> post) throws IOException {
final HTTPClient client = new HTTPClient();
client.setTimout(timeout);
client.setUserAgent(this.userAgent);
client.setHost(vhost);
return client.POSTbytes(url.toNormalform(false, false), post);
}
}

@ -0,0 +1,71 @@
/**
* HttpConnector
* Copyright 2010 by Michael Peter Christen
* First released 25.05.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.protocol.http;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.MultiProtocolURI;
import org.apache.http.entity.mime.content.ContentBody;
/**
* This Connector is a convenience class to access the protocol-specific http client class.
*/
public class HTTPConnector {
private static final Map<String, HTTPConnector> cons = new ConcurrentHashMap<String, HTTPConnector>();
private String userAgent;
private HTTPConnector(String userAgent) {
this.userAgent = userAgent;
}
public static final HTTPConnector getConnector(String userAgent) {
HTTPConnector c = cons.get(userAgent);
if (c != null) return c;
c = new HTTPConnector(userAgent);
return c;
}
/**
* send data to the server named by vhost
*
* @param url address of the server
* @param vhost name of the server at address which should respond
* @param post data to send (name-value-pairs)
* @param timeout in milliseconds
* @return response body
* @throws IOException
*/
public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, LinkedHashMap<String, ContentBody> post) throws IOException {
final HTTPClient client = new HTTPClient();
client.setTimout(timeout);
client.setUserAgent(this.userAgent);
client.setHost(vhost);
return client.POSTbytes(url.toNormalform(false, false), post);
}
}

@ -34,13 +34,15 @@ import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.RSSFeed;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.RSSReader;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.cora.protocol.http.HTTPConnector;
//import org.apache.commons.httpclient.methods.multipart.Part;
//import org.apache.commons.httpclient.methods.multipart.StringPart;
import org.apache.http.entity.mime.content.ContentBody;
import org.apache.http.entity.mime.content.StringBody;
import de.anomic.crawler.retrieval.HTTPLoader;
public class Search {
public static BlockingQueue<RSSMessage> search(String rssSearchServiceURL, String query, boolean verify, boolean global, long timeout, int maximumRecords) {
@ -118,24 +120,15 @@ public class Search {
throw new IOException("cora.Search failed asking peer '" + rssSearchServiceURL + "': bad url, " + e.getMessage());
}
// prepare request
// final List<Part> post = new ArrayList<Part>();
// post.add(new StringPart("query", query, Charset.defaultCharset().name()));
// post.add(new StringPart("startRecord", Integer.toString(startRecord), Charset.defaultCharset().name()));
// post.add(new StringPart("maximumRecords", Long.toString(maximumRecords), Charset.defaultCharset().name()));
// post.add(new StringPart("verify", verify ? "true" : "false", Charset.defaultCharset().name()));
// post.add(new StringPart("resource", global ? "global" : "local", Charset.defaultCharset().name()));
// send request
try {
// final byte[] result = HttpConnector.wput(rssSearchServiceURL, uri.getHost(), post, (int) timeout);
final LinkedHashMap<String,ContentBody> parts = new LinkedHashMap<String,ContentBody>();
parts.put("query", new StringBody(query));
parts.put("startRecord", new StringBody(Integer.toString(startRecord)));
parts.put("maximumRecords", new StringBody(Long.toString(maximumRecords)));
parts.put("verify", new StringBody(verify ? "true" : "false"));
parts.put("resource", new StringBody(global ? "global" : "local"));
final byte[] result = HttpConnector.wput(rssSearchServiceURL, uri.getHost(), parts, (int) timeout);
final byte[] result = HTTPConnector.getConnector(HTTPLoader.yacyUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
//String debug = new String(result); System.out.println("*** DEBUG: " + debug);
final RSSReader reader = RSSReader.parse(result);
if (reader == null) {

@ -28,8 +28,9 @@ import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import de.anomic.crawler.retrieval.HTTPLoader;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
@ -233,7 +234,7 @@ public class htmlParser extends AbstractParser implements Parser {
MultiProtocolURI url;
try {
url = new MultiProtocolURI(args[0]);
byte[] content = HttpConnector.wget(url, 3000);
byte[] content = url.get(HTTPLoader.crawlerUserAgent, 3000);
Document[] document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content));
String title = document[0].dc_title();
System.out.println(title);

@ -555,7 +555,6 @@ public class Domains {
}
public static InetAddress myPublicLocalIP() {
new localHostAddressLookup().start();
// list all addresses
// for (int i = 0; i < localHostAddresses.length; i++) System.out.println("IP: " + localHostAddresses[i].getHostAddress()); // DEBUG
if (localHostAddresses.length == 0) {

Loading…
Cancel
Save