Detailed some Javadoc related to /api/webstructure.xml usage.

pull/105/head
luccioman 8 years ago
parent 007e2afa6e
commit 9cea7cbb10

@ -45,8 +45,51 @@ import net.yacy.search.index.Segment.ReferenceReportCache;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
/**
* Retrieval of a web links structure.
*/
public class webstructure {
/**
* Retrieve the locally known web links structure of a specified resource ("about" parameter supplied) or
* the whole computed links structure since install (no parameter supplied)
* or since last start or last call ("latest" parameter supplied).
* Returned object contains the following information :
* <ul>
* <li>in all cases :
* <ul>
* <li>accumulated list of outgoing links to other domains (per host accumulated anchors)</li>
* </ul>
* </li>
* <li>when "about" parameter is filled :
* <ul>
* <li>accumulated list of incoming links from other domains (per host accumulated references)</li>
* <li>detailed list of outgoing links (anchors) from documents to references</li>
* <li>detailed list of incoming links (citations) from other documents (their references) - reverse link structure</li>
* </ul>
* </li>
* </ul>
* Information detail is limited by {@link WebStructureGraph#maxhosts} and {@link WebStructureGraph#maxref} constants.
*
* @param header
* servlet request header
* @param post
* request parameters. Supported keys :
* <ul>
* <li>about : get only links structure about the resource
* specified as value. Supported values : host hash, URL hash,
* host name or URL</li>
* <li>latest (ignored when about parameter is valued): get the structure that have been computed during
* the current run-time of YaCy, and with each next call only an
* update to the next list of references.</li>
* <li>agentName : name of the user agent string used to load the
* "about" resource</li>
* </ul>
* @param env
* server environment
* @return the servlet answer object
* @see WebStructureGraph
*/
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;

@ -1,4 +1,4 @@
// plasmaWebStructure.java
// WebStructureGraph.java
// -----------------------------
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 15.05.2007 on http://yacy.net
@ -65,19 +65,34 @@ import net.yacy.kelondro.rwi.ReferenceFactory;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.Switchboard;
/**
* Holds lists of links per host names to allow reconstructing a web graph structure of links.
*/
public class WebStructureGraph {
public static int maxref = 200; // maximum number of references, to avoid overflow when a large link farm occurs (i.e. wikipedia)
public static int maxhosts = 10000; // maximum number of hosts in web structure map
/** Maximum number of references per host, to avoid overflow when a large link farm occurs (i.e. wikipedia) */
public static int maxref = 200;
/** Maximum number of hosts in web structure map */
public static int maxhosts = 10000;
private final static ConcurrentLog log = new ConcurrentLog("WebStructureGraph");
/** Backup file */
private final File structureFile;
/** Older structure entries (notably loaded from the backup file) */
private final TreeMap<String, byte[]> structure_old; // <b64hash(6)>','<host> to <date-yyyymmdd(8)>{<target-b64hash(6)><target-count-hex(4)>}*
/** Recently computed structure entries */
private final TreeMap<String, byte[]> structure_new;
/** Queue used to receive new entries to store */
private final BlockingQueue<LearnObject> publicRefDNSResolvingQueue;
/** Worker thread consuming the publicRefDNSResolvingQueue */
private final PublicRefDNSResolvingProcess publicRefDNSResolvingWorker;
/** Entry used to terminate the worker thread */
private final static LearnObject leanrefObjectPOISON = new LearnObject(null, null);
private static class LearnObject {
@ -90,6 +105,13 @@ public class WebStructureGraph {
}
}
/**
* Constructs an instance, eventually loads entries from the supplied backup
* structureFile when it exists and starts the worker thread.
*
* @param structureFile
* backup file
*/
public WebStructureGraph(final File structureFile) {
this.structure_old = new TreeMap<String, byte[]>();
this.structure_new = new TreeMap<String, byte[]>();

Loading…
Cancel
Save