redesign of index caching - removed indexCache.db

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@86 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 3f85978519
commit 1d7fed87dc

@ -36,7 +36,8 @@ You can <a href="http://java.sun.com/j2se/1.4.2/download.html">download the Java
<p><b>Latest Release:</b>
The latest YaCy-release is 0.37.<br>
Download <a href="/yacy/release/yacy_v0.37_20050502.tar.gz">YaCy 0.37</a> here.
Download <a href="/yacy/release/yacy_v0.37_20050502.tar.gz">generic (all platforms with J2SE 1.4: Linux, Mac OS X, Windows, Solaris) YaCy 0.37</a> here.<br>
If you want to install YaCy on Windows, you can use the convenient <a href="/yacy/release/yacy_v0.37_20050502.exe">Windows-Installer-Version of YaCy 0.37</a>.
</p>
<p><table bgcolor="#EEEEEE" width="100%"><tr><td>

@ -0,0 +1,41 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<head>
<title>YaCy: Press Material, Publications, Presentations</title>
<meta http-equiv="content-type" content="text/html;charset=iso-8859-1">
<!-- <meta name="Content-Language" content="German, Deutsch, de, at, ch"> -->
<meta name="Content-Language" content="English, Englisch">
<meta name="keywords" content="YaCy HTTP Proxy search engine spider indexer java network open free download Mac Windwos Software development">
<meta name="description" content="YaCy Software HTTP Proxy Freeware Home Page">
<meta name="copyright" content="Michael Christen">
<script src="navigation.js" type="text/javascript"></script>
<link rel="stylesheet" media="all" href="style.css">
<!-- Realisation: Michael Christen; Contact: mc<at>anomic.de-->
</head>
<body bgcolor="#fefefe" marginheight="0" marginwidth="0" leftmargin="0" topmargin="0">
<SCRIPT LANGUAGE="JavaScript1.1"><!--
globalheader();
//--></SCRIPT>
<!-- ----- HERE STARTS CONTENT PART ----- -->
<h2>Press Material, Publications, Presentations</h2>
<p>Here you can find links to documents that had been published about YaCy by YaCy-Authors</p><br>
<p>Deutsche Dokumentation / German-only documents
<ul>
<li><a href="http://www.yacy.net/yacy/material/YaCy-Datenschleuder086.pdf"><b>"YaCy -- Peer-to-Peer Web-Suchmaschine"</b></a> - Ver&ouml;ffentlichung in der Datenschleuder #086; technische Details zur Funktionsweise</li>
<li><a href="http://www.yacy.net/yacy/material/YaCy-nichtMonopolisierbar.pdf"><b>Vortrag zur SuMa-eV Veranstaltung: "Portale/Suchmaschinen - und ihre Grenzen"</b> - pdf/pr&auml;sentierfertige Folien</a></li>
<li><a href="http://www.yacy.net/yacy/material/YaCy-nichtMonopolisierbar/index.html"><b>Vortrag zur SuMa-eV Veranstaltung: "Portale/Suchmaschinen - und ihre Grenzen"</b> - Web-Pr&auml;sentation</a></li>
<li><a href="http://www.yacy.net/yacy/material/YaCy-FlyerD.pdf"><b>Flyer "Das Wichtigste zu YaCy im &Uuml;berblick"</b></a></li>
</ul></p><br>
<!-- ----- HERE ENDS CONTENT PART ----- -->
<SCRIPT LANGUAGE="JavaScript1.1"><!--
globalfooter();
//--></SCRIPT>
</body>
</html>

@ -1,7 +1,7 @@
var appname = "YACY: a Java Freeware P2P-Based Search Engine with Caching HTTP Proxy";
var thismenu = new Array(
"index","FAQ","Details","Technology","Platforms","News","Demo","License","Download",
"Installation","Volunteers","Deutsches Forum@http://www.yacy-forum.de","English Forum@http://sourceforge.net/forum/?group_id=116142","Links","Contact","","Impressum");
"Installation","Volunteers","Deutsches Forum@http://www.yacy-forum.de","English Forum@http://sourceforge.net/forum/?group_id=116142","Material","Links","Contact","","Impressum");
var mainmenu = new Array(
"YACY Home@http://www.yacy.net/index.html",
"Products@http://www.yacy.net/Products/index.html",

@ -76,7 +76,7 @@ public class IndexControl_p {
prop.put("urlstring", "");
prop.put("urlhash", "");
prop.put("result", "");
prop.put("wcount", "" + switchboard.wordIndex.sizeMin());
prop.put("wcount", "" + switchboard.wordIndex.size());
prop.put("ucount", "" + switchboard.loadedURL.size());
prop.put("otherHosts", "");
prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : "");
@ -116,25 +116,23 @@ public class IndexControl_p {
}
if (post.containsKey("keyhashdeleteall")) {
try {
if ((delurl) || (delurlref)) {
// generate an urlx array
try {
HashSet keyhashes = new HashSet();
keyhashes.add(keyhash);
plasmaWordIndexEntity index = switchboard.searchManager.searchHashes(keyhashes, 10000);
Enumeration en = index.elements(true);
int i = 0;
urlx = new String[index.size()];
while (en.hasMoreElements()) urlx[i++] = ((plasmaWordIndexEntry) en.nextElement()).getUrlHash();
} catch (IOException e) {
urlx = new String[0];
}
if ((delurl) || (delurlref)) {
// generate an urlx array
try {
HashSet keyhashes = new HashSet();
keyhashes.add(keyhash);
plasmaWordIndexEntity index = switchboard.searchManager.searchHashes(keyhashes, 10000);
Enumeration en = index.elements(true);
int i = 0;
urlx = new String[index.size()];
while (en.hasMoreElements()) urlx[i++] = ((plasmaWordIndexEntry) en.nextElement()).getUrlHash();
} catch (IOException e) {
urlx = new String[0];
}
if (delurlref) for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true);
if ((delurl) || (delurlref)) for (int i = 0; i < urlx.length; i++) switchboard.loadedURL.remove(urlx[i]);
switchboard.wordIndex.deleteComplete(keyhash);
} catch (IOException e) {}
}
if (delurlref) for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true);
if ((delurl) || (delurlref)) for (int i = 0; i < urlx.length; i++) switchboard.loadedURL.remove(urlx[i]);
switchboard.wordIndex.deleteIndex(keyhash);
post.remove("keyhashdeleteall");
if ((keystring.length() > 0) && (plasmaWordIndexEntry.word2hash(keystring).equals(keyhash)))
post.put("keystringsearch", "generated");
@ -143,11 +141,9 @@ public class IndexControl_p {
}
if (post.containsKey("keyhashdelete")) {
try {
if (delurlref) for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true);
if ((delurl) || (delurlref)) for (int i = 0; i < urlx.length; i++) switchboard.loadedURL.remove(urlx[i]);
switchboard.wordIndex.removeEntries(keyhash, urlx, true);
} catch (IOException e) {}
if (delurlref) for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true);
if ((delurl) || (delurlref)) for (int i = 0; i < urlx.length; i++) switchboard.loadedURL.remove(urlx[i]);
switchboard.wordIndex.removeEntries(keyhash, urlx, true);
// this shall lead to a presentation of the list; so handle that the remaining program
// thinks that it was called for a list presentation
post.remove("keyhashdelete");
@ -200,16 +196,13 @@ public class IndexControl_p {
plasmaWordIndexEntity[] indexes = new plasmaWordIndexEntity[1];
String result;
long starttime = System.currentTimeMillis();
try {indexes[0] = switchboard.wordIndex.getEntity(keyhash, true);
result = yacyClient.transferIndex(yacyCore.seedDB.getConnected(post.get("hostHash", "")), indexes, switchboard.loadedURL);
} catch (IOException e) {
result = "IOException: " + e.getMessage();
}
indexes[0] = switchboard.wordIndex.getEntity(keyhash, true);
result = yacyClient.transferIndex(yacyCore.seedDB.getConnected(post.get("hostHash", "")), indexes, switchboard.loadedURL);
prop.put("result", (result == null) ? ("Successfully transferred " + indexes[0].size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds") : result);
}
if (post.containsKey("keyhashsimilar")) {
Iterator hashIt = switchboard.wordIndex.hashIterator(keyhash, true, true, true);
Iterator hashIt = switchboard.wordIndex.wordHashes(keyhash, true, true);
String result = "Sequential List of Word-Hashes:<br>";
String hash;
int i = 0;
@ -294,7 +287,7 @@ public class IndexControl_p {
}
// insert constants
prop.put("wcount", "" + switchboard.wordIndex.sizeMin());
prop.put("wcount", "" + switchboard.wordIndex.size());
prop.put("ucount", "" + switchboard.loadedURL.size());
prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : "");
prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : "");

@ -65,7 +65,7 @@ public class IndexShare_p {
prop.put("wordfreq", switchboard.getConfig("defaultWordReceiveFrequency","10"));
prop.put("dtable", "");
prop.put("rtable", "");
prop.put("wcount", "" + switchboard.wordIndex.sizeMin());
prop.put("wcount", "" + switchboard.wordIndex.size());
prop.put("ucount", "" + switchboard.loadedURL.size());
return prop; // be save
}
@ -78,7 +78,7 @@ public class IndexShare_p {
}
// insert constants
prop.put("wcount", "" + switchboard.wordIndex.sizeMin());
prop.put("wcount", "" + switchboard.wordIndex.size());
prop.put("ucount", "" + switchboard.loadedURL.size());
// return rewrite properties
return prop;

@ -51,6 +51,7 @@ import java.util.Vector;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndexEntry;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
@ -106,17 +107,13 @@ public class transferRWI {
wordHash = estring.substring(0, p);
wordhashes[i] = wordHash;
entry = new plasmaWordIndexEntry(estring.substring(p));
try {
switchboard.wordIndex.addEntry(wordHash, entry);
urlHash = entry.getUrlHash();
if ((!(unknownURL.contains(urlHash))) &&
(!(switchboard.loadedURL.exists(urlHash)))) {
unknownURL.add(urlHash);
}
received++;
} catch (IOException ee) {
ee.printStackTrace();
switchboard.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, entry));
urlHash = entry.getUrlHash();
if ((!(unknownURL.contains(urlHash))) &&
(!(switchboard.loadedURL.exists(urlHash)))) {
unknownURL.add(urlHash);
}
received++;
}
}
yacyCore.seedDB.mySeed.incRI(received);

@ -45,10 +45,10 @@
# Contributions and changes to the program code must be marked as such.
# define variables
version='0.37'
version='0.371'
datestr=`date +%Y%m%d`
release='yacy_v'$version'_'$datestr
#release='yacy_dev_v'$version'_'$datestr
#release='yacy_v'$version'_'$datestr
release='yacy_dev_v'$version'_'$datestr
extralibs='yacy_libx'
target='RELEASE'
classes='classes'
@ -58,7 +58,7 @@ source='source'
doc='doc'
data='DATA'
mainclass='yacy.java'
classpath='$classes:lib/commons-collections.jar:lib/commons-pool-1.2.jar:libx/PDFBox-0.7.1.jar:libx/log4j-1.2.9.jar:libx/tm-extractors-0.4.jar'
classpath='$classes:lib/commons-collections.jar:lib/commons-pool-1.2.jar:libx/PDFBox-0.7.1.jar:libx/log4j-1.2.9.jar:libx/tm-extractors-0.4.jar:libx/informa-0.6.0.jar:libx/jdom.jar'
mkdir $release
mkdir $extralibs

@ -301,16 +301,34 @@ public class kelondroMScoreCluster {
public static void main(String[] args) {
System.out.println("Test for Score: start");
long time = System.currentTimeMillis();
kelondroMScoreCluster s = new kelondroMScoreCluster();
for (int i = 0; i < 10000; i++) s.addScore("score#" + i + "xxx" + i + "xxx" + i + "xxx" + i + "xxx", i/10);
int c = 0;
// create cluster
long time = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
s.addScore("score#" + i + "xxx" + i + "xxx" + i + "xxx" + i + "xxx", i/10);
c += i/10;
}
/*
System.out.println("result:");
Object[] result;
result = s.getScores(s.size(), true);
for (int i = 0; i < s.size(); i++) System.out.println("up: " + result[i]);
result = s.getScores(s.size(), false);
for (int i = 0; i < s.size(); i++) System.out.println("down: " + result[i]);
System.out.println("Test for Score: finish. time = " + (System.currentTimeMillis() - time));
System.out.println("total=" + s.totalCount() + ", elements=" + s.size());
*/
System.out.println("finished create. time = " + (System.currentTimeMillis() - time));
System.out.println("total=" + s.totalCount() + ", elements=" + s.size() + ", redundant count=" + c);
// delete cluster
time = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
s.deleteScore("score#" + i + "xxx" + i + "xxx" + i + "xxx" + i + "xxx");
c -= i/10;
}
System.out.println("finished delete. time = " + (System.currentTimeMillis() - time));
System.out.println("total=" + s.totalCount() + ", elements=" + s.size() + ", redundant count=" + c);
}
}

@ -147,7 +147,7 @@ public class plasmaParserDocument {
private synchronized void resortLinks() {
Iterator i;
String url;
int extpos;
int extpos, qpos;
String ext = null;
i = anchors.entrySet().iterator();
hyperlinks = new HashMap();
@ -163,10 +163,10 @@ public class plasmaParserDocument {
extpos = url.lastIndexOf(".");
String normal;
if (extpos > 0) {
if (url.indexOf("?") != -1) {
ext = url.substring(extpos,url.indexOf("?")).toLowerCase();
if (((qpos = url.indexOf("?")) >= 0) && (qpos > extpos)) {
ext = url.substring(extpos, qpos).toLowerCase();
} else {
ext = url.substring(extpos).toLowerCase();
ext = url.substring(extpos).toLowerCase();
}
normal = plasmaParser.urlNormalform(url);
if (normal != null) {

@ -81,19 +81,11 @@ public class plasmaSearch {
// we take mod 64**3 = 262144, this is the mask of the storage
return (int) ((modified.getTime() / 86400000) % 262144);
}
public void addWordIndex(URL url, String urlHash, Date urlModified, int quality, String wordHash, int wordCount, int posintext, int posinphrase, int posofphraseint, String language, char doctype, boolean local) {
// this is called by the remote search procedure when a new index arrives from remote
plasmaWordIndexEntry entry = new plasmaWordIndexEntry(urlHash, wordCount,
posintext, posinphrase, posofphraseint,
calcVirtualAge(urlModified), quality,
language, doctype, local);
try {
wordIndex.addEntry(wordHash, entry);
} catch (IOException e) {}
// System.out.println("* received one index entry for URL: " + url); // debug
}
public void addWords(plasmaWordIndexEntryContainer container) {
wordIndex.addEntries(container);
}
public int addPageIndex(URL url, String urlHash, Date urlModified, plasmaCondenser condenser,
String language, char doctype) {
// this is called by the switchboard to put in a new page into the index
@ -112,8 +104,7 @@ public class plasmaSearch {
int count;
plasmaWordIndexEntry entry;
String wordHash;
int c = 0;
int p = 0;
int p = 0;
while (i.hasNext()) {
word = (String) i.next();
count = condenser.wordCount(word);
@ -121,9 +112,7 @@ public class plasmaSearch {
wordHash = plasmaWordIndexEntry.word2hash(word);
entry = new plasmaWordIndexEntry(urlHash, count, p++, 0, 0,
age, quality, language, doctype, true);
try {
c += wordIndex.addEntry(wordHash, entry);
} catch (IOException e) {}
wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, entry));
}
//System.out.println("DEBUG: plasmaSearch.addPageIndex: added " + condenser.getWords().size() + " words, flushed " + c + " entries");
return condenser.getWords().size();

@ -250,8 +250,9 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
loadedURL = new plasmaCrawlLURL(new File(plasmaPath, "urlHash.db"), ramLURL);
noticeURL = new plasmaCrawlNURL(plasmaPath, ramNURL);
errorURL = new plasmaCrawlEURL(new File(plasmaPath, "urlErr0.db"), ramEURL);
wordIndex = new plasmaWordIndex(plasmaPath, ramRWI);
wordIndex.setMaxWords(10000);
wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, log);
int wordCacheMax = Integer.parseInt((String) getConfig("wordCacheMax", "10000"));
wordIndex.setMaxWords(wordCacheMax);
searchManager = new plasmaSearch(loadedURL, wordIndex);
// start a cache manager
@ -430,7 +431,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
}
public int cacheSizeMin() {
return wordIndex.sizeMin();
return wordIndex.size();
}
public void enQueue(Object job) {
@ -1195,9 +1196,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
while (it.hasNext()) {
word = (String) it.next();
// delete the URL reference in this word index
try {
count += wordIndex.removeEntries(plasmaWordIndexEntry.word2hash(word), urlEntries, true);
} catch (IOException e) {}
count += wordIndex.removeEntries(plasmaWordIndexEntry.word2hash(word), urlEntries, true);
}
return count;
}
@ -1266,7 +1265,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
(yacyCore.seedDB.mySeed == null) ||
(yacyCore.seedDB.mySeed.isVirgin()) ||
(loadedURL.size() < 10) ||
(wordIndex.sizeMin() < 100) ||
(wordIndex.size() < 100) ||
(!(yacyCore.seedDB.mySeed.isJunior()))) return false;
int transferred;
@ -1369,7 +1368,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
Vector tmpEntities = new Vector();
String nexthash = "";
try {
Iterator wordHashIterator = wordIndex.hashIterator(hash, true, true, true);
Iterator wordHashIterator = wordIndex.wordHashes(hash, true, true);
plasmaWordIndexEntity indexEntity, tmpEntity;
Enumeration urlEnum;
plasmaWordIndexEntry indexEntry;

@ -55,16 +55,17 @@ import java.util.TreeSet;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.yacy.yacySeedDB;
import de.anomic.server.serverLog;
public class plasmaWordIndex {
File databaseRoot;
plasmaWordIndexRAMCache ramCache;
plasmaWordIndexCache ramCache;
public plasmaWordIndex(File databaseRoot, int bufferkb) throws IOException {
public plasmaWordIndex(File databaseRoot, int bufferkb, serverLog log) throws IOException {
this.databaseRoot = databaseRoot;
this.ramCache = new plasmaWordIndexRAMCache(databaseRoot, bufferkb);
ramCache.start();
plasmaWordIndexClassicDB fileDB = new plasmaWordIndexClassicDB(databaseRoot, log);
this.ramCache = new plasmaWordIndexCache(databaseRoot, fileDB, log);
}
public int maxURLinWordCache() {
@ -79,107 +80,35 @@ public class plasmaWordIndex {
ramCache.setMaxWords(maxWords);
}
public int addEntry(String wordHash, plasmaWordIndexEntry entry) throws IOException {
return ramCache.addEntryToIndexMem(wordHash, entry);
}
public int addEntries(plasmaWordIndexEntryContainer entries) {
return ramCache.addEntries(entries, System.currentTimeMillis());
}
public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) throws IOException {
return ramCache.getIndexMem(wordHash, deleteIfEmpty);
public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) {
return ramCache.getIndex(wordHash, deleteIfEmpty);
}
public int sizeMin() {
return ramCache.sizeMin();
public int size() {
return ramCache.size();
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) throws IOException {
return ramCache.removeEntriesMem(wordHash, urlHashes, deleteComplete);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
return ramCache.removeEntries(wordHash, urlHashes, deleteComplete);
}
public void close(int waitingBoundSeconds) {
ramCache.close(waitingBoundSeconds);
}
public synchronized void deleteComplete(String wordHash) throws IOException {
ramCache.deleteComplete(wordHash);
}
public synchronized Iterator hashIterator(String startHash, boolean up, boolean rot, boolean deleteEmpty) {
Iterator i = new iterateCombined(startHash, up, deleteEmpty);
if ((rot) && (!(i.hasNext())) && (startHash != null)) {
return new iterateCombined(null, up, deleteEmpty);
} else {
return i;
}
public void deleteIndex(String wordHash) {
ramCache.deleteIndex(wordHash);
}
public class iterateCombined implements Iterator {
Comparator comp;
Iterator filei;
Iterator cachei;
String nextf, nextc;
public iterateCombined(String startHash, boolean up, boolean deleteEmpty) {
this.comp = kelondroMSetTools.fastStringComparator(up);
filei = fileIterator(startHash, up, deleteEmpty);
try {
cachei = ramCache.wordHashesMem(startHash, 100);
} catch (IOException e) {
cachei = new HashSet().iterator();
}
nextFile();
nextCache();
}
private void nextFile() {
if (filei.hasNext()) nextf = (String) filei.next(); else nextf = null;
}
private void nextCache() {
if (cachei.hasNext()) nextc = new String(((byte[][]) cachei.next())[0]); else nextc = null;
}
public boolean hasNext() {
return (nextf != null) || (nextc != null);
}
public Object next() {
String s;
if (nextc == null) {
s = nextf;
//System.out.println("Iterate Hash: take " + s + " from file, cache is empty");
nextFile();
return s;}
if (nextf == null) {
s = nextc;
//System.out.println("Iterate Hash: take " + s + " from cache, file is empty");
nextCache();
return s;}
// compare the strings
int c = comp.compare(nextf, nextc);
if (c == 0) {
s = nextf;
//System.out.println("Iterate Hash: take " + s + " from file&cache");
nextFile();
nextCache();
return s;
} else if (c < 0) {
s = nextf;
//System.out.println("Iterate Hash: take " + s + " from file");
nextFile();
return s;
} else {
s = nextc;
//System.out.println("Iterate Hash: take " + s + " from cache");
nextCache();
return s;
}
}
public void remove() {
}
public Iterator wordHashes(String startHash, boolean up, boolean rot) {
return ramCache.wordHashes(startHash, up);
}
public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) {
return new iterateFiles(startHash, up, deleteEmpty);
}
@ -295,8 +224,8 @@ public class plasmaWordIndex {
public static void main(String[] args) {
//System.out.println(kelondroMSetTools.fastStringComparator(true).compare("RwGeoUdyDQ0Y", "rwGeoUdyDQ0Y"));
try {
plasmaWordIndex index = new plasmaWordIndex(new File("D:\\dev\\proxy\\DATA\\PLASMADB"), 555);
Iterator i = index.hashIterator("5A8yhZMh_Kmv", true, true, true);
plasmaWordIndex index = new plasmaWordIndex(new File("D:\\dev\\proxy\\DATA\\PLASMADB"), 555, new serverLog("TESTAPP"));
Iterator i = index.wordHashes("5A8yhZMh_Kmv", true, true);
while (i.hasNext()) {
System.out.println("File: " + (String) i.next());
}

@ -0,0 +1,380 @@
// plasmaWordIndexCache.java
// -------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// last major change: 6.5.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma;
import java.io.*;
import java.util.*;
import java.lang.RuntimeException;
import de.anomic.kelondro.*;
import de.anomic.server.serverLog;
import de.anomic.yacy.yacySeedDB;
public class plasmaWordIndexCache implements plasmaWordIndexInterface {
private static final String indexDumpFileName = "indexDump.stack";
static String minKey, maxKey;
// class variables
private File databaseRoot;
private plasmaWordIndexInterface backend;
private TreeMap cache;
private kelondroMScoreCluster hashScore;
private HashMap hashDate;
private int maxWords;
private serverLog log;
static {
maxKey = "";
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += 'z';
minKey = "";
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}
public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, serverLog log) {
this.databaseRoot = databaseRoot;
this.cache = new TreeMap();
this.hashScore = new kelondroMScoreCluster();
this.hashDate = new HashMap();
this.maxWords = 10000;
this.backend = backend;
this.log = log;
try {
restore();
} catch (IOException e){
log.logError("unable to restore cache dump: " + e.getMessage());
e.printStackTrace();
}
}
private void dump(int waitingSeconds) throws IOException {
log.logSystem("creating dump for index cache, " + cache.size() + " words (and much more urls)");
File indexDumpFile = new File(databaseRoot, indexDumpFileName);
if (indexDumpFile.exists()) indexDumpFile.delete();
kelondroStack dumpStack = new kelondroStack(indexDumpFile, 0, new int[]{plasmaWordIndexEntry.wordHashLength, 4, 8, plasmaWordIndexEntry.attrSpaceLong});
long startTime = System.currentTimeMillis();
long messageTime = System.currentTimeMillis() + 5000;
long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
synchronized (cache) {
Iterator i = cache.entrySet().iterator();
Map.Entry entry;
String wordHash;
plasmaWordIndexEntryContainer container;
long creationTime;
plasmaWordIndexEntry wordEntry;
byte[][] row = new byte[4][];
while (i.hasNext()) {
// get entries
entry = (Map.Entry) i.next();
wordHash = (String) entry.getKey();
creationTime = getCreationTime(wordHash);
container = (plasmaWordIndexEntryContainer) entry.getValue();
// put entries on stack
if (container != null) {
Iterator ci = container.entries();
while (ci.hasNext()) {
wordEntry = (plasmaWordIndexEntry) ci.next();
row[0] = wordHash.getBytes();
row[1] = kelondroRecords.long2bytes(container.size(), 4);
row[2] = kelondroRecords.long2bytes(creationTime, 8);
row[3] = wordEntry.toEncodedForm(true).getBytes();
dumpStack.push(row);
urlcount++;
}
}
wordcount++;
// write a log
if (System.currentTimeMillis() > messageTime) {
wordsPerSecond = wordcount * 1000 / (1 + System.currentTimeMillis() - startTime);
log.logInfo("dumping status: " + wordcount + " words done, " + ((cache.size() - wordcount) / wordsPerSecond) + " seconds remaining");
messageTime = System.currentTimeMillis() + 5000;
}
}
}
log.logSystem("dumped " + urlcount + " word/url relations in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
}
private long restore() throws IOException {
File indexDumpFile = new File(databaseRoot, indexDumpFileName);
if (!(indexDumpFile.exists())) return 0;
kelondroStack dumpStack = new kelondroStack(indexDumpFile, 0);
log.logSystem("restore dump of index cache, " + dumpStack.size() + " word/url relations");
long startTime = System.currentTimeMillis();
long messageTime = System.currentTimeMillis() + 5000;
long urlCount = 0, urlsPerSecond = 0;
synchronized (cache) {
Iterator i = dumpStack.iterator();
kelondroRecords.Node node;
String wordHash;
plasmaWordIndexEntryContainer container;
long creationTime;
plasmaWordIndexEntry wordEntry;
byte[][] row = new byte[4][];
while (i.hasNext()) {
// get out one entry
node = (kelondroRecords.Node) i.next();
row = node.getValues();
wordHash = new String(row[0]);
creationTime = kelondroRecords.bytes2long(row[2]);
wordEntry = new plasmaWordIndexEntry(wordHash, new String(row[3]));
// store to cache
addEntry(wordHash, wordEntry, creationTime);
urlCount++;
// write a log
if (System.currentTimeMillis() > messageTime) {
urlsPerSecond = urlCount * 1000 / (1 + System.currentTimeMillis() - startTime);
log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpStack.size() - urlCount) / urlsPerSecond) + " seconds remaining");
messageTime = System.currentTimeMillis() + 5000;
}
}
}
log.logSystem("restored " + cache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
return urlCount;
}
public int maxURLinWordCache() {
return hashScore.getScore(hashScore.getMaxObject());
}
public int wordCacheRAMSize() {
return cache.size();
}
public void setMaxWords(int maxWords) {
this.maxWords = maxWords;
}
public int size() {
if (backend.size() > cache.size()) return backend.size(); else return cache.size();
}
public Iterator wordHashes(String startWordHash, boolean up) {
if (!(up)) throw new RuntimeException("plasmaWordIndexCache.wordHashes can only count up");
return new iterateCombined(cache.keySet().iterator(), backend.wordHashes(startWordHash, true), true);
}
public class iterateCombined implements Iterator {
Comparator comp;
Iterator a, b;
String na, nb;
boolean up;
public iterateCombined(Iterator a, Iterator b, boolean up) {
this.a = a;
this.b = b;
this.up = up;
this.comp = kelondroMSetTools.fastStringComparator(up);
nexta();
nextb();
}
private void nexta() {
if (a.hasNext()) na = (String) a.next(); else na = null;
}
private void nextb() {
if (b.hasNext()) nb = (String) b.next(); else nb = null;
}
public boolean hasNext() {
return (na != null) || (nb != null);
}
public Object next() {
String s;
if (na == null) {
s = nb;
nextb();
return s;
}
if (nb == null) {
s = na;
nexta();
return s;
}
// compare the strings
int c = comp.compare(na, nb);
if (c == 0) {
s = na;
//System.out.println("Iterate Hash: take " + s + " from file&cache");
nexta();
nextb();
return s;
} else if ((up) && (c < 0)) {
s = na;
nexta();
return s;
} else {
s = nb;
nextb();
return s;
}
}
public void remove() {
}
}
private int flushKey(String key) {
plasmaWordIndexEntryContainer container = null;
long time;
synchronized (cache) {
container = (plasmaWordIndexEntryContainer) cache.get(key);
if (container == null) return 0; // flushing of nonexisting key
time = getCreationTime(key);
cache.remove(key);
hashScore.deleteScore(key);
hashDate.remove(key);
}
return backend.addEntries(container, time);
}
private int flushToLimit() {
if ((hashScore.size() == 0) && (cache.size() == 0)) {
serverLog.logDebug("PLASMA INDEXING", "flushToLimit: called but cache is empty");
return 0;
}
if ((hashScore.size() == 0) && (cache.size() != 0)) {
serverLog.logError("PLASMA INDEXING", "flushToLimit: hashScore.size=0 but cache.size=" + cache.size());
return 0;
}
if ((hashScore.size() != 0) && (cache.size() == 0)) {
serverLog.logError("PLASMA INDEXING", "flushToLimit: hashScore.size=" + hashScore.size() + " but cache.size=0");
return 0;
}
//serverLog.logDebug("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + ", cache.size=" + cache.size());
int total = 0;
synchronized (hashScore) {
String key;
int count;
Long createTime;
while (hashScore.size() >= maxWords) {
key = (String) hashScore.getMaxObject();
createTime = (Long) hashDate.get(key);
count = hashScore.getScore(key);
if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 9000)) {
log.logDebug("key " + key + " is too fresh, abandon flush (count=" + count + ", cachesize=" + cache.size() + ")");
break;
}
if (count < 5) log.logWarning("flushing of key " + key + " not appropriate (too less entries, count=" + count + "): increase cache size");
log.logDebug("flushing key " + key + ", count=" + count + ", cachesize=" + cache.size());
total += flushKey(key);
if (total > 100) break;
}
}
return total;
}
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
flushKey(wordHash);
return backend.getIndex(wordHash, deleteIfEmpty);
}
public long getCreationTime(String wordHash) {
Long time = (Long) hashDate.get(wordHash);
if (time == null) return 0;
return time.longValue();
}
public void deleteIndex(String wordHash) {
synchronized (cache) {
cache.remove(wordHash);
hashScore.deleteScore(wordHash);
hashDate.remove(wordHash);
}
backend.deleteIndex(wordHash);
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
flushKey(wordHash);
return backend.removeEntries(wordHash, urlHashes, deleteComplete);
}
public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
flushToLimit();
//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
// put new words into cache
int added = 0;
synchronized (cache) {
String wordHash = container.wordHash();
plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) entries = new plasmaWordIndexEntryContainer(wordHash);
added = entries.add(container);
if (added > 0) {
cache.put(wordHash, entries);
hashScore.addScore(wordHash, added);
hashDate.put(wordHash, new Long(creationTime));
}
}
//System.out.println("DEBUG: cache = " + cache.toString());
return added;
}
private void addEntry(String wordHash, plasmaWordIndexEntry newEntry, long creationTime) {
plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) cache.get(wordHash);
if (entries == null) entries = new plasmaWordIndexEntryContainer(wordHash);
if (entries.add(newEntry)) {
cache.put(wordHash, entries);
hashScore.incScore(wordHash);
hashDate.put(wordHash, new Long(creationTime));
}
}
public void close(int waitingSeconds) {
try {
dump(waitingSeconds);
} catch (IOException e){
log.logError("unable to dump cache: " + e.getMessage());
e.printStackTrace();
}
}
}

@ -0,0 +1,255 @@
// plasmaWordIndexClassicDB.java
// -----------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// last major change: 6.5.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.util.*;
import de.anomic.kelondro.*;
import de.anomic.server.serverLog;
import de.anomic.yacy.yacySeedDB;
public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
// class variables
private File databaseRoot;
private serverLog log;
private int size;
public plasmaWordIndexClassicDB(File databaseRoot, serverLog log) throws IOException {
this.databaseRoot = databaseRoot;
this.log = log;
this.size = 0;
}
public int size() {
return size;
}
public Iterator wordHashes(String startHash, boolean up) {
return new iterateFiles(startHash, up);
}
public class iterateFiles implements Iterator {
private ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries
private Comparator comp; // for string-compare
private String buffer; // the prefetch-buffer
public iterateFiles(String startHash, boolean up) {
this.hierarchy = new ArrayList();
this.comp = kelondroMSetTools.fastStringComparator(up);
// the we initially fill the hierarchy with the content of the root folder
String path = "WORDS";
TreeSet list = list(new File(databaseRoot, path));
// if we have a start hash then we find the appropriate subdirectory to start
if ((startHash != null) && (startHash.length() == yacySeedDB.commonHashLength)) {
delete(startHash.substring(0, 1), list);
if (list.size() > 0) {
hierarchy.add(list);
String[] paths = new String[]{startHash.substring(0, 1), startHash.substring(1, 2), startHash.substring(2, 4), startHash.substring(4, 6)};
int pathc = 0;
while ((pathc < paths.length) &&
(comp.compare((String) list.first(), paths[pathc]) == 0)) {
path = path + "/" + paths[pathc];
list = list(new File(databaseRoot, path));
delete(paths[pathc], list);
if (list.size() == 0) break;
hierarchy.add(list);
pathc++;
}
}
while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {};
} else {
hierarchy.add(list);
buffer = next0();
}
}
private synchronized void delete(String pattern, TreeSet names) {
String name;
while ((names.size() > 0) && (comp.compare((new File(name = (String) names.first())).getName(), pattern) < 0)) names.remove(name);
}
private TreeSet list(File path) {
//System.out.println("PATH: " + path);
TreeSet t = new TreeSet(comp);
String[] l = path.list();
if (l != null) for (int i = 0; i < l.length; i++) t.add(path + "/" + l[i]);
//else System.out.println("DEBUG: wrong path " + path);
//System.out.println(t);
return t;
}
private synchronized String next0() {
// the object is a File pointing to the corresponding file
File f;
String n;
TreeSet t;
do {
t = null;
while ((t == null) && (hierarchy.size() > 0)) {
t = (TreeSet) hierarchy.get(hierarchy.size() - 1);
if (t.size() == 0) {
hierarchy.remove(hierarchy.size() - 1); // we step up one hierarchy
t = null;
}
}
if ((hierarchy.size() == 0) || (t.size() == 0)) return null; // this is the end
// fetch value
f = new File(n = (String) t.first());
t.remove(n);
// if the value represents another folder, we step into the next hierarchy
if (f.isDirectory()) {
t = list(f);
if (t.size() == 0) {
// the folder is empty, delete it
f.delete();
} else {
hierarchy.add(t);
}
f = null;
}
} while (f == null);
// thats it
if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) {
return null;
} else {
return n.substring(0, yacySeedDB.commonHashLength);
}
}
public boolean hasNext() {
return buffer != null;
}
public Object next() {
String r = buffer;
while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {};
return r;
}
public void remove() {
}
}
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
try {
return new plasmaWordIndexEntity(databaseRoot, wordHash, deleteIfEmpty);
} catch (IOException e) {
log.logError("plasmaWordIndexClassic.getIndex: " + e.getMessage());
return null;
}
}
public long getCreationTime(String wordHash) {
File f = plasmaWordIndexEntity.wordHash2path(databaseRoot, wordHash);
if (f.exists()) return f.lastModified(); else return -1;
}
public void deleteIndex(String wordHash) {
try {
plasmaWordIndexEntity.removePlasmaIndex(databaseRoot, wordHash);
} catch (IOException e) {
log.logError("plasmaWordIndexClassic.deleteIndex: " + e.getMessage());
return;
}
}
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
// removes all given url hashes from a single word index. Returns number of deletions.
plasmaWordIndexEntity pi = getIndex(wordHash, true);
int count = 0;
try {
for (int i = 0; i < urlHashes.length; i++)
if (pi.removeEntry(urlHashes[i], deleteComplete)) count++;
int size = pi.size();
pi.close(); pi = null;
// check if we can remove the index completely
if ((deleteComplete) && (size == 0)) deleteIndex(wordHash);
return count;
} catch (IOException e) {
log.logError("plasmaWordIndexClassic.removeEntries: " + e.getMessage());
return count;
}
}
public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
// fetch the index cache
if (container.size() == 0) return 0;
// open file
try {
plasmaWordIndexEntity pi = new plasmaWordIndexEntity(databaseRoot, container.wordHash(), false);
int count = 0;
// write from vector
if (container != null) {
Iterator i = container.entries();
while (i.hasNext()) {
if (pi.addEntry((plasmaWordIndexEntry) i.next())) count++;
}
}
// close and return
pi.close();
pi = null;
return count;
} catch (IOException e) {
log.logError("plasmaWordIndexClassic.addEntries: " + e.getMessage());
return 0;
}
}
public void close(int waitingSeconds) {
}
}

@ -1,5 +1,5 @@
// plasmaIndex.java
// -----------------------
// plasmaWordIndexEntity.java
// --------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
@ -154,12 +154,11 @@ public class plasmaWordIndexEntity {
if (theTmpMap == null) return (theIndex.get(entry.getUrlHash().getBytes()) != null); else return (theTmpMap.containsKey(entry.getUrlHash()));
}
public void addEntry(plasmaWordIndexEntry entry) throws IOException {
public boolean addEntry(plasmaWordIndexEntry entry) throws IOException {
if (theTmpMap == null) {
theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm(false).getBytes());
//System.out.println(theIndex.toString()); // debug
return (theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm(false).getBytes()) == null);
} else {
theTmpMap.put(entry.getUrlHash(), entry);
return (theTmpMap.put(entry.getUrlHash(), entry) == null);
}
}

@ -0,0 +1,97 @@
// plasmaIndexEntryContainer.java
// ------------------------------
// part of YaCy
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// last major change: 07.05.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma;
import java.util.*;
public class plasmaWordIndexEntryContainer {
private String wordHash;
private HashMap container;
public plasmaWordIndexEntryContainer(String wordHash) {
this.wordHash = wordHash;
container = new HashMap(); // a urlhash/plasmaWordIndexEntry - relation
}
public int size() {
return container.size();
}
public String wordHash() {
return wordHash;
}
public boolean add(plasmaWordIndexEntry entry) {
// returns true if the new entry was added, false if it already existet
String urlHash = entry.getUrlHash();
if (container.containsKey(urlHash)) return false;
container.put(urlHash, entry);
return true;
}
public int add(plasmaWordIndexEntryContainer c) {
// returns the number of new elements
Iterator i = c.entries();
int x = 0;
while (i.hasNext()) {
if (add((plasmaWordIndexEntry) i.next())) x++;
}
return x;
}
public Iterator entries() {
// returns an iterator of plasmaWordIndexEntry objects
return container.values().iterator();
}
public static plasmaWordIndexEntryContainer instantContainer(String wordHash, plasmaWordIndexEntry entry) {
plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash);
c.add(entry);
return c;
}
public String toString() {
return "C[" + wordHash + "] has " + container.size() + " entries";
}
}

@ -1,275 +0,0 @@
// plasmaWordIndexFileCache.java
// -----------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 22.01.2004
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
/*
The plasmaIndexCache manages a database table with a list of
indexEntries in it. This is done in a completely different fashion
as organized by the plasmaIndex tables. The entries are not
sorted and just stored in a buffer.
Whenever during a seach an index is retrieved, first it's buffer
is flushed into the corresponding index table, so that it can be
sorted into the remaining index entry elements.
The cache database consist of
- the word hash as primary key
- one column with a one-byte counter
- a number of more columns with indexEntry elements
*/
// compile with
// javac -classpath classes -sourcepath source -d classes -g source/de/anomic/plasma/*.java
package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroTree;
import de.anomic.server.serverLog;
public class plasmaWordIndexFileCache {
private static final String indexCacheFileName = "indexCache.db";
private static final int buffers = 50; // number of buffered entries per word
// class variables
private File databaseRoot;
private kelondroTree indexCache;
private int bufferkb;
public plasmaWordIndexFileCache(File databaseRoot, int bufferkb) throws IOException {
this.databaseRoot = databaseRoot;
this.bufferkb = bufferkb;
File indexCacheFile = new File(databaseRoot, indexCacheFileName);
if (indexCacheFile.exists()) {
// simply open the file
indexCache = new kelondroTree(indexCacheFile, bufferkb * 0x400);
} else {
createCacheFile(indexCacheFile);
}
}
private void resetCacheFile() {
// this has to be used in emergencies only
// it can happen that there is a serious db inconsistency; in that case we re-create the indexCache
try { indexCache.close(); } catch (IOException e) {}
File indexCacheFile = new File(databaseRoot, indexCacheFileName);
if (indexCacheFile.exists()) indexCacheFile.delete();
try {
createCacheFile(indexCacheFile);
} catch (IOException e) {
de.anomic.server.serverLog.logError("PLASMA", "plasmaWordIndexFileCache.resetCacheFile: serious failure creating the cache file: " + e.getMessage());
indexCache = null;
}
}
private void createCacheFile(File indexCacheFile) throws IOException {
// create a new file
int[] columns = new int[buffers + 2];
columns[0] = plasmaWordIndexEntry.wordHashLength;
columns[1] = 1;
for (int i = 0; i < buffers; i++) columns[i + 2] = plasmaCrawlLURL.urlHashLength + plasmaWordIndexEntry.attrSpaceShort;
indexCache = new kelondroTree(indexCacheFile, bufferkb * 0x400, columns);
}
protected void close() throws IOException {
indexCache.close();
indexCache = null;
}
private byte[][] getCache(String wordHash) throws IOException {
// read one line from the cache; if none exists: construct one
byte[][] row;
try {
row = indexCache.get(wordHash.getBytes());
} catch (Exception e) {
// we had some negativeSeekOffsetExceptions here, and also loops may cause this
// in that case the indexCache is corrupt
System.out.println("Error in plasmaWordINdexFileCache.getCache: index for hash " + wordHash + " is corrupt:" + e.toString());
//e.printStackTrace();
row = null;
}
if (row == null) {
row = new byte[indexCache.columns()][];
row[0] = wordHash.getBytes();
row[1] = new byte[1];
row[1][0] = (byte) 0;
}
return row;
}
protected Iterator wordHashes(String wordHash, boolean up) throws IOException {
try {
return indexCache.rows(up, false, (wordHash == null) ? null : wordHash.getBytes());
} catch (kelondroException e) {
de.anomic.server.serverLog.logError("PLASMA", "kelondro error in plasmaWordIndexFileCache: " + e.getMessage() + "; deleting index for " + wordHash);
deleteComplete(wordHash);
return new HashSet().iterator();
}
}
protected plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) throws IOException {
// first flush the index cache, if there is any for that word hash
byte[][] row = indexCache.get(wordHash.getBytes());
if (row != null) {
int entries = (int) row[1][0];
if (entries != 0) flushCache(row, null); // if the cache has entries, flush it
indexCache.remove(wordHash.getBytes()); // delete the cache index row; suppose to be empty now
}
// then return the index from the uncached file (with new entries)
return new plasmaWordIndexEntity(databaseRoot, wordHash, deleteIfEmpty);
}
protected void addEntriesToIndex(String wordHash, Vector /* of plasmaIndexEntry */ newEntries) throws IOException {
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
// fetch the index cache
if (newEntries.size() == 0) return;
byte[][] row = getCache(wordHash);
int entries = (int) row[1][0];
// check if the index cache is full
if (entries + 2 + newEntries.size() >= indexCache.columns()) {
flushCache(row, newEntries); // and put in new values
entries = 0;
row[1][0] = (byte) 0; // set number of entries to zero
} else {
// put in the new values
String newEntry;
for (int i = 0; i < newEntries.size(); i++) {
newEntry = ((plasmaWordIndexEntry) newEntries.elementAt(i)).getUrlHash() + ((plasmaWordIndexEntry) newEntries.elementAt(i)).toEncodedForm(false);
row[entries + 2] = newEntry.getBytes();
entries++;
}
row[1][0] = (byte) entries;
try {
indexCache.put(row);
} catch (kelondroException e) {
// this is a very bad case; a database inconsistency occurred
serverLog.logError("PLASMA", "fatal error in plasmaWordIndexFileCache.addEntriesToIndex: write of " + wordHash + " to index cache failed - " + e.getMessage() + " - indexCache.db deleted");
resetCacheFile();
} catch (IOException e) {
// this is a very bad case; a database inconsistency occurred
serverLog.logError("PLASMA", "fatal error in plasmaWordIndexFileCache.addEntriesToIndex: write of " + wordHash + " to index cache failed - " + e.getMessage() + " - indexCache.db deleted");
resetCacheFile();
}
}
// finished!
}
protected void deleteComplete(String wordHash) throws IOException {
plasmaWordIndexEntity.removePlasmaIndex(databaseRoot, wordHash);
indexCache.remove(wordHash.getBytes());
}
protected int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) throws IOException {
// removes all given url hashes from a single word index. Returns number of deletions.
plasmaWordIndexEntity pi = getIndex(wordHash, true);
int count = 0;
for (int i = 0; i < urlHashes.length; i++) if (pi.removeEntry(urlHashes[i], deleteComplete)) count++;
int size = pi.size();
pi.close(); pi = null;
// check if we can remove the index completely
if ((deleteComplete) && (size == 0)) {
// remove index
if (!(plasmaWordIndexEntity.removePlasmaIndex(databaseRoot, wordHash)))
System.out.println("DEBUG: cannot remove index file for word hash " + wordHash);
// remove cache
indexCache.remove(wordHash.getBytes());
}
return count;
}
private synchronized void flushCache(byte[][] row, Vector indexEntries) throws IOException {
String wordHash = new String(row[0]);
int entries = (int) row[1][0];
if ((entries == 0) && ((indexEntries == null) || (indexEntries.size() == 0))) return;
// open file
plasmaWordIndexEntity pi = new plasmaWordIndexEntity(databaseRoot, wordHash, false);
// write from array
plasmaWordIndexEntry entry;
for (int i = 0; i < entries; i++) {
entry = new plasmaWordIndexEntry(new String(row[i + 2], 0, plasmaCrawlLURL.urlHashLength),
new String(row[i + 2], plasmaCrawlLURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort));
pi.addEntry(entry);
}
// write from vector
if (indexEntries != null) {
for (int i = 0; i < indexEntries.size(); i++)
pi.addEntry((plasmaWordIndexEntry) indexEntries.elementAt(i));
}
// close and return
pi.close();
pi = null;
}
private int size(String wordHash) throws IOException {
// return number of entries in specific cache
byte[][] row = indexCache.get(wordHash.getBytes());
if (row == null) return 0;
return (int) row[1][0];
}
protected int size() {
if (indexCache == null) return 0; else return indexCache.size();
}
/*
private plasmaIndex getIndexF(String wordHash) throws IOException {
return new plasmaIndex(databaseRoot, wordHash);
}
private void addEntryToIndexF(String wordHash, plasmaIndexEntry entry) throws IOException {
plasmaIndex pi = new plasmaIndex(databaseRoot, wordHash);
pi.addEntry(entry);
pi.close();
}
*/
}

@ -0,0 +1,62 @@
// plasmaWordIndexInterface.java
// -----------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// last major change: 6.5.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.plasma;
import java.util.*;
public interface plasmaWordIndexInterface {
public int size();
public Iterator wordHashes(String startWordHash, boolean up);
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty);
public long getCreationTime(String wordHash);
public void deleteIndex(String wordHash);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);
public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime);
public void close(int waitingSeconds);
}

@ -1,253 +0,0 @@
// plasmaIndexRAMCache.java
// -----------------------
// part of YACY
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 22.12.2004
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
// compile with
// javac -classpath classes -sourcepath source -d classes -g source/de/anomic/plasma/*.java
package de.anomic.plasma;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.Vector;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.server.serverLog;
import de.anomic.yacy.yacySeedDB;
public class plasmaWordIndexRAMCache extends Thread {
static String minKey, maxKey;
// class variables
TreeMap cache;
kelondroMScoreCluster hashScore;
plasmaWordIndexFileCache pic;
boolean terminate;
long terminateUntil;
int maxWords;
static {
maxKey = "";
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += 'z';
minKey = "";
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}
public plasmaWordIndexRAMCache(File databaseRoot, int bufferkb) throws IOException {
this.pic = new plasmaWordIndexFileCache(databaseRoot, bufferkb);
this.cache = new TreeMap();
this.hashScore = new kelondroMScoreCluster();
this.maxWords = 1000;
this.terminate = false;
}
public int maxURLinWordCache() {
return hashScore.getScore(hashScore.getMaxObject());
}
public int wordCacheRAMSize() {
return cache.size();
}
public void setMaxWords(int maxWords) {
this.maxWords = maxWords;
}
public void run() {
serverLog.logSystem("PLASMA INDEXING", "started word cache management");
int check;
// permanently flush cache elements
while (!(terminate)) {
if (hashScore.size() < 100) try {Thread.currentThread().sleep(10000);} catch (InterruptedException e) {}
while ((!(terminate)) && (cache != null) && (hashScore.size() > 0)) try {
check = hashScore.size();
flushSpecific(false);
//serverLog.logDebug("PLASMA INDEXING", "single flush. bevore=" + check + "; after=" + hashScore.size());
try {Thread.currentThread().sleep(10 + ((maxWords / 10) / (1 + hashScore.size())));} catch (InterruptedException e) {}
} catch (IOException e) {
serverLog.logError("PLASMA INDEXING", "PANIK! exception in main cache loop: " + e.getMessage());
e.printStackTrace();
terminate = true;
cache = null;
}
}
serverLog.logSystem("PLASMA INDEXING", "CATCHED TERMINATION SIGNAL: start final flush");
// close all;
try {
// first flush everything
while ((hashScore.size() > 0) && (System.currentTimeMillis() < terminateUntil)) {
flushSpecific(false);
}
// then close file cache:
pic.close();
} catch (IOException e) {
serverLog.logDebug("PLASMA INDEXING", "interrupted final flush: " + e.toString());
}
// report
if (hashScore.size() == 0)
serverLog.logSystem("PLASMA INDEXING", "finished final flush; flushed all words");
else
serverLog.logError("PLASMA INDEXING", "terminated final flush; " + hashScore.size() + " words lost");
// delete data
cache = null;
hashScore = null;
}
public void close(int waitingBoundSeconds) {
terminate = true;
// wait until terination is done
// we can do at least 6 flushes/second
int waitingtime = 10 + (((cache == null) ? 0 : cache.size()) / 5); // seconds
if (waitingtime > waitingBoundSeconds) waitingtime = waitingBoundSeconds; // upper bound
this.terminateUntil = System.currentTimeMillis() + (waitingtime * 1000);
terminate = true;
while ((cache != null) && (waitingtime > 0)) {
serverLog.logDebug("PLASMA INDEXING", "final word flush; cache.size=" + cache.size() + "; time-out in " + waitingtime + " seconds");
try {Thread.currentThread().sleep(5000);} catch (InterruptedException e) {}
waitingtime -= 5;
}
}
private int flushSpecific(boolean greatest) throws IOException {
//System.out.println("DEBUG: plasmaIndexRAMCache.flushSpecific(" + ((greatest) ? "greatest" : "smallest") + "); cache.size() = " + cache.size());
if ((hashScore.size() == 0) && (cache.size() == 0)) {
serverLog.logDebug("PLASMA INDEXING", "flushSpecific: called but cache is empty");
return 0;
}
if ((hashScore.size() == 0) && (cache.size() != 0)) {
serverLog.logError("PLASMA INDEXING", "flushSpecific: hashScore.size=0 but cache.size=" + cache.size());
return 0;
}
if ((hashScore.size() != 0) && (cache.size() == 0)) {
serverLog.logError("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + " but cache.size=0");
return 0;
}
//serverLog.logDebug("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + ", cache.size=" + cache.size());
String key = (String) ((greatest) ? hashScore.getMaxObject() : hashScore.getMinObject());
return flushKey(key, "flushSpecific");
}
private int flushKey(String key, String caller) throws IOException {
Vector v = null;
v = (Vector) cache.get(key);
if (v == null) return 0; // flushing of nonexisting key
synchronized (cache) {
cache.remove(key);
hashScore.deleteScore(key);
}
pic.addEntriesToIndex(key, v);
return v.size();
}
public synchronized Iterator wordHashesMem(String wordHash, int count) throws IOException {
// returns a list of hashes from a specific start point
// we need to flush some of the elements in the cache first
// maybe we flush too much, but this is not easy to find out and it does not matter
TreeMap subMap = new TreeMap(cache.subMap((wordHash == null) ? minKey : wordHash, maxKey));
int flushcount = subMap.size();
if (flushcount > count) flushcount = count;
String key;
for (int i = 0; i < flushcount ; i++) {
key = (String) subMap.firstKey();
flushKey(key, "getSequentialWordHashesMem");
subMap.remove(key);
}
// finally return the result from the underlying hash list:
return pic.wordHashes(wordHash, true);
}
public plasmaWordIndexEntity getIndexMem(String wordHash, boolean deleteIfEmpty) throws IOException {
flushKey(wordHash, "getIndexMem");
return pic.getIndex(wordHash, deleteIfEmpty);
}
public int addEntryToIndexMem(String wordHash, plasmaWordIndexEntry entry) throws IOException {
// make space for new words
int flushc = 0;
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
synchronized (hashScore) {
while (hashScore.size() > maxWords) flushc += flushSpecific(true);
}
//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
// put new words into cache
synchronized (cache) {
Vector v = (Vector) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (v == null) v = new Vector();
v.add(entry);
cache.put(wordHash, v);
hashScore.incScore(wordHash);
}
return flushc;
}
public synchronized void deleteComplete(String wordHash) throws IOException {
cache.remove(wordHash);
hashScore.deleteScore(wordHash);
pic.deleteComplete(wordHash);
}
public int removeEntriesMem(String wordHash, String[] urlHashes, boolean deleteComplete) throws IOException {
flushKey(wordHash, "removeEntriesMem");
return pic.removeEntries(wordHash, urlHashes, deleteComplete);
}
public int sizeMin() {
// it is not easy to find out the correct size of the cache
// to make the result correct, it would be necessary to flush the complete ram cache
// instead, we return the minimum size of the cache, which is the maximun of either the
// ram or table cache
if ((hashScore == null) || (pic == null)) return 0;
return (hashScore.size() < pic.size()) ? pic.size() : hashScore.size();
}
}

@ -1,4 +1,4 @@
// yacyClient.java
// yacyClient.java
// -------------------------------------
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
@ -55,115 +55,116 @@ import de.anomic.plasma.plasmaSearch;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndexEntity;
import de.anomic.plasma.plasmaWordIndexEntry;
import de.anomic.plasma.plasmaWordIndexEntryContainer;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
public class yacyClient {
public static int publishMySeed(String address, String otherHash) {
// this is called to enrich the seed information by
// - own address (if peer is behind a nat/router)
// - check peer type (virgin/junior/senior/principal)
// to do this, we send a 'Hello' to another peer
// this carries the following information:
// 'iam' - own hash
// 'youare' - remote hash, to verify that we are correct
// 'key' - a session key that the remote peer may use to answer
// and the own seed string
// we expect the following information to be send back:
// - 'yourip' the ip of the connection peer (we)
// - 'yourtype' the type of this peer that the other peer checked by asking for a specific word
// and the remote seed string
// the number of new seeds are returned
// one exceptional failure case is when we know the other's peers hash, the other peers responds correctly
// but they appear to be another peer by comparisment of the other peer's hash
// this works of course only if we know the other peer's hash.
String key = crypt.randomSalt();
HashMap result = null;
try {
/*
URL url = new URL("http://" + address + "/yacy/hello.html?iam=" +
yacyCore.seedCache.mySeed.hash +
"&pattern=&count=20" +
"&key=" + key + "&seed=" + yacyCore.seedCache.mySeed.genSeedStr(key));
yacyCore.log.logDebug("HELLO to URL " + url.toString());
result = nxTools.table(httpc.wget(url,
10000, null, null, yacyCore.seedCache.sb.remoteProxyHost, yacyCore.seedCache.sb.remoteProxyPort));
*/
URL url = new URL("http://" + address + "/yacy/hello.html");
serverObjects obj = new serverObjects();
obj.put("iam", yacyCore.seedDB.mySeed.hash);
obj.put("pattern", "");
obj.put("count", "20");
obj.put("key", key);
// this is called to enrich the seed information by
// - own address (if peer is behind a nat/router)
// - check peer type (virgin/junior/senior/principal)
// to do this, we send a 'Hello' to another peer
// this carries the following information:
// 'iam' - own hash
// 'youare' - remote hash, to verify that we are correct
// 'key' - a session key that the remote peer may use to answer
// and the own seed string
// we expect the following information to be send back:
// - 'yourip' the ip of the connection peer (we)
// - 'yourtype' the type of this peer that the other peer checked by asking for a specific word
// and the remote seed string
// the number of new seeds are returned
// one exceptional failure case is when we know the other's peers hash, the other peers responds correctly
// but they appear to be another peer by comparisment of the other peer's hash
// this works of course only if we know the other peer's hash.
String key = crypt.randomSalt();
HashMap result = null;
try {
/*
URL url = new URL("http://" + address + "/yacy/hello.html?iam=" +
yacyCore.seedCache.mySeed.hash +
"&pattern=&count=20" +
"&key=" + key + "&seed=" + yacyCore.seedCache.mySeed.genSeedStr(key));
yacyCore.log.logDebug("HELLO to URL " + url.toString());
result = nxTools.table(httpc.wget(url,
10000, null, null, yacyCore.seedCache.sb.remoteProxyHost, yacyCore.seedCache.sb.remoteProxyPort));
*/
URL url = new URL("http://" + address + "/yacy/hello.html");
serverObjects obj = new serverObjects();
obj.put("iam", yacyCore.seedDB.mySeed.hash);
obj.put("pattern", "");
obj.put("count", "20");
obj.put("key", key);
obj.put("mytime", yacyCore.universalDateShortString());
obj.put("seed", yacyCore.seedDB.mySeed.genSeedStr(key));
obj.put("seed", yacyCore.seedDB.mySeed.genSeedStr(key));
result = nxTools.table(httpc.wput(url,
105000, null, null,
yacyCore.seedDB.sb.remoteProxyHost,
yacyCore.seedDB.sb.remoteProxyPort,
obj));
} catch (Exception e) {
yacyCore.log.logDebug("yacyClient.publishMySeed exception:" + e.getMessage());
return -1;
}
if ((result == null) || (result.size() < 3)) {
yacyCore.log.logDebug("yacyClient.publishMySeed result error: " +
((result == null) ? "result null" : ("result=" + result.toString())));
return -1;
}
105000, null, null,
yacyCore.seedDB.sb.remoteProxyHost,
yacyCore.seedDB.sb.remoteProxyPort,
obj));
} catch (Exception e) {
yacyCore.log.logDebug("yacyClient.publishMySeed exception:" + e.getMessage());
return -1;
}
if ((result == null) || (result.size() < 3)) {
yacyCore.log.logDebug("yacyClient.publishMySeed result error: " +
((result == null) ? "result null" : ("result=" + result.toString())));
return -1;
}
Date remoteTime = yacyCore.parseUniversalDate((String) result.get("mytime")); // read remote time
// check consistency with expectation
if ((otherHash != null ) && (otherHash.length() > 0)) {
yacySeed otherPeer = yacySeed.genRemoteSeed((String) result.get("seed0"), key, remoteTime);
if ((otherPeer == null) || (!(otherPeer.hash.equals(otherHash)))) {
yacyCore.log.logDebug("yacyClient.publishMySeed consistency error: other peer wrong");
return -1; // no success
}
}
// set my own seed according to new information
yacySeed mySeedBkp = (yacySeed) yacyCore.seedDB.mySeed.clone();
yacyCore.seedDB.mySeed.put("IP", (String) result.get("yourip"));
String mytype = (String) result.get("yourtype");
if (mytype == null) mytype = "junior";
if ((yacyCore.seedDB.mySeed.get("PeerType", "junior").equals("principal")) && (mytype.equals("senior"))) mytype = "principal";
yacyCore.seedDB.mySeed.put("PeerType", mytype);
if (!(yacyCore.seedDB.mySeed.isProper())) {
yacyCore.seedDB.mySeed = mySeedBkp;
yacyCore.log.logDebug("yacyClient.publishMySeed mySeed error: not proper");
return -1;
}
// read the seeds that the peer returned and integrate them into own database
// check consistency with expectation
if ((otherHash != null ) && (otherHash.length() > 0)) {
yacySeed otherPeer = yacySeed.genRemoteSeed((String) result.get("seed0"), key, remoteTime);
if ((otherPeer == null) || (!(otherPeer.hash.equals(otherHash)))) {
yacyCore.log.logDebug("yacyClient.publishMySeed consistency error: other peer wrong");
return -1; // no success
}
}
// set my own seed according to new information
yacySeed mySeedBkp = (yacySeed) yacyCore.seedDB.mySeed.clone();
yacyCore.seedDB.mySeed.put("IP", (String) result.get("yourip"));
String mytype = (String) result.get("yourtype");
if (mytype == null) mytype = "junior";
if ((yacyCore.seedDB.mySeed.get("PeerType", "junior").equals("principal")) && (mytype.equals("senior"))) mytype = "principal";
yacyCore.seedDB.mySeed.put("PeerType", mytype);
if (!(yacyCore.seedDB.mySeed.isProper())) {
yacyCore.seedDB.mySeed = mySeedBkp;
yacyCore.log.logDebug("yacyClient.publishMySeed mySeed error: not proper");
return -1;
}
// read the seeds that the peer returned and integrate them into own database
int i = 0;
String seedStr;
int count = 0;
while ((seedStr = (String) result.get("seed" + i++)) != null) {
// integrate new seed into own database
// the first seed, "seed0" is the seed of the responding peer
if (yacyCore.peerActions.peerArrival(yacySeed.genRemoteSeed(seedStr, key, remoteTime), (i == 1))) count++;
}
return count;
String seedStr;
int count = 0;
while ((seedStr = (String) result.get("seed" + i++)) != null) {
// integrate new seed into own database
// the first seed, "seed0" is the seed of the responding peer
if (yacyCore.peerActions.peerArrival(yacySeed.genRemoteSeed(seedStr, key, remoteTime), (i == 1))) count++;
}
return count;
}
public static yacySeed querySeed(yacySeed target, String seedHash) {
String key = crypt.randomSalt();
String key = crypt.randomSalt();
try {
HashMap result = nxTools.table(httpc.wget(
new URL("http://" + target.getAddress() +
"/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash + "&key=" + key +
"&object=seed&env=" + seedHash),
10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
new URL("http://" + target.getAddress() +
"/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash + "&key=" + key +
"&object=seed&env=" + seedHash),
10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
if ((result == null) || (result.size() == 0)) return null;
Date remoteTime = yacyCore.parseUniversalDate((String) result.get("mytime")); // read remote time
return yacySeed.genRemoteSeed((String) result.get("response"), key, remoteTime);
@ -172,126 +173,138 @@ public class yacyClient {
return null;
}
}
public static int queryRWICount(yacySeed target, String wordHash) {
try {
HashMap result = nxTools.table(httpc.wget(
new URL("http://" + target.getAddress() +
"/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash + "&key=" +
"&object=rwicount&env=" + wordHash +
"&ttl=0"),
10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
new URL("http://" + target.getAddress() +
"/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash + "&key=" +
"&object=rwicount&env=" + wordHash +
"&ttl=0"),
10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
if ((result == null) || (result.size() == 0)) return -1;
return Integer.parseInt((String) result.get("response"));
} catch (Exception e) {
return Integer.parseInt((String) result.get("response"));
} catch (Exception e) {
yacyCore.log.logError("yacyClient.queryRWICount error:" + e.getMessage());
return -1;
}
return -1;
}
}
public static int queryUrlCount(yacySeed target) {
if (target == null) return -1;
if (yacyCore.seedDB.mySeed == null) return -1;
String querystr =
"http://" + target.getAddress() +
"/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash +
"&key=" +
"&object=lurlcount&env=&ttl=0";
try {
HashMap result = nxTools.table(httpc.wget(
new URL(querystr), 5000, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
//yacyCore.log("DEBUG QUERY: query=" + querystr + "; result = " + result.toString());
if ((result == null) || (result.size() == 0)) return -1;
if (target == null) return -1;
if (yacyCore.seedDB.mySeed == null) return -1;
String querystr =
"http://" + target.getAddress() +
"/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + target.hash +
"&key=" +
"&object=lurlcount&env=&ttl=0";
try {
HashMap result = nxTools.table(httpc.wget(
new URL(querystr), 6000, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
//yacyCore.log("DEBUG QUERY: query=" + querystr + "; result = " + result.toString());
if ((result == null) || (result.size() == 0)) return -1;
String resp = (String) result.get("response");
if (resp == null) return -1; else return Integer.parseInt(resp);
} catch (Exception e) {
} catch (Exception e) {
yacyCore.log.logError("yacyClient.queryUrlCount error asking peer '" + target.getName() + "':" + e.toString());
return -1;
}
return -1;
}
}
public static int search(String wordhashes, int count, boolean global,
yacySeed targetPeer, plasmaCrawlLURL urlManager, plasmaSearch searchManager,
long duetime) {
// send a search request to peer with remote Hash
// this mainly converts the words into word hashes
// INPUT:
// iam : complete seed of the requesting peer
// youare : seed hash of the target peer, used for testing network stability
// key : transmission key for response
// search : a list of search words
// hsearch : a string of word hashes
// fwdep : forward depth. if "0" then peer may NOT ask another peer for more results
// fwden : forward deny, a list of seed hashes. They may NOT be target of forward hopping
// count : maximum number of wanted results
// global : if "true", then result may consist of answers from other peers
yacySeed targetPeer, plasmaCrawlLURL urlManager, plasmaSearch searchManager,
long duetime) {
// send a search request to peer with remote Hash
// this mainly converts the words into word hashes
// INPUT:
// iam : complete seed of the requesting peer
// youare : seed hash of the target peer, used for testing network stability
// key : transmission key for response
// search : a list of search words
// hsearch : a string of word hashes
// fwdep : forward depth. if "0" then peer may NOT ask another peer for more results
// fwden : forward deny, a list of seed hashes. They may NOT be target of forward hopping
// count : maximum number of wanted results
// global : if "true", then result may consist of answers from other peers
// duetime : maximum time that a peer should spent to create a result
// request result
String key = crypt.randomSalt();
try {
String url = "http://" + targetPeer.getAddress() + "/yacy/search.html";
/*
String url = "http://" + targetPeer.getAddress() +
"/yacy/search.html?myseed=" + yacyCore.seedCache.mySeed.genSeedStr(key) +
"&youare=" + targetPeer.hash + "&key=" + key +
"&myseed=" + yacyCore.seedCache.mySeed.genSeedStr(key) +
"&count=" + count + "&resource=" + ((global) ? "global" : "local") +
"&query=" + wordhashes;
*/
serverObjects obj = new serverObjects();
obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
obj.put("youare", targetPeer.hash);
obj.put("key", key);
obj.put("count", count);
obj.put("resource", ((global) ? "global" : "local"));
obj.put("query", wordhashes);
// request result
String key = crypt.randomSalt();
try {
String url = "http://" + targetPeer.getAddress() + "/yacy/search.html";
/*
String url = "http://" + targetPeer.getAddress() +
"/yacy/search.html?myseed=" + yacyCore.seedCache.mySeed.genSeedStr(key) +
"&youare=" + targetPeer.hash + "&key=" + key +
"&myseed=" + yacyCore.seedCache.mySeed.genSeedStr(key) +
"&count=" + count + "&resource=" + ((global) ? "global" : "local") +
"&query=" + wordhashes;
*/
serverObjects obj = new serverObjects();
obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
obj.put("youare", targetPeer.hash);
obj.put("key", key);
obj.put("count", count);
obj.put("resource", ((global) ? "global" : "local"));
obj.put("query", wordhashes);
obj.put("ttl", "0");
obj.put("duetime", "" + duetime);
obj.put("mytime", yacyCore.universalDateShortString());
//yacyCore.log.logDebug("yacyClient.search url=" + url);
obj.put("mytime", yacyCore.universalDateShortString());
//yacyCore.log.logDebug("yacyClient.search url=" + url);
long timestamp = System.currentTimeMillis();
HashMap result = nxTools.table(httpc.wput(new URL(url),
300000, null, null,
yacyCore.seedDB.sb.remoteProxyHost,
yacyCore.seedDB.sb.remoteProxyPort,
obj));
HashMap result = nxTools.table(httpc.wput(new URL(url),
300000, null, null,
yacyCore.seedDB.sb.remoteProxyHost,
yacyCore.seedDB.sb.remoteProxyPort,
obj));
long totalrequesttime = System.currentTimeMillis() - timestamp;
/*
HashMap result = nxTools.table(httpc.wget(new URL(url),
/*
HashMap result = nxTools.table(httpc.wget(new URL(url),
300000, null, null, yacyCore.seedCache.remoteProxyHost, yacyCore.seedCache.remoteProxyPort));
*/
// OUTPUT:
// version : application version of responder
// uptime : uptime in seconds of responder
// total : number of total available LURL's for this search
// count : number of returned LURL's for this search
// resource<n> : LURL of search
// fwhop : hops (depth) of forwards that had been performed to construct this result
// fwsrc : peers that helped to construct this result
// fwrec : peers that would have helped to construct this result (recommendations)
// searchtime : time that the peer actually spent to create the result
// references : references (search hints) that was calculated during search
*/
// OUTPUT:
// version : application version of responder
// uptime : uptime in seconds of responder
// total : number of total available LURL's for this search
// count : number of returned LURL's for this search
// resource<n> : LURL of search
// fwhop : hops (depth) of forwards that had been performed to construct this result
// fwsrc : peers that helped to construct this result
// fwrec : peers that would have helped to construct this result (recommendations)
// searchtime : time that the peer actually spent to create the result
// references : references (search hints) that was calculated during search
// now create a plasmaIndex out of this result
//System.out.println("yacyClient: search result = " + result.toString()); // debug
int results = Integer.parseInt((String) result.get("count"));
// now create a plasmaIndex out of this result
//System.out.println("yacyClient: search result = " + result.toString()); // debug
int results = Integer.parseInt((String) result.get("count"));
//System.out.println("***result count " + results);
plasmaCrawlLURL.entry link;
String wordhash;
for (int n = 0; n < results; n++) {
link = urlManager.newEntry((String) result.get("resource" + n), true, yacyCore.seedDB.mySeed.hash, targetPeer.hash, 2);
for (int m = 0; m < wordhashes.length() / plasmaCrawlLURL.urlHashLength; m++) {
wordhash = wordhashes.substring(m * plasmaCrawlLURL.urlHashLength, (m + 1) * plasmaCrawlLURL.urlHashLength);
searchManager.addWordIndex(link.url(), link.hash(), link.moddate(), link.quality(),
wordhash, link.wordCount(), 0, 0, 0, link.language(), link.doctype(), false);
}
}
plasmaCrawlLURL.entry link;
// create containers
int words = wordhashes.length() / plasmaWordIndexEntry.wordHashLength;
plasmaWordIndexEntryContainer container[] = new plasmaWordIndexEntryContainer[words];
for (int i = 0; i < words; i++) {
container[i] = new plasmaWordIndexEntryContainer(wordhashes.substring(i * plasmaWordIndexEntry.wordHashLength, (i + 1) * plasmaWordIndexEntry.wordHashLength));
}
// insert results to containers
for (int n = 0; n < results; n++) {
link = urlManager.newEntry((String) result.get("resource" + n), true, yacyCore.seedDB.mySeed.hash, targetPeer.hash, 2);
plasmaWordIndexEntry entry = new plasmaWordIndexEntry(link.hash(), link.wordCount(), 0, 0, 0,
plasmaSearch.calcVirtualAge(link.moddate()), link.quality(),
link.language(), link.doctype(), false);
for (int m = 0; m < words; m++) container[m].add(entry);
}
// finally insert the containers to the index
for (int m = 0; m < words; m++) searchManager.addWords(container[m]);
// generate statistics
long searchtime;
try {
searchtime = Integer.parseInt("" + (String) result.get("searchtime"));
@ -299,68 +312,68 @@ public class yacyClient {
searchtime = totalrequesttime;
}
yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
return results;
} catch (Exception e) {
yacyCore.log.logError("yacyClient.search error: '" + targetPeer.get("Name", "anonymous") + "' failed - " + e);
//e.printStackTrace();
return 0;
}
return results;
} catch (Exception e) {
yacyCore.log.logError("yacyClient.search error: '" + targetPeer.get("Name", "anonymous") + "' failed - " + e);
//e.printStackTrace();
return 0;
}
}
public static HashMap permissionMessage(String targetHash) {
// ask for allowed message size and attachement size
// if this replies null, the peer does not answer
// ask for allowed message size and attachement size
// if this replies null, the peer does not answer
if ((yacyCore.seedDB == null) || (yacyCore.seedDB.mySeed == null)) return null;
serverObjects post = new serverObjects();
String key = crypt.randomSalt();
post.put("key", key);
post.put("process", "permission");
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetHash);
post.put("mytime", yacyCore.universalDateShortString());
String address;
if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) {
address = yacyCore.seedDB.mySeed.getAddress();
//System.out.println("local address: " + address);
} else {
serverObjects post = new serverObjects();
String key = crypt.randomSalt();
post.put("key", key);
post.put("process", "permission");
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetHash);
post.put("mytime", yacyCore.universalDateShortString());
String address;
if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) {
address = yacyCore.seedDB.mySeed.getAddress();
//System.out.println("local address: " + address);
} else {
yacySeed targetSeed = yacyCore.seedDB.getConnected(targetHash);
if (targetSeed == null) return null;
address = targetSeed.getAddress();
//System.out.println("remote address: " + address);
}
if (address == null) address = "localhost:8080";
try {
address = targetSeed.getAddress();
//System.out.println("remote address: " + address);
}
if (address == null) address = "localhost:8080";
try {
return nxTools.table(httpc.wput(
new URL("http://" + address + "/yacy/message.html"),
8000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post));
} catch (Exception e) {
// most probably a network time-out exception
new URL("http://" + address + "/yacy/message.html"),
8000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post));
} catch (Exception e) {
// most probably a network time-out exception
yacyCore.log.logError("yacyClient.permissionMessage error:" + e.getMessage());
return null;
}
return null;
}
}
public static HashMap postMessage(String targetHash, String subject, byte[] message) {
// this post a message to the remote message board
serverObjects post = new serverObjects();
String key = crypt.randomSalt();
post.put("key", key);
post.put("process", "post");
post.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
post.put("youare", targetHash);
post.put("subject", subject);
post.put("mytime", yacyCore.universalDateShortString());
post.put("message", new String(message));
String address;
if (targetHash.equals(yacyCore.seedDB.mySeed.hash))
address = yacyCore.seedDB.mySeed.getAddress();
else
address = yacyCore.seedDB.getConnected(targetHash).getAddress();
if (address == null) address = "localhost:8080";
//System.out.println("DEBUG POST " + address + "/yacy/message.html" + post.toString());
// this post a message to the remote message board
serverObjects post = new serverObjects();
String key = crypt.randomSalt();
post.put("key", key);
post.put("process", "post");
post.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
post.put("youare", targetHash);
post.put("subject", subject);
post.put("mytime", yacyCore.universalDateShortString());
post.put("message", new String(message));
String address;
if (targetHash.equals(yacyCore.seedDB.mySeed.hash))
address = yacyCore.seedDB.mySeed.getAddress();
else
address = yacyCore.seedDB.getConnected(targetHash).getAddress();
if (address == null) address = "localhost:8080";
//System.out.println("DEBUG POST " + address + "/yacy/message.html" + post.toString());
try {
Vector v = httpc.wput(new URL("http://" + address + "/yacy/message.html"), 20000, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
//System.out.println("V=" + v.toString());
return nxTools.table(v);
} catch (Exception e) {
@ -368,35 +381,35 @@ public class yacyClient {
return null;
}
}
public static HashMap crawlOrder(yacySeed targetSeed, String url, String referrer, int depth) {
// this post a message to the remote message board
if (targetSeed == null) return null;
if (yacyCore.seedDB.mySeed == null) return null;
// this post a message to the remote message board
if (targetSeed == null) return null;
if (yacyCore.seedDB.mySeed == null) return null;
if (yacyCore.seedDB.mySeed == targetSeed) return null;
// construct request
String key = crypt.randomSalt();
String address = targetSeed.getAddress();
if (address == null) return null;
String key = crypt.randomSalt();
String address = targetSeed.getAddress();
if (address == null) return null;
try {
return nxTools.table(httpc.wget(
new URL("http://" + address + "/yacy/crawlOrder.html?"+
"key=" + key +
"&process=crawl" +
"&youare=" + targetSeed.hash +
"&iam=" + yacyCore.seedDB.mySeed.hash +
"&url=" + crypt.simpleEncode(url) +
"&referrer=" + crypt.simpleEncode(referrer) +
"&depth=" + depth +
"&ttl=0"
),
10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
} catch (Exception e) {
// most probably a network time-out exception
new URL("http://" + address + "/yacy/crawlOrder.html?"+
"key=" + key +
"&process=crawl" +
"&youare=" + targetSeed.hash +
"&iam=" + yacyCore.seedDB.mySeed.hash +
"&url=" + crypt.simpleEncode(url) +
"&referrer=" + crypt.simpleEncode(referrer) +
"&depth=" + depth +
"&ttl=0"
),
10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
} catch (Exception e) {
// most probably a network time-out exception
yacyCore.log.logError("yacyClient.crawlOrder error: peer=" + targetSeed.getName() + ", error=" + e.getMessage());
return null;
}
return null;
}
}
/*
@ -404,11 +417,11 @@ public class yacyClient {
http://217.234.95.114:5777/yacy/crawlOrder.html?key=abc&iam=S-cjM67KhtcJ&youare=EK31N7RgRqTn&process=crawl&referrer=&depth=0&url=p|http://www.heise.de/newsticker/meldung/53245
version=0.297 uptime=225 accepted=true reason=ok delay=30 depth=0
-er crawlt, Ergebnis erscheint aber unter falschem initiator
*/
*/
public static HashMap crawlReceipt(yacySeed targetSeed, String process, String result, String reason, plasmaCrawlLURL.entry entry, String wordhashes) {
if (targetSeed == null) return null;
if (yacyCore.seedDB.mySeed == null) return null;
if (yacyCore.seedDB.mySeed == null) return null;
if (yacyCore.seedDB.mySeed == targetSeed) return null;
/*
@ -416,7 +429,7 @@ public class yacyClient {
negative cases, no retry
unavailable - the resource is not avaiable (a broken link); not found or interrupted
robot - a robot-file has denied to crawl that resource
negative cases, retry possible
rejected - the peer has rejected to load the resource
dequeue - peer too busy - rejected to crawl
@ -424,37 +437,37 @@ public class yacyClient {
positive cases with crawling
fill - the resource was loaded and processed
update - the resource was already in database but re-loaded and processed
positive cases without crawling
positive cases without crawling
known - the resource is already in database, believed to be fresh and not reloaded
stale - the resource was reloaded but not processed because source had no changes
*/
*/
// construct request
String key = crypt.randomSalt();
String key = crypt.randomSalt();
String address = targetSeed.getAddress();
if (address == null) return null;
if (address == null) return null;
try {
return nxTools.table(httpc.wget(
new URL("http://" + address + "/yacy/crawlReceipt.html?" +
"iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + targetSeed.hash +
"&process=" + process +
"&key=" + key +
"&urlhash=" + ((entry == null) ? "" : entry.hash()) +
"&result=" + result +
"&reason=" + reason +
"&wordh=" + wordhashes +
"&lurlEntry=" + ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), key))
),
60000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
} catch (Exception e) {
// most probably a network time-out exception
new URL("http://" + address + "/yacy/crawlReceipt.html?" +
"iam=" + yacyCore.seedDB.mySeed.hash +
"&youare=" + targetSeed.hash +
"&process=" + process +
"&key=" + key +
"&urlhash=" + ((entry == null) ? "" : entry.hash()) +
"&result=" + result +
"&reason=" + reason +
"&wordh=" + wordhashes +
"&lurlEntry=" + ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), key))
),
60000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort));
} catch (Exception e) {
// most probably a network time-out exception
yacyCore.log.logError("yacyClient.crawlReceipt error:" + e.getMessage());
return null;
}
return null;
}
}
/*
public static byte[] singleGET(String host, int port, String path, int timeout,
@ -493,15 +506,15 @@ public class yacyClient {
}
private static HashMap transferRWI(yacySeed targetSeed, plasmaWordIndexEntity[] indexes, plasmaCrawlLURL urlDB) {
String address = targetSeed.getAddress();
if (address == null) return null;
String address = targetSeed.getAddress();
if (address == null) return null;
// prepare post values
serverObjects post = new serverObjects();
String key = crypt.randomSalt();
post.put("key", key);
serverObjects post = new serverObjects();
String key = crypt.randomSalt();
post.put("key", key);
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetSeed.hash);
post.put("wordc", "" + indexes.length);
post.put("youare", targetSeed.hash);
post.put("wordc", "" + indexes.length);
int indexcount = 0;
String entrypost = "";
Enumeration eenum;
@ -523,7 +536,7 @@ public class yacyClient {
} else {
// try to get the entry from the urlDB
if ((urlDB.exists(entry.getUrlHash())) &&
((urlentry = urlDB.getEntry(entry.getUrlHash())) != null)) {
((urlentry = urlDB.getEntry(entry.getUrlHash())) != null)) {
// good case: store the urlentry to the cache
urlCache.put(entry.getUrlHash(), urlentry);
// add index to list
@ -552,9 +565,9 @@ public class yacyClient {
post.put("entryc", "" + indexcount);
post.put("indexes", entrypost);
try {
try {
Vector v = httpc.wput(new URL("http://" + address + "/yacy/transferRWI.html"), 60000, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
// this should return a list of urlhashes that are unknwon
if (v != null) {
yacyCore.seedDB.mySeed.incSI(indexcount);
@ -571,16 +584,16 @@ public class yacyClient {
}
private static HashMap transferURL(yacySeed targetSeed, plasmaCrawlLURL.entry[] urls) {
// this post a message to the remote message board
String address = targetSeed.getAddress();
if (address == null) return null;
// this post a message to the remote message board
String address = targetSeed.getAddress();
if (address == null) return null;
// prepare post values
serverObjects post = new serverObjects();
String key = crypt.randomSalt();
post.put("key", key);
serverObjects post = new serverObjects();
String key = crypt.randomSalt();
post.put("key", key);
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetSeed.hash);
String resource = "";
post.put("youare", targetSeed.hash);
String resource = "";
int urlc = 0;
for (int i = 0; i < urls.length; i++) {
if (urls[i] != null) {
@ -592,9 +605,9 @@ public class yacyClient {
}
}
post.put("urlc", "" + urlc);
try {
try {
Vector v = httpc.wput(new URL("http://" + address + "/yacy/transferURL.html"), 60000, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
if (v != null) {
yacyCore.seedDB.mySeed.incSU(urlc);
}
@ -606,22 +619,22 @@ public class yacyClient {
}
public static HashMap getProfile(yacySeed targetSeed) {
// this post a message to the remote message board
serverObjects post = new serverObjects();
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetSeed.hash);
String address = targetSeed.getAddress();
if (address == null) address = "localhost:8080";
try {
// this post a message to the remote message board
serverObjects post = new serverObjects();
post.put("iam", yacyCore.seedDB.mySeed.hash);
post.put("youare", targetSeed.hash);
String address = targetSeed.getAddress();
if (address == null) address = "localhost:8080";
try {
Vector v = httpc.wput(new URL("http://" + address + "/yacy/profile.html"), 20000, null, null,
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post);
return nxTools.table(v);
} catch (Exception e) {
yacyCore.log.logError("yacyClient.getProfile error:" + e.getMessage());
return null;
}
}
public static void main(String[] args) {
System.out.println("yacyClient Test");
try {
@ -630,20 +643,20 @@ public class yacyClient {
core.peerActions.loadSeedLists();
yacySeed target = core.seedDB.getConnected(args[1]);
String wordhashe = plasmaWordIndexEntry.word2hash("test");
//System.out.println("permission=" + permissionMessage(args[1]));
//System.out.println("permission=" + permissionMessage(args[1]));
HashMap result = nxTools.table(httpc.wget(
new URL("http://" + target.getAddress() +
"/yacy/search.html?myseed=" + core.seedDB.mySeed.genSeedStr(null) +
"&youare=" + target.hash + "&key=" +
"&myseed=" + core.seedDB.mySeed.genSeedStr(null) +
"&count=10&resource=global" +
"&query=" + wordhashe),
5000, null, null, core.seedDB.sb.remoteProxyHost, core.seedDB.sb.remoteProxyPort));
new URL("http://" + target.getAddress() +
"/yacy/search.html?myseed=" + core.seedDB.mySeed.genSeedStr(null) +
"&youare=" + target.hash + "&key=" +
"&myseed=" + core.seedDB.mySeed.genSeedStr(null) +
"&count=10&resource=global" +
"&query=" + wordhashe),
5000, null, null, core.seedDB.sb.remoteProxyHost, core.seedDB.sb.remoteProxyPort));
System.out.println("Result=" + result.toString());
} catch (Exception e) {
e.printStackTrace();
}
e.printStackTrace();
}
System.exit(0);
}
}

@ -1,8 +1,2 @@
#plasmaParser configuration file
#Mon May 02 10:12:02 CEST 2005
application/atom+xml=de.anomic.plasma.parser.rss.rssParser
text/rss=de.anomic.plasma.parser.rss.rssParser
application/rss+xml=de.anomic.plasma.parser.rss.rssParser
application/rdf+xml=de.anomic.plasma.parser.rss.rssParser
application/msword=de.anomic.plasma.parser.doc.docParser
application/pdf=de.anomic.plasma.parser.pdf.pdfParser
#plasmaParser configuration file
#Sat May 07 22:32:33 CEST 2005

Loading…
Cancel
Save