From 55da87121136bb5edc22c063207cc663d1582824 Mon Sep 17 00:00:00 2001
From: orbiter
Date: Thu, 15 Nov 2007 03:03:18 +0000
Subject: [PATCH] preparations for better ranking: better debugging of index
properties to do this, the index administration interface was extended. It is
now possible to select parts of a index. See properties shown in interface
after a word search for details.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4218 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
build.properties | 2 +-
htroot/IndexControlRWIs_p.html | 208 +++++++
htroot/IndexControlRWIs_p.java | 456 ++++++++++++++
htroot/IndexControlURLs_p.html | 64 ++
htroot/IndexControlURLs_p.java | 200 ++++++
htroot/IndexControl_p.html | 178 ------
htroot/IndexControl_p.java | 589 ------------------
htroot/env/templates/header.template | 2 +-
.../templates/submenuIndexControl.template | 3 +-
source/de/anomic/plasma/plasmaCondenser.java | 18 +-
source/de/anomic/plasma/plasmaWordIndex.java | 116 ++++
11 files changed, 1056 insertions(+), 780 deletions(-)
create mode 100644 htroot/IndexControlRWIs_p.html
create mode 100644 htroot/IndexControlRWIs_p.java
create mode 100644 htroot/IndexControlURLs_p.html
create mode 100644 htroot/IndexControlURLs_p.java
delete mode 100644 htroot/IndexControl_p.html
delete mode 100644 htroot/IndexControl_p.java
diff --git a/build.properties b/build.properties
index 7984715a0..f1d02eeae 100644
--- a/build.properties
+++ b/build.properties
@@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
-releaseVersion=0.552
+releaseVersion=0.553
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy
diff --git a/htroot/IndexControlRWIs_p.html b/htroot/IndexControlRWIs_p.html
new file mode 100644
index 000000000..1600bcd31
--- /dev/null
+++ b/htroot/IndexControlRWIs_p.html
@@ -0,0 +1,208 @@
+
+
+
+ YaCy '#[clientname]#': Index Control
+ #%env/templates/metas.template%#
+
+
+ #%env/templates/header.template%#
+ #%env/templates/submenuIndexControl.template%#
+ Index Administration
+ The local index currently contains #[wcount]# reverse word indexes
+
+
+ #(searchresult)#::
+ No entry for word '#[word]#'
::
+ No entry for word hash #[wordhash]#
::
+ Search result:
+
+ Display URL List
+
+ Number of lines:
+ all lines
+ 100
+ 1000
+
+ Ordering of list:
+ by URL
+ by URL Hash
+
+
+
+
+
+
+
+ Transfer RWI to other Peer
+
+ Transfer by Word-Hash:
+
+
+
+ to Peer:
+ select
+ #{hosts}#
+ #[hostname]#
+ #{/hosts}#
+ or enter a hash:
+
+
+
+
+ #(/searchresult)#
+
+ #(keyhashsimilar)#::Sequential List of Word-Hashes:
+ #{rows}#
+ #{cols}#
+ #[wordHash]# #{/cols}#
+ #{/rows}#
+ #(/keyhashsimilar)#
+
+ #(genUrlList)#
+ ::
+ No URL entries related to this word hash #[keyHash]# .
+ ::
+ #[count]# URL entries related to this word hash #[keyHash]#
+
+ #(/genUrlList)#
+ #[result]#
+
+ #%env/templates/footer.template%#
+
+
\ No newline at end of file
diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java
new file mode 100644
index 000000000..47f823e6d
--- /dev/null
+++ b/htroot/IndexControlRWIs_p.java
@@ -0,0 +1,456 @@
+// IndexControlRWIs_p.java
+// -----------------------
+// (C) 2004-2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 2004 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2007-11-14 01:15:28 +0000 (Mi, 14 Nov 2007) $
+// $LastChangedRevision: 4216 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.MalformedURLException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import de.anomic.data.listManager;
+import de.anomic.http.httpHeader;
+import de.anomic.index.indexContainer;
+import de.anomic.index.indexRWIEntry;
+import de.anomic.index.indexURLEntry;
+import de.anomic.kelondro.kelondroBitfield;
+import de.anomic.plasma.plasmaCondenser;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.plasma.plasmaWordIndex;
+import de.anomic.plasma.urlPattern.abstractURLPattern;
+import de.anomic.plasma.urlPattern.plasmaURLPattern;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+import de.anomic.yacy.yacyClient;
+import de.anomic.yacy.yacyCore;
+import de.anomic.yacy.yacySeed;
+import de.anomic.yacy.yacyURL;
+
+public class IndexControlRWIs_p {
+
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
+ // return variable that accumulates replacements
+ plasmaSwitchboard sb = (plasmaSwitchboard) env;
+ serverObjects prop = new serverObjects();
+
+ prop.putHTML("keystring", "");
+ prop.put("keyhash", "");
+ prop.put("result", "");
+
+ // switch off all optional forms/lists
+ prop.put("searchresult", 0);
+ prop.put("keyhashsimilar", 0);
+ prop.put("genUrlList", 0);
+
+ if (post != null) {
+ // default values
+ String keystring = post.get("keystring", "").trim();
+ String keyhash = post.get("keyhash", "").trim();
+ int sortorder = post.getInt("ordering", 0);
+ prop.putHTML("keystring", keystring);
+ prop.put("keyhash", keyhash);
+
+ // read values from checkboxes
+ String[] urlx = post.getAll("urlhx.*");
+ boolean delurl = post.containsKey("delurl");
+ boolean delurlref = post.containsKey("delurlref");
+
+ if (post.containsKey("keystringsearch")) {
+ keyhash = plasmaCondenser.word2hash(keystring);
+ prop.put("keyhash", keyhash);
+ final plasmaWordIndex.Finding finding = genSearchresult(prop, sb, keyhash, null, false, sortorder);
+ if (finding.size() == 0) {
+ prop.put("searchresult", 1);
+ prop.put("searchresult_word", keystring);
+ }
+ }
+
+ if (post.containsKey("keyhashsearch")) {
+ if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) {
+ prop.put("keystring", "<not possible to compute word from hash>");
+ }
+ final plasmaWordIndex.Finding finding = genSearchresult(prop, sb, keyhash, null, false, sortorder);
+ if (finding.size() == 0) {
+ prop.put("searchresult", 2);
+ prop.put("searchresult_wordhash", keyhash);
+ }
+ }
+
+ // delete word
+ if (post.containsKey("keyhashdeleteall")) {
+ if (delurl || delurlref) {
+ // generate an urlx array
+ indexContainer index = null;
+ index = sb.wordIndex.getContainer(keyhash, null);
+ Iterator en = index.entries();
+ int i = 0;
+ urlx = new String[index.size()];
+ while (en.hasNext()) {
+ urlx[i++] = ((indexRWIEntry) en.next()).urlHash();
+ }
+ index = null;
+ }
+ if (delurlref) {
+ for (int i = 0; i < urlx.length; i++) sb.removeAllUrlReferences(urlx[i], true);
+ }
+ if (delurl || delurlref) {
+ for (int i = 0; i < urlx.length; i++) {
+ sb.urlRemove(urlx[i]);
+ }
+ }
+ sb.wordIndex.deleteContainer(keyhash);
+ post.remove("keyhashdeleteall");
+ post.put("urllist", "generated");
+ }
+
+ // delete selected URLs
+ if (post.containsKey("keyhashdelete")) {
+ if (delurlref) {
+ for (int i = 0; i < urlx.length; i++) sb.removeAllUrlReferences(urlx[i], true);
+ }
+ if (delurl || delurlref) {
+ for (int i = 0; i < urlx.length; i++) {
+ sb.urlRemove(urlx[i]);
+ }
+ }
+ Set urlHashes = new HashSet();
+ for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]);
+ sb.wordIndex.removeEntries(keyhash, urlHashes);
+ // this shall lead to a presentation of the list; so handle that the remaining program
+ // thinks that it was called for a list presentation
+ post.remove("keyhashdelete");
+ post.put("urllist", "generated");
+ }
+
+ if (post.containsKey("urllist")) {
+ if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) {
+ prop.put("keystring", "<not possible to compute word from hash>");
+ }
+ kelondroBitfield flags = compileFlags(post);
+ int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1);
+ final plasmaWordIndex.Finding finding = genSearchresult(prop, sb, keyhash, flags, true, sortorder);
+ genURLList(prop, keyhash, keystring, finding, flags, count, sortorder);
+ }
+
+ // transfer to other peer
+ if (post.containsKey("keyhashtransfer")) {
+ if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) {
+ prop.put("keystring", "<not possible to compute word from hash>");
+ }
+
+ // find host & peer
+ String host = post.get("host", ""); // get host from input field
+ yacySeed seed = null;
+ if (host.length() != 0) {
+ if (host.length() == 12) {
+ // the host string is a peer hash
+ seed = yacyCore.seedDB.getConnected(host);
+ } else {
+ // the host string can be a host name
+ seed = yacyCore.seedDB.lookupByName(host);
+ }
+ } else {
+ host = post.get("hostHash", ""); // if input field is empty, get from select box
+ seed = yacyCore.seedDB.getConnected(host);
+ }
+
+ // prepare index
+ indexContainer index;
+ String result;
+ long starttime = System.currentTimeMillis();
+ index = sb.wordIndex.getContainer(keyhash, null);
+ // built urlCache
+ Iterator urlIter = index.entries();
+ HashMap knownURLs = new HashMap();
+ HashSet unknownURLEntries = new HashSet();
+ indexRWIEntry iEntry;
+ indexURLEntry lurl;
+ while (urlIter.hasNext()) {
+ iEntry = (indexRWIEntry) urlIter.next();
+ lurl = sb.wordIndex.loadedURL.load(iEntry.urlHash(), null);
+ if (lurl == null) {
+ unknownURLEntries.add(iEntry.urlHash());
+ urlIter.remove();
+ } else {
+ knownURLs.put(iEntry.urlHash(), lurl);
+ }
+ }
+
+ // transport to other peer
+ String gzipBody = sb.getConfig("indexControl.gzipBody","false");
+ int timeout = (int) sb.getConfigLong("indexControl.timeout",60000);
+ HashMap resultObj = yacyClient.transferIndex(
+ seed,
+ new indexContainer[]{index},
+ knownURLs,
+ "true".equalsIgnoreCase(gzipBody),
+ timeout);
+ result = (String) resultObj.get("result");
+ prop.put("result", (result == null) ? ("Successfully transferred " + knownURLs.size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds, " + unknownURLEntries + " URL not found") : result);
+ index = null;
+ }
+
+ // generate list
+ if (post.containsKey("keyhashsimilar")) {
+ final Iterator containerIt = sb.wordIndex.indexContainerSet(keyhash, false, true, 256).iterator();
+ indexContainer container;
+ int i = 0;
+ int rows = 0, cols = 0;
+ prop.put("keyhashsimilar", "1");
+ while (containerIt.hasNext() && i < 256) {
+ container = (indexContainer) containerIt.next();
+ prop.put("keyhashsimilar_rows_"+rows+"_cols_"+cols+"_wordHash", container.getWordHash());
+ cols++;
+ if (cols==8) {
+ prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
+ cols = 0;
+ rows++;
+ }
+ i++;
+ }
+ prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
+ prop.put("keyhashsimilar_rows", rows + 1);
+ prop.put("result", "");
+ }
+
+ if (post.containsKey("blacklist")) {
+ String blacklist = post.get("blacklist", "");
+ Set urlHashes = new HashSet();
+ if (post.containsKey("blacklisturls")) {
+ PrintWriter pw;
+ try {
+ String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(",");
+ pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true));
+ yacyURL url;
+ for (int i=0; i 0) {
+ Iterator e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(startHash);
+ while (e.hasNext()) {
+ seed = (yacySeed) e.next();
+ if (seed != null) {
+ prop.put("searchresult_hosts_" + hc + "_hosthash", seed.hash);
+ prop.putHTML("searchresult_hosts_" + hc + "_hostname", seed.hash + " " + seed.get(yacySeed.NAME, "nameless"));
+ hc++;
+ }
+ }
+ prop.put("searchresult_hosts", hc);
+ } else {
+ prop.put("searchresult_hosts", "0");
+ }
+ }
+
+ private static plasmaWordIndex.Finding genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, boolean urlfetch, int sortorder) {
+ final plasmaWordIndex.Finding finding = sb.wordIndex.retrieveURLs(keyhash, filter, false, -1, urlfetch, sortorder);
+ if (finding.size() == 0) {
+ prop.put("searchresult", 2);
+ prop.put("searchresult_wordhash", keyhash);
+ } else {
+ prop.put("searchresult", 3);
+ prop.put("searchresult_allurl", finding.size());
+ prop.put("searchresult_reference", finding.flagcount()[indexRWIEntry.flag_app_reference]);
+ prop.put("searchresult_description", finding.flagcount()[indexRWIEntry.flag_app_descr]);
+ prop.put("searchresult_author", finding.flagcount()[indexRWIEntry.flag_app_author]);
+ prop.put("searchresult_tag", finding.flagcount()[indexRWIEntry.flag_app_tags]);
+ prop.put("searchresult_url", finding.flagcount()[indexRWIEntry.flag_app_url]);
+ prop.put("searchresult_emphasized", finding.flagcount()[indexRWIEntry.flag_app_emphasized]);
+ prop.put("searchresult_image", finding.flagcount()[plasmaCondenser.flag_cat_hasimage]);
+ prop.put("searchresult_audio", finding.flagcount()[plasmaCondenser.flag_cat_hasaudio]);
+ prop.put("searchresult_video", finding.flagcount()[plasmaCondenser.flag_cat_hasvideo]);
+ prop.put("searchresult_app", finding.flagcount()[plasmaCondenser.flag_cat_hasapp]);
+ prop.put("searchresult_indexof", finding.flagcount()[plasmaCondenser.flag_cat_indexof]);
+ }
+ return finding;
+ }
+
+ private static void genURLList(serverObjects prop, String keyhash, String keystring, plasmaWordIndex.Finding finding, kelondroBitfield flags, int maxlines, int ordering) {
+ // search for a word hash and generate a list of url links
+ prop.put("genUrlList_keyHash", keyhash);
+
+ if (finding.size() == 0) {
+ prop.put("genUrlList", 1);
+ prop.put("genUrlList_count", 0);
+ prop.put("searchresult", 2);
+ } else {
+ prop.put("genUrlList", 2);
+ prop.put("searchresult", 3);
+ prop.put("genUrlList_flags", flags.exportB64());
+ prop.put("genUrlList_lines", maxlines);
+ prop.put("genUrlList_ordering", ordering);
+ int i = 0;
+ yacyURL url;
+ Iterator iter = finding.hit();
+ plasmaWordIndex.Item entry;
+ String us;
+ while (iter.hasNext()) {
+ entry = (plasmaWordIndex.Item) iter.next();
+ us = entry.url().comp().url().toNormalform(false, false);
+ prop.put("genUrlList_urlList_"+i+"_urlExists", "1");
+ prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
+ prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", entry.index().urlHash());
+ prop.putHTML("genUrlList_urlList_"+i+"_urlExists_keyString", keystring);
+ prop.put("genUrlList_urlList_"+i+"_urlExists_keyHash", keyhash);
+ prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlString", us);
+ prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlStringShort", (us.length() > 60) ? (us.substring(0, 60) + "...") : us);
+ prop.put("genUrlList_urlList_"+i+"_urlExists_pos", entry.index().posintext());
+ prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", entry.index().posofphrase());
+ prop.put("genUrlList_urlList_"+i+"_urlExists_urlcomps", entry.index().urlcomps());
+ prop.put("genUrlList_urlList_"+i+"_urlExists_urllength", entry.index().urllength());
+ prop.put("genUrlList_urlList_"+i+"_urlExists_props",
+ ((entry.index().flags().get(plasmaCondenser.flag_cat_hasimage)) ? "contains images, " : "") +
+ ((entry.index().flags().get(plasmaCondenser.flag_cat_hasaudio)) ? "contains audio, " : "") +
+ ((entry.index().flags().get(plasmaCondenser.flag_cat_hasvideo)) ? "contains video, " : "") +
+ ((entry.index().flags().get(plasmaCondenser.flag_cat_hasapp)) ? "contains applications, " : "") +
+ ((entry.index().flags().get(indexRWIEntry.flag_app_url)) ? "appears in url, " : "") +
+ ((entry.index().flags().get(indexRWIEntry.flag_app_descr)) ? "appears in description, " : "") +
+ ((entry.index().flags().get(indexRWIEntry.flag_app_author)) ? "appears in author, " : "") +
+ ((entry.index().flags().get(indexRWIEntry.flag_app_tags)) ? "appears in tags, " : "") +
+ ((entry.index().flags().get(indexRWIEntry.flag_app_reference)) ? "appears in reference, " : "") +
+ ((entry.index().flags().get(indexRWIEntry.flag_app_emphasized)) ? "appears emphasized" : "")
+ );
+ prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", entry.index().posofphrase());
+ prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", entry.index().posofphrase());
+ try {
+ url = new yacyURL(us, null);
+ } catch (MalformedURLException e) {
+ url = null;
+ }
+ if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) {
+ prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxChecked", "1");
+ }
+ i++;
+ if ((maxlines >= 0) && (i >= maxlines)) break;
+ }
+ iter = finding.miss().iterator();
+ while (iter.hasNext()) {
+ us = (String) iter.next();
+ prop.put("genUrlList_urlList_"+i+"_urlExists", "0");
+ prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
+ prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", us);
+ i++;
+ }
+ prop.put("genUrlList_urlList", i);
+ prop.putHTML("genUrlList_keyString", keystring);
+ prop.put("genUrlList_count", i);
+ putBlacklists(prop, listManager.getDirListing(listManager.listsPath));
+ }
+ }
+
+ private static void putBlacklists(serverObjects prop, String[] lists) {
+ prop.put("genUrlList_blacklists", lists.length);
+ for (int i=0; i
+
+
+ YaCy '#[clientname]#': Index Control
+ #%env/templates/metas.template%#
+
+
+ #%env/templates/header.template%#
+ #%env/templates/submenuIndexControl.template%#
+ Index Administration
+ The local index currently contains #[ucount]# URL references
+
+
+ #(urlhashsimilar)#::Sequential List of URL-Hashes:
+ #{rows}#
+ #{cols}##[urlHash]# #{/cols}#
+ #{/rows}#
+ #(/urlhashsimilar)#
+
+ #(genUrlProfile)#
+ ::No entry found for URL-hash #[urlhash]#
+ ::
+ URL String #[urlNormalform]#
+ Hash #[urlhash]#
+ Description #[urlDescr]#
+ Modified-Date #[moddate]#
+ Loaded-Date #[loaddate]#
+ Referrer #[referrer]#
+ Doctype #[doctype]#
+ Language #[language]#
+ Size #[size]#
+ Words #[wordCount]#
+
+
+ #(/genUrlProfile)#
+ #[result]#
+
+ #%env/templates/footer.template%#
+
+
\ No newline at end of file
diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java
new file mode 100644
index 000000000..dfc46c03f
--- /dev/null
+++ b/htroot/IndexControlURLs_p.java
@@ -0,0 +1,200 @@
+// IndexControlRWIs_p.java
+// -----------------------
+// (C) 2004-2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 2004 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2007-11-14 01:15:28 +0000 (Mi, 14 Nov 2007) $
+// $LastChangedRevision: 4216 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.Iterator;
+
+import de.anomic.http.httpHeader;
+import de.anomic.index.indexURLEntry;
+import de.anomic.kelondro.kelondroBase64Order;
+import de.anomic.kelondro.kelondroRotateIterator;
+import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+import de.anomic.yacy.yacyURL;
+
+public class IndexControlURLs_p {
+
+ public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
+ // return variable that accumulates replacements
+ plasmaSwitchboard sb = (plasmaSwitchboard) env;
+
+ serverObjects prop = new serverObjects();
+
+ if (post == null || env == null) {
+ prop.put("urlstring", "");
+ prop.put("urlhash", "");
+ prop.put("result", "");
+ prop.put("ucount", Integer.toString(sb.wordIndex.loadedURL.size()));
+ prop.put("otherHosts", "");
+ return prop; // be save
+ }
+
+ // default values
+ String urlstring = post.get("urlstring", "").trim();
+ String urlhash = post.get("urlhash", "").trim();
+
+ if (!urlstring.startsWith("http://") &&
+ !urlstring.startsWith("https://")) { urlstring = "http://" + urlstring; }
+
+ prop.putHTML("urlstring", urlstring);
+ prop.put("urlhash", urlhash);
+ prop.put("result", " ");
+
+ if (post.containsKey("urlhashdeleteall")) {
+ //try {
+ int i = sb.removeAllUrlReferences(urlhash, true);
+ prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes.");
+ //} catch (IOException e) {
+ // prop.put("result", "Deleted nothing because the url-hash could not be resolved");
+ //}
+ }
+
+ if (post.containsKey("urlhashdelete")) {
+ indexURLEntry entry = sb.wordIndex.loadedURL.load(urlhash, null);
+ if (entry == null) {
+ prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
+ } else {
+ urlstring = entry.comp().url().toNormalform(false, true);
+ prop.put("urlstring", "");
+ sb.urlRemove(urlhash);
+ prop.putHTML("result", "Removed URL " + urlstring);
+ }
+ }
+
+ if (post.containsKey("urldelete")) {
+ try {
+ urlhash = (new yacyURL(urlstring, null)).hash();
+ } catch (MalformedURLException e) {
+ urlhash = null;
+ }
+ if ((urlhash == null) || (urlstring == null)) {
+ prop.put("result", "No input given; nothing deleted.");
+ } else {
+ sb.urlRemove(urlhash);
+ prop.putHTML("result", "Removed URL " + urlstring);
+ }
+ }
+
+ if (post.containsKey("urlstringsearch")) {
+ try {
+ yacyURL url = new yacyURL(urlstring, null);
+ urlhash = url.hash();
+ prop.put("urlhash", urlhash);
+ indexURLEntry entry = sb.wordIndex.loadedURL.load(urlhash, null);
+ if (entry == null) {
+ prop.putHTML("urlstring", "unknown url: " + urlstring);
+ prop.put("urlhash", "");
+ } else {
+ prop.putAll(genUrlProfile(sb, entry, urlhash));
+ }
+ } catch (MalformedURLException e) {
+ prop.putHTML("urlstring", "bad url: " + urlstring);
+ prop.put("urlhash", "");
+ }
+ }
+
+ if (post.containsKey("urlhashsearch")) {
+ indexURLEntry entry = sb.wordIndex.loadedURL.load(urlhash, null);
+ if (entry == null) {
+ prop.put("result", "No Entry for URL hash " + urlhash);
+ } else {
+ prop.putHTML("urlstring", entry.comp().url().toNormalform(false, true));
+ prop.putAll(genUrlProfile(sb, entry, urlhash));
+ }
+ }
+
+ // generate list
+ if (post.containsKey("urlhashsimilar")) {
+ try {
+ final Iterator entryIt = new kelondroRotateIterator(sb.wordIndex.loadedURL.entries(true, urlhash), new String(kelondroBase64Order.zero(urlhash.length())));
+ StringBuffer result = new StringBuffer("Sequential List of URL-Hashes: ");
+ indexURLEntry entry;
+ int i = 0;
+ int rows = 0, cols = 0;
+ prop.put("urlhashsimilar", "1");
+ while (entryIt.hasNext() && i < 256) {
+ entry = (indexURLEntry) entryIt.next();
+ if (entry == null) break;
+ prop.put("urlhashsimilar_rows_"+rows+"_cols_"+cols+"_urlHash", entry.hash());
+ cols++;
+ if (cols==8) {
+ prop.put("urlhashsimilar_rows_"+rows+"_cols", cols);
+ cols = 0;
+ rows++;
+ }
+ i++;
+ }
+ prop.put("urlhashsimilar_rows", rows);
+ prop.put("result", result.toString());
+ } catch (IOException e) {
+ prop.put("result", "No Entries for URL hash " + urlhash);
+ }
+ }
+
+ // insert constants
+ prop.putNum("ucount", sb.wordIndex.loadedURL.size());
+ // return rewrite properties
+ return prop;
+ }
+
+ private static serverObjects genUrlProfile(plasmaSwitchboard switchboard, indexURLEntry entry, String urlhash) {
+ serverObjects prop = new serverObjects();
+ if (entry == null) {
+ prop.put("genUrlProfile", "1");
+ prop.put("genUrlProfile_urlhash", urlhash);
+ return prop;
+ }
+ indexURLEntry.Components comp = entry.comp();
+ String referrer = null;
+ indexURLEntry le = (entry.referrerHash() == null) ? null : switchboard.wordIndex.loadedURL.load(entry.referrerHash(), null);
+ if (le == null) {
+ referrer = "";
+ } else {
+ referrer = le.comp().url().toNormalform(false, true);
+ }
+ if (comp.url() == null) {
+ prop.put("genUrlProfile", "1");
+ prop.put("genUrlProfile_urlhash", urlhash);
+ return prop;
+ }
+ prop.put("genUrlProfile", "2");
+ prop.putHTML("genUrlProfile_urlNormalform", comp.url().toNormalform(false, true));
+ prop.put("genUrlProfile_urlhash", urlhash);
+ prop.put("genUrlProfile_urlDescr", comp.title());
+ prop.put("genUrlProfile_moddate", entry.moddate());
+ prop.put("genUrlProfile_loaddate", entry.loaddate());
+ prop.putHTML("genUrlProfile_referrer", referrer);
+ prop.put("genUrlProfile_doctype", ""+entry.doctype());
+ prop.put("genUrlProfile_language", entry.language());
+ prop.put("genUrlProfile_size", entry.size());
+ prop.put("genUrlProfile_wordCount", entry.wordCount());
+ return prop;
+ }
+
+}
diff --git a/htroot/IndexControl_p.html b/htroot/IndexControl_p.html
deleted file mode 100644
index 4a61bb0c6..000000000
--- a/htroot/IndexControl_p.html
+++ /dev/null
@@ -1,178 +0,0 @@
-
-
-
- YaCy '#[clientname]#': Index Control
- #%env/templates/metas.template%#
-
-
- #%env/templates/header.template%#
- #%env/templates/submenuIndexControl.template%#
- Index Administration
-
-
- #(keyhashsimilar)#::Sequential List of Word-Hashes:
- #{rows}#
- #{cols}#
- #[wordHash]# #{/cols}#
- #{/rows}#
- #(/keyhashsimilar)#
- #(urlhashsimilar)#::Sequential List of URL-Hashes:
- #{rows}#
- #{cols}##[urlHash]# #{/cols}#
- #{/rows}#
- #(/urlhashsimilar)#
- #(genUrlList)#
- ::
- No URL entries related to this word hash #[keyHash]# .
- ::
- #[count]# URL entries related to this word hash #[keyHash]#
-
- #(/genUrlList)#
- #(genUrlProfile)#
- ::No entry found for URL-hash #[urlhash]#
- ::
- URL String #[urlNormalform]#
- Hash #[urlhash]#
- Description #[urlDescr]#
- Modified-Date #[moddate]#
- Loaded-Date #[loaddate]#
- Referrer #[referrer]#
- Doctype #[doctype]#
- Language #[language]#
- Size #[size]#
- Words #[wordCount]#
-
-
- #(/genUrlProfile)#
- #[result]#
-
- #%env/templates/footer.template%#
-
-
\ No newline at end of file
diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java
deleted file mode 100644
index 2f83a4e49..000000000
--- a/htroot/IndexControl_p.java
+++ /dev/null
@@ -1,589 +0,0 @@
-// IndexControl_p.java
-// -----------------------
-// part of the AnomicHTTPD caching proxy
-// (C) by Michael Peter Christen; mc@anomic.de
-// first published on http://www.anomic.de
-// Frankfurt, Germany, 2004
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-//
-// Using this software in any meaning (reading, learning, copying, compiling,
-// running) means that you agree that the Author(s) is (are) not responsible
-// for cost, loss of data or any harm that may be caused directly or indirectly
-// by usage of this softare or this documentation. The usage of this software
-// is on your own risk. The installation and usage (starting/running) of this
-// software may allow other people or application to access your computer and
-// any attached devices and is highly dependent on the configuration of the
-// software which must be done by the user of the software; the author(s) is
-// (are) also not responsible for proper configuration and usage of the
-// software, even if provoked by documentation provided together with
-// the software.
-//
-// Any changes to this file according to the GPL as documented in the file
-// gpl.txt aside this file in the shipment you received can be done to the
-// lines that follows this copyright notice here, but changes must not be
-// done inside the copyright notive above. A re-distribution must contain
-// the intact and unchanged copyright notice.
-// Contributions and changes to the program code must be marked as such.
-
-// You must compile this file with
-// javac -classpath .:../classes IndexControl_p.java
-// if the shell's current path is HTROOT
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.net.MalformedURLException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
-import de.anomic.data.listManager;
-import de.anomic.http.httpHeader;
-import de.anomic.index.indexContainer;
-import de.anomic.index.indexRWIEntry;
-import de.anomic.index.indexURLEntry;
-import de.anomic.kelondro.kelondroBase64Order;
-import de.anomic.kelondro.kelondroRotateIterator;
-import de.anomic.plasma.plasmaCondenser;
-import de.anomic.plasma.plasmaSwitchboard;
-import de.anomic.plasma.urlPattern.abstractURLPattern;
-import de.anomic.plasma.urlPattern.plasmaURLPattern;
-import de.anomic.server.serverObjects;
-import de.anomic.server.serverSwitch;
-import de.anomic.yacy.yacyClient;
-import de.anomic.yacy.yacyCore;
-import de.anomic.yacy.yacySeed;
-import de.anomic.yacy.yacyURL;
-
-public class IndexControl_p {
-
- public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
- // return variable that accumulates replacements
- plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
-
- serverObjects prop = new serverObjects();
-
- if (post == null || env == null) {
- prop.put("keystring", "");
- prop.put("keyhash", "");
- prop.put("urlstring", "");
- prop.put("urlhash", "");
- prop.put("result", "");
- prop.put("wcount", Integer.toString(switchboard.wordIndex.size()));
- prop.put("ucount", Integer.toString(switchboard.wordIndex.loadedURL.size()));
- prop.put("otherHosts", "");
- listHosts(prop, "");
- return prop; // be save
- }
-
- // default values
- String keystring = post.get("keystring", "").trim();
- String keyhash = post.get("keyhash", "").trim();
- String urlstring = post.get("urlstring", "").trim();
- String urlhash = post.get("urlhash", "").trim();
-
- if (!urlstring.startsWith("http://") &&
- !urlstring.startsWith("https://")) { urlstring = "http://" + urlstring; }
-
- prop.putHTML("keystring", keystring);
- prop.put("keyhash", keyhash);
- prop.putHTML("urlstring", urlstring);
- prop.put("urlhash", urlhash);
- prop.put("result", " ");
-
- // read values from checkboxes
- String[] urlx = post.getAll("urlhx.*");
- boolean delurl = post.containsKey("delurl");
- boolean delurlref = post.containsKey("delurlref");
-
- // delete word
- if (post.containsKey("keyhashdeleteall")) {
- if (delurl || delurlref) {
- // generate an urlx array
- indexContainer index = null;
- index = switchboard.wordIndex.getContainer(keyhash, null);
- Iterator en = index.entries();
- int i = 0;
- urlx = new String[index.size()];
- while (en.hasNext()) {
- urlx[i++] = ((indexRWIEntry) en.next()).urlHash();
- }
- index = null;
- }
- if (delurlref) {
- for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true);
- }
- if (delurl || delurlref) {
- for (int i = 0; i < urlx.length; i++) {
- switchboard.urlRemove(urlx[i]);
- }
- }
- switchboard.wordIndex.deleteContainer(keyhash);
- post.remove("keyhashdeleteall");
- if (keystring.length() > 0 &&
- plasmaCondenser.word2hash(keystring).equals(keyhash)) {
- post.put("keystringsearch", "generated");
- } else {
- post.put("keyhashsearch", "generated");
- }
- }
-
- // delete selected URLs
- if (post.containsKey("keyhashdelete")) {
- if (delurlref) {
- for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true);
- }
- if (delurl || delurlref) {
- for (int i = 0; i < urlx.length; i++) {
- switchboard.urlRemove(urlx[i]);
- }
- }
- Set urlHashes = new HashSet();
- for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]);
- switchboard.wordIndex.removeEntries(keyhash, urlHashes);
- // this shall lead to a presentation of the list; so handle that the remaining program
- // thinks that it was called for a list presentation
- post.remove("keyhashdelete");
- if (keystring.length() > 0 && plasmaCondenser.word2hash(keystring).equals(keyhash)) {
- post.put("keystringsearch", "generated");
- } else {
- post.put("keyhashsearch", "generated");
-// prop.put("result", "Delete of relation of url hashes " + result + " to key hash " + keyhash);
- }
- }
-
- if (post.containsKey("urlhashdeleteall")) {
- //try {
- int i = switchboard.removeAllUrlReferences(urlhash, true);
- prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes.");
- //} catch (IOException e) {
- // prop.put("result", "Deleted nothing because the url-hash could not be resolved");
- //}
- }
-
- if (post.containsKey("urlhashdelete")) {
- indexURLEntry entry = switchboard.wordIndex.loadedURL.load(urlhash, null);
- if (entry == null) {
- prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
- } else {
- urlstring = entry.comp().url().toNormalform(false, true);
- prop.put("urlstring", "");
- switchboard.urlRemove(urlhash);
- prop.putHTML("result", "Removed URL " + urlstring);
- }
- }
-
- if (post.containsKey("urldelete")) {
- try {
- urlhash = (new yacyURL(urlstring, null)).hash();
- } catch (MalformedURLException e) {
- urlhash = null;
- }
- if ((urlhash == null) || (urlstring == null)) {
- prop.put("result", "No input given; nothing deleted.");
- } else {
- switchboard.urlRemove(urlhash);
- prop.putHTML("result", "Removed URL " + urlstring);
- }
- }
-
- if (post.containsKey("keystringsearch")) {
- keyhash = plasmaCondenser.word2hash(keystring);
- prop.put("keyhash", keyhash);
- prop.put("urlstring", "");
- prop.put("urlhash", "");
- prop.putAll(genUrlList(switchboard, keyhash, keystring));
- }
-
- if (post.containsKey("keyhashsearch")) {
- if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) {
- prop.put("keystring", "<not possible to compute word from hash>");
- }
- prop.put("urlstring", "");
- prop.put("urlhash", "");
- prop.putAll(genUrlList(switchboard, keyhash, ""));
- }
-
- // transfer to other peer
- if (post.containsKey("keyhashtransfer")) {
- if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) {
- prop.put("keystring", "<not possible to compute word from hash>");
- }
-
- // find host & peer
- String host = post.get("host", ""); // get host from input field
- yacySeed seed = null;
- if (host.length() != 0) {
- if (host.length() == 12) {
- // the host string is a peer hash
- seed = yacyCore.seedDB.getConnected(host);
- } else {
- // the host string can be a host name
- seed = yacyCore.seedDB.lookupByName(host);
- }
- } else {
- host = post.get("hostHash", ""); // if input field is empty, get from select box
- seed = yacyCore.seedDB.getConnected(host);
- }
-
- // prepare index
- prop.put("urlstring", "");
- prop.put("urlhash", "");
- indexContainer index;
- String result;
- long starttime = System.currentTimeMillis();
- index = switchboard.wordIndex.getContainer(keyhash, null);
- // built urlCache
- Iterator urlIter = index.entries();
- HashMap knownURLs = new HashMap();
- HashSet unknownURLEntries = new HashSet();
- indexRWIEntry iEntry;
- indexURLEntry lurl;
- while (urlIter.hasNext()) {
- iEntry = (indexRWIEntry) urlIter.next();
- lurl = switchboard.wordIndex.loadedURL.load(iEntry.urlHash(), null);
- if (lurl == null) {
- unknownURLEntries.add(iEntry.urlHash());
- urlIter.remove();
- } else {
- knownURLs.put(iEntry.urlHash(), lurl);
- }
- }
-
- // transport to other peer
- String gzipBody = switchboard.getConfig("indexControl.gzipBody","false");
- int timeout = (int) switchboard.getConfigLong("indexControl.timeout",60000);
- HashMap resultObj = yacyClient.transferIndex(
- seed,
- new indexContainer[]{index},
- knownURLs,
- "true".equalsIgnoreCase(gzipBody),
- timeout);
- result = (String) resultObj.get("result");
- prop.put("result", (result == null) ? ("Successfully transferred " + knownURLs.size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds, " + unknownURLEntries + " URL not found") : result);
- index = null;
- }
-
- // generate list
- if (post.containsKey("keyhashsimilar")) {
- final Iterator containerIt = switchboard.wordIndex.indexContainerSet(keyhash, false, true, 256).iterator();
- indexContainer container;
- int i = 0;
- int rows = 0, cols = 0;
- prop.put("keyhashsimilar", "1");
- while (containerIt.hasNext() && i < 256) {
- container = (indexContainer) containerIt.next();
- prop.put("keyhashsimilar_rows_"+rows+"_cols_"+cols+"_wordHash", container.getWordHash());
- cols++;
- if (cols==8) {
- prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
- cols = 0;
- rows++;
- }
- i++;
- }
- prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
- prop.put("keyhashsimilar_rows", rows + 1);
- prop.put("result", "");
- }
-
- if (post.containsKey("urlstringsearch")) {
- try {
- yacyURL url = new yacyURL(urlstring, null);
- urlhash = url.hash();
- prop.put("urlhash", urlhash);
- indexURLEntry entry = switchboard.wordIndex.loadedURL.load(urlhash, null);
- if (entry == null) {
- prop.putHTML("urlstring", "unknown url: " + urlstring);
- prop.put("urlhash", "");
- } else {
- prop.putAll(genUrlProfile(switchboard, entry, urlhash));
- }
- } catch (MalformedURLException e) {
- prop.putHTML("urlstring", "bad url: " + urlstring);
- prop.put("urlhash", "");
- }
- }
-
- if (post.containsKey("urlhashsearch")) {
- indexURLEntry entry = switchboard.wordIndex.loadedURL.load(urlhash, null);
- if (entry == null) {
- prop.put("result", "No Entry for URL hash " + urlhash);
- } else {
- prop.putHTML("urlstring", entry.comp().url().toNormalform(false, true));
- prop.putAll(genUrlProfile(switchboard, entry, urlhash));
- }
- }
-
- // generate list
- if (post.containsKey("urlhashsimilar")) {
- try {
- final Iterator entryIt = new kelondroRotateIterator(switchboard.wordIndex.loadedURL.entries(true, urlhash), new String(kelondroBase64Order.zero(urlhash.length())));
- StringBuffer result = new StringBuffer("Sequential List of URL-Hashes: ");
- indexURLEntry entry;
- int i = 0;
- int rows = 0, cols = 0;
- prop.put("urlhashsimilar", "1");
- while (entryIt.hasNext() && i < 256) {
- entry = (indexURLEntry) entryIt.next();
- if (entry == null) break;
- prop.put("urlhashsimilar_rows_"+rows+"_cols_"+cols+"_urlHash", entry.hash());
- cols++;
- if (cols==8) {
- prop.put("urlhashsimilar_rows_"+rows+"_cols", cols);
- cols = 0;
- rows++;
- }
- i++;
- }
- prop.put("urlhashsimilar_rows", rows);
- prop.put("result", result.toString());
- } catch (IOException e) {
- prop.put("result", "No Entries for URL hash " + urlhash);
- }
- }
-
- if (post.containsKey("blacklist")) {
- String blacklist = post.get("blacklist", "");
- Set urlHashes = new HashSet();
- if (post.containsKey("blacklisturls")) {
- PrintWriter pw;
- try {
- String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(",");
- pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true));
- yacyURL url;
- for (int i=0; i 0) {
- Iterator e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(startHash);
- while (e.hasNext()) {
- seed = (yacySeed) e.next();
- if (seed != null) {
- prop.put("hosts_" + hc + "_hosthash", seed.hash);
- prop.putHTML("hosts_" + hc + "_hostname", seed.hash + " " + seed.get(yacySeed.NAME, "nameless"));
- hc++;
- }
- }
- prop.put("hosts", hc);
- } else {
- prop.put("hosts", "0");
- }
- }
-
- public static serverObjects genUrlProfile(plasmaSwitchboard switchboard, indexURLEntry entry, String urlhash) {
- serverObjects prop = new serverObjects();
- if (entry == null) {
- prop.put("genUrlProfile", "1");
- prop.put("genUrlProfile_urlhash", urlhash);
- return prop;
- }
- indexURLEntry.Components comp = entry.comp();
- String referrer = null;
- indexURLEntry le = (entry.referrerHash() == null) ? null : switchboard.wordIndex.loadedURL.load(entry.referrerHash(), null);
- if (le == null) {
- referrer = "";
- } else {
- referrer = le.comp().url().toNormalform(false, true);
- }
- if (comp.url() == null) {
- prop.put("genUrlProfile", "1");
- prop.put("genUrlProfile_urlhash", urlhash);
- return prop;
- }
- prop.put("genUrlProfile", "2");
- prop.putHTML("genUrlProfile_urlNormalform", comp.url().toNormalform(false, true));
- prop.put("genUrlProfile_urlhash", urlhash);
- prop.put("genUrlProfile_urlDescr", comp.title());
- prop.put("genUrlProfile_moddate", entry.moddate());
- prop.put("genUrlProfile_loaddate", entry.loaddate());
- prop.putHTML("genUrlProfile_referrer", referrer);
- prop.put("genUrlProfile_doctype", ""+entry.doctype());
- prop.put("genUrlProfile_language", entry.language());
- prop.put("genUrlProfile_size", entry.size());
- prop.put("genUrlProfile_wordCount", entry.wordCount());
- return prop;
- }
-
- public static serverObjects genUrlList(plasmaSwitchboard switchboard, String keyhash, String keystring) {
- // search for a word hash and generate a list of url links
- serverObjects prop = new serverObjects();
- indexContainer index = null;
- try {
- index = switchboard.wordIndex.getContainer(keyhash, null);
-
- prop.put("genUrlList_keyHash", keyhash);
-
- if ((index == null) || (index.size() == 0)) {
- prop.put("genUrlList", "1");
- prop.put("genUrlList_count", "0");
- } else {
- final Iterator en = index.entries();
- prop.put("genUrlList", "2");
- int i = 0;
- // first generate a new map where the urls are sorted (not by hash but by the url text)
- final TreeMap tm = new TreeMap();
- indexRWIEntry xi;
- indexURLEntry le;
- Object[] wu;
- while (en.hasNext()) {
- xi = (indexRWIEntry) en.next();
- le = switchboard.wordIndex.loadedURL.load(xi.urlHash(), null);
- wu = new Object[]{xi, le};
- if (le == null) {
- tm.put(xi.urlHash(), wu);
- } else {
- tm.put(le.comp().url().toNormalform(false, true), wu);
- }
- }
-
- yacyURL url;
- final Iterator iter = tm.entrySet().iterator();
- Map.Entry entry;
- String us;
- while (iter.hasNext()) {
- entry = (Map.Entry) iter.next();
- us = (String) entry.getKey();
- wu = (Object[]) entry.getValue();
- xi = (indexRWIEntry) wu[0];
- le = (indexURLEntry) wu[1];
- if (us.equals(xi.urlHash())) {
- prop.put("genUrlList_urlList_"+i+"_urlExists", "0");
- prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
- prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", xi.urlHash());
- } else {
- prop.put("genUrlList_urlList_"+i+"_urlExists", "1");
- prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i);
- prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", xi.urlHash());
- prop.putHTML("genUrlList_urlList_"+i+"_urlExists_keyString", keystring);
- prop.put("genUrlList_urlList_"+i+"_urlExists_keyHash", keyhash);
- prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlString", us);
- prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlStringShort", (us.length() > 60) ? (us.substring(0, 60) + "...") : us);
- prop.put("genUrlList_urlList_"+i+"_urlExists_pos", xi.posintext());
- prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", xi.posofphrase());
- prop.put("genUrlList_urlList_"+i+"_urlExists_urlcomps", xi.urlcomps());
- prop.put("genUrlList_urlList_"+i+"_urlExists_urllength", xi.urllength());
- prop.put("genUrlList_urlList_"+i+"_urlExists_props",
- ((xi.flags().get(plasmaCondenser.flag_cat_hasimage)) ? "contains images, " : "") +
- ((xi.flags().get(plasmaCondenser.flag_cat_hasaudio)) ? "contains audio, " : "") +
- ((xi.flags().get(plasmaCondenser.flag_cat_hasvideo)) ? "contains video, " : "") +
- ((xi.flags().get(plasmaCondenser.flag_cat_hasapp)) ? "contains applications, " : "") +
- ((xi.flags().get(indexRWIEntry.flag_app_url)) ? "appears in url, " : "") +
- ((xi.flags().get(indexRWIEntry.flag_app_descr)) ? "appears in description, " : "") +
- ((xi.flags().get(indexRWIEntry.flag_app_author)) ? "appears in author, " : "") +
- ((xi.flags().get(indexRWIEntry.flag_app_tags)) ? "appears in tags, " : "") +
- ((xi.flags().get(indexRWIEntry.flag_app_reference)) ? "appears in reference, " : "") +
- ((xi.flags().get(indexRWIEntry.flag_app_emphasized)) ? "appears emphasized" : "")
- );
- prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", xi.posofphrase());
- prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", xi.posofphrase());
- url = new yacyURL(us, null);
- if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) {
- prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxChecked", "1");
- }
- }
- i++;
- }
- prop.put("genUrlList_urlList", i);
- prop.putHTML("genUrlList_keyString", keystring);
- prop.put("genUrlList_count", i);
- putBlacklists(prop, listManager.getDirListing(listManager.listsPath));
- }
- index = null;
- return prop;
- } catch (IOException e) {
- return prop;
- } finally {
- if (index != null) index = null;
- }
- }
-
- private static void putBlacklists(serverObjects prop, String[] lists) {
- prop.put("genUrlList_blacklists", lists.length);
- for (int i=0; i