From 55da87121136bb5edc22c063207cc663d1582824 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 15 Nov 2007 03:03:18 +0000 Subject: [PATCH] preparations for better ranking: better debugging of index properties to do this, the index administration interface was extended. It is now possible to select parts of a index. See properties shown in interface after a word search for details. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4218 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 2 +- htroot/IndexControlRWIs_p.html | 208 +++++++ htroot/IndexControlRWIs_p.java | 456 ++++++++++++++ htroot/IndexControlURLs_p.html | 64 ++ htroot/IndexControlURLs_p.java | 200 ++++++ htroot/IndexControl_p.html | 178 ------ htroot/IndexControl_p.java | 589 ------------------ htroot/env/templates/header.template | 2 +- .../templates/submenuIndexControl.template | 3 +- source/de/anomic/plasma/plasmaCondenser.java | 18 +- source/de/anomic/plasma/plasmaWordIndex.java | 116 ++++ 11 files changed, 1056 insertions(+), 780 deletions(-) create mode 100644 htroot/IndexControlRWIs_p.html create mode 100644 htroot/IndexControlRWIs_p.java create mode 100644 htroot/IndexControlURLs_p.html create mode 100644 htroot/IndexControlURLs_p.java delete mode 100644 htroot/IndexControl_p.html delete mode 100644 htroot/IndexControl_p.java diff --git a/build.properties b/build.properties index 7984715a0..f1d02eeae 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.552 +releaseVersion=0.553 releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseFileParentDir=yacy diff --git a/htroot/IndexControlRWIs_p.html b/htroot/IndexControlRWIs_p.html new file mode 100644 index 000000000..1600bcd31 --- /dev/null +++ b/htroot/IndexControlRWIs_p.html @@ -0,0 +1,208 @@ + + + + YaCy '#[clientname]#': Index Control + #%env/templates/metas.template%# + + + #%env/templates/header.template%# + #%env/templates/submenuIndexControl.template%# +

Index Administration

+

The local index currently contains #[wcount]# reverse word indexes

+
+
RWI Retrieval +
+
Retrieve by Word:
+
+ +
+ +
Retrieve by Word-Hash:
+
+ + +
+
+
+
+ + #(searchresult)#:: +

No entry for word '#[word]#'

:: +

No entry for word hash #[wordhash]#

:: +

Search result: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 total URLsappearance inin link typedocument type
 referencedescriptionauthortagsurlemphasizedimageaudiovideoappindex of
 #[allurl]##[reference]##[description]##[author]##[tag]##[url]##[emphasized]##[image]##[audio]##[video]##[app]##[indexof]#
Selection
+

+
Display URL List +
+
Number of lines:
+
all lines   + 100   + 1000   +
+
Ordering of list:
+
by URL   + by URL Hash   + +
+
+
+
+
+
+
Transfer RWI to other Peer +
+
Transfer by Word-Hash:
+
+ +
+
to Peer:
+
select or enter a hash: +
+
+
+
+ #(/searchresult)# + + #(keyhashsimilar)#::Sequential List of Word-Hashes:
+ #{rows}# + #{cols}# + #[wordHash]##{/cols}#
+ #{/rows}# + #(/keyhashsimilar)# + + #(genUrlList)# + :: + No URL entries related to this word hash #[keyHash]#. + :: +

#[count]# URL entries related to this word hash #[keyHash]#

+
+

+ + + #{urlList}# + + #(urlExists)# + + + + + + + + + + :: + + + + + + + + + #(/urlExists)# + + #{/urlList}# +
 hashurlposphraseurlcompsurllengthprops
+ <unresolved URL Hash> + #[urlhxValue]##[urlStringShort]##[pos]##[phrase]##[urlcomps]##[urllength]##[props]#
+ + + + + + +
+

Reference Deletion +
+
+
delete also the referenced URL itself (reasonable and recommended, may produce unresolved references
+ at other word indexes but they do not harm) +
+
+
for every resolvable and deleted URL reference, delete the same reference at every other word where
+ the reference exists (very extensive, but prevents further unresolved references) +
+
 
+
+
+   (= delete Word) +
+
+
+
Blacklist Extension +
+
+ +
+
+
+ +
+
+
+
+
#(/genUrlList)# + #[result]# + + #%env/templates/footer.template%# + + \ No newline at end of file diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java new file mode 100644 index 000000000..47f823e6d --- /dev/null +++ b/htroot/IndexControlRWIs_p.java @@ -0,0 +1,456 @@ +// IndexControlRWIs_p.java +// ----------------------- +// (C) 2004-2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 2004 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2007-11-14 01:15:28 +0000 (Mi, 14 Nov 2007) $ +// $LastChangedRevision: 4216 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.net.MalformedURLException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import de.anomic.data.listManager; +import de.anomic.http.httpHeader; +import de.anomic.index.indexContainer; +import de.anomic.index.indexRWIEntry; +import de.anomic.index.indexURLEntry; +import de.anomic.kelondro.kelondroBitfield; +import de.anomic.plasma.plasmaCondenser; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.plasmaWordIndex; +import de.anomic.plasma.urlPattern.abstractURLPattern; +import de.anomic.plasma.urlPattern.plasmaURLPattern; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyClient; +import de.anomic.yacy.yacyCore; +import de.anomic.yacy.yacySeed; +import de.anomic.yacy.yacyURL; + +public class IndexControlRWIs_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard sb = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + prop.putHTML("keystring", ""); + prop.put("keyhash", ""); + prop.put("result", ""); + + // switch off all optional forms/lists + prop.put("searchresult", 0); + prop.put("keyhashsimilar", 0); + prop.put("genUrlList", 0); + + if (post != null) { + // default values + String keystring = post.get("keystring", "").trim(); + String keyhash = post.get("keyhash", "").trim(); + int sortorder = post.getInt("ordering", 0); + prop.putHTML("keystring", keystring); + prop.put("keyhash", keyhash); + + // read values from checkboxes + String[] urlx = post.getAll("urlhx.*"); + boolean delurl = post.containsKey("delurl"); + boolean delurlref = post.containsKey("delurlref"); + + if (post.containsKey("keystringsearch")) { + keyhash = plasmaCondenser.word2hash(keystring); + prop.put("keyhash", keyhash); + final plasmaWordIndex.Finding finding = genSearchresult(prop, sb, keyhash, null, false, sortorder); + if (finding.size() == 0) { + prop.put("searchresult", 1); + prop.put("searchresult_word", keystring); + } + } + + if (post.containsKey("keyhashsearch")) { + if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) { + prop.put("keystring", "<not possible to compute word from hash>"); + } + final plasmaWordIndex.Finding finding = genSearchresult(prop, sb, keyhash, null, false, sortorder); + if (finding.size() == 0) { + prop.put("searchresult", 2); + prop.put("searchresult_wordhash", keyhash); + } + } + + // delete word + if (post.containsKey("keyhashdeleteall")) { + if (delurl || delurlref) { + // generate an urlx array + indexContainer index = null; + index = sb.wordIndex.getContainer(keyhash, null); + Iterator en = index.entries(); + int i = 0; + urlx = new String[index.size()]; + while (en.hasNext()) { + urlx[i++] = ((indexRWIEntry) en.next()).urlHash(); + } + index = null; + } + if (delurlref) { + for (int i = 0; i < urlx.length; i++) sb.removeAllUrlReferences(urlx[i], true); + } + if (delurl || delurlref) { + for (int i = 0; i < urlx.length; i++) { + sb.urlRemove(urlx[i]); + } + } + sb.wordIndex.deleteContainer(keyhash); + post.remove("keyhashdeleteall"); + post.put("urllist", "generated"); + } + + // delete selected URLs + if (post.containsKey("keyhashdelete")) { + if (delurlref) { + for (int i = 0; i < urlx.length; i++) sb.removeAllUrlReferences(urlx[i], true); + } + if (delurl || delurlref) { + for (int i = 0; i < urlx.length; i++) { + sb.urlRemove(urlx[i]); + } + } + Set urlHashes = new HashSet(); + for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]); + sb.wordIndex.removeEntries(keyhash, urlHashes); + // this shall lead to a presentation of the list; so handle that the remaining program + // thinks that it was called for a list presentation + post.remove("keyhashdelete"); + post.put("urllist", "generated"); + } + + if (post.containsKey("urllist")) { + if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) { + prop.put("keystring", "<not possible to compute word from hash>"); + } + kelondroBitfield flags = compileFlags(post); + int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1); + final plasmaWordIndex.Finding finding = genSearchresult(prop, sb, keyhash, flags, true, sortorder); + genURLList(prop, keyhash, keystring, finding, flags, count, sortorder); + } + + // transfer to other peer + if (post.containsKey("keyhashtransfer")) { + if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) { + prop.put("keystring", "<not possible to compute word from hash>"); + } + + // find host & peer + String host = post.get("host", ""); // get host from input field + yacySeed seed = null; + if (host.length() != 0) { + if (host.length() == 12) { + // the host string is a peer hash + seed = yacyCore.seedDB.getConnected(host); + } else { + // the host string can be a host name + seed = yacyCore.seedDB.lookupByName(host); + } + } else { + host = post.get("hostHash", ""); // if input field is empty, get from select box + seed = yacyCore.seedDB.getConnected(host); + } + + // prepare index + indexContainer index; + String result; + long starttime = System.currentTimeMillis(); + index = sb.wordIndex.getContainer(keyhash, null); + // built urlCache + Iterator urlIter = index.entries(); + HashMap knownURLs = new HashMap(); + HashSet unknownURLEntries = new HashSet(); + indexRWIEntry iEntry; + indexURLEntry lurl; + while (urlIter.hasNext()) { + iEntry = (indexRWIEntry) urlIter.next(); + lurl = sb.wordIndex.loadedURL.load(iEntry.urlHash(), null); + if (lurl == null) { + unknownURLEntries.add(iEntry.urlHash()); + urlIter.remove(); + } else { + knownURLs.put(iEntry.urlHash(), lurl); + } + } + + // transport to other peer + String gzipBody = sb.getConfig("indexControl.gzipBody","false"); + int timeout = (int) sb.getConfigLong("indexControl.timeout",60000); + HashMap resultObj = yacyClient.transferIndex( + seed, + new indexContainer[]{index}, + knownURLs, + "true".equalsIgnoreCase(gzipBody), + timeout); + result = (String) resultObj.get("result"); + prop.put("result", (result == null) ? ("Successfully transferred " + knownURLs.size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds, " + unknownURLEntries + " URL not found") : result); + index = null; + } + + // generate list + if (post.containsKey("keyhashsimilar")) { + final Iterator containerIt = sb.wordIndex.indexContainerSet(keyhash, false, true, 256).iterator(); + indexContainer container; + int i = 0; + int rows = 0, cols = 0; + prop.put("keyhashsimilar", "1"); + while (containerIt.hasNext() && i < 256) { + container = (indexContainer) containerIt.next(); + prop.put("keyhashsimilar_rows_"+rows+"_cols_"+cols+"_wordHash", container.getWordHash()); + cols++; + if (cols==8) { + prop.put("keyhashsimilar_rows_"+rows+"_cols", cols); + cols = 0; + rows++; + } + i++; + } + prop.put("keyhashsimilar_rows_"+rows+"_cols", cols); + prop.put("keyhashsimilar_rows", rows + 1); + prop.put("result", ""); + } + + if (post.containsKey("blacklist")) { + String blacklist = post.get("blacklist", ""); + Set urlHashes = new HashSet(); + if (post.containsKey("blacklisturls")) { + PrintWriter pw; + try { + String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(","); + pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true)); + yacyURL url; + for (int i=0; i 0) { + Iterator e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(startHash); + while (e.hasNext()) { + seed = (yacySeed) e.next(); + if (seed != null) { + prop.put("searchresult_hosts_" + hc + "_hosthash", seed.hash); + prop.putHTML("searchresult_hosts_" + hc + "_hostname", seed.hash + " " + seed.get(yacySeed.NAME, "nameless")); + hc++; + } + } + prop.put("searchresult_hosts", hc); + } else { + prop.put("searchresult_hosts", "0"); + } + } + + private static plasmaWordIndex.Finding genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, boolean urlfetch, int sortorder) { + final plasmaWordIndex.Finding finding = sb.wordIndex.retrieveURLs(keyhash, filter, false, -1, urlfetch, sortorder); + if (finding.size() == 0) { + prop.put("searchresult", 2); + prop.put("searchresult_wordhash", keyhash); + } else { + prop.put("searchresult", 3); + prop.put("searchresult_allurl", finding.size()); + prop.put("searchresult_reference", finding.flagcount()[indexRWIEntry.flag_app_reference]); + prop.put("searchresult_description", finding.flagcount()[indexRWIEntry.flag_app_descr]); + prop.put("searchresult_author", finding.flagcount()[indexRWIEntry.flag_app_author]); + prop.put("searchresult_tag", finding.flagcount()[indexRWIEntry.flag_app_tags]); + prop.put("searchresult_url", finding.flagcount()[indexRWIEntry.flag_app_url]); + prop.put("searchresult_emphasized", finding.flagcount()[indexRWIEntry.flag_app_emphasized]); + prop.put("searchresult_image", finding.flagcount()[plasmaCondenser.flag_cat_hasimage]); + prop.put("searchresult_audio", finding.flagcount()[plasmaCondenser.flag_cat_hasaudio]); + prop.put("searchresult_video", finding.flagcount()[plasmaCondenser.flag_cat_hasvideo]); + prop.put("searchresult_app", finding.flagcount()[plasmaCondenser.flag_cat_hasapp]); + prop.put("searchresult_indexof", finding.flagcount()[plasmaCondenser.flag_cat_indexof]); + } + return finding; + } + + private static void genURLList(serverObjects prop, String keyhash, String keystring, plasmaWordIndex.Finding finding, kelondroBitfield flags, int maxlines, int ordering) { + // search for a word hash and generate a list of url links + prop.put("genUrlList_keyHash", keyhash); + + if (finding.size() == 0) { + prop.put("genUrlList", 1); + prop.put("genUrlList_count", 0); + prop.put("searchresult", 2); + } else { + prop.put("genUrlList", 2); + prop.put("searchresult", 3); + prop.put("genUrlList_flags", flags.exportB64()); + prop.put("genUrlList_lines", maxlines); + prop.put("genUrlList_ordering", ordering); + int i = 0; + yacyURL url; + Iterator iter = finding.hit(); + plasmaWordIndex.Item entry; + String us; + while (iter.hasNext()) { + entry = (plasmaWordIndex.Item) iter.next(); + us = entry.url().comp().url().toNormalform(false, false); + prop.put("genUrlList_urlList_"+i+"_urlExists", "1"); + prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i); + prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", entry.index().urlHash()); + prop.putHTML("genUrlList_urlList_"+i+"_urlExists_keyString", keystring); + prop.put("genUrlList_urlList_"+i+"_urlExists_keyHash", keyhash); + prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlString", us); + prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlStringShort", (us.length() > 60) ? (us.substring(0, 60) + "...") : us); + prop.put("genUrlList_urlList_"+i+"_urlExists_pos", entry.index().posintext()); + prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", entry.index().posofphrase()); + prop.put("genUrlList_urlList_"+i+"_urlExists_urlcomps", entry.index().urlcomps()); + prop.put("genUrlList_urlList_"+i+"_urlExists_urllength", entry.index().urllength()); + prop.put("genUrlList_urlList_"+i+"_urlExists_props", + ((entry.index().flags().get(plasmaCondenser.flag_cat_hasimage)) ? "contains images, " : "") + + ((entry.index().flags().get(plasmaCondenser.flag_cat_hasaudio)) ? "contains audio, " : "") + + ((entry.index().flags().get(plasmaCondenser.flag_cat_hasvideo)) ? "contains video, " : "") + + ((entry.index().flags().get(plasmaCondenser.flag_cat_hasapp)) ? "contains applications, " : "") + + ((entry.index().flags().get(indexRWIEntry.flag_app_url)) ? "appears in url, " : "") + + ((entry.index().flags().get(indexRWIEntry.flag_app_descr)) ? "appears in description, " : "") + + ((entry.index().flags().get(indexRWIEntry.flag_app_author)) ? "appears in author, " : "") + + ((entry.index().flags().get(indexRWIEntry.flag_app_tags)) ? "appears in tags, " : "") + + ((entry.index().flags().get(indexRWIEntry.flag_app_reference)) ? "appears in reference, " : "") + + ((entry.index().flags().get(indexRWIEntry.flag_app_emphasized)) ? "appears emphasized" : "") + ); + prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", entry.index().posofphrase()); + prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", entry.index().posofphrase()); + try { + url = new yacyURL(us, null); + } catch (MalformedURLException e) { + url = null; + } + if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) { + prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxChecked", "1"); + } + i++; + if ((maxlines >= 0) && (i >= maxlines)) break; + } + iter = finding.miss().iterator(); + while (iter.hasNext()) { + us = (String) iter.next(); + prop.put("genUrlList_urlList_"+i+"_urlExists", "0"); + prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i); + prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", us); + i++; + } + prop.put("genUrlList_urlList", i); + prop.putHTML("genUrlList_keyString", keystring); + prop.put("genUrlList_count", i); + putBlacklists(prop, listManager.getDirListing(listManager.listsPath)); + } + } + + private static void putBlacklists(serverObjects prop, String[] lists) { + prop.put("genUrlList_blacklists", lists.length); + for (int i=0; i + + + YaCy '#[clientname]#': Index Control + #%env/templates/metas.template%# + + + #%env/templates/header.template%# + #%env/templates/submenuIndexControl.template%# +

Index Administration

+

The local index currently contains #[ucount]# URL references

+
+
URL Retrieval +
+
Retrieve by URL:
+
+ +
+ +
Rertieve by URL-Hash:
+
+ + +
+
+
+
+ + #(urlhashsimilar)#::Sequential List of URL-Hashes:
+ #{rows}# + #{cols}##[urlHash]# #{/cols}#
+ #{/rows}# + #(/urlhashsimilar)# + + #(genUrlProfile)# + ::No entry found for URL-hash #[urlhash]# + :: + + + + + + + + + + +
URL String#[urlNormalform]#
Hash#[urlhash]#
Description#[urlDescr]#
Modified-Date#[moddate]#
Loaded-Date#[loaddate]#
Referrer#[referrer]#
Doctype#[doctype]#
Language#[language]#
Size#[size]#
Words#[wordCount]#

+
+ + + + +
+  this may produce unresolved references at other word indexes but they do not harm

+
+  delete the reference to this url at every other word where the reference exists (very extensive, but prevents unresolved references)
+
+ #(/genUrlProfile)# + #[result]# + + #%env/templates/footer.template%# + + \ No newline at end of file diff --git a/htroot/IndexControlURLs_p.java b/htroot/IndexControlURLs_p.java new file mode 100644 index 000000000..dfc46c03f --- /dev/null +++ b/htroot/IndexControlURLs_p.java @@ -0,0 +1,200 @@ +// IndexControlRWIs_p.java +// ----------------------- +// (C) 2004-2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 2004 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2007-11-14 01:15:28 +0000 (Mi, 14 Nov 2007) $ +// $LastChangedRevision: 4216 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import java.io.IOException; +import java.net.MalformedURLException; +import java.util.Iterator; + +import de.anomic.http.httpHeader; +import de.anomic.index.indexURLEntry; +import de.anomic.kelondro.kelondroBase64Order; +import de.anomic.kelondro.kelondroRotateIterator; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.yacy.yacyURL; + +public class IndexControlURLs_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard sb = (plasmaSwitchboard) env; + + serverObjects prop = new serverObjects(); + + if (post == null || env == null) { + prop.put("urlstring", ""); + prop.put("urlhash", ""); + prop.put("result", ""); + prop.put("ucount", Integer.toString(sb.wordIndex.loadedURL.size())); + prop.put("otherHosts", ""); + return prop; // be save + } + + // default values + String urlstring = post.get("urlstring", "").trim(); + String urlhash = post.get("urlhash", "").trim(); + + if (!urlstring.startsWith("http://") && + !urlstring.startsWith("https://")) { urlstring = "http://" + urlstring; } + + prop.putHTML("urlstring", urlstring); + prop.put("urlhash", urlhash); + prop.put("result", " "); + + if (post.containsKey("urlhashdeleteall")) { + //try { + int i = sb.removeAllUrlReferences(urlhash, true); + prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes."); + //} catch (IOException e) { + // prop.put("result", "Deleted nothing because the url-hash could not be resolved"); + //} + } + + if (post.containsKey("urlhashdelete")) { + indexURLEntry entry = sb.wordIndex.loadedURL.load(urlhash, null); + if (entry == null) { + prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted."); + } else { + urlstring = entry.comp().url().toNormalform(false, true); + prop.put("urlstring", ""); + sb.urlRemove(urlhash); + prop.putHTML("result", "Removed URL " + urlstring); + } + } + + if (post.containsKey("urldelete")) { + try { + urlhash = (new yacyURL(urlstring, null)).hash(); + } catch (MalformedURLException e) { + urlhash = null; + } + if ((urlhash == null) || (urlstring == null)) { + prop.put("result", "No input given; nothing deleted."); + } else { + sb.urlRemove(urlhash); + prop.putHTML("result", "Removed URL " + urlstring); + } + } + + if (post.containsKey("urlstringsearch")) { + try { + yacyURL url = new yacyURL(urlstring, null); + urlhash = url.hash(); + prop.put("urlhash", urlhash); + indexURLEntry entry = sb.wordIndex.loadedURL.load(urlhash, null); + if (entry == null) { + prop.putHTML("urlstring", "unknown url: " + urlstring); + prop.put("urlhash", ""); + } else { + prop.putAll(genUrlProfile(sb, entry, urlhash)); + } + } catch (MalformedURLException e) { + prop.putHTML("urlstring", "bad url: " + urlstring); + prop.put("urlhash", ""); + } + } + + if (post.containsKey("urlhashsearch")) { + indexURLEntry entry = sb.wordIndex.loadedURL.load(urlhash, null); + if (entry == null) { + prop.put("result", "No Entry for URL hash " + urlhash); + } else { + prop.putHTML("urlstring", entry.comp().url().toNormalform(false, true)); + prop.putAll(genUrlProfile(sb, entry, urlhash)); + } + } + + // generate list + if (post.containsKey("urlhashsimilar")) { + try { + final Iterator entryIt = new kelondroRotateIterator(sb.wordIndex.loadedURL.entries(true, urlhash), new String(kelondroBase64Order.zero(urlhash.length()))); + StringBuffer result = new StringBuffer("Sequential List of URL-Hashes:
"); + indexURLEntry entry; + int i = 0; + int rows = 0, cols = 0; + prop.put("urlhashsimilar", "1"); + while (entryIt.hasNext() && i < 256) { + entry = (indexURLEntry) entryIt.next(); + if (entry == null) break; + prop.put("urlhashsimilar_rows_"+rows+"_cols_"+cols+"_urlHash", entry.hash()); + cols++; + if (cols==8) { + prop.put("urlhashsimilar_rows_"+rows+"_cols", cols); + cols = 0; + rows++; + } + i++; + } + prop.put("urlhashsimilar_rows", rows); + prop.put("result", result.toString()); + } catch (IOException e) { + prop.put("result", "No Entries for URL hash " + urlhash); + } + } + + // insert constants + prop.putNum("ucount", sb.wordIndex.loadedURL.size()); + // return rewrite properties + return prop; + } + + private static serverObjects genUrlProfile(plasmaSwitchboard switchboard, indexURLEntry entry, String urlhash) { + serverObjects prop = new serverObjects(); + if (entry == null) { + prop.put("genUrlProfile", "1"); + prop.put("genUrlProfile_urlhash", urlhash); + return prop; + } + indexURLEntry.Components comp = entry.comp(); + String referrer = null; + indexURLEntry le = (entry.referrerHash() == null) ? null : switchboard.wordIndex.loadedURL.load(entry.referrerHash(), null); + if (le == null) { + referrer = ""; + } else { + referrer = le.comp().url().toNormalform(false, true); + } + if (comp.url() == null) { + prop.put("genUrlProfile", "1"); + prop.put("genUrlProfile_urlhash", urlhash); + return prop; + } + prop.put("genUrlProfile", "2"); + prop.putHTML("genUrlProfile_urlNormalform", comp.url().toNormalform(false, true)); + prop.put("genUrlProfile_urlhash", urlhash); + prop.put("genUrlProfile_urlDescr", comp.title()); + prop.put("genUrlProfile_moddate", entry.moddate()); + prop.put("genUrlProfile_loaddate", entry.loaddate()); + prop.putHTML("genUrlProfile_referrer", referrer); + prop.put("genUrlProfile_doctype", ""+entry.doctype()); + prop.put("genUrlProfile_language", entry.language()); + prop.put("genUrlProfile_size", entry.size()); + prop.put("genUrlProfile_wordCount", entry.wordCount()); + return prop; + } + +} diff --git a/htroot/IndexControl_p.html b/htroot/IndexControl_p.html deleted file mode 100644 index 4a61bb0c6..000000000 --- a/htroot/IndexControl_p.html +++ /dev/null @@ -1,178 +0,0 @@ - - - - YaCy '#[clientname]#': Index Control - #%env/templates/metas.template%# - - - #%env/templates/header.template%# - #%env/templates/submenuIndexControl.template%# -

Index Administration

- -
-

The local index currently consists of (at least) #[wcount]# reverse word indexes and #[ucount]# URL references

- - - - - - - - - - - - - - - - - - - - - -
Word:
Word-Hash: - -

-
- -
- - - - - - - - - - - - - - - - - -
URL:
URL-Hash: - -
-
- #(keyhashsimilar)#::Sequential List of Word-Hashes:
- #{rows}# - #{cols}# - #[wordHash]##{/cols}#
- #{/rows}# - #(/keyhashsimilar)# - #(urlhashsimilar)#::Sequential List of URL-Hashes:
- #{rows}# - #{cols}##[urlHash]# #{/cols}#
- #{/rows}# - #(/urlhashsimilar)# - #(genUrlList)# - :: - No URL entries related to this word hash #[keyHash]#. - :: -

#[count]# URL entries related to this word hash #[keyHash]#

-
-

- - - #{urlList}# - - #(urlExists)# - - - - - - - - - - :: - - - - - - - - - #(/urlExists)# - - #{/urlList}# -
 hashurlposphraseurlcompsurllengthprops
- <unresolved URL Hash> - #[urlhxValue]##[urlStringShort]##[pos]##[phrase]##[urlcomps]##[urllength]##[props]#
- - - -
-

Reference Deletion -
-
-
delete also the referenced URL itself (reasonable and recommended, may produce unresolved references
- at other word indexes but they do not harm) -
-
-
for every resolvable and deleted URL reference, delete the same reference at every other word where
- the reference exists (very extensive, but prevents further unresolved references) -
-
 
-
-
-   (= delete Word) -
-
-
-
Blacklist Extension -
-
- -
-
-
- -
-
-
-
-
#(/genUrlList)# - #(genUrlProfile)# - ::No entry found for URL-hash #[urlhash]# - :: - - - - - - - - - - -
URL String#[urlNormalform]#
Hash#[urlhash]#
Description#[urlDescr]#
Modified-Date#[moddate]#
Loaded-Date#[loaddate]#
Referrer#[referrer]#
Doctype#[doctype]#
Language#[language]#
Size#[size]#
Words#[wordCount]#

-
- - - - -
-  this may produce unresolved references at other word indexes but they do not harm

-
-  delete the reference to this url at every other word where the reference exists (very extensive, but prevents unresolved references)
-
- #(/genUrlProfile)# - #[result]# - - #%env/templates/footer.template%# - - \ No newline at end of file diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java deleted file mode 100644 index 2f83a4e49..000000000 --- a/htroot/IndexControl_p.java +++ /dev/null @@ -1,589 +0,0 @@ -// IndexControl_p.java -// ----------------------- -// part of the AnomicHTTPD caching proxy -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2004 -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -// done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. - -// You must compile this file with -// javac -classpath .:../classes IndexControl_p.java -// if the shell's current path is HTROOT - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.net.MalformedURLException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; - -import de.anomic.data.listManager; -import de.anomic.http.httpHeader; -import de.anomic.index.indexContainer; -import de.anomic.index.indexRWIEntry; -import de.anomic.index.indexURLEntry; -import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroRotateIterator; -import de.anomic.plasma.plasmaCondenser; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.urlPattern.abstractURLPattern; -import de.anomic.plasma.urlPattern.plasmaURLPattern; -import de.anomic.server.serverObjects; -import de.anomic.server.serverSwitch; -import de.anomic.yacy.yacyClient; -import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacySeed; -import de.anomic.yacy.yacyURL; - -public class IndexControl_p { - - public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { - // return variable that accumulates replacements - plasmaSwitchboard switchboard = (plasmaSwitchboard) env; - - serverObjects prop = new serverObjects(); - - if (post == null || env == null) { - prop.put("keystring", ""); - prop.put("keyhash", ""); - prop.put("urlstring", ""); - prop.put("urlhash", ""); - prop.put("result", ""); - prop.put("wcount", Integer.toString(switchboard.wordIndex.size())); - prop.put("ucount", Integer.toString(switchboard.wordIndex.loadedURL.size())); - prop.put("otherHosts", ""); - listHosts(prop, ""); - return prop; // be save - } - - // default values - String keystring = post.get("keystring", "").trim(); - String keyhash = post.get("keyhash", "").trim(); - String urlstring = post.get("urlstring", "").trim(); - String urlhash = post.get("urlhash", "").trim(); - - if (!urlstring.startsWith("http://") && - !urlstring.startsWith("https://")) { urlstring = "http://" + urlstring; } - - prop.putHTML("keystring", keystring); - prop.put("keyhash", keyhash); - prop.putHTML("urlstring", urlstring); - prop.put("urlhash", urlhash); - prop.put("result", " "); - - // read values from checkboxes - String[] urlx = post.getAll("urlhx.*"); - boolean delurl = post.containsKey("delurl"); - boolean delurlref = post.containsKey("delurlref"); - - // delete word - if (post.containsKey("keyhashdeleteall")) { - if (delurl || delurlref) { - // generate an urlx array - indexContainer index = null; - index = switchboard.wordIndex.getContainer(keyhash, null); - Iterator en = index.entries(); - int i = 0; - urlx = new String[index.size()]; - while (en.hasNext()) { - urlx[i++] = ((indexRWIEntry) en.next()).urlHash(); - } - index = null; - } - if (delurlref) { - for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true); - } - if (delurl || delurlref) { - for (int i = 0; i < urlx.length; i++) { - switchboard.urlRemove(urlx[i]); - } - } - switchboard.wordIndex.deleteContainer(keyhash); - post.remove("keyhashdeleteall"); - if (keystring.length() > 0 && - plasmaCondenser.word2hash(keystring).equals(keyhash)) { - post.put("keystringsearch", "generated"); - } else { - post.put("keyhashsearch", "generated"); - } - } - - // delete selected URLs - if (post.containsKey("keyhashdelete")) { - if (delurlref) { - for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true); - } - if (delurl || delurlref) { - for (int i = 0; i < urlx.length; i++) { - switchboard.urlRemove(urlx[i]); - } - } - Set urlHashes = new HashSet(); - for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]); - switchboard.wordIndex.removeEntries(keyhash, urlHashes); - // this shall lead to a presentation of the list; so handle that the remaining program - // thinks that it was called for a list presentation - post.remove("keyhashdelete"); - if (keystring.length() > 0 && plasmaCondenser.word2hash(keystring).equals(keyhash)) { - post.put("keystringsearch", "generated"); - } else { - post.put("keyhashsearch", "generated"); -// prop.put("result", "Delete of relation of url hashes " + result + " to key hash " + keyhash); - } - } - - if (post.containsKey("urlhashdeleteall")) { - //try { - int i = switchboard.removeAllUrlReferences(urlhash, true); - prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes."); - //} catch (IOException e) { - // prop.put("result", "Deleted nothing because the url-hash could not be resolved"); - //} - } - - if (post.containsKey("urlhashdelete")) { - indexURLEntry entry = switchboard.wordIndex.loadedURL.load(urlhash, null); - if (entry == null) { - prop.put("result", "No Entry for URL hash " + urlhash + "; nothing deleted."); - } else { - urlstring = entry.comp().url().toNormalform(false, true); - prop.put("urlstring", ""); - switchboard.urlRemove(urlhash); - prop.putHTML("result", "Removed URL " + urlstring); - } - } - - if (post.containsKey("urldelete")) { - try { - urlhash = (new yacyURL(urlstring, null)).hash(); - } catch (MalformedURLException e) { - urlhash = null; - } - if ((urlhash == null) || (urlstring == null)) { - prop.put("result", "No input given; nothing deleted."); - } else { - switchboard.urlRemove(urlhash); - prop.putHTML("result", "Removed URL " + urlstring); - } - } - - if (post.containsKey("keystringsearch")) { - keyhash = plasmaCondenser.word2hash(keystring); - prop.put("keyhash", keyhash); - prop.put("urlstring", ""); - prop.put("urlhash", ""); - prop.putAll(genUrlList(switchboard, keyhash, keystring)); - } - - if (post.containsKey("keyhashsearch")) { - if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) { - prop.put("keystring", "<not possible to compute word from hash>"); - } - prop.put("urlstring", ""); - prop.put("urlhash", ""); - prop.putAll(genUrlList(switchboard, keyhash, "")); - } - - // transfer to other peer - if (post.containsKey("keyhashtransfer")) { - if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) { - prop.put("keystring", "<not possible to compute word from hash>"); - } - - // find host & peer - String host = post.get("host", ""); // get host from input field - yacySeed seed = null; - if (host.length() != 0) { - if (host.length() == 12) { - // the host string is a peer hash - seed = yacyCore.seedDB.getConnected(host); - } else { - // the host string can be a host name - seed = yacyCore.seedDB.lookupByName(host); - } - } else { - host = post.get("hostHash", ""); // if input field is empty, get from select box - seed = yacyCore.seedDB.getConnected(host); - } - - // prepare index - prop.put("urlstring", ""); - prop.put("urlhash", ""); - indexContainer index; - String result; - long starttime = System.currentTimeMillis(); - index = switchboard.wordIndex.getContainer(keyhash, null); - // built urlCache - Iterator urlIter = index.entries(); - HashMap knownURLs = new HashMap(); - HashSet unknownURLEntries = new HashSet(); - indexRWIEntry iEntry; - indexURLEntry lurl; - while (urlIter.hasNext()) { - iEntry = (indexRWIEntry) urlIter.next(); - lurl = switchboard.wordIndex.loadedURL.load(iEntry.urlHash(), null); - if (lurl == null) { - unknownURLEntries.add(iEntry.urlHash()); - urlIter.remove(); - } else { - knownURLs.put(iEntry.urlHash(), lurl); - } - } - - // transport to other peer - String gzipBody = switchboard.getConfig("indexControl.gzipBody","false"); - int timeout = (int) switchboard.getConfigLong("indexControl.timeout",60000); - HashMap resultObj = yacyClient.transferIndex( - seed, - new indexContainer[]{index}, - knownURLs, - "true".equalsIgnoreCase(gzipBody), - timeout); - result = (String) resultObj.get("result"); - prop.put("result", (result == null) ? ("Successfully transferred " + knownURLs.size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds, " + unknownURLEntries + " URL not found") : result); - index = null; - } - - // generate list - if (post.containsKey("keyhashsimilar")) { - final Iterator containerIt = switchboard.wordIndex.indexContainerSet(keyhash, false, true, 256).iterator(); - indexContainer container; - int i = 0; - int rows = 0, cols = 0; - prop.put("keyhashsimilar", "1"); - while (containerIt.hasNext() && i < 256) { - container = (indexContainer) containerIt.next(); - prop.put("keyhashsimilar_rows_"+rows+"_cols_"+cols+"_wordHash", container.getWordHash()); - cols++; - if (cols==8) { - prop.put("keyhashsimilar_rows_"+rows+"_cols", cols); - cols = 0; - rows++; - } - i++; - } - prop.put("keyhashsimilar_rows_"+rows+"_cols", cols); - prop.put("keyhashsimilar_rows", rows + 1); - prop.put("result", ""); - } - - if (post.containsKey("urlstringsearch")) { - try { - yacyURL url = new yacyURL(urlstring, null); - urlhash = url.hash(); - prop.put("urlhash", urlhash); - indexURLEntry entry = switchboard.wordIndex.loadedURL.load(urlhash, null); - if (entry == null) { - prop.putHTML("urlstring", "unknown url: " + urlstring); - prop.put("urlhash", ""); - } else { - prop.putAll(genUrlProfile(switchboard, entry, urlhash)); - } - } catch (MalformedURLException e) { - prop.putHTML("urlstring", "bad url: " + urlstring); - prop.put("urlhash", ""); - } - } - - if (post.containsKey("urlhashsearch")) { - indexURLEntry entry = switchboard.wordIndex.loadedURL.load(urlhash, null); - if (entry == null) { - prop.put("result", "No Entry for URL hash " + urlhash); - } else { - prop.putHTML("urlstring", entry.comp().url().toNormalform(false, true)); - prop.putAll(genUrlProfile(switchboard, entry, urlhash)); - } - } - - // generate list - if (post.containsKey("urlhashsimilar")) { - try { - final Iterator entryIt = new kelondroRotateIterator(switchboard.wordIndex.loadedURL.entries(true, urlhash), new String(kelondroBase64Order.zero(urlhash.length()))); - StringBuffer result = new StringBuffer("Sequential List of URL-Hashes:
"); - indexURLEntry entry; - int i = 0; - int rows = 0, cols = 0; - prop.put("urlhashsimilar", "1"); - while (entryIt.hasNext() && i < 256) { - entry = (indexURLEntry) entryIt.next(); - if (entry == null) break; - prop.put("urlhashsimilar_rows_"+rows+"_cols_"+cols+"_urlHash", entry.hash()); - cols++; - if (cols==8) { - prop.put("urlhashsimilar_rows_"+rows+"_cols", cols); - cols = 0; - rows++; - } - i++; - } - prop.put("urlhashsimilar_rows", rows); - prop.put("result", result.toString()); - } catch (IOException e) { - prop.put("result", "No Entries for URL hash " + urlhash); - } - } - - if (post.containsKey("blacklist")) { - String blacklist = post.get("blacklist", ""); - Set urlHashes = new HashSet(); - if (post.containsKey("blacklisturls")) { - PrintWriter pw; - try { - String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(","); - pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true)); - yacyURL url; - for (int i=0; i 0) { - Iterator e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(startHash); - while (e.hasNext()) { - seed = (yacySeed) e.next(); - if (seed != null) { - prop.put("hosts_" + hc + "_hosthash", seed.hash); - prop.putHTML("hosts_" + hc + "_hostname", seed.hash + " " + seed.get(yacySeed.NAME, "nameless")); - hc++; - } - } - prop.put("hosts", hc); - } else { - prop.put("hosts", "0"); - } - } - - public static serverObjects genUrlProfile(plasmaSwitchboard switchboard, indexURLEntry entry, String urlhash) { - serverObjects prop = new serverObjects(); - if (entry == null) { - prop.put("genUrlProfile", "1"); - prop.put("genUrlProfile_urlhash", urlhash); - return prop; - } - indexURLEntry.Components comp = entry.comp(); - String referrer = null; - indexURLEntry le = (entry.referrerHash() == null) ? null : switchboard.wordIndex.loadedURL.load(entry.referrerHash(), null); - if (le == null) { - referrer = ""; - } else { - referrer = le.comp().url().toNormalform(false, true); - } - if (comp.url() == null) { - prop.put("genUrlProfile", "1"); - prop.put("genUrlProfile_urlhash", urlhash); - return prop; - } - prop.put("genUrlProfile", "2"); - prop.putHTML("genUrlProfile_urlNormalform", comp.url().toNormalform(false, true)); - prop.put("genUrlProfile_urlhash", urlhash); - prop.put("genUrlProfile_urlDescr", comp.title()); - prop.put("genUrlProfile_moddate", entry.moddate()); - prop.put("genUrlProfile_loaddate", entry.loaddate()); - prop.putHTML("genUrlProfile_referrer", referrer); - prop.put("genUrlProfile_doctype", ""+entry.doctype()); - prop.put("genUrlProfile_language", entry.language()); - prop.put("genUrlProfile_size", entry.size()); - prop.put("genUrlProfile_wordCount", entry.wordCount()); - return prop; - } - - public static serverObjects genUrlList(plasmaSwitchboard switchboard, String keyhash, String keystring) { - // search for a word hash and generate a list of url links - serverObjects prop = new serverObjects(); - indexContainer index = null; - try { - index = switchboard.wordIndex.getContainer(keyhash, null); - - prop.put("genUrlList_keyHash", keyhash); - - if ((index == null) || (index.size() == 0)) { - prop.put("genUrlList", "1"); - prop.put("genUrlList_count", "0"); - } else { - final Iterator en = index.entries(); - prop.put("genUrlList", "2"); - int i = 0; - // first generate a new map where the urls are sorted (not by hash but by the url text) - final TreeMap tm = new TreeMap(); - indexRWIEntry xi; - indexURLEntry le; - Object[] wu; - while (en.hasNext()) { - xi = (indexRWIEntry) en.next(); - le = switchboard.wordIndex.loadedURL.load(xi.urlHash(), null); - wu = new Object[]{xi, le}; - if (le == null) { - tm.put(xi.urlHash(), wu); - } else { - tm.put(le.comp().url().toNormalform(false, true), wu); - } - } - - yacyURL url; - final Iterator iter = tm.entrySet().iterator(); - Map.Entry entry; - String us; - while (iter.hasNext()) { - entry = (Map.Entry) iter.next(); - us = (String) entry.getKey(); - wu = (Object[]) entry.getValue(); - xi = (indexRWIEntry) wu[0]; - le = (indexURLEntry) wu[1]; - if (us.equals(xi.urlHash())) { - prop.put("genUrlList_urlList_"+i+"_urlExists", "0"); - prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i); - prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", xi.urlHash()); - } else { - prop.put("genUrlList_urlList_"+i+"_urlExists", "1"); - prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i); - prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlhxValue", xi.urlHash()); - prop.putHTML("genUrlList_urlList_"+i+"_urlExists_keyString", keystring); - prop.put("genUrlList_urlList_"+i+"_urlExists_keyHash", keyhash); - prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlString", us); - prop.putHTML("genUrlList_urlList_"+i+"_urlExists_urlStringShort", (us.length() > 60) ? (us.substring(0, 60) + "...") : us); - prop.put("genUrlList_urlList_"+i+"_urlExists_pos", xi.posintext()); - prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", xi.posofphrase()); - prop.put("genUrlList_urlList_"+i+"_urlExists_urlcomps", xi.urlcomps()); - prop.put("genUrlList_urlList_"+i+"_urlExists_urllength", xi.urllength()); - prop.put("genUrlList_urlList_"+i+"_urlExists_props", - ((xi.flags().get(plasmaCondenser.flag_cat_hasimage)) ? "contains images, " : "") + - ((xi.flags().get(plasmaCondenser.flag_cat_hasaudio)) ? "contains audio, " : "") + - ((xi.flags().get(plasmaCondenser.flag_cat_hasvideo)) ? "contains video, " : "") + - ((xi.flags().get(plasmaCondenser.flag_cat_hasapp)) ? "contains applications, " : "") + - ((xi.flags().get(indexRWIEntry.flag_app_url)) ? "appears in url, " : "") + - ((xi.flags().get(indexRWIEntry.flag_app_descr)) ? "appears in description, " : "") + - ((xi.flags().get(indexRWIEntry.flag_app_author)) ? "appears in author, " : "") + - ((xi.flags().get(indexRWIEntry.flag_app_tags)) ? "appears in tags, " : "") + - ((xi.flags().get(indexRWIEntry.flag_app_reference)) ? "appears in reference, " : "") + - ((xi.flags().get(indexRWIEntry.flag_app_emphasized)) ? "appears emphasized" : "") - ); - prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", xi.posofphrase()); - prop.put("genUrlList_urlList_"+i+"_urlExists_phrase", xi.posofphrase()); - url = new yacyURL(us, null); - if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) { - prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxChecked", "1"); - } - } - i++; - } - prop.put("genUrlList_urlList", i); - prop.putHTML("genUrlList_keyString", keystring); - prop.put("genUrlList_count", i); - putBlacklists(prop, listManager.getDirListing(listManager.listsPath)); - } - index = null; - return prop; - } catch (IOException e) { - return prop; - } finally { - if (index != null) index = null; - } - } - - private static void putBlacklists(serverObjects prop, String[] lists) { - prop.put("genUrlList_blacklists", lists.length); - for (int i=0; i
  • Crawl Start & Monitoring
  • Crawl Results
  • -
  • Index Administration
  • +
  • Index Administration
  • Filter & Blacklists
  • Indexing with Proxy
  • diff --git a/htroot/env/templates/submenuIndexControl.template b/htroot/env/templates/submenuIndexControl.template index 000009c18..3c2ab2ca3 100644 --- a/htroot/env/templates/submenuIndexControl.template +++ b/htroot/env/templates/submenuIndexControl.template @@ -1,7 +1,8 @@