From 85f3617835a76bac0f8ca0b513ad73156161d7b5 Mon Sep 17 00:00:00 2001 From: hydrox Date: Thu, 21 Sep 2006 20:36:46 +0000 Subject: [PATCH] *) moved HTML from class-file to template-file (please check if it is valid HTML) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2644 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexControl_p.html | 68 ++++++++++++++++- htroot/IndexControl_p.java | 149 +++++++++++++++++++------------------ 2 files changed, 143 insertions(+), 74 deletions(-) diff --git a/htroot/IndexControl_p.html b/htroot/IndexControl_p.html index 9aa96f913..8a9fe7e45 100644 --- a/htroot/IndexControl_p.html +++ b/htroot/IndexControl_p.html @@ -107,7 +107,73 @@ - + #(keyhashsimilar)# + ::Sequential List of Word-Hashes:
#{rows}# #{cols}##[wordHash]# #{/cols}#
#{/rows}# + #(/keyhashsimilar)# + #(urlhashsimilar)# + ::Sequential List of URL-Hashes:
#{rows}# #{cols}##[urlHash]# #{/cols}#
#{/rows}# + #(/urlhashsimilar)# + #(genUrlList)# + :: + No URL entries related to this word hash #[keyHash]#. + :: + URL entries related to this word hash #[keyHash]#

+
+ #{urlList}# #(urlExists)# + #[urlhxValue]# <unresolved URL Hash>
+ :: + #[urlhxValue]# #[urlString]#, pos=#[pos]#
+ #(/urlExists)# #{/urlList}# + + + +
+
Reference Deletion + + + + + + +
+

+


  (= delete Word)
+
+

+ delete also the referenced URL itself (reasonable and recommended, may produce unresolved references at other word indexes but they do not harm) +
+

+ for every resolveable and deleted URL reference, delete the same reference at every other word where the reference exists (very extensive, but prevents further unresolved references) +
+

#(/genUrlList)# + #(genUrlProfile)# + ::No entry found for URL-hash #[urlhash]# + :: + + + + + + + + + + + + + +
URL String#[urlNormalform]#
Hash#[urlhash]#
Description#[urlDescr]#
Modified-Date#[moddate]#
Loaded-Date#[loaddate]#
Referrer#[referrer]#
Doctype#[doctype]#
Copy-Count#[copyCount]#
Local-Flag#[local]#
Quality#[quality]#
Language#[language]#
Size#[size]#
Words#[wordCount]#

+
+ + + + +
+  this may produce unresolved references at other word indexes but they do not harm

+
+  delete the reference to this url at every other word where the reference exists (very extensive, but prevents unresolved references)
+
+ #(/genUrlProfile)# #[result]# #%env/templates/footer.template%# diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index 0e52897ce..d4b7850dd 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -76,6 +76,7 @@ public class IndexControl_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { // return variable that accumulates replacements plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); if (post == null || env == null) { @@ -107,7 +108,7 @@ public class IndexControl_p { prop.put("keyhash", keyhash); prop.put("urlstring", urlstring); prop.put("urlhash", urlhash); - prop.put("result", ""); + prop.put("result", " "); // read values from checkboxes String[] urlx = post.getAll("urlhx.*"); @@ -228,7 +229,7 @@ public class IndexControl_p { prop.put("keyhash", keyhash); prop.put("urlstring", ""); prop.put("urlhash", ""); - prop.put("result", genUrlList(switchboard, keyhash, keystring)); + prop.putAll(genUrlList(switchboard, keyhash, keystring)); } if (post.containsKey("keyhashsearch")) { @@ -238,7 +239,7 @@ public class IndexControl_p { } prop.put("urlstring", ""); prop.put("urlhash", ""); - prop.put("result", genUrlList(switchboard, keyhash, "")); + prop.putAll(genUrlList(switchboard, keyhash, "")); } // transfer to other peer @@ -287,16 +288,23 @@ public class IndexControl_p { if (post.containsKey("keyhashsimilar")) { try { final Iterator containerIt = switchboard.wordIndex.indexContainerSet(keyhash, plasmaWordIndex.RL_WORDFILES, true, 256).iterator(); - StringBuffer result = new StringBuffer("Sequential List of Word-Hashes:
"); indexContainer container; int i = 0; + int rows = 0, cols = 0; + prop.put("keyhashsimilar", 1); while (containerIt.hasNext() && i < 256) { container = (indexContainer) containerIt.next(); - result.append("").append(container.getWordHash()).append(" ").append(((i + 1) % 8 == 0) ? "
" : ""); + prop.put("keyhashsimilar_rows_"+rows+"_cols_"+cols+"_wordHash", container.getWordHash()); + cols++; + if (cols==8) { + prop.put("keyhashsimilar_rows_"+rows+"_cols", cols); + cols = 0; + rows++; + } i++; } - prop.put("result", result); + prop.put("keyhashsimilar_rows", rows); + prop.put("result", ""); } catch (IOException e) { prop.put("result", "unknown keys: " + e.getMessage()); } @@ -312,7 +320,7 @@ public class IndexControl_p { prop.put("urlstring", "unknown url: " + urlstring); prop.put("urlhash", ""); } else { - prop.put("result", genUrlProfile(switchboard, entry, urlhash)); + prop.putAll(genUrlProfile(switchboard, entry, urlhash)); } } catch (MalformedURLException e) { prop.put("urlstring", "bad url: " + urlstring); @@ -328,7 +336,7 @@ public class IndexControl_p { URL url = entry.url(); urlstring = url.toString(); prop.put("urlstring", urlstring); - prop.put("result", genUrlProfile(switchboard, entry, urlhash)); + prop.putAll(genUrlProfile(switchboard, entry, urlhash)); } } @@ -339,12 +347,20 @@ public class IndexControl_p { StringBuffer result = new StringBuffer("Sequential List of URL-Hashes:
"); plasmaCrawlLURL.Entry entry; int i = 0; + int rows = 0, cols = 0; + prop.put("urlhashsimilar", 1); while (entryIt.hasNext() && i < 256) { entry = (plasmaCrawlLURL.Entry) entryIt.next(); - result.append("").append(entry.hash()).append(" ").append(((i + 1) % 8 == 0) ? "
" : ""); + prop.put("urlhashsimilar_rows_"+rows+"_cols_"+cols+"_urlHash", entry.hash()); + cols++; + if (cols==8) { + prop.put("urlhashsimilar_rows_"+rows+"_cols", cols); + cols = 0; + rows++; + } i++; } + prop.put("urlhashsimilar_rows", rows); prop.put("result", result.toString()); } catch (IOException e) { prop.put("result", "No Entries for URL hash " + urlhash); @@ -380,8 +396,13 @@ public class IndexControl_p { return prop; } - public static String genUrlProfile(plasmaSwitchboard switchboard, plasmaCrawlLURL.Entry entry, String urlhash) { - if (entry == null) { return "No entry found for URL-hash " + urlhash; } + public static serverObjects genUrlProfile(plasmaSwitchboard switchboard, plasmaCrawlLURL.Entry entry, String urlhash) { + serverObjects prop = new serverObjects(); + if (entry == null) { + prop.put("genUrlProfile", 1); + prop.put("genUrlProfile_urlhash", urlhash); + return prop; + } URL url = entry.url(); String referrer = null; plasmaCrawlLURL.Entry le = switchboard.urlPool.loadedURL.load(entry.referrerHash(), null); @@ -390,49 +411,42 @@ public class IndexControl_p { } else { referrer = le.url().toString(); } - if (url == null) { return "No entry found for URL-hash " + urlhash; } - String result = "" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + - "
URL String" + url.toNormalform() + "
Hash" + urlhash + "
Description" + entry.descr() + "
Modified-Date" + entry.moddate() + "
Loaded-Date" + entry.loaddate() + "
Referrer" + referrer + "
Doctype" + entry.doctype() + "
Copy-Count" + entry.copyCount() + "
Local-Flag" + entry.local() + "
Quality" + entry.quality() + "
Language" + entry.language() + "
Size" + entry.size() + "
Words" + entry.wordCount() + "

"; - result += - "
" + - "" + - "" + - "" + - "" + - "
" + - " this may produce unresolved references at other word indexes but they do not harm

" + - "
" + - " delete the reference to this url at every other word where the reference exists (very extensive, but prevents unresolved references)
" + - "
"; - return result; + if (url == null) { + prop.put("genUrlProfile", 1); + prop.put("genUrlProfile_urlhash", urlhash); + return prop; + } + prop.put("genUrlProfile", 2); + prop.put("genUrlProfile_urlNormalform", url.toNormalform()); + prop.put("genUrlProfile_urlhash", urlhash); + prop.put("genUrlProfile_urlDescr", entry.descr()); + prop.put("genUrlProfile_moddate", entry.moddate()); + prop.put("genUrlProfile_loaddate", entry.loaddate()); + prop.put("genUrlProfile_referrer", referrer); + prop.put("genUrlProfile_doctype", ""+entry.doctype()); + prop.put("genUrlProfile_copyCount", entry.copyCount()); + prop.put("genUrlProfile_local", ""+entry.local()); + prop.put("genUrlProfile_quality", entry.quality()); + prop.put("genUrlProfile_language", entry.language()); + prop.put("genUrlProfile_size", entry.size()); + prop.put("genUrlProfile_wordCount", entry.wordCount()); + return prop; } - public static String genUrlList(plasmaSwitchboard switchboard, String keyhash, String keystring) { + public static serverObjects genUrlList(plasmaSwitchboard switchboard, String keyhash, String keystring) { // search for a word hash and generate a list of url links + serverObjects prop = new serverObjects(); indexContainer index = null; try { index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); - final StringBuffer result = new StringBuffer(1024); + prop.put("genUrlList_keyHash", keyhash); + if (index.size() == 0) { - result.append("No URL entries related to this word hash ").append(keyhash).append("."); + prop.put("genUrlList", 1); } else { final Iterator en = index.entries(); - result.append("URL entries related to this word hash ").append(keyhash).append("

"); - result.append("
"); + prop.put("genUrlList", 2); String us; String uh[] = new String[2]; int i = 0; @@ -454,46 +468,35 @@ public class IndexControl_p { URL url; final Iterator iter = tm.keySet().iterator(); - result.ensureCapacity((tm.size() + 2) * 384); while (iter.hasNext()) { us = iter.next().toString(); uh = (String[]) tm.get(us); if (us.equals(uh[0])) { - result.append("") - .append("").append(uh[0]).append(" <unresolved URL Hash>
"); - + prop.put("genUrlList_urlList_"+i+"_urlExists", 0); + prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i); + prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxValue", uh[0]); } else { + prop.put("genUrlList_urlList_"+i+"_urlExists", 1); + prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxCount", i); + prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxValue", uh[0]); + prop.put("genUrlList_urlList_"+i+"_urlExists_keyString", keystring); + prop.put("genUrlList_urlList_"+i+"_urlExists_keyHash", keyhash); + prop.put("genUrlList_urlList_"+i+"_urlExists_urlString", us); + prop.put("genUrlList_urlList_"+i+"_urlExists_pos", uh[1]); url = new URL(us); if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) { - result.append(""); - } else { - result.append(""); + prop.put("genUrlList_urlList_"+i+"_urlExists_urlhxChecked", 1); } - result.append("") - .append(uh[0]).append(" ").append(us).append(", pos=").append(uh[1]).append("
"); } + i++; } - result.append("") - .append("") - .append("") - .append("") - .append("
Reference Deletion


") - .append("

") - .append("
  (= delete Word)
") - .append("
") - .append("

") - .append("delete also the referenced URL itself (reasonable and recommended, may produce unresolved references at other word indexes but they do not harm)") - .append("
") - .append("

") - .append("for every resolveable and deleted URL reference, delete the same reference at every other word where the reference exists (very extensive, but prevents further unresolved references)") - .append("

"); + prop.put("genUrlList_urlList", i); + prop.put("genUrlList_keyString", keystring); } index = null; - return result.toString(); + return prop; } catch (IOException e) { - return ""; + return prop; } finally { if (index != null) index = null; }