- more dublin core naming of page metadata

- better presentation of result counters in search results

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4420 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent fa3b8f0ae1
commit a8a5df4a51

@ -203,10 +203,10 @@ public class Bookmarks {
document = plasmaSnippetCache.retrieveDocument(comp.url(), true, 5000, true);
prop.put("mode_edit", "0"); // create mode
prop.put("mode_url", comp.url().toNormalform(false, true));
prop.putHTML("mode_title", comp.title());
prop.putHTML("mode_description", (document == null) ? comp.title(): document.dc_title());
prop.putHTML("mode_author", comp.author());
prop.putHTML("mode_tags", (document == null) ? comp.tags() : document.dc_subject(','));
prop.putHTML("mode_title", comp.dc_title());
prop.putHTML("mode_description", (document == null) ? comp.dc_title(): document.dc_title());
prop.putHTML("mode_author", comp.dc_creator());
prop.putHTML("mode_tags", (document == null) ? comp.dc_subject() : document.dc_subject(','));
prop.putHTML("mode_path","");
prop.put("mode_public", "0");
prop.put("mode_feed", "0"); //TODO: check if it IS a feed

@ -217,11 +217,11 @@ public class CrawlResults {
} else {
prop.put("table_indexed_" + cnt + "_showTitle_available", "1");
if (comp.title() == null || comp.title().trim().length() == 0)
if (comp.dc_title() == null || comp.dc_title().trim().length() == 0)
prop.put("table_indexed_" + cnt + "_showTitle_available_nodescr", "0");
else
prop.put("table_indexed_" + cnt + "_showTitle_available_nodescr", "1");
prop.putHTML("table_indexed_" + cnt + "_showTitle_available_nodescr_urldescr", comp.title());
prop.putHTML("table_indexed_" + cnt + "_showTitle_available_nodescr_urldescr", comp.dc_title());
prop.put("table_indexed_" + cnt + "_showTitle_available_cachepath", cachepath);
prop.putHTML("table_indexed_" + cnt + "_showTitle_available_urltitle", urlstr);

@ -89,7 +89,7 @@ public class IndexControlRWIs_p {
if (post.containsKey("keystringsearch")) {
keyhash = plasmaCondenser.word2hash(keystring);
prop.put("keyhash", keyhash);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 1);
prop.put("searchresult_word", keystring);
@ -100,7 +100,7 @@ public class IndexControlRWIs_p {
if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) {
prop.put("keystring", "<not possible to compute word from hash>");
}
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder, false);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, null, sortorder);
if (ranking.filteredCount() == 0) {
prop.put("searchresult", 2);
prop.put("searchresult_wordhash", keyhash);
@ -159,7 +159,7 @@ public class IndexControlRWIs_p {
}
kelondroBitfield flags = plasmaSearchAPI.compileFlags(post);
int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder, true);
final plasmaSearchRankingProcess ranking = plasmaSearchAPI.genSearchresult(prop, sb, keyhash, flags, sortorder);
plasmaSearchAPI.genURLList(prop, keyhash, keystring, ranking, flags, count, sortorder);
}

@ -241,7 +241,7 @@ public class IndexControlURLs_p {
prop.put("genUrlProfile", "2");
prop.putHTML("genUrlProfile_urlNormalform", comp.url().toNormalform(false, true));
prop.put("genUrlProfile_urlhash", urlhash);
prop.put("genUrlProfile_urlDescr", comp.title());
prop.put("genUrlProfile_urlDescr", comp.dc_title());
prop.put("genUrlProfile_moddate", entry.moddate().toString());
prop.put("genUrlProfile_loaddate", entry.loaddate().toString());
prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1);

@ -124,7 +124,7 @@ public class ViewFile {
return prop;
}
url = comp.url();
descr = comp.title();
descr = comp.dc_title();
urlEntry.wordCount();
size = urlEntry.size();
pre = urlEntry.flags().get(plasmaCondenser.flag_cat_indexof);

@ -70,9 +70,12 @@ function addHover() {
}
}
function statistics(offset, items, global, total) {
function statistics(offset, itemscount, totalcount, localResourceSize, remoteResourceSize, remoteIndexCount, remotePeerCount) {
document.getElementById("resultsOffset").firstChild.nodeValue = offset;
document.getElementById("itemscount").firstChild.nodeValue = items;
document.getElementById("globalcount").firstChild.nodeValue = global;
document.getElementById("totalcount").firstChild.nodeValue = total;
document.getElementById("itemscount").firstChild.nodeValue = itemscount;
document.getElementById("totalcount").firstChild.nodeValue = totalcount;
document.getElementById("localResourceSize").firstChild.nodeValue = localResourceSize;
document.getElementById("remoteResourceSize").firstChild.nodeValue = remoteResourceSize;
document.getElementById("remoteIndexCount").firstChild.nodeValue = remoteIndexCount;
document.getElementById("remotePeerCount").firstChild.nodeValue = remotePeerCount;
}

@ -183,7 +183,7 @@ public final class search {
snippetComputationAllTime = theSearch.getSnippetComputationTime();
// set statistic details of search result and find best result index set
if (theSearch.getLocalCount() == 0) {
if (theSearch.getRankingResult().getLocalResourceSize() == 0) {
prop.put("indexcount", "");
prop.put("joincount", "0");
} else {
@ -207,11 +207,11 @@ public final class search {
}
prop.put("indexcount", indexcount.toString());
if (theSearch.getLocalCount() == 0) {
if (theSearch.getRankingResult().getLocalResourceSize() == 0) {
joincount = 0;
prop.put("joincount", "0");
} else {
joincount = theSearch.getLocalCount();
joincount = theSearch.getRankingResult().getLocalResourceSize();
prop.put("joincount", Integer.toString(joincount));
accu = theSearch.completeResults(duetime);
}

@ -104,11 +104,11 @@ public class urls {
referrer = sb.getURL(entry.referrerHash());
// create RSS entry
comp = entry.comp();
prop.put("item_" + c + "_title", comp.title());
prop.put("item_" + c + "_title", comp.dc_title());
prop.putHTML("item_" + c + "_link", comp.url().toNormalform(true, false));
prop.putHTML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false));
prop.putHTML("item_" + c + "_description", comp.title());
prop.put("item_" + c + "_author", comp.author());
prop.putHTML("item_" + c + "_description", comp.dc_title());
prop.put("item_" + c + "_author", comp.dc_creator());
prop.put("item_" + c + "_pubDate", serverDate.formatShortSecond(entry.moddate()));
prop.put("item_" + c + "_guid", entry.hash());
c++;

@ -99,7 +99,7 @@ document.getElementById("Enter").value = "search again - catch up more links";
<p>No Results. (length of search words must be at least 3 characters)</p>
::
<div id="results"></div>
<span id="resCounter" style="display: inline;"><strong id="resultsOffset">#[offset]#</strong>-<strong id="itemscount">#[linkcount]#</strong> results from a total number of <strong id="totalcount">#[totalcount]#</strong> known#(globalresults)#.::, <strong id="globalcount">#[globalcount]#</strong> links from other YaCy peers.#(/globalresults)#</span>
<span id="resCounter" style="display: inline;"><strong id="resultsOffset">#[offset]#</strong>-<strong id="itemscount">#[itemscount]#</strong> results from a total number of <strong id="totalcount">#[totalcount]#</strong> known#(globalresults)#.:: (<strong id="localResourceSize">#[localResourceSize]#</strong> local, <strong id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong id="remoteIndexCount">#[remoteIndexCount]#</strong> links from <strong id="remotePeerCount">#[remotePeerCount]#</strong> other YaCy peers.#(/globalresults)#</span>
<span id="resNav" style="display: inline;">#[resnav]#</span>
::
<p>Searching the web with this peer is disabled for unauthorized users. Please <a href="Status.html?login=">log in</a> as administrator to use the search function</p>

@ -240,8 +240,8 @@ public class yacysearch {
// create a news message
HashMap<String, String> map = new HashMap<String, String>();
map.put("url", comp.url().toNormalform(false, true).replace(',', '|'));
map.put("title", comp.title().replace(',', ' '));
map.put("description", ((document == null) ? comp.title() : document.dc_title()).replace(',', ' '));
map.put("title", comp.dc_title().replace(',', ' '));
map.put("description", ((document == null) ? comp.dc_title() : document.dc_title()).replace(',', ' '));
map.put("author", ((document == null) ? "" : document.dc_creator()));
map.put("tags", ((document == null) ? "" : document.dc_subject(' ')));
yacyCore.newsPool.publishMyNews(yacyNewsRecord.newRecord(yacyNewsPool.CATEGORY_SURFTIPP_ADD, map));
@ -306,12 +306,12 @@ public class yacysearch {
// log
serverLog.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + theQuery.queryString + " - " +
(theSearch.getLocalCount() + theSearch.getGlobalCount()) + " links found, " +
(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize()) + " links found, " +
((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
// prepare search statistics
Long trackerHandle = new Long(System.currentTimeMillis());
HashMap<String, Object> searchProfile = theQuery.resultProfile(theSearch.getLocalCount() + theSearch.getGlobalCount(), System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), theSearch.getSnippetComputationTime());
HashMap<String, Object> searchProfile = theQuery.resultProfile(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), theSearch.getSnippetComputationTime());
searchProfile.put("querystring", theQuery.queryString);
searchProfile.put("time", trackerHandle);
searchProfile.put("host", client);
@ -323,13 +323,16 @@ public class yacysearch {
sb.localSearchTracker.put(client, handles);
prop = new serverObjects();
prop.put("num-results_totalcount", yFormatter.number(theSearch.getLocalCount() + theSearch.getGlobalCount(), !rss));
prop.put("num-results_globalresults", "1");
prop.put("num-results_globalresults_globalcount", yFormatter.number(theSearch.getGlobalCount(), !rss));
prop.put("num-results_offset", offset);
prop.put("num-results_linkcount", "0");
prop.put("num-results_itemscount", "0");
prop.put("num-results_itemsPerPage", itemsPerPage);
prop.put("num-results_totalcount", yFormatter.number(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), !rss));
prop.put("num-results_globalresults", (globalsearch) ? "1" : "0");
prop.put("num-results_globalresults_localResourceSize", yFormatter.number(theSearch.getRankingResult().getLocalResourceSize(), !rss));
prop.put("num-results_globalresults_remoteResourceSize", yFormatter.number(theSearch.getRankingResult().getRemoteResourceSize(), !rss));
prop.put("num-results_globalresults_remoteIndexCount", yFormatter.number(theSearch.getRankingResult().getRemoteIndexCount(), !rss));
prop.put("num-results_globalresults_remotePeerCount", yFormatter.number(theSearch.getRankingResult().getRemotePeerCount(), !rss));
// compose page navigation
StringBuffer resnav = new StringBuffer();
int thispage = offset / theQuery.displayResults();
@ -337,7 +340,7 @@ public class yacysearch {
resnav.append(navurla(thispage - 1, display, theQuery));
resnav.append("<strong>&lt;</strong></a>&nbsp;");
}
int numberofpages = Math.min(10, Math.min(thispage + 2, (theSearch.getGlobalCount() + theSearch.getLocalCount()) / theQuery.displayResults()));
int numberofpages = Math.min(10, Math.min(thispage + 2, (theSearch.getRankingResult().getRemoteResourceSize() + theSearch.getRankingResult().getLocalResourceSize()) / theQuery.displayResults()));
for (int i = 0; i < numberofpages; i++) {
if (i == thispage) {
resnav.append("<strong>");

@ -62,7 +62,7 @@
#(/rssreferences)#
#(dynamic)#::
<script type="text/javascript">
statistics("#[offset]#", "#[items]#", "#[global]#", "#[total]#");
statistics("#[offset]#", "#[itemscount]#", "#[totalcount]#", "#[localResourceSize]#", "#[remoteResourceSize]#", "#[remoteIndexCount]#", "#[remotePeerCount]#");
progressbar.step(1);
</script>
#(/dynamic)#

@ -44,6 +44,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.tools.yFormatter;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacySeed;
@ -85,9 +86,12 @@ public class yacysearchitem {
// dynamically update count values
if (!rss) {
prop.put("dynamic_offset", theQuery.neededResults() - theQuery.displayResults() + 1);
prop.put("dynamic_global", theSearch.getGlobalCount());
prop.put("dynamic_total", theSearch.getGlobalCount() + theSearch.getLocalCount());
prop.put("dynamic_items", (item < 0) ? theQuery.neededResults() : item + 1);
prop.put("dynamic_itemscount", (item < 0) ? theQuery.neededResults() : item + 1);
prop.put("dynamic_totalcount", yFormatter.number(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), !rss));
prop.put("dynamic_localResourceSize", yFormatter.number(theSearch.getRankingResult().getLocalResourceSize(), !rss));
prop.put("dynamic_remoteResourceSize", yFormatter.number(theSearch.getRankingResult().getRemoteResourceSize(), !rss));
prop.put("dynamic_remoteIndexCount", yFormatter.number(theSearch.getRankingResult().getRemoteIndexCount(), !rss));
prop.put("dynamic_remotePeerCount", yFormatter.number(theSearch.getRankingResult().getRemotePeerCount(), !rss));
prop.put("dynamic", "1");
}

@ -120,9 +120,9 @@ public class indexURLEntry {
public indexURLEntry(
yacyURL url,
String descr,
String author,
String tags,
String dc_title,
String dc_creator,
String dc_subject,
String ETag,
Date mod,
Date load,
@ -143,7 +143,7 @@ public class indexURLEntry {
// create new entry and store it into database
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, url.hash(), null);
this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag));
this.entry.setCol(col_comp, encodeComp(url, dc_title, dc_creator, dc_subject, ETag));
encodeDate(col_mod, mod);
encodeDate(col_load, load);
encodeDate(col_fresh, fresh);
@ -175,12 +175,12 @@ public class indexURLEntry {
return new Date(86400000 * this.entry.getColLong(col));
}
public static byte[] encodeComp(yacyURL url, String descr, String author, String tags, String ETag) {
public static byte[] encodeComp(yacyURL url, String dc_title, String dc_creator, String dc_subject, String ETag) {
serverCharBuffer s = new serverCharBuffer(200);
s.append(url.toNormalform(false, true)).append(10);
s.append(descr).append(10);
s.append(author).append(10);
s.append(tags).append(10);
s.append(dc_title).append(10);
s.append(dc_creator).append(10);
s.append(dc_subject).append(10);
s.append(ETag).append(10);
return s.toString().getBytes();
}
@ -203,13 +203,13 @@ public class indexURLEntry {
url = null;
}
String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = "";
String author = crypt.simpleDecode(prop.getProperty("author", ""), null); if (author == null) author = "";
String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = "";
String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = "";
String ETag = crypt.simpleDecode(prop.getProperty("ETag", ""), null); if (ETag == null) ETag = "";
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, url.hash(), null);
this.entry.setCol(col_comp, encodeComp(url, descr, author, tags, ETag));
this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, ETag));
try {
encodeDate(col_mod, serverDate.parseShortDay(prop.getProperty("mod", "20000101")));
} catch (ParseException e) {
@ -256,9 +256,9 @@ public class indexURLEntry {
try {
s.append("hash=").append(hash());
s.append(",url=").append(crypt.simpleEncode(comp.url().toNormalform(false, true)));
s.append(",descr=").append(crypt.simpleEncode(comp.title()));
s.append(",author=").append(crypt.simpleEncode(comp.author()));
s.append(",tags=").append(crypt.simpleEncode(comp.tags()));
s.append(",descr=").append(crypt.simpleEncode(comp.dc_title()));
s.append(",author=").append(crypt.simpleEncode(comp.dc_creator()));
s.append(",tags=").append(crypt.simpleEncode(comp.dc_subject()));
s.append(",ETag=").append(crypt.simpleEncode(comp.ETag()));
s.append(",mod=").append(serverDate.formatShortDay(moddate()));
s.append(",load=").append(serverDate.formatShortDay(loaddate()));
@ -429,7 +429,7 @@ public class indexURLEntry {
null,
comp().url(),
referrerHash(),
comp().title(),
comp().dc_title(),
loaddate(),
null,
0,
@ -455,7 +455,7 @@ public class indexURLEntry {
public class Components {
private yacyURL url;
private String title, author, tags, ETag;
private String dc_title, dc_creator, dc_subject, ETag;
public Components(String url, String urlhash, String title, String author, String tags, String ETag) {
try {
@ -463,22 +463,22 @@ public class indexURLEntry {
} catch (MalformedURLException e) {
this.url = null;
}
this.title = title;
this.author = author;
this.tags = tags;
this.dc_title = title;
this.dc_creator = author;
this.dc_subject = tags;
this.ETag = ETag;
}
public Components(yacyURL url, String descr, String author, String tags, String ETag) {
this.url = url;
this.title = descr;
this.author = author;
this.tags = tags;
this.dc_title = descr;
this.dc_creator = author;
this.dc_subject = tags;
this.ETag = ETag;
}
public yacyURL url() { return this.url; }
public String title() { return this.title; }
public String author() { return this.author; }
public String tags() { return this.tags; }
public String dc_title() { return this.dc_title; }
public String dc_creator() { return this.dc_creator; }
public String dc_subject() { return this.dc_subject; }
public String ETag() { return this.ETag; }
}

@ -343,6 +343,7 @@ public class kelondroEcoTable implements kelondroIndex {
} else {
// read old value
kelondroRow.Entry v = table.get(i);
assert v != null;
System.arraycopy(row.getPrimaryKeyBytes(), 0, b, 0, rowdef.primaryKeyLength);
System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, rowdef.objectsize - rowdef.primaryKeyLength);
// write new value

@ -72,10 +72,10 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroSplitTable;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
import de.anomic.kelondro.kelondroSplitTable;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCodings;
import de.anomic.server.logging.serverLog;
@ -623,14 +623,14 @@ public final class plasmaCrawlLURL {
pw.println(url);
}
if (format == 1) {
pw.println("<a href=\"" + url + "\">" + htmlTools.encodeUnicode2html(comp.title(), true, true) + "</a><br>");
pw.println("<a href=\"" + url + "\">" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</a><br>");
}
if (format == 2) {
pw.println("<item>");
pw.println("<title>" + htmlTools.encodeUnicode2html(comp.title(), true, true) + "</title>");
pw.println("<title>" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</title>");
pw.println("<link>" + yacyURL.escape(url) + "</link>");
if (comp.author().length() > 0) pw.println("<author>" + htmlTools.encodeUnicode2html(comp.author(), true, true) + "</author>");
if (comp.tags().length() > 0) pw.println("<description>" + htmlTools.encodeUnicode2html(comp.tags(), true, true) + "</description>");
if (comp.dc_creator().length() > 0) pw.println("<author>" + htmlTools.encodeUnicode2html(comp.dc_creator(), true, true) + "</author>");
if (comp.dc_subject().length() > 0) pw.println("<description>" + htmlTools.encodeUnicode2html(comp.dc_subject(), true, true) + "</description>");
pw.println("<pubDate>" + entry.moddate().toString() + "</pubDate>");
pw.println("<guid isPermaLink=\"false\">" + entry.hash() + "</guid>");
pw.println("</item>");

@ -88,10 +88,10 @@ public class plasmaSearchAPI {
}
}
public static plasmaSearchRankingProcess genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int sortorder, boolean fetchURLs) {
public static plasmaSearchRankingProcess genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int sortorder) {
plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1, sb.getRanking(), filter);
plasmaSearchRankingProcess ranked = new plasmaSearchRankingProcess(sb.wordIndex, query, sortorder, Integer.MAX_VALUE);
ranked.execQuery(fetchURLs);
ranked.execQuery();
if (ranked.filteredCount() == 0) {
prop.put("searchresult", 2);

@ -76,7 +76,6 @@ public final class plasmaSearchEvent {
public TreeMap<String, String> IAResults;
public TreeMap<String, Integer> IACount;
public String IAmaxcounthash, IAneardhthash;
private int localcount;
private resultWorker[] workerThreads;
private ArrayList<ResultEntry> resultList;
//private int resultListLock; // a pointer that shows that all elements below this pointer are fixed and may not be changed again
@ -101,7 +100,6 @@ public final class plasmaSearchEvent {
this.IACount = new TreeMap<String, Integer>();
this.IAmaxcounthash = null;
this.IAneardhthash = null;
this.localcount = 0;
this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0;
this.workerThreads = null;
@ -157,8 +155,7 @@ public final class plasmaSearchEvent {
} else {
// do a local search
this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, 2, max_results_preparation);
this.rankedCache.execQuery(true);
this.localcount = this.rankedCache.filteredCount();
this.rankedCache.execQuery();
//plasmaWordIndex.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
if (generateAbstracts) {
@ -249,8 +246,7 @@ public final class plasmaSearchEvent {
// sort the local containers and truncate it to a limited count,
// so following sortings together with the global results will be fast
synchronized (rankedCache) {
rankedCache.execQuery(true);
localcount = rankedCache.filteredCount();
rankedCache.execQuery();
}
}
}
@ -291,13 +287,13 @@ public final class plasmaSearchEvent {
long startTime = System.currentTimeMillis();
indexURLEntry.Components comp = page.comp();
String pagetitle = comp.title().toLowerCase();
String pagetitle = comp.dc_title().toLowerCase();
if (comp.url() == null) {
registerFailure(page.hash(), "url corrupted (null)");
return null; // rare case where the url is corrupted
}
String pageurl = comp.url().toString().toLowerCase();
String pageauthor = comp.author().toLowerCase();
String pageauthor = comp.dc_creator().toLowerCase();
long dbRetrievalTime = System.currentTimeMillis() - startTime;
// check exclusion
@ -315,7 +311,7 @@ public final class plasmaSearchEvent {
// check constraints
if ((query.constraint != null) &&
(query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&
(!(comp.title().startsWith("Index of")))) {
(!(comp.dc_title().startsWith("Index of")))) {
final Iterator<String> wi = query.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
registerFailure(page.hash(), "index-of constraint not fullfilled");
@ -423,12 +419,8 @@ public final class plasmaSearchEvent {
return secondarySearchThreads;
}
public int getLocalCount() {
return this.localcount;
}
public int getGlobalCount() {
return this.rankedCache.getGlobalCount();
public plasmaSearchRankingProcess getRankingResult() {
return this.rankedCache;
}
public long getURLRetrievalTime() {
@ -465,7 +457,7 @@ public final class plasmaSearchEvent {
if ((query.onlineSnippetFetch) &&
(!event.anyWorkerAlive()) &&
(event.resultList.size() < query.neededResults() + 10) &&
((event.getLocalCount() + event.getGlobalCount()) > event.resultList.size())) {
((event.getRankingResult().getLocalResourceSize() + event.getRankingResult().getRemoteResourceSize()) > event.resultList.size())) {
// set new timeout
event.eventTime = System.currentTimeMillis();
// start worker threads to fetch urls and snippets
@ -764,7 +756,7 @@ public final class plasmaSearchEvent {
("yacyshare " +
filename.replace('?', ' ') +
" " +
urlcomps.title()).getBytes(), "UTF-8").keySet(),
urlcomps.dc_title()).getBytes(), "UTF-8").keySet(),
urlentry.hash());
wordIndex.loadedURL.remove(urlentry.hash()); // clean up
throw new RuntimeException("index void");
@ -794,7 +786,7 @@ public final class plasmaSearchEvent {
return (alternative_urlname == null) ? urlcomps.url().toNormalform(false, true) : alternative_urlname;
}
public String title() {
return urlcomps.title();
return urlcomps.dc_title();
}
public plasmaSnippetCache.TextSnippet textSnippet() {
return this.textSnippet;

@ -57,9 +57,8 @@ public final class plasmaSearchRankingProcess {
private HashMap<String, String> handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process
private plasmaSearchQuery query;
private int sortorder;
private int filteredCount;
private int maxentries;
private int globalcount;
private int remote_peerCount, remote_indexCount, remote_resourceSize, local_resourceSize;
private indexRWIEntryOrder order;
private HashMap<String, Object> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private kelondroMScoreCluster<String> ref; // reference score computation for the commonSense heuristic
@ -76,11 +75,13 @@ public final class plasmaSearchRankingProcess {
this.sortedRWIEntries = new TreeMap<Object, indexRWIRowEntry>();
this.doubleDomCache = new HashMap<String, TreeMap<Object, indexRWIRowEntry>>();
this.handover = new HashMap<String, String>();
this.filteredCount = 0;
this.order = null;
this.query = query;
this.maxentries = maxentries;
this.globalcount = 0;
this.remote_peerCount = 0;
this.remote_indexCount = 0;
this.remote_resourceSize = 0;
this.local_resourceSize = 0;
this.urlhashes = new HashMap<String, Object>();
this.ref = new kelondroMScoreCluster<String>();
this.misses = new TreeSet<String>();
@ -90,7 +91,7 @@ public final class plasmaSearchRankingProcess {
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
}
public void execQuery(boolean fetchURLs) {
public void execQuery() {
long timer = System.currentTimeMillis();
this.localSearchContainerMaps = wordIndex.localSearchContainers(query, null);
@ -113,16 +114,24 @@ public final class plasmaSearchRankingProcess {
}
if (sortorder == 2) {
insertRanked(index, true);
insertRanked(index, true, index.size());
} else {
insertNoOrder(index, fetchURLs);
insertNoOrder(index, true, index.size());
}
}
private void insertNoOrder(indexContainer index, boolean local) {
private void insertNoOrder(indexContainer index, boolean local, int fullResource) {
final Iterator<indexRWIRowEntry> en = index.entries();
// generate a new map where the urls are sorted (not by hash but by the url text)
if (local) {
this.local_resourceSize += fullResource;
} else {
this.remote_resourceSize += fullResource;
this.remote_peerCount++;
this.remote_indexCount += index.size();
}
indexRWIRowEntry ientry;
indexURLEntry uentry;
String u;
@ -141,20 +150,14 @@ public final class plasmaSearchRankingProcess {
if (sortorder == 0) {
this.sortedRWIEntries.put(ientry.urlHash(), ientry);
this.urlhashes.put(ientry.urlHash(), ientry.urlHash());
filteredCount++;
} else {
if (local) {
uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0);
if (uentry == null) {
this.misses.add(ientry.urlHash());
} else {
u = uentry.comp().url().toNormalform(false, true);
this.sortedRWIEntries.put(u, ientry);
this.urlhashes.put(ientry.urlHash(), u);
filteredCount++;
}
uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0);
if (uentry == null) {
this.misses.add(ientry.urlHash());
} else {
filteredCount++;
u = uentry.comp().url().toNormalform(false, true);
this.sortedRWIEntries.put(u, ientry);
this.urlhashes.put(ientry.urlHash(), u);
}
}
@ -163,12 +166,18 @@ public final class plasmaSearchRankingProcess {
} // end loop
}
public void insertRanked(indexContainer index, boolean local) {
public void insertRanked(indexContainer index, boolean local, int fullResource) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
assert (index != null);
if (index.size() == 0) return;
if (local) {
this.local_resourceSize += fullResource;
} else {
this.remote_resourceSize += fullResource;
this.remote_peerCount++;
}
long timer = System.currentTimeMillis();
if (this.order == null) {
@ -224,11 +233,8 @@ public final class plasmaSearchRankingProcess {
}
// increase counter for statistics
if (!local) this.globalcount++;
if (!local) this.remote_indexCount++;
}
this.filteredCount = sortedRWIEntries.size();
//long sc = Math.max(1, System.currentTimeMillis() - s0);
//System.out.println("###DEBUG### time to sort " + container.size() + " entries to " + this.filteredCount + ": " + sc + " milliseconds, " + (container.size() / sc) + " entries/millisecond, ranking = " + tc);
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.PRESORT, index.size(), System.currentTimeMillis() - timer));
@ -350,14 +356,34 @@ public final class plasmaSearchRankingProcess {
return flagcount;
}
// "results from a total number of <remote_resourceSize + local_resourceSize> known (<local_resourceSize> local, <remote_resourceSize> remote), <remote_indexCount> links from <remote_peerCount> other YaCy peers."
public int filteredCount() {
return this.filteredCount;
// the number of index entries that are considered as result set
return this.sortedRWIEntries.size();
}
public int getGlobalCount() {
return this.globalcount;
public int getRemoteIndexCount() {
// the number of result contributions from all the remote peers
return this.remote_indexCount;
}
public int getRemotePeerCount() {
// the number of remote peers that have contributed
return this.remote_peerCount;
}
public int getRemoteResourceSize() {
// the number of all hits in all the remote peers
return this.remote_resourceSize;
}
public int getLocalResourceSize() {
// the number of hits in the local peer (index size, size of the collection in the own index)
return this.local_resourceSize;
}
public indexRWIEntry remove(String urlHash) {
Object r = (Long) urlhashes.get(urlHash);
if (r == null) return null;

@ -874,7 +874,7 @@ public class plasmaSnippetCache {
plasmaSearchEvent event = plasmaSearchEvent.getEvent(eventID);
assert plasmaSwitchboard.getSwitchboard() != null;
assert plasmaSwitchboard.getSwitchboard().wordIndex != null;
assert event != null;
assert event != null : "eventID = " + eventID;
assert event.getQuery() != null;
plasmaSwitchboard.getSwitchboard().wordIndex.removeEntryMultiple(event.getQuery().queryHashes, urlHash);
event.remove(urlHash);

@ -2205,7 +2205,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
/* =========================================================================
* CREATE INDEX
* ========================================================================= */
String docDescription = document.dc_title();
String dc_title = document.dc_title();
yacyURL referrerURL = entry.referrerURL();
String noIndexReason = plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR;
@ -2236,9 +2236,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
long ldate = System.currentTimeMillis();
indexURLEntry newEntry = new indexURLEntry(
entry.url(), // URL
docDescription, // document description
document.dc_creator(), // author
document.dc_subject(' '), // tags
dc_title, // document description
document.dc_creator(), // author
document.dc_subject(' '), // tags
"", // ETag
docDate, // modification date
new Date(), // loaded date
@ -2406,7 +2406,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// of string concatenation
log.logInfo("*Indexed " + words + " words in URL " + entry.url() +
" [" + entry.urlHash() + "]" +
"\n\tDescription: " + docDescription +
"\n\tDescription: " + dc_title +
"\n\tMimeType: " + document.dc_format() + " | Charset: " + document.getCharset() + " | " +
"Size: " + document.getTextLength() + " bytes | " +
"Anchors: " + ((document.getAnchors() == null) ? 0 : document.getAnchors().size()) +
@ -2430,7 +2430,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
} else {
log.logFine("Not Indexed Resource '" + entry.url().toNormalform(false, true) + "': process case=" + processCase);
addURLtoErrorDB(entry.url(), referrerURL.hash(), initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield());
addURLtoErrorDB(entry.url(), referrerURL.hash(), initiatorPeerHash, dc_title, plasmaCrawlEURL.DENIED_UNKNOWN_INDEXING_PROCESS_CASE, new kelondroBitfield());
}
} catch (Exception ee) {
if (ee instanceof InterruptedException) throw (InterruptedException)ee;
@ -2443,7 +2443,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (clusterhashes != null) initiatorPeer.setAlternativeAddress((String) clusterhashes.get(initiatorPeer.hash));
yacyClient.crawlReceipt(initiatorPeer, "crawl", "exception", ee.getMessage(), null, "");
}
addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, docDescription, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield());
addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, dc_title, plasmaCrawlEURL.DENIED_UNSPECIFIED_INDEXING_ERROR, new kelondroBitfield());
}
} else {
@ -2451,7 +2451,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
checkInterruption();
log.logInfo("Not indexed any word in URL " + entry.url() + "; cause: " + noIndexReason);
addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, docDescription, noIndexReason, new kelondroBitfield());
addURLtoErrorDB(entry.url(), (referrerURL == null) ? null : referrerURL.hash(), initiatorPeerHash, dc_title, noIndexReason, new kelondroBitfield());
if ((processCase == PROCESSCASE_6_GLOBAL_CRAWLING) && (initiatorPeer != null)) {
if (clusterhashes != null) initiatorPeer.setAlternativeAddress((String) clusterhashes.get(initiatorPeer.hash));
yacyClient.crawlReceipt(initiatorPeer, "crawl", "rejected", noIndexReason, null, "");

@ -477,9 +477,10 @@ public final class yacyClient {
// now create a plasmaIndex out of this result
// System.out.println("yacyClient: " + ((urlhashes.length() == 0) ? "primary" : "secondary")+ " search result = " + result.toString()); // debug
int results = 0;
int results = 0, joincount = 0;
try {
results = Integer.parseInt((String) result.get("count"));
results = Integer.parseInt(result.get("count"));
joincount = Integer.parseInt(result.get("joincount"));
} catch (NumberFormatException e) {
yacyCore.log.logFine("SEARCH failed FROM " + target.hash + ":" + target.getName() + ", wrong output format");
yacyCore.peerActions.peerDeparture(target, "search request to peer created number format exception");
@ -557,7 +558,7 @@ public final class yacyClient {
// store remote result to local result container
synchronized (containerCache) {
// insert one container into the search result buffer
containerCache.insertRanked(container[0], false); // one is enough
containerCache.insertRanked(container[0], false, joincount); // one is enough
// integrate remote topwords
String references = (String) result.get("references");

Loading…
Cancel
Save