enhanced navigation

- fixed too early computation of navigation
- moved navigation rendering to yacysearchtrailer
- added more asserts

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6006 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 3ca1f109c4
commit a5d481eab1

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5
# Release Configuration
releaseVersion=0.82
releaseVersion=0.83
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

@ -126,28 +126,9 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results"
#(resultTable)#::</table>#(/resultTable)#
<!-- linklist end -->
<!-- attach the bottomline -->
<!--#include virtual="yacysearchtrailer.html?eventID=#[eventID]#&display=#[display]#" -->
</div>
<!--#include virtual="yacysearchtrailer.html?eventID=#[eventID]#&display=#[display]#" -->
#(navigation)#
::
<div id="sidebar" style="position:fixed; right:8px; margin-top:5px; width: 220px;">
<h3><a href="#">Navigation</a></h3>
<div>
First attempt to add 'real' Navigation to yacy search results:<br />
After a search is started, it is analysed how many hits are in each site.
If you click on a domain the search is modified using the efficient site: - operator!
</div>
<h3><a href="#">Domains</a></h3>
<div><ul style="padding-left: 0px;">
#{domains}#
<li>#[domain]#</li>
#{/domains}#
</ul></div>
<h3><a href="#">Authors</a></h3>
<div>Text...</div>
</div>
#(/navigation)#
</div>
</body>
</html>

@ -28,7 +28,6 @@
// if the shell's current path is HTROOT
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeSet;
@ -49,7 +48,6 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSwitchboardConstants;
import de.anomic.plasma.parser.Word;
import de.anomic.plasma.parser.Condenser;
import de.anomic.plasma.plasmaSearchRankingProcess.hostnaventry;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverObjects;
@ -466,7 +464,7 @@ public class yacysearch {
resnav.append(navurla(thispage - 1, display, theQuery, originalUrlMask));
resnav.append("<strong>&lt;</strong></a>&nbsp;");
*/
resnav.append(navurla(thispage - 1, display, theQuery, originalUrlMask, null));
resnav.append(plasmaSearchQuery.navurla(thispage - 1, display, theQuery, originalUrlMask, null));
resnav.append("<img src=\"env/grafics/navdl.gif\" width=\"16\" height=\"16\"></a>&nbsp;");
}
final int numberofpages = Math.min(10, Math.max(thispage + 2, totalcount / theQuery.displayResults()));
@ -486,7 +484,7 @@ public class yacysearch {
resnav.append(i + 1);
resnav.append("</a>&nbsp;");
*/
resnav.append(navurla(i, display, theQuery, originalUrlMask, null));
resnav.append(plasmaSearchQuery.navurla(i, display, theQuery, originalUrlMask, null));
resnav.append("<img src=\"env/grafics/navd");
resnav.append(i + 1);
resnav.append(".gif\" width=\"16\" height=\"16\"></a>&nbsp;");
@ -500,24 +498,10 @@ public class yacysearch {
resnav.append(navurla(thispage + 1, display, theQuery, originalUrlMask));
resnav.append("<strong>&gt;</strong></a>");
*/
resnav.append(navurla(thispage + 1, display, theQuery, originalUrlMask, null));
resnav.append(plasmaSearchQuery.navurla(thispage + 1, display, theQuery, originalUrlMask, null));
resnav.append("<img src=\"env/grafics/navdr.gif\" width=\"16\" height=\"16\"></a>");
}
prop.put("num-results_resnav", resnav.toString());
// compose search navigation
ArrayList<hostnaventry> hostNavigator = theSearch.getHostNavigator(10);
if (hostNavigator == null) {
prop.put("navigation", 0);
} else {
prop.put("navigation", 1);
hostnaventry entry;
for (int i = 0; i < hostNavigator.size(); i++) {
entry = hostNavigator.get(i);
prop.put("navigation_domains_" + i + "_domain", navurla(thispage, display, theQuery, originalUrlMask, "site:" + entry.host) + entry.host + " (" + entry.count + ")</a>");
}
prop.put("navigation_domains", hostNavigator.size());
}
// generate the search result lines; the content will be produced by another servlet
for (int i = 0; i < theQuery.displayResults(); i++) {
@ -585,22 +569,4 @@ public class yacysearch {
// return rewrite properties
return prop;
}
/**
* generates the page navigation bar
*/
private static String navurla(final int page, final int display, final plasmaSearchQuery theQuery, final String originalUrlMask, String addToQuery) {
return
"<a href=\"yacysearch.html?display=" + display +
"&amp;search=" + theQuery.queryString(true) + ((addToQuery == null) ? "" : "+" + addToQuery) +
"&amp;maximumRecords="+ theQuery.displayResults() +
"&amp;startRecord=" + (page * theQuery.displayResults()) +
"&amp;resource=" + ((theQuery.isLocal()) ? "local" : "global") +
"&amp;verify=" + ((theQuery.onlineSnippetFetch) ? "true" : "false") +
"&amp;urlmaskfilter=" + originalUrlMask +
"&amp;prefermaskfilter=" + theQuery.prefer +
"&amp;cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +
"&amp;contentdom=" + theQuery.contentdom() +
"&amp;former=" + theQuery.queryString(true) + "\">";
}
}

@ -1,3 +1,20 @@
<p><strong>Topwords</strong>:
#{words}#&nbsp;<a href="yacysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#">#[word]#</a>&nbsp;|#{/words}#
</p>
<div id="sidebar" style="position:fixed; right:8px; margin-top:5px; width: 220px;">
<h2>Navigation</h2>
#(navigation)#
::
<h3>Domains</h3>
<div>
After a search is started, it is analysed how many hits are in each site.
If you click on a domain the search is modified using the 'site:' operator.
</div>
<div><ul style="padding-left: 0px;">
#{domains}#
<li>#[domain]#</li>
#{/domains}#
</ul></div>
#(/navigation)#
<h3>Topics</h3>
#{words}#
<a href="yacysearch.html?search=#[newsearch]#&amp;Enter=Search&amp;count=#[count]#&amp;offset=#[offset]#&amp;resource=#[resource]#&amp;contentdom=#[contentdom]#&amp;display=#[display]#">#[word]#</a>
#{/words}#
</div>

@ -24,6 +24,7 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
@ -35,6 +36,7 @@ import de.anomic.plasma.plasmaProfiling;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSearchRankingProcess.hostnaventry;
import de.anomic.server.serverObjects;
import de.anomic.server.serverProfiling;
import de.anomic.server.serverSwitch;
@ -48,6 +50,7 @@ public class yacysearchtrailer {
final serverObjects prop = new serverObjects();
final String eventID = post.get("eventID", "");
final int display = post.getInt("display", 0);
// default settings for blank item
prop.put("words", "0");
@ -60,6 +63,20 @@ public class yacysearchtrailer {
}
final plasmaSearchQuery theQuery = theSearch.getQuery();
// compose search navigation
ArrayList<hostnaventry> hostNavigator = theSearch.getHostNavigator(10);
if (hostNavigator == null) {
prop.put("navigation", 0);
} else {
prop.put("navigation", 1);
hostnaventry entry;
for (int i = 0; i < hostNavigator.size(); i++) {
entry = hostNavigator.get(i);
prop.put("navigation_domains_" + i + "_domain", plasmaSearchQuery.navurla(0, display, theQuery, theQuery.urlMask, "site:" + entry.host) + entry.host + " (" + entry.count + ")</a>");
}
prop.put("navigation_domains", hostNavigator.size());
}
// attach the bottom line with search references (topwords)
final Set<String> references = theSearch.references(20);
@ -99,6 +116,7 @@ public class yacysearchtrailer {
prop.putHTML("words_" + hintcount + "_newsearch", theQuery.queryString.replace(' ', '+') + "+" + word);
prop.put("words_" + hintcount + "_count", theQuery.displayResults());
prop.put("words_" + hintcount + "_offset", "0");
prop.put("words_" + hintcount + "_display", display);
prop.put("words_" + hintcount + "_contentdom", theQuery.contentdom());
prop.put("words_" + hintcount + "_resource", ((theQuery.isLocal()) ? "local" : "global"));
prop.put("words_" + hintcount + "_nl", (iter.hasNext() && hintcount < MAX_TOPWORDS) ? 1 : 0);

@ -618,7 +618,7 @@ public class RowCollection implements Iterable<Row.Entry> {
*/
final int partition(final int L, final int R, int S, final byte[] swapspace) {
assert (L < R - 1): "L = " + L + ", R = " + R + ", S = " + S;
assert (R - L >= isortlimit): "L = " + L + ", R = " + R + ", S = " + S;
assert (R - L >= isortlimit): "L = " + L + ", R = " + R + ", S = " + S + ", isortlimit = " + isortlimit;
int p = L;
int q = R - 1;

@ -99,7 +99,7 @@ public final class plasmaSearchEvent {
long urlRetrievalAllTime;
long snippetComputationAllTime;
public ResultURLs crawlResults;
public ArrayList<hostnaventry> hostNavigator;
private ArrayList<hostnaventry> hostNavigator;
@SuppressWarnings("unchecked")
private plasmaSearchEvent(final plasmaSearchQuery query,
@ -125,6 +125,7 @@ public final class plasmaSearchEvent {
this.snippetComputationAllTime = 0;
this.workerThreads = null;
this.localSearchThread = null;
this.hostNavigator = null;
this.result = new SortStore<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new SortStore<plasmaSnippetCache.MediaSnippet>(-1);
this.failedURLs = new HashMap<String, String>(); // a map of urls to reason strings where a worker thread tried to work on, but failed.
@ -142,7 +143,6 @@ public final class plasmaSearchEvent {
(query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) {
// do a global search
this.rankedCache = new plasmaSearchRankingProcess(indexSegment, query, max_results_preparation, 16);
this.hostNavigator = null;
final int fetchpeers = 12;
@ -180,7 +180,6 @@ public final class plasmaSearchEvent {
// do a local search
this.rankedCache = new plasmaSearchRankingProcess(indexSegment, query, max_results_preparation, 2);
this.rankedCache.execQuery();
this.hostNavigator = rankedCache.getHostNavigator(10);
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
if (generateAbstracts) {
@ -189,6 +188,8 @@ public final class plasmaSearchEvent {
int maxcount = -1;
long mindhtdistance = Long.MAX_VALUE, l;
byte[] wordhash;
assert this.rankedCache.searchContainerMaps() != null;
assert this.rankedCache.searchContainerMaps()[0] != null;
for (Map.Entry<byte[], ReferenceContainer<WordReference>> entry : this.rankedCache.searchContainerMaps()[0].entrySet()) {
wordhash = entry.getKey();
final ReferenceContainer container = entry.getValue();
@ -240,7 +241,6 @@ public final class plasmaSearchEvent {
// so following sortings together with the global results will be fast
try {
rankedCache.execQuery();
hostNavigator = rankedCache.getHostNavigator(10);
} catch (final Exception e) {
e.printStackTrace();
}

@ -217,10 +217,14 @@ public final class plasmaSearchQuery {
}
public static String hashSet2hashString(final TreeSet<byte[]> hashes) {
final Iterator<byte[]> i = hashes.iterator();
final StringBuilder sb = new StringBuilder(hashes.size() * yacySeedDB.commonHashLength);
while (i.hasNext()) sb.append(new String(i.next()));
return new String(sb);
final byte[] bb = new byte[hashes.size() * yacySeedDB.commonHashLength];
int p = 0;
for (byte[] b : hashes) {
assert b.length == yacySeedDB.commonHashLength : "hash = " + new String(b);
System.arraycopy(b, 0, bb, p, yacySeedDB.commonHashLength);
p += yacySeedDB.commonHashLength;
}
return new String(bb);
}
public static String anonymizedQueryHashes(final TreeSet<byte[]> hashes) {
@ -326,4 +330,27 @@ public final class plasmaSearchQuery {
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + context;
}
/**
* make a query anchor tag
* @param page
* @param display
* @param theQuery
* @param originalUrlMask
* @param addToQuery
* @return
*/
public static String navurla(final int page, final int display, final plasmaSearchQuery theQuery, final String originalUrlMask, String addToQuery) {
return
"<a href=\"yacysearch.html?display=" + display +
"&amp;search=" + theQuery.queryString(true) + ((addToQuery == null) ? "" : "+" + addToQuery) +
"&amp;maximumRecords="+ theQuery.displayResults() +
"&amp;startRecord=" + (page * theQuery.displayResults()) +
"&amp;resource=" + ((theQuery.isLocal()) ? "local" : "global") +
"&amp;verify=" + ((theQuery.onlineSnippetFetch) ? "true" : "false") +
"&amp;urlmaskfilter=" + originalUrlMask +
"&amp;prefermaskfilter=" + theQuery.prefer +
"&amp;cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +
"&amp;contentdom=" + theQuery.contentdom() +
"&amp;former=" + theQuery.queryString(true) + "\">";
}
}

@ -131,10 +131,10 @@ public final class yacyClient {
break;
} catch (final Exception e) {
if (Thread.currentThread().isInterrupted()) {
yacyCore.log.logWarning("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' interrupted.");
yacyCore.log.logInfo("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' interrupted.");
return -1;
}
yacyCore.log.logWarning("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "', peer " + address + "; exception: " + e.getMessage() + "; retry = " + retry);
yacyCore.log.logInfo("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "', peer " + address + "; exception: " + e.getMessage() + "; retry = " + retry);
// try again (go into loop)
result = null;
}
@ -527,6 +527,7 @@ public final class yacyClient {
// create containers
final int words = wordhashes.length() / yacySeedDB.commonHashLength;
assert words > 0 : "wordhashes = " + wordhashes;
final ReferenceContainer<WordReference>[] container = new ReferenceContainer[words];
for (int i = 0; i < words; i++) {
container[i] = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength).getBytes(), count);
@ -996,7 +997,7 @@ public final class yacyClient {
result.put("indexPayloadSize", Integer.toString(entrypost.length()));
return result;
} catch (final Exception e) {
yacyCore.log.logSevere("yacyClient.transferRWI error:" + e.getMessage());
yacyCore.log.logInfo("yacyClient.transferRWI error:" + e.getMessage());
return null;
}
}

@ -78,6 +78,7 @@ public class yacySearch extends Thread {
final Bitfield constraint) {
super("yacySearch_" + targetPeer.getName());
//System.out.println("DEBUG - yacySearch thread " + this.getName() + " initialized " + ((urlhashes.length() == 0) ? "(primary)" : "(secondary)"));
assert wordhashes.length() >= 12;
this.wordhashes = wordhashes;
this.excludehashes = excludehashes;
this.urlhashes = urlhashes;
@ -256,6 +257,7 @@ public class yacySearch extends Thread {
// prepare seed targets and threads
assert language != null;
assert wordhashes.length() >= 12 : "wordhashes = " + wordhashes;
final yacySeed[] targetPeers =
(clusterselection == null) ?
selectSearchTargets(
@ -287,6 +289,8 @@ public class yacySearch extends Thread {
final String targethash, final Blacklist blacklist,
final plasmaSearchRankingProfile rankingProfile,
final Bitfield constraint, final TreeMap<byte[], String> clusterselection) {
assert wordhashes.length() >= 12;
// check own peer status
if (peers.mySeed() == null || peers.mySeed().getPublicAddress() == null) { return null; }

Loading…
Cancel
Save