finished refactoring of searchtemplates.

now plasmaSwitchboard.searchFromLocal calculates a searchResults structure,
which is parsed in the yacysearch/detailedSearch Servlets.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3244 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
allo 18 years ago
parent 35039982da
commit c39dda2374

@ -7,6 +7,7 @@ version 0.50
* CHANGED: Migrated to the new Database Structure
* ADDED: XSS protection for all pages as default.
* ADDED: searchResults structure.
* CHANGED: the searchtemplates are now generated from a searchResults structure in the servlets.
version 0.49
* CHANGED: New Database Structure for Index and URL Storage

@ -47,22 +47,32 @@
// javac -classpath .:../classes index.java
// if the shell's current path is HTROOT
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeSet;
import java.util.Map;
import de.anomic.data.searchResults;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSearchPreOrder;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacySeed;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
public class DetailedSearch {
@ -235,7 +245,58 @@ public class DetailedSearch {
plasmaSearchRankingProfile localRanking = new plasmaSearchRankingProfile("local", post.toString());
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
final serverObjects prop = sb.searchFromLocal(thisSearch, localRanking, localTiming, remoteTiming, postsort, (String) header.get("CLIENTIP"));
final serverObjects prop = new serverObjects();//sb.searchFromLocal(thisSearch, localRanking, localTiming, remoteTiming, postsort, (String) header.get("CLIENTIP"));
searchResults results = sb.searchFromLocal(thisSearch, localRanking, localTiming, remoteTiming, postsort, (String) header.get("CLIENTIP"));
//prop.put("references", 0);
URL wordURL=null;
prop.put("num-results_totalcount", results.getTotalcount());
prop.put("num-results_filteredcount", results.getFilteredcount());
prop.put("num-results_orderedcount", results.getOrderedcount());
prop.put("num-results_linkcount", results.getLinkcount());
prop.put("type_results", 0);
if(results.numResults()!=0){
//we've got results
prop.put("num-results_totalcount", results.getTotalcount());
prop.put("num-results_filteredcount", results.getFilteredcount());
prop.put("num-results_orderedcount", Integer.toString(results.getOrderedcount())); //why toString?
prop.put("num-results_globalresults", results.getGlobalresults());
for(int i=0;i<results.numResults();i++){
searchResults.searchResult result=results.getResult(i);
try {
prop.put("type_results_" + i + "_authorized_recommend", (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, "stippadd", "url", result.getUrl()) == null) ? 1 : 0);
} catch (IOException e) {}
prop.put("type_results_" + i + "_authorized_recommend_deletelink", "/yacysearch.html?search=" + results.getFormerSearch() + "&amp;Enter=Search&amp;count=" + results.getQuery().wantedResults + "&amp;order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&amp;resource=local&amp;time=3&amp;deleteref=" + result.getUrlhash() + "&amp;urlmaskfilter=.*");
prop.put("type_results_" + i + "_authorized_recommend_recommendlink", "/yacysearch.html?search=" + results.getFormerSearch() + "&amp;Enter=Search&amp;count=" + results.getQuery().wantedResults + "&amp;order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&amp;resource=local&amp;time=3&amp;recommendref=" + result.getUrlhash() + "&amp;urlmaskfilter=.*");
prop.put("type_results_" + i + "_authorized_urlhash", result.getUrlhash());
prop.put("type_results_" + i + "_description", result.getUrlentry().comp().descr());
prop.put("type_results_" + i + "_url", result.getUrl());
prop.put("type_results_" + i + "_urlhash", result.getUrlhash());
prop.put("type_results_" + i + "_urlhexhash", yacySeed.b64Hash2hexHash(result.getUrlhash()));
prop.put("type_results_" + i + "_urlname", nxTools.shortenURLString(result.getUrlname(), 120));
prop.put("type_results_" + i + "_date", plasmaSwitchboard.dateString(result.getUrlentry().moddate()));
prop.put("type_results_" + i + "_ybr", plasmaSearchPreOrder.ybr(result.getUrlentry().hash()));
prop.put("type_results_" + i + "_size", Long.toString(result.getUrlentry().size()));
try {
prop.put("type_results_" + i + "_words", URLEncoder.encode(results.getQuery().queryWords.toString(),"UTF-8"));
} catch (UnsupportedEncodingException e) {}
prop.put("type_results_" + i + "_former", results.getFormerSearch());
prop.put("type_results_" + i + "_rankingprops", result.getUrlentry().word().toPropertyForm() + ", domLengthEstimated=" + plasmaURL.domLengthEstimation(result.getUrlhash()) +
((plasmaURL.probablyRootURL(result.getUrlhash())) ? ", probablyRootURL" : "") +
(((wordURL = plasmaURL.probablyWordURL(result.getUrlhash(), results.getQuery().words(""))) != null) ? ", probablyWordURL=" + wordURL.toNormalform() : ""));
// adding snippet if available
if (result.hasSnippet()) {
prop.put("type_results_" + i + "_snippet", 1);
prop.putASIS("type_results_" + i + "_snippet_text", result.getSnippet().getLineMarked(results.getQuery().queryHashes));//FIXME: the ASIS should not be needed, if there is no html in .java
} else {
prop.put("type_results_" + i + "_snippet", 0);
prop.put("type_results_" + i + "_snippet_text", "");
}
prop.put("type_results", results.numResults());
prop.put("references", results.getReferences());
prop.put("num-results_linkcount", Integer.toString(results.numResults()));
}
}
putRanking(prop, localRanking, "local");
// remember the last search expression

@ -47,12 +47,16 @@
// if the shell's current path is HTROOT
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.data.searchResults;
import de.anomic.data.wikiCode;
import de.anomic.data.searchResults.searchResult;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.httpHeader;
import de.anomic.index.indexURLEntry;
@ -68,13 +72,17 @@ import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.yacy.yacySeed;
public class yacysearch {
@ -258,7 +266,59 @@ public class yacysearch {
plasmaSearchRankingProfile ranking = (sb.getConfig("rankingProfile", "").length() == 0) ? new plasmaSearchRankingProfile(contentdomString) : new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile(4 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile(6 * thisSearch.maximumTime / 10, thisSearch.wantedResults);
prop = sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming, true, (String) header.get("CLIENTIP"));
searchResults results = sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming, true, (String) header.get("CLIENTIP"));
//prop=sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming, true, (String) header.get("CLIENTIP"));
prop=new serverObjects();
//prop.put("references", 0);
URL wordURL=null;
prop.put("num-results_totalcount", results.getTotalcount());
prop.put("num-results_filteredcount", results.getFilteredcount());
prop.put("num-results_orderedcount", results.getOrderedcount());
prop.put("num-results_linkcount", results.getLinkcount());
prop.put("type_results", 0);
if(results.numResults()!=0){
//we've got results
prop.put("num-results_totalcount", results.getTotalcount());
prop.put("num-results_filteredcount", results.getFilteredcount());
prop.put("num-results_orderedcount", Integer.toString(results.getOrderedcount())); //why toString?
prop.put("num-results_globalresults", results.getGlobalresults());
for(int i=0;i<results.numResults();i++){
searchResults.searchResult result=results.getResult(i);
try {
prop.put("type_results_" + i + "_authorized_recommend", (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, "stippadd", "url", result.getUrl()) == null) ? 1 : 0);
} catch (IOException e) {}
prop.put("type_results_" + i + "_authorized_recommend_deletelink", "/yacysearch.html?search=" + results.getFormerSearch() + "&amp;Enter=Search&amp;count=" + results.getQuery().wantedResults + "&amp;order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&amp;resource=local&amp;time=3&amp;deleteref=" + result.getUrlhash() + "&amp;urlmaskfilter=.*");
prop.put("type_results_" + i + "_authorized_recommend_recommendlink", "/yacysearch.html?search=" + results.getFormerSearch() + "&amp;Enter=Search&amp;count=" + results.getQuery().wantedResults + "&amp;order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&amp;resource=local&amp;time=3&amp;recommendref=" + result.getUrlhash() + "&amp;urlmaskfilter=.*");
prop.put("type_results_" + i + "_authorized_urlhash", result.getUrlhash());
prop.put("type_results_" + i + "_description", result.getUrlentry().comp().descr());
prop.put("type_results_" + i + "_url", result.getUrl());
prop.put("type_results_" + i + "_urlhash", result.getUrlhash());
prop.put("type_results_" + i + "_urlhexhash", yacySeed.b64Hash2hexHash(result.getUrlhash()));
prop.put("type_results_" + i + "_urlname", nxTools.shortenURLString(result.getUrlname(), 120));
prop.put("type_results_" + i + "_date", plasmaSwitchboard.dateString(result.getUrlentry().moddate()));
prop.put("type_results_" + i + "_ybr", plasmaSearchPreOrder.ybr(result.getUrlentry().hash()));
prop.put("type_results_" + i + "_size", Long.toString(result.getUrlentry().size()));
try {
prop.put("type_results_" + i + "_words", URLEncoder.encode(results.getQuery().queryWords.toString(),"UTF-8"));
} catch (UnsupportedEncodingException e) {}
prop.put("type_results_" + i + "_former", results.getFormerSearch());
prop.put("type_results_" + i + "_rankingprops", result.getUrlentry().word().toPropertyForm() + ", domLengthEstimated=" + plasmaURL.domLengthEstimation(result.getUrlhash()) +
((plasmaURL.probablyRootURL(result.getUrlhash())) ? ", probablyRootURL" : "") +
(((wordURL = plasmaURL.probablyWordURL(result.getUrlhash(), results.getQuery().words(""))) != null) ? ", probablyWordURL=" + wordURL.toNormalform() : ""));
// adding snippet if available
if (result.hasSnippet()) {
prop.put("type_results_" + i + "_snippet", 1);
prop.putASIS("type_results_" + i + "_snippet_text", result.getSnippet().getLineMarked(results.getQuery().queryHashes));//FIXME: the ASIS should not be needed, if there is no html in .java
} else {
prop.put("type_results_" + i + "_snippet", 0);
prop.put("type_results_" + i + "_snippet_text", "");
}
prop.put("type_results", results.numResults());
prop.put("references", results.getReferences());
prop.put("num-results_linkcount", Integer.toString(results.numResults()));
}
}
// remember the last search expression
env.setConfig("last-search", querystring + contentdomString);

@ -40,6 +40,7 @@ public class searchResults {
private String formerSearch="";
private plasmaSearchQuery query=null;
private ArrayList results=null;
private Object[] references=null;
public searchResults(){
this.results=new ArrayList();
@ -52,8 +53,19 @@ public class searchResults {
this.linkcount=linkcount;
}
public void appendResult(searchResult result){
if (results==null)
results=new ArrayList();
results.add(result);
}
public int numResults(){
if(results==null) return 0;
return results.size();
}
public searchResult getResult(int index){
if(results==null || results.size()-1<index)
return null;
return (searchResult)results.get(index);
}
public void setTotalcount(int totalcount) {
this.totalcount = totalcount;
}
@ -105,6 +117,12 @@ public class searchResults {
public plasmaSearchQuery getQuery() {
return query;
}
public void setReferences(Object[] references) {
this.references = references;
}
public Object[] getReferences() {
return references;
}
public class searchResult{
private String url="";
private String urlname="";

@ -2080,7 +2080,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (date == null) return ""; else return DateFormatter.format(date);
}
public serverObjects searchFromLocal(plasmaSearchQuery query,
public searchResults searchFromLocal(plasmaSearchQuery query,
plasmaSearchRankingProfile ranking,
plasmaSearchTimingProfile localTiming,
plasmaSearchTimingProfile remoteTiming,
@ -2090,7 +2090,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// tell all threads to do nothing for a specific time
intermissionAllThreads(2 * query.maximumTime);
serverObjects prop = new serverObjects();
searchResults results=new searchResults();
results.setRanking(ranking);
results.setQuery(query);
@ -2126,12 +2125,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
results.setFilteredcount(0);
results.setOrderedcount(0);
results.setLinkcount(0);
prop.put("num-results_totalcount", results.getTotalcount());
prop.put("num-results_filteredcount", results.getFilteredcount());
prop.put("num-results_orderedcount", results.getOrderedcount());
prop.put("num-results_linkcount", results.getLinkcount());
prop.put("references", 0);
prop.put("type_results", 0);
} else {
results.setTotalcount(acc.globalContributions + acc.localContributions);
results.setFilteredcount(acc.filteredResults);
@ -2139,10 +2132,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
results.setGlobalresults(acc.globalContributions);
results.setRanking(ranking);
prop.put("num-results_totalcount", results.getTotalcount());
prop.put("num-results_filteredcount", results.getFilteredcount());
prop.put("num-results_orderedcount", Integer.toString(results.getOrderedcount())); //why toString?
prop.put("num-results_globalresults", results.getGlobalresults());
int i = 0;
int p;
indexURLEntry urlentry;
@ -2192,7 +2181,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
*/
//addScoreForked(ref, gs, descr.split(" "));
//addScoreForked(ref, gs, urlstring.split("/"));
URL wordURL;
searchResults.searchResult result=results.createSearchResult();
result.setUrl(urlstring);
result.setUrlname(urlname);
@ -2209,33 +2197,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if ((snippet != null) && (snippet.getSource() == plasmaSnippetCache.ERROR_NO_MATCH)) {
// suppress line: there is no match in that resource
} else {*/
prop.put("type_results_" + i + "_authorized_recommend", (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, "stippadd", "url", result.getUrl()) == null) ? 1 : 0);
prop.put("type_results_" + i + "_authorized_recommend_deletelink", "/yacysearch.html?search=" + results.getFormerSearch() + "&amp;Enter=Search&amp;count=" + results.getQuery().wantedResults + "&amp;order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&amp;resource=local&amp;time=3&amp;deleteref=" + result.getUrlhash() + "&amp;urlmaskfilter=.*");
prop.put("type_results_" + i + "_authorized_recommend_recommendlink", "/yacysearch.html?search=" + results.getFormerSearch() + "&amp;Enter=Search&amp;count=" + results.getQuery().wantedResults + "&amp;order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&amp;resource=local&amp;time=3&amp;recommendref=" + result.getUrlhash() + "&amp;urlmaskfilter=.*");
prop.put("type_results_" + i + "_authorized_urlhash", result.getUrlhash());
prop.put("type_results_" + i + "_description", result.getUrlentry().comp().descr());
prop.put("type_results_" + i + "_url", result.getUrl());
prop.put("type_results_" + i + "_urlhash", result.getUrlhash());
prop.put("type_results_" + i + "_urlhexhash", yacySeed.b64Hash2hexHash(result.getUrlhash()));
prop.put("type_results_" + i + "_urlname", nxTools.shortenURLString(result.getUrlname(), 120));
prop.put("type_results_" + i + "_date", dateString(result.getUrlentry().moddate()));
prop.put("type_results_" + i + "_ybr", plasmaSearchPreOrder.ybr(result.getUrlentry().hash()));
prop.put("type_results_" + i + "_size", Long.toString(result.getUrlentry().size()));
prop.put("type_results_" + i + "_words", URLEncoder.encode(results.getQuery().queryWords.toString(),"UTF-8"));
prop.put("type_results_" + i + "_former", results.getFormerSearch());
prop.put("type_results_" + i + "_rankingprops", result.getUrlentry().word().toPropertyForm() + ", domLengthEstimated=" + plasmaURL.domLengthEstimation(result.getUrlhash()) +
((plasmaURL.probablyRootURL(result.getUrlhash())) ? ", probablyRootURL" : "") +
(((wordURL = plasmaURL.probablyWordURL(urlhash, query.words(""))) != null) ? ", probablyWordURL=" + wordURL.toNormalform() : ""));
// adding snippet if available
if (result.hasSnippet()) {
prop.put("type_results_" + i + "_snippet", 1);
prop.putASIS("type_results_" + i + "_snippet_text", result.getSnippet().getLineMarked(query.queryHashes));//FIXME: the ASIS should not be needed, if there is no html in .java
} else {
prop.put("type_results_" + i + "_snippet", 0);
prop.put("type_results_" + i + "_snippet_text", "");
}
i++;
results.appendResult(result);
//}
@ -2258,6 +2219,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
**/
//Object[] ws = ref.getScores(16, false, 2, Integer.MAX_VALUE);
Object[] ws = acc.getReferences(16);
results.setReferences(ws);
log.logFine("SEARCH TIME AFTER XREF PREPARATION: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
/*
@ -2265,18 +2227,15 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
for (int ii = 0; ii < ws.length; ii++) System.out.print(ws[ii] + ", ");
System.out.println(" all words = " + ref.getElementCount() + ", total count = " + ref.getTotalCount());
*/
prop.put("references", ws);
prop.put("num-results_linkcount", Integer.toString(i));
prop.put("type_results", Integer.toString(i));
}
// log
log.logInfo("EXIT WORD SEARCH: " + query.queryWords + " - " +
prop.get("num-results_totalcount", "0") + " links found, " +
prop.get("num-results_filteredcount", "0") + " links filtered, " +
prop.get("num-results_orderedcount", "0") + " links ordered, " +
prop.get("num-results_linkcount", "?") + " links selected, " +
((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
results.getTotalcount() + " links found, " +
results.getFilteredcount() + " links filtered, " +
results.getOrderedcount() + " links ordered, " +
results.getLinkcount() + " links selected, " +
((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
// prepare search statistics
@ -2289,7 +2248,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
handles.add(trackerHandle);
this.localSearchTracker.put(client, handles);
return prop;
return results;
} catch (IOException e) {
return null;
}

Loading…
Cancel
Save