diff --git a/build.properties b/build.properties
index f7aae6a4b..04675e6c1 100644
--- a/build.properties
+++ b/build.properties
@@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
-releaseVersion=0.443
+releaseVersion=0.444
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
diff --git a/htroot/DetailedSearch.java b/htroot/DetailedSearch.java
index cd90d365d..6bea34c98 100644
--- a/htroot/DetailedSearch.java
+++ b/htroot/DetailedSearch.java
@@ -132,7 +132,7 @@ public class DetailedSearch {
}
// do the search
- plasmaSearchQuery thisSearch = new plasmaSearchQuery(query, wdist, count, searchtime, urlmask,
+ plasmaSearchQuery thisSearch = new plasmaSearchQuery(query, wdist, "", count, searchtime, urlmask,
((global) && (yacyonline) && (!(env.getConfig("last-search","").equals(querystring)))) ? plasmaSearchQuery.SEARCHDOM_GLOBALDHT : plasmaSearchQuery.SEARCHDOM_LOCAL,
"", 20);
plasmaSearchRankingProfile localRanking = new plasmaSearchRankingProfile("local", post.toString());
diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java
index 89addb84b..5a4928ded 100644
--- a/htroot/IndexCreate_p.java
+++ b/htroot/IndexCreate_p.java
@@ -81,8 +81,6 @@ public class IndexCreate_p {
prop.put("error", 0);
prop.put("info", 0);
prop.put("refreshbutton", 0);
-
- switchboard.cleanProfiles();
if (post != null) {
if (post.containsKey("crawlingstart")) {
@@ -395,6 +393,7 @@ public class IndexCreate_p {
// sed crawl profiles
int count = 0;
+ int domlistlength = (post == null) ? 160 : post.getInt("domlistlength", 160);
//try{
Iterator it = switchboard.profiles.profiles(true);
plasmaCrawlProfile.entry profile;
@@ -410,7 +409,7 @@ public class IndexCreate_p {
prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter());
prop.put("crawlProfiles_"+count+"_crawlingIfOlder", (profile.recrawlIfOlder() == Long.MAX_VALUE) ? "no re-crawl" : ""+profile.recrawlIfOlder());
prop.put("crawlProfiles_"+count+"_crawlingDomFilterDepth", (profile.domFilterDepth() == Integer.MAX_VALUE) ? "inactive" : ""+profile.domFilterDepth());
- prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", profile.domNames(true, 160));
+ prop.put("crawlProfiles_"+count+"_crawlingDomFilterContent", profile.domNames(true, domlistlength));
prop.put("crawlProfiles_"+count+"_crawlingDomMaxPages", (profile.domMaxPages() == Integer.MAX_VALUE) ? "unlimited" : ""+profile.domMaxPages());
prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0));
prop.put("crawlProfiles_"+count+"_storeCache", ((profile.storeHTCache()) ? 1 : 0));
diff --git a/htroot/index.html b/htroot/index.html
index 4dac23c85..f388059c2 100644
--- a/htroot/index.html
+++ b/htroot/index.html
@@ -34,6 +34,8 @@
+
+
|
more options... |
@@ -106,6 +108,19 @@
#(/urlmaskoptions)#
+
+
+ Prefer mask:
+ |
+
+ #(prefermaskoptions)#
+
+ ::
+ restrict on
+ show all
+ #(/prefermaskoptions)#
+ |
+
#(/searchoptions)#
diff --git a/htroot/index.java b/htroot/index.java
index 24e54be80..4a41f9afd 100644
--- a/htroot/index.java
+++ b/htroot/index.java
@@ -102,8 +102,8 @@ public class index {
prop.put("combine", 0);
prop.put("resultbottomline", 0);
prop.put("searchoptions", searchoptions);
- prop.put("searchoptions_count-10", 1);
- prop.put("searchoptions_count-50", 0);
+ prop.put("searchoptions_count-10", 0);
+ prop.put("searchoptions_count-50", 1);
prop.put("searchoptions_count-100", 0);
prop.put("searchoptions_count-1000", 0);
prop.put("searchoptions_order-ybr-date-quality", plasmaSearchPreOrder.canUseYBR() ? 1 : 0);
@@ -122,6 +122,8 @@ public class index {
prop.put("searchoptions_time-60", 0);
prop.put("searchoptions_urlmaskoptions", 0);
prop.put("searchoptions_urlmaskoptions_urlmaskfilter", ".*");
+ prop.put("searchoptions_prefermaskoptions", 0);
+ prop.put("searchoptions_prefermaskoptions_prefermaskfilter", "");
prop.put("results", "");
prop.put("cat", "href");
prop.put("type", "0");
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index dd44fe9de..2e8752d96 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -83,6 +83,7 @@ public final class search {
final long duetime= post.getLong("duetime", 3000);
final int count = post.getInt("count", 10); // maximum number of wanted results
final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE);
+ final String prefer = post.get("prefer", "");
// final boolean global = ((String) post.get("resource", "global")).equals("global"); // if true, then result may consist of answers from other peers
// Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time
@@ -104,7 +105,7 @@ public final class search {
}
final long timestamp = System.currentTimeMillis();
- plasmaSearchQuery squery = new plasmaSearchQuery(keyhashes, maxdist, count, duetime, ".*");
+ plasmaSearchQuery squery = new plasmaSearchQuery(keyhashes, maxdist, prefer, count, duetime, ".*");
squery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
serverObjects prop = new serverObjects();
diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html
index 7319dd62a..1a1181d8c 100644
--- a/htroot/yacysearch.html
+++ b/htroot/yacysearch.html
@@ -55,6 +55,7 @@ picPlus.src = "/env/grafics/plus.gif";
+
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index a876bebc0..bd769a35d 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -109,6 +109,7 @@ public class yacysearch {
prop.put("resource", "global");
prop.put("time", 6);
prop.put("urlmaskfilter", ".*");
+ prop.put("prefermaskfilter", "");
prop.put("cat", "href");
prop.put("depth", "0");
prop.put("type", 0);
@@ -144,7 +145,8 @@ public class yacysearch {
} else {
urlmask = (post.containsKey("urlmaskfilter")) ? (String) post.get("urlmaskfilter") : ".*";
}
- String prefer = post.get("prefer", ".*");
+ String prefermask = post.get("prefermaskfilter", "");
+ if ((prefermask.length() > 0) && (prefermask.indexOf(".*") < 0)) prefermask = ".*" + prefermask + ".*";
serverObjects prop = new serverObjects();
@@ -189,6 +191,7 @@ public class yacysearch {
plasmaSearchQuery thisSearch = new plasmaSearchQuery(
query,
maxDistance,
+ prefermask,
count,
searchtime,
urlmask,
@@ -351,7 +354,7 @@ public class yacysearch {
prop.put("resource", (global) ? "global" : "local");
prop.put("time", searchtime / 1000);
prop.put("urlmaskfilter", urlmask);
- prop.put("prefer", prefer);
+ prop.put("prefermaskfilter", prefermask);
prop.put("display", display);
// return rewrite properties
diff --git a/source/de/anomic/plasma/plasmaCrawlProfile.java b/source/de/anomic/plasma/plasmaCrawlProfile.java
index 21e711ea4..fa14483c1 100644
--- a/source/de/anomic/plasma/plasmaCrawlProfile.java
+++ b/source/de/anomic/plasma/plasmaCrawlProfile.java
@@ -456,7 +456,7 @@ public class plasmaCrawlProfile {
while (domnamesi.hasNext()) {
ey = (Map.Entry) domnamesi.next();
dp = (DomProfile) ey.getValue();
- domnames += ((String) ey.getKey()) + ((attr) ? ("/d=" + dp.depth + ",c=" + dp.count + " ") : " ");
+ domnames += ((String) ey.getKey()) + ((attr) ? ("/r=" + dp.referrer + ", d=" + dp.depth + ", c=" + dp.count + " ") : " ") + "
";
if ((maxlength > 0) && (domnames.length() >= maxlength)) {
domnames = domnames.substring(0, maxlength-3) + "...";
break;
diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java
index acb2e77bf..735ae2331 100644
--- a/source/de/anomic/plasma/plasmaCrawlStacker.java
+++ b/source/de/anomic/plasma/plasmaCrawlStacker.java
@@ -318,7 +318,7 @@ public final class plasmaCrawlStacker {
}
// add domain to profile domain list
- if (currentdepth <= profile.domFilterDepth()) {
+ if ((profile.domFilterDepth() != Integer.MAX_VALUE) || (profile.domMaxPages() != Integer.MAX_VALUE)) {
profile.domInc(nexturl.getHost(), (referrerURL == null) ? null : referrerURL.getHost().toLowerCase(), currentdepth);
}
diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java
index 55a2e31c0..0f68e887b 100644
--- a/source/de/anomic/plasma/plasmaSearchQuery.java
+++ b/source/de/anomic/plasma/plasmaSearchQuery.java
@@ -61,6 +61,7 @@ public final class plasmaSearchQuery {
public Set queryWords;
public Set queryHashes;
public int wantedResults;
+ public String prefer;
public long maximumTime;
public String urlMask;
public int domType;
@@ -68,11 +69,12 @@ public final class plasmaSearchQuery {
public int domMaxTargets;
public int maxDistance;
- public plasmaSearchQuery(Set queryWords, int maxDistance,
+ public plasmaSearchQuery(Set queryWords, int maxDistance, String prefer,
int wantedResults, long maximumTime, String urlMask,
int domType, String domGroupName, int domMaxTargets) {
this.queryWords = queryWords;
this.maxDistance = maxDistance;
+ this.prefer = prefer;
this.queryHashes = words2hashes(queryWords);
this.wantedResults = wantedResults;
this.maximumTime = maximumTime;
@@ -82,10 +84,11 @@ public final class plasmaSearchQuery {
this.domMaxTargets = domMaxTargets;
}
- public plasmaSearchQuery(Set queryHashes, int maxDistance,
+ public plasmaSearchQuery(Set queryHashes, int maxDistance, String prefer,
int wantedResults, long maximumTime, String urlMask) {
this.queryWords = null;
this.maxDistance = maxDistance;
+ this.prefer = prefer;
this.queryHashes = queryHashes;
this.wantedResults = wantedResults;
this.maximumTime = maximumTime;
diff --git a/source/de/anomic/plasma/plasmaSearchRankingProfile.java b/source/de/anomic/plasma/plasmaSearchRankingProfile.java
index e172bdb0f..b898bd0bb 100644
--- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java
+++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java
@@ -73,6 +73,7 @@ public class plasmaSearchRankingProfile {
public static final String QUERYINDESCR = "queryindescr";
public static final String URLCOMPINTOPLIST = "urlcompintoplist";
public static final String DESCRCOMPINTOPLIST = "descrcompintoplist";
+ public static final String PREFER = "prefer";
public String[] order;
private HashMap coeff;
@@ -96,6 +97,7 @@ public class plasmaSearchRankingProfile {
coeff.put(QUERYINDESCR, new Integer(8));
coeff.put(URLCOMPINTOPLIST, new Integer(3));
coeff.put(DESCRCOMPINTOPLIST, new Integer(2));
+ coeff.put(PREFER, new Integer(15));
}
public plasmaSearchRankingProfile(String prefix, String profile) {
@@ -183,6 +185,10 @@ public class plasmaSearchRankingProfile {
// apply pre-calculated order attributes
long ranking = this.preRanking(normalizedEntry);
+ // prefer hit with 'prefer' pattern
+ if (page.url().toString().matches(query.prefer)) ranking += 256 << ((Integer) coeff.get(PREFER)).intValue();
+ if (page.descr().toString().matches(query.prefer)) ranking += 256 << ((Integer) coeff.get(PREFER)).intValue();
+
// apply 'common-sense' heuristic using references
for (int j = 0; j < urlcomps.length; j++) {
if (topwords.contains(urlcomps[j])) ranking += 256 << ((Integer) coeff.get(URLCOMPINTOPLIST)).intValue();
@@ -210,6 +216,7 @@ public class plasmaSearchRankingProfile {
ranking += (255 * page.descr().length() / 80) << ((Integer) coeff.get(DESCRLENGTH)).intValue();
ranking += (255 * (12 - Math.abs(12 - Math.min(12, descrcomps.length))) / 12) << ((Integer) coeff.get(DESCRCOMPS)).intValue();
+
return ranking;
}