From c7f130030037fbfbc7ab4e5821389434074e2c5e Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 28 Mar 2006 15:37:45 +0000 Subject: [PATCH] -fixes for last commit -some more ranking attributes (comments only) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1979 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexCreate_p.java | 6 +++-- .../anomic/plasma/plasmaWordIndexEntry.java | 23 ++++++++++--------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java index 731558f5d..fbb36f7d5 100644 --- a/htroot/IndexCreate_p.java +++ b/htroot/IndexCreate_p.java @@ -108,7 +108,7 @@ public class IndexCreate_p { env.setConfig("crawlingDomFilterDepth", Integer.toString(crawlingDomFilterDepth)); boolean crawlingDomMaxCheck = post.get("crawlingDomMaxCheck", "off").equals("on"); - int crawlingDomMaxPages = (crawlingDomMaxCheck) ? Integer.parseInt(post.get("crawlingDomFilterDepth", "-1")) : -1; + int crawlingDomMaxPages = (crawlingDomMaxCheck) ? Integer.parseInt(post.get("crawlingDomMaxPages", "-1")) : -1; env.setConfig("crawlingDomMaxPages", Integer.toString(crawlingDomMaxPages)); boolean crawlingQ = post.get("crawlingQ", "off").equals("on"); @@ -325,7 +325,9 @@ public class IndexCreate_p { prop.put("crawlingIfOlderUnitDayCheck", 0); prop.put("crawlingIfOlderUnitHourCheck", 0); prop.put("crawlingIfOlderUnitMinuteCheck", 0); - if (crawlingIfOlder == Integer.MAX_VALUE) { + if (crawlingIfOlder == -1) { + prop.put("crawlingIfOlderNumber", 1); + prop.put("crawlingIfOlderUnitYearCheck", 1); } else if (crawlingIfOlder >= 60*24*365) { prop.put("crawlingIfOlderNumber", crawlingIfOlder / 60*24*365); prop.put("crawlingIfOlderUnitYearCheck", 1); diff --git a/source/de/anomic/plasma/plasmaWordIndexEntry.java b/source/de/anomic/plasma/plasmaWordIndexEntry.java index 07404351c..b99b3780d 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntry.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntry.java @@ -112,14 +112,15 @@ public final class plasmaWordIndexEntry implements Cloneable { public static final int AP_PATH = 9; // word inside an url: in path public static final int AP_IMG = 10; // tag inside image references public static final int AP_ANCHOR = 11; // anchor description - public static final int AP_BOLD = 12; // may be interpreted as emphasized - public static final int AP_ITALICS = 13; // may be interpreted as emphasized - public static final int AP_WEAK = 14; // for Text that is small or bareley visible - public static final int AP_INVISIBLE = 15; // good for spam detection - public static final int AP_TAG = 16; // for tagged indexeing (i.e. using mp3 tags) - public static final int AP_AUTHOR = 17; // word appears in author name - public static final int AP_OPUS = 18; // word appears in name of opus, which may be an album name (in mp3 tags) - public static final int AP_TRACK = 19; // word appears in track name (i.e. in mp3 tags) + public static final int AP_ENV = 12; // word appears in environment (similar to anchor appearance) + public static final int AP_BOLD = 13; // may be interpreted as emphasized + public static final int AP_ITALICS = 14; // may be interpreted as emphasized + public static final int AP_WEAK = 15; // for Text that is small or bareley visible + public static final int AP_INVISIBLE = 16; // good for spam detection + public static final int AP_TAG = 17; // for tagged indexeing (i.e. using mp3 tags) + public static final int AP_AUTHOR = 18; // word appears in author name + public static final int AP_OPUS = 19; // word appears in name of opus, which may be an album name (in mp3 tags) + public static final int AP_TRACK = 20; // word appears in track name (i.e. in mp3 tags) // URL attributes public static final int UA_LOCAL = 0; // URL was crawled locally @@ -165,9 +166,9 @@ public final class plasmaWordIndexEntry implements Cloneable { char doctype = DT_UNKNOWN; if (mime == null) doctype = DT_UNKNOWN; else if (mime.startsWith("image/")) doctype = DT_IMAGE; -/* else if (mime.endsWith("/gif")) doctype = DT_IMAGE; + else if (mime.endsWith("/gif")) doctype = DT_IMAGE; else if (mime.endsWith("/jpeg")) doctype = DT_IMAGE; - else if (mime.endsWith("/png")) doctype = DT_IMAGE; */ + else if (mime.endsWith("/png")) doctype = DT_IMAGE; else if (mime.endsWith("/html")) doctype = DT_HTML; else if (mime.endsWith("/rtf")) doctype = DT_DOC; else if (mime.endsWith("/pdf")) doctype = DT_PDFPS; @@ -177,7 +178,7 @@ public final class plasmaWordIndexEntry implements Cloneable { else if (mime.endsWith("/mspowerpoint")) doctype = DT_DOC; else if (mime.endsWith("/postscript")) doctype = DT_PDFPS; else if (mime.startsWith("text/")) doctype = DT_TEXT; -// else if (mime.startsWith("image/")) doctype = DT_IMAGE; + else if (mime.startsWith("image/")) doctype = DT_IMAGE; else if (mime.startsWith("audio/")) doctype = DT_AUDIO; else if (mime.startsWith("video/")) doctype = DT_MOVIE; //bz2 = application/x-bzip2