From d8425e6809017be6c5d2843b294256f59fbbafba Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 4 Sep 2012 14:47:53 +0200 Subject: [PATCH] added collections to crawl monitor --- htroot/CrawlResults.html | 2 ++ htroot/CrawlResults.java | 10 ++++++++++ htroot/ViewFile.html | 1 + htroot/ViewFile.java | 3 +++ source/de/anomic/crawler/ResultURLs.java | 2 +- source/net/yacy/kelondro/data/meta/URIMetadata.java | 4 +++- .../yacy/kelondro/data/meta/URIMetadataNode.java | 6 ++++++ .../net/yacy/kelondro/data/meta/URIMetadataRow.java | 13 ++++++++++++- source/net/yacy/search/index/Segment.java | 3 ++- 9 files changed, 40 insertions(+), 4 deletions(-) diff --git a/htroot/CrawlResults.html b/htroot/CrawlResults.html index bc773e020..c2e9d5047 100644 --- a/htroot/CrawlResults.html +++ b/htroot/CrawlResults.html @@ -111,6 +111,7 @@ + #(showCollection)#::Collection#(/showCollection)# #(showInit)#::Initiator#(/showInit)# #(showExec)#::Executor#(/showExec)# #(showDate)#::Modified#(/showDate)# @@ -131,6 +132,7 @@ + #(showCollection)#::#[collection]##(/showCollection)# #(showInit)#::#[initiatorSeed]##(/showInit)# #(showExec)#::#[executorSeed]##(/showExec)# diff --git a/htroot/CrawlResults.java b/htroot/CrawlResults.java index 47fa832c4..2e09e3c2c 100644 --- a/htroot/CrawlResults.java +++ b/htroot/CrawlResults.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.text.SimpleDateFormat; +import java.util.Arrays; import java.util.Date; import java.util.Iterator; import java.util.Locale; @@ -39,6 +40,7 @@ import net.yacy.kelondro.data.meta.URIMetadata; import net.yacy.kelondro.logging.Log; import net.yacy.peers.Seed; import net.yacy.search.Switchboard; +import net.yacy.search.index.YaCySchema; import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs.EventOrigin; import de.anomic.crawler.ResultURLs.InitExecEntry; @@ -54,6 +56,7 @@ public class CrawlResults { final serverObjects prop = new serverObjects(); int lines = 500; + boolean showCollection = sb.index.fulltext().getSolrScheme().isEmpty() || sb.index.fulltext().getSolrScheme().contains(YaCySchema.collection_sxt); boolean showInit = env.getConfigBool("IndexMonitorInit", false); boolean showExec = env.getConfigBool("IndexMonitorExec", false); boolean showDate = env.getConfigBool("IndexMonitorDate", true); @@ -166,6 +169,7 @@ public class CrawlResults { prop.putHTML("table_feedbackpage", "CrawlResults.html"); prop.put("table_tabletype", tabletype.getCode()); + prop.put("table_showCollection", (showCollection) ? "1" : "0"); prop.put("table_showInit", (showInit) ? "1" : "0"); prop.put("table_showExec", (showExec) ? "1" : "0"); prop.put("table_showDate", (showDate) ? "1" : "0"); @@ -204,6 +208,12 @@ public class CrawlResults { prop.put("table_indexed_" + cnt + "_tabletype", tabletype.getCode()); prop.put("table_indexed_" + cnt + "_urlhash", entry.getKey()); + if (showCollection) { + prop.put("table_indexed_" + cnt + "_showCollection", "1"); + prop.put("table_indexed_" + cnt + "_showCollection_collection", Arrays.toString(urle.collections())); + } else + prop.put("table_indexed_" + cnt + "_showCollection", "0"); + if (showInit) { prop.put("table_indexed_" + cnt + "_showInit", "1"); prop.put("table_indexed_" + cnt + "_showInit_initiatorSeed", (initiatorSeed == null) ? "unknown" : initiatorSeed.getName()); diff --git a/htroot/ViewFile.html b/htroot/ViewFile.html index 1406692a5..a44d67c6d 100644 --- a/htroot/ViewFile.html +++ b/htroot/ViewFile.html @@ -63,6 +63,7 @@
Audio Links:
#[laudio]#
Video Links:
#[lvideo]#
App Links:
#[lapp]#
+
Collections:
#[collections]#
Triplestore:
#[triples]#
:
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java index 66450f9b7..048fa3405 100644 --- a/htroot/ViewFile.java +++ b/htroot/ViewFile.java @@ -29,6 +29,7 @@ import java.io.IOException; import java.net.MalformedURLException; +import java.util.Arrays; import java.util.Collection; import java.util.Enumeration; import java.util.Iterator; @@ -371,6 +372,7 @@ public class ViewFile { prop.put("error_laudio", ""); prop.put("error_lvideo", ""); prop.put("error_lapp", ""); + prop.put("error_collections", ""); } else { prop.put("error_referrerHash", urlEntry.referrerHash()); prop.put("error_moddate", urlEntry.moddate()); @@ -393,6 +395,7 @@ public class ViewFile { prop.put("error_laudio", urlEntry.laudio()); prop.put("error_lvideo", urlEntry.lvideo()); prop.put("error_lapp", urlEntry.lapp()); + prop.put("error_collections", Arrays.toString(urlEntry.collections())); } return prop; diff --git a/source/de/anomic/crawler/ResultURLs.java b/source/de/anomic/crawler/ResultURLs.java index 2593d40b1..a5f05af2f 100644 --- a/source/de/anomic/crawler/ResultURLs.java +++ b/source/de/anomic/crawler/ResultURLs.java @@ -223,7 +223,7 @@ public final class ResultURLs { public static void main(final String[] args) { try { final DigestURI url = new DigestURI("http", "www.yacy.net", 80, "/"); - final URIMetadata urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", 0.0d, 0.0d, new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), UTF8.getBytes("de"), 0, 0, 0, 0, 0, 0); + final URIMetadata urlRef = new URIMetadataRow(url, "YaCy Homepage", "", "", "", 0.0d, 0.0d, new Date(), new Date(), new Date(), "", new byte[] {}, 123, 42, '?', new Bitfield(), UTF8.getBytes("de"), 0, 0, 0, 0, 0, 0, new String[0]); final EventOrigin stackNo = EventOrigin.LOCAL_CRAWLING; System.out.println("valid test:\n======="); // add diff --git a/source/net/yacy/kelondro/data/meta/URIMetadata.java b/source/net/yacy/kelondro/data/meta/URIMetadata.java index 1d839a64a..419cf59ee 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadata.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadata.java @@ -72,7 +72,7 @@ public interface URIMetadata { */ @Override public String toString(); - + public String dc_title(); public String dc_creator(); @@ -117,6 +117,8 @@ public interface URIMetadata { public String snippet(); + public String[] collections(); + public WordReference word(); public boolean isOlder(final URIMetadata other); diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index ea6f6d26e..7592e260f 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -310,6 +310,12 @@ public class URIMetadataNode implements URIMetadata { return this.snippet; } + @Override + public String[] collections() { + ArrayList a = getArrayList(YaCySchema.collection_sxt); + return a.toArray(new String[a.size()]); + } + @Override public WordReference word() { return this.word; diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index 6d5f1bc06..dbe02ddf0 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -104,6 +104,7 @@ public class URIMetadataRow implements URIMetadata { private final Row.Entry entry; private final String snippet; + private final String[] collections; private WordReference word; // this is only used if the url is transported via remote search requests private final long ranking; // during generation of a search result this value is set private Components comp; @@ -112,6 +113,7 @@ public class URIMetadataRow implements URIMetadata { // create a dummy entry, good to produce poison objects this.entry = rowdef.newEntry(); this.snippet = ""; + this.collections = new String[0]; this.word = null; this.ranking = 0; this.comp = null; @@ -139,7 +141,8 @@ public class URIMetadataRow implements URIMetadata { final int laudio, final int limage, final int lvideo, - final int lapp) { + final int lapp, + final String[] collections) { // create new entry this.entry = rowdef.newEntry(); this.entry.setCol(col_hash, url.hash()); @@ -162,6 +165,7 @@ public class URIMetadataRow implements URIMetadata { this.entry.setCol(col_lapp, lapp); //System.out.println("===DEBUG=== " + load.toString() + ", " + decodeDate(col_load).toString()); this.snippet = ""; + this.collections = collections; this.word = null; this.ranking = 0; this.comp = null; @@ -211,6 +215,7 @@ public class URIMetadataRow implements URIMetadata { this.word = searchedWord; this.ranking = ranking; this.comp = null; + this.collections = new String[0]; } public URIMetadataRow(final Properties prop) throws kelondroException { @@ -277,6 +282,7 @@ public class URIMetadataRow implements URIMetadata { } this.ranking = 0; this.comp = null; + this.collections = new String[0]; } public static URIMetadataRow importEntry(final String propStr) { @@ -478,6 +484,11 @@ public class URIMetadataRow implements URIMetadata { return this.snippet; } + @Override + public String[] collections() { + return this.collections; + } + @Override public WordReference word() { return this.word; diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 84a146394..6d1318fcc 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -389,7 +389,8 @@ public class Segment { document.getAudiolinks().size(), // laudio document.getImages().size(), // limage document.getVideolinks().size(), // lvideo - document.getApplinks().size() // lapp + document.getApplinks().size(), // lapp + profile.collections() // collections ); // STORE TO SOLR