diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list index ae2faf5b3..65d7a70b3 100644 --- a/defaults/solr.keys.list +++ b/defaults/solr.keys.list @@ -91,7 +91,7 @@ attr_inboundlinks_tag #attr_inboundlinks_rel ## internal links, the rel property of the a-tag, coded binary -#attr_inboundlinks_relcode +#attr_inboundlinks_relflags ## internal links, the text content of the a-tag #attr_inboundlinks_text @@ -117,6 +117,9 @@ attr_outboundlinks_tag ## external links, the rel property of the a-tag #attr_outboundlinks_rel +## external links, the rel property of the a-tag, coded binary +#attr_outboundlinks_relflags + ## external links, the text content of the a-tag #attr_outboundlinks_text diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index f7a6c1331..473e4ef5a 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -64,9 +64,9 @@ import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; import net.yacy.search.index.Segments; import net.yacy.search.query.QueryParams; +import net.yacy.search.query.RWIProcess; import net.yacy.search.query.SearchEventCache; import net.yacy.search.ranking.BlockRank; -import net.yacy.search.ranking.RankingProcess; import net.yacy.search.ranking.ReferenceOrder; import de.anomic.crawler.ResultURLs; import de.anomic.data.ListManager; @@ -134,7 +134,7 @@ public class IndexControlRWIs_p { if (post.containsKey("keystringsearch")) { keyhash = Word.word2hash(keystring); prop.put("keyhash", keyhash); - final RankingProcess ranking = genSearchresult(prop, sb, segment, keyhash, null); + final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, null); if (ranking.filteredCount() == 0) { prop.put("searchresult", 1); prop.putHTML("searchresult_word", keystring); @@ -145,7 +145,7 @@ public class IndexControlRWIs_p { if (keystring.length() == 0 || !ByteBuffer.equals(Word.word2hash(keystring), keyhash)) { prop.put("keystring", "<not possible to compute word from hash>"); } - final RankingProcess ranking = genSearchresult(prop, sb, segment, keyhash, null); + final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, null); if (ranking.filteredCount() == 0) { prop.put("searchresult", 2); prop.putHTML("searchresult_wordhash", ASCII.String(keyhash)); @@ -240,7 +240,7 @@ public class IndexControlRWIs_p { } final Bitfield flags = compileFlags(post); final int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1); - final RankingProcess ranking = genSearchresult(prop, sb, segment, keyhash, flags); + final RWIProcess ranking = genSearchresult(prop, sb, segment, keyhash, flags); genURLList(prop, keyhash, keystring, ranking, flags, count); } @@ -425,7 +425,7 @@ public class IndexControlRWIs_p { return prop; } - public static void genURLList(final serverObjects prop, final byte[] keyhash, final String keystring, final RankingProcess ranked, final Bitfield flags, final int maxlines) { + public static void genURLList(final serverObjects prop, final byte[] keyhash, final String keystring, final RWIProcess ranked, final Bitfield flags, final int maxlines) { // search for a word hash and generate a list of url links final String keyhashs = ASCII.String(keyhash); prop.put("genUrlList_keyHash", keyhashs); @@ -557,10 +557,10 @@ public class IndexControlRWIs_p { prop.put("searchresult_hosts", hc); } - public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, final Segment segment, final byte[] keyhash, final Bitfield filter) { + public static RWIProcess genSearchresult(final serverObjects prop, final Switchboard sb, final Segment segment, final byte[] keyhash, final Bitfield filter) { final QueryParams query = new QueryParams(ASCII.String(keyhash), -1, filter, segment, sb.getRanking(), "IndexControlRWIs_p"); final ReferenceOrder order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang)); - final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE); + final RWIProcess ranked = new RWIProcess(query, order, Integer.MAX_VALUE); ranked.run(); if (ranked.filteredCount() == 0) { diff --git a/source/net/yacy/cora/services/federated/solr/SolrScheme.java b/source/net/yacy/cora/services/federated/solr/SolrScheme.java index b191f24ec..6b2a95ff2 100644 --- a/source/net/yacy/cora/services/federated/solr/SolrScheme.java +++ b/source/net/yacy/cora/services/federated/solr/SolrScheme.java @@ -162,6 +162,7 @@ public class SolrScheme extends ConfigurationSet { if (isEmpty() || contains("attr_inboundlinks_urlstub")) addSolr(solrdoc, "attr_inboundlinks_urlstub", inboundlinksURLStub); if (isEmpty() || contains("attr_inboundlinks_name")) addSolr(solrdoc, "attr_inboundlinks_name", inboundlinksName); if (isEmpty() || contains("attr_inboundlinks_rel")) addSolr(solrdoc, "attr_inboundlinks_rel", inboundlinksRel); + if (isEmpty() || contains("attr_inboundlinks_relflags")) addSolr(solrdoc, "attr_inboundlinks_relflags", relEval(inboundlinksRel)); if (isEmpty() || contains("attr_inboundlinks_text")) addSolr(solrdoc, "attr_inboundlinks_text", inboundlinksText); c = 0; @@ -198,6 +199,7 @@ public class SolrScheme extends ConfigurationSet { if (isEmpty() || contains("attr_outboundlinks_urlstub")) addSolr(solrdoc, "attr_outboundlinks_urlstub", outboundlinksURLStub); if (isEmpty() || contains("attr_outboundlinks_name")) addSolr(solrdoc, "attr_outboundlinks_name", outboundlinksName); if (isEmpty() || contains("attr_outboundlinks_rel")) addSolr(solrdoc, "attr_outboundlinks_rel", outboundlinksRel); + if (isEmpty() || contains("attr_outboundlinks_relflags")) addSolr(solrdoc, "attr_outboundlinks_relflags", relEval(inboundlinksRel)); if (isEmpty() || contains("attr_outboundlinks_text")) addSolr(solrdoc, "attr_outboundlinks_text", outboundlinksText); @@ -352,6 +354,16 @@ public class SolrScheme extends ConfigurationSet { return solrdoc; } + private int relEval(String[] rel) { + int i = 0; + for (String s: rel) { + String s0 = s.toLowerCase().trim(); + if ("me".equals(s0)) i += 1; + if ("nofollow".equals(s0)) i += 2; + } + return i; + } + public String solrGetID(final SolrDocument solr) { return (String) solr.getFieldValue("id"); } diff --git a/source/net/yacy/peers/yacyClient.java b/source/net/yacy/peers/yacyClient.java index aa843bda9..efc4f589d 100644 --- a/source/net/yacy/peers/yacyClient.java +++ b/source/net/yacy/peers/yacyClient.java @@ -97,8 +97,8 @@ import net.yacy.repository.Blacklist; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; import net.yacy.search.query.QueryParams; +import net.yacy.search.query.RWIProcess; import net.yacy.search.query.SearchEvent; -import net.yacy.search.ranking.RankingProcess; import net.yacy.search.ranking.RankingProfile; import net.yacy.search.snippet.ContentDomain; import net.yacy.search.snippet.TextSnippet; @@ -440,7 +440,7 @@ public final class yacyClient { final int partitions, final yacySeed target, final Segment indexSegment, - final RankingProcess containerCache, + final RWIProcess containerCache, final SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser, final Blacklist blacklist, final RankingProfile rankingProfile, diff --git a/source/net/yacy/peers/yacySearch.java b/source/net/yacy/peers/yacySearch.java index 63fed05f9..d0c69f932 100644 --- a/source/net/yacy/peers/yacySearch.java +++ b/source/net/yacy/peers/yacySearch.java @@ -36,8 +36,8 @@ import net.yacy.peers.dht.PeerSelection; import net.yacy.repository.Blacklist; import net.yacy.search.index.Segment; import net.yacy.search.query.QueryParams; +import net.yacy.search.query.RWIProcess; import net.yacy.search.query.SearchEvent; -import net.yacy.search.ranking.RankingProcess; import net.yacy.search.ranking.RankingProfile; @@ -49,7 +49,7 @@ public class yacySearch extends Thread { final private boolean global; final private int partitions; final private Segment indexSegment; - final private RankingProcess containerCache; + final private RWIProcess containerCache; final private SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser; final private Blacklist blacklist; final private yacySeed targetPeer; @@ -75,7 +75,7 @@ public class yacySearch extends Thread { final yacySeed targetPeer, final Segment indexSegment, final yacySeedDB peers, - final RankingProcess containerCache, + final RWIProcess containerCache, final SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser, final Blacklist blacklist, final RankingProfile rankingProfile, @@ -162,7 +162,7 @@ public class yacySearch extends Thread { final int count, long time, final int maxDist, final Segment indexSegment, final yacySeedDB peers, - final RankingProcess containerCache, + final RWIProcess containerCache, final SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser, final Blacklist blacklist, final RankingProfile rankingProfile, @@ -211,7 +211,7 @@ public class yacySearch extends Thread { final long time, final Segment indexSegment, final yacySeedDB peers, - final RankingProcess containerCache, + final RWIProcess containerCache, final String targethash, final Blacklist blacklist, final RankingProfile rankingProfile, final Bitfield constraint, final SortedMap clusterselection) { diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java index 822d41370..e6606b075 100644 --- a/source/net/yacy/search/index/DocumentIndex.java +++ b/source/net/yacy/search/index/DocumentIndex.java @@ -45,8 +45,8 @@ import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow.Components; import net.yacy.kelondro.logging.Log; import net.yacy.search.query.QueryParams; +import net.yacy.search.query.RWIProcess; import net.yacy.search.query.SearchEvent; -import net.yacy.search.ranking.RankingProcess; import net.yacy.search.ranking.RankingProfile; import net.yacy.search.ranking.ReferenceOrder; import net.yacy.search.snippet.ContentDomain; @@ -207,7 +207,7 @@ public class DocumentIndex extends Segment { // make a query and start a search final QueryParams query = new QueryParams(querystring, count, null, this, textRankingDefault, "DocumentIndex"); final ReferenceOrder order = new ReferenceOrder(query.ranking, UTF8.getBytes(query.targetlang)); - final RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation); + final RWIProcess rankedCache = new RWIProcess(query, order, SearchEvent.max_results_preparation); rankedCache.start(); // search is running; retrieve results diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index d61bbf66c..1b6dd2fbc 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -63,8 +63,8 @@ import net.yacy.kelondro.util.ISO639; import net.yacy.repository.Blacklist; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.Switchboard; +import net.yacy.search.query.RWIProcess; import net.yacy.search.query.SearchEvent; -import net.yacy.search.ranking.RankingProcess; import de.anomic.crawler.retrieval.Response; public class Segment { @@ -219,7 +219,7 @@ public class Segment { final int outlinksOther, final SearchEvent searchEvent, final String sourceName) { - final RankingProcess rankingProcess = (searchEvent == null) ? null : searchEvent.getRankingResult(); + final RWIProcess rankingProcess = (searchEvent == null) ? null : searchEvent.getRankingResult(); if (rankingProcess != null) rankingProcess.moreFeeders(1); int wordCount = 0; final int urlLength = url.toNormalform(true, true).length(); diff --git a/source/net/yacy/search/ranking/RankingProcess.java b/source/net/yacy/search/query/RWIProcess.java similarity index 96% rename from source/net/yacy/search/ranking/RankingProcess.java rename to source/net/yacy/search/query/RWIProcess.java index c0b62b9c7..7aea35a1f 100644 --- a/source/net/yacy/search/ranking/RankingProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -24,7 +24,7 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -package net.yacy.search.ranking; +package net.yacy.search.query; import java.util.Comparator; import java.util.ConcurrentModificationException; @@ -60,13 +60,11 @@ import net.yacy.kelondro.util.EventTracker; import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; -import net.yacy.search.query.QueryParams; -import net.yacy.search.query.SearchEvent; -import net.yacy.search.query.SearchEvent.Type; +import net.yacy.search.ranking.ReferenceOrder; import net.yacy.search.snippet.ContentDomain; import net.yacy.search.snippet.ResultEntry; -public final class RankingProcess extends Thread { +public final class RWIProcess extends Thread { private static final int maxDoubleDomAll = 1000, maxDoubleDomSpecial = 10000; @@ -79,7 +77,7 @@ public final class RankingProcess extends Thread { private SortedMap> localSearchInclusion; private int remote_resourceSize, remote_indexCount, remote_peerCount; - private int local_resourceSize, local_indexCount; + private int local_indexCount; private final WeakPriorityBlockingQueue stack; private int feeders; private final ConcurrentHashMap> doubleDomCache; // key = domhash (6 bytes); value = like stack @@ -99,7 +97,7 @@ public final class RankingProcess extends Thread { private final ScoreMap filetypeNavigator; // a counter for file types - public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) { + public RWIProcess(final QueryParams query, final ReferenceOrder order, final int maxentries) { // we collect the urlhashes and construct a list with urlEntry objects // attention: if minEntries is too high, this method will not terminate within the maxTime // sortorder: 0 = hash, 1 = url, 2 = ranking @@ -112,7 +110,6 @@ public final class RankingProcess extends Thread { this.remote_peerCount = 0; this.remote_resourceSize = 0; this.remote_indexCount = 0; - this.local_resourceSize = 0; this.local_indexCount = 0; this.urlhashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 100); this.misses = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 100); @@ -181,9 +178,7 @@ public final class RankingProcess extends Thread { assert (index != null); if (index.isEmpty()) return; - if (local) { - this.local_resourceSize += index.size(); - } else { + if (!local) { assert fullResource >= 0 : "fullResource = " + fullResource; this.remote_resourceSize += fullResource; this.remote_peerCount++; diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 99940e6df..3594ee92b 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -53,8 +53,7 @@ import net.yacy.peers.dht.FlatWordPartitionScheme; import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.Switchboard; -import net.yacy.search.query.ResultFetcher.Worker; -import net.yacy.search.ranking.RankingProcess; +import net.yacy.search.query.SnippetProcess.Worker; import net.yacy.search.ranking.ReferenceOrder; import net.yacy.search.snippet.ResultEntry; import de.anomic.data.WorkTables; @@ -73,8 +72,8 @@ public final class SearchEvent { private QueryParams query; private final yacySeedDB peers; private final WorkTables workTables; - private RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container - private ResultFetcher resultFetcher; + private RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container + private SnippetProcess resultFetcher; private final SecondarySearchSuperviser secondarySearchSuperviser; @@ -122,7 +121,7 @@ public final class SearchEvent { if (remote) { // initialize a ranking process that is the target for data // that is generated concurrently from local and global search threads - this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation); + this.rankingProcess = new RWIProcess(this.query, this.order, max_results_preparation); // start a local search concurrently this.rankingProcess.start(); @@ -163,10 +162,10 @@ public final class SearchEvent { } // start worker threads to fetch urls and snippets - this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 3000, deleteIfSnippetFail); + this.resultFetcher = new SnippetProcess(loader, this.rankingProcess, query, this.peers, this.workTables, 3000, deleteIfSnippetFail); } else { // do a local search - this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation); + this.rankingProcess = new RWIProcess(this.query, this.order, max_results_preparation); if (generateAbstracts) { this.rankingProcess.run(); // this is not started concurrently here on purpose! @@ -207,7 +206,7 @@ public final class SearchEvent { } // start worker threads to fetch urls and snippets - this.resultFetcher = new ResultFetcher(loader, this.rankingProcess, query, this.peers, this.workTables, 500, deleteIfSnippetFail); + this.resultFetcher = new SnippetProcess(loader, this.rankingProcess, query, this.peers, this.workTables, 500, deleteIfSnippetFail); } // clean up events @@ -330,7 +329,7 @@ public final class SearchEvent { return this.secondarySearchThreads; } - public RankingProcess getRankingResult() { + public RWIProcess getRankingResult() { return this.rankingProcess; } @@ -571,7 +570,7 @@ public final class SearchEvent { } - public ResultFetcher result() { + public SnippetProcess result() { return this.resultFetcher; } diff --git a/source/net/yacy/search/query/ResultFetcher.java b/source/net/yacy/search/query/SnippetProcess.java similarity index 93% rename from source/net/yacy/search/query/ResultFetcher.java rename to source/net/yacy/search/query/SnippetProcess.java index ddfd18327..cca0e5eef 100644 --- a/source/net/yacy/search/query/ResultFetcher.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -51,7 +51,6 @@ import net.yacy.peers.yacySeedDB; import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.Switchboard; -import net.yacy.search.ranking.RankingProcess; import net.yacy.search.snippet.ContentDomain; import net.yacy.search.snippet.MediaSnippet; import net.yacy.search.snippet.ResultEntry; @@ -63,10 +62,10 @@ import org.apache.solr.common.SolrDocumentList; import de.anomic.data.WorkTables; import de.anomic.http.client.Cache; -public class ResultFetcher { +public class SnippetProcess { // input values - final RankingProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container + final RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container QueryParams query; private final yacySeedDB peers; private final WorkTables workTables; @@ -83,9 +82,9 @@ public class ResultFetcher { private final boolean deleteIfSnippetFail; private boolean cleanupState; - public ResultFetcher( + public SnippetProcess( final LoaderDispatcher loader, - final RankingProcess rankedCache, + final RWIProcess rankedCache, final QueryParams query, final yacySeedDB peers, final WorkTables workTables, @@ -355,7 +354,7 @@ public class ResultFetcher { this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.neededResults = neededResults; this.shallrun = true; - this.solr = ResultFetcher.this.rankingProcess.getQuery().getSegment().getSolr(); + this.solr = SnippetProcess.this.rankingProcess.getQuery().getSegment().getSolr(); } @Override @@ -365,7 +364,7 @@ public class ResultFetcher { URIMetadataRow page; ResultEntry resultEntry; //final int fetchAhead = snippetMode == 0 ? 0 : 10; - final boolean nav_topics = ResultFetcher.this.query.navigators.equals("all") || ResultFetcher.this.query.navigators.indexOf("topics") >= 0; + final boolean nav_topics = SnippetProcess.this.query.navigators.equals("all") || SnippetProcess.this.query.navigators.indexOf("topics") >= 0; try { //System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis())); int loops = 0; @@ -377,25 +376,25 @@ public class ResultFetcher { } // check if we have enough - if (ResultFetcher.this.result.sizeAvailable() >= this.neededResults) { + if (SnippetProcess.this.result.sizeAvailable() >= this.neededResults) { //Log.logWarning("ResultFetcher", ResultFetcher.this.result.sizeAvailable() + " = result.sizeAvailable() >= this.neededResults = " + this.neededResults); break; } // check if we can succeed if we try to take another url - if (ResultFetcher.this.rankingProcess.feedingIsFinished() && ResultFetcher.this.rankingProcess.sizeQueue() == 0) { + if (SnippetProcess.this.rankingProcess.feedingIsFinished() && SnippetProcess.this.rankingProcess.sizeQueue() == 0) { //Log.logWarning("ResultFetcher", "rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0"); break; } // get next entry - page = ResultFetcher.this.rankingProcess.takeURL(true, Math.min(100, this.timeout - System.currentTimeMillis())); + page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(100, this.timeout - System.currentTimeMillis())); //if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis()); if (page == null) { //System.out.println("page == null"); break; // no more available } - if (ResultFetcher.this.query.filterfailurls && ResultFetcher.this.workTables.failURLsContains(page.hash())) continue; + if (SnippetProcess.this.query.filterfailurls && SnippetProcess.this.workTables.failURLsContains(page.hash())) continue; // in case that we have an attached solr, we load also the solr document String solrContent = null; @@ -415,16 +414,16 @@ public class ResultFetcher { //if (rawLine != null && !this.snippetPattern.matcher(rawLine).matches()) continue; //if (result.contains(resultEntry)) continue; - ResultFetcher.this.urlRetrievalAllTime += resultEntry.dbRetrievalTime; - ResultFetcher.this.snippetComputationAllTime += resultEntry.snippetComputationTime; + SnippetProcess.this.urlRetrievalAllTime += resultEntry.dbRetrievalTime; + SnippetProcess.this.snippetComputationAllTime += resultEntry.snippetComputationTime; // place the result to the result vector // apply post-ranking - long ranking = Long.valueOf(ResultFetcher.this.rankingProcess.getOrder().cardinal(resultEntry.word())); - ranking += postRanking(resultEntry, ResultFetcher.this.rankingProcess.getTopicNavigator(10)); + long ranking = Long.valueOf(SnippetProcess.this.rankingProcess.getOrder().cardinal(resultEntry.word())); + ranking += postRanking(resultEntry, SnippetProcess.this.rankingProcess.getTopicNavigator(10)); resultEntry.ranking = ranking; - ResultFetcher.this.result.put(new ReverseElement(resultEntry, ranking)); // remove smallest in case of overflow - if (nav_topics) ResultFetcher.this.rankingProcess.addTopics(resultEntry); + SnippetProcess.this.result.put(new ReverseElement(resultEntry, ranking)); // remove smallest in case of overflow + if (nav_topics) SnippetProcess.this.rankingProcess.addTopics(resultEntry); } //System.out.println("FINISHED WORKER " + id + " FOR " + this.neededResults + " RESULTS, loops = " + loops); } catch (final Exception e) {