From 27fa6a66ad194d68b6a5ad8cac86d5ba29e9cbaa Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 8 Jun 2009 23:30:12 +0000 Subject: [PATCH] - completed the author navigation - removed some unused variables git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6037 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacy/search.java | 5 ++- htroot/yacysearch.java | 23 ++++++++++++ htroot/yacysearchtrailer.html | 14 ++++--- htroot/yacysearchtrailer.java | 6 ++- htroot/yacysearchtrailer.json | 15 +++++++- source/de/anomic/data/blogBoard.java | 2 +- source/de/anomic/data/blogBoardComments.java | 2 +- source/de/anomic/data/bookmarksDB.java | 6 +-- source/de/anomic/data/messageBoard.java | 2 +- source/de/anomic/data/userDB.java | 2 +- source/de/anomic/data/wiki/wikiBoard.java | 4 +- source/de/anomic/kelondro/blob/BLOBTree.java | 19 +++------- .../anomic/plasma/plasmaRankingCRProcess.java | 2 +- .../de/anomic/plasma/plasmaSearchQuery.java | 5 +++ .../plasma/plasmaSearchRankingProcess.java | 37 +++++++++++-------- 15 files changed, 95 insertions(+), 49 deletions(-) diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index c8d9b72b3..a755ca8d8 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -90,6 +90,7 @@ public final class search { final String contentdom = post.get("contentdom", "text"); final String filter = post.get("filter", ".*"); String sitehash = post.get("sitehash", ""); if (sitehash.length() == 0) sitehash = null; + String authorhash = post.get("authorhash", ""); if (authorhash.length() == 0) authorhash = null; String language = post.get("language", ""); if (!iso639.exists(language)) { // take language from the user agent @@ -204,6 +205,7 @@ public final class search { null, false, sitehash, + authorhash, yacyURL.TLD_any_zone_filter, client, false); @@ -255,7 +257,8 @@ public final class search { -1, constraint, false, - sitehash, + sitehash, + authorhash, yacyURL.TLD_any_zone_filter, client, false); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index c048b983b..b1d7aa6fe 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -282,6 +282,28 @@ public class yacysearch { while(domain.endsWith(".")) domain = domain.substring(0, domain.length() - 1); sitehash = yacyURL.domhash(domain); } + int authori = querystring.indexOf("author:"); + String authorhash = null; + if (authori >= 0) { + // check if the author was given with single quotes or without + boolean quotes = false; + if (querystring.charAt(authori + 7) == (char) 39) { + quotes = true; + } + String author; + if (quotes) { + int ftb = querystring.indexOf((char) 39, authori + 8); + if (ftb == -1) ftb = querystring.length() + 1; + author = querystring.substring(authori + 8, ftb); + querystring = querystring.replace("author:'" + author + "'", ""); + } else { + int ftb = querystring.indexOf(' ', authori); + if (ftb == -1) ftb = querystring.length(); + author = querystring.substring(authori + 7, ftb); + querystring = querystring.replace("author:" + author, ""); + } + authorhash = new String(Word.word2hash(author)); + } int tld = querystring.indexOf("tld:"); if (tld >= 0) { int ftb = querystring.indexOf(' ', tld); @@ -401,6 +423,7 @@ public class yacysearch { constraint, true, sitehash, + authorhash, yacyURL.TLD_any_zone_filter, client, authenticated); diff --git a/htroot/yacysearchtrailer.html b/htroot/yacysearchtrailer.html index 62d3d190f..c6f458d1b 100644 --- a/htroot/yacysearchtrailer.html +++ b/htroot/yacysearchtrailer.html @@ -4,12 +4,7 @@
  • #[url]#
  • #{/element}# #(/nav-domains)# -#(nav-topics)#:: -

    Topics

    -
    -#(/nav-topics)# + #(nav-authors)#::

    Authors

    #(/nav-authors)# +#(nav-topics)#:: +

    Topics

    +
    +#(/nav-topics)# +

    Timeline

    diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index 0fc758602..d13cb07a8 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -115,11 +115,13 @@ public class yacysearchtrailer { prop.put("nav-authors", 1); NavigatorEntry entry; int i; + String anav; for (i = 0; i < authorNavigator.size(); i++) { entry = authorNavigator.get(i); + anav = (entry.name.indexOf(' ') < 0) ? "author:" + entry.name : "author:'" + entry.name + "'"; prop.put("nav-authors_element_" + i + "_name", entry.name); - prop.put("nav-authors_element_" + i + "_url", "" + entry.name + " (" + entry.count + ")"); - prop.putJSON("nav-authors_element_" + i + "_url-json", plasmaSearchQuery.navurl("json", 0, display, theQuery, theQuery.urlMask, "author:'" + entry.name + "'", theQuery.navigators)); + prop.put("nav-authors_element_" + i + "_url", "" + entry.name + " (" + entry.count + ")"); + prop.putJSON("nav-authors_element_" + i + "_url-json", plasmaSearchQuery.navurl("json", 0, display, theQuery, theQuery.urlMask, anav, theQuery.navigators)); prop.put("nav-authors_element_" + i + "_count", entry.count); prop.put("nav-authors_element_" + i + "_modifier", "author:'" + entry.name + "'"); prop.put("nav-authors_element_" + i + "_nl", 1); diff --git a/htroot/yacysearchtrailer.json b/htroot/yacysearchtrailer.json index a621522ad..3a394b505 100644 --- a/htroot/yacysearchtrailer.json +++ b/htroot/yacysearchtrailer.json @@ -11,7 +11,20 @@ {"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)# #{/element}# ] - },#(/nav-domains)##(nav-topics)#:: + },#(/nav-domains)##(nav-authors)#:: + { + "facetname": "authors", + "displayname": "Authors", + "type": "String", + "min": "0", + "max": "0", + "mean": "0", + "elements": [ +#{element}# + {"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)# +#{/element}# + ] + }#(/nav-authors)##(nav-topics)#:: { "facetname": "topwords", "displayname": "Topics", diff --git a/source/de/anomic/data/blogBoard.java b/source/de/anomic/data/blogBoard.java index c36d08e96..46095a265 100644 --- a/source/de/anomic/data/blogBoard.java +++ b/source/de/anomic/data/blogBoard.java @@ -67,7 +67,7 @@ public class blogBoard { new File(actpath.getParent()).mkdir(); new File(newFile.getParent()).mkdir(); if (database == null) { - database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_'); + database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_'); } } diff --git a/source/de/anomic/data/blogBoardComments.java b/source/de/anomic/data/blogBoardComments.java index 69f070c42..911d400d8 100644 --- a/source/de/anomic/data/blogBoardComments.java +++ b/source/de/anomic/data/blogBoardComments.java @@ -70,7 +70,7 @@ public class blogBoardComments { new File(actpath.getParent()).mkdir(); new File(newFile.getParent()).mkdir(); if (database == null) { - database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_'); + database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_'); } } diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java index 9d373f486..b7fbf0932 100644 --- a/source/de/anomic/data/bookmarksDB.java +++ b/source/de/anomic/data/bookmarksDB.java @@ -116,17 +116,17 @@ public class bookmarksDB { tagCache=new TreeMap(); bookmarksFile.getParentFile().mkdirs(); //this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false)); - this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, bookmarksFileNew), 1000, '_'); + this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, bookmarksFileNew), 1000, '_'); // tags tagsFile.getParentFile().mkdirs(); final boolean tagsFileExisted = tagsFile.exists(); - this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, tagsFileNew), 500, '_'); + this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, tagsFileNew), 500, '_'); if (!tagsFileExisted) rebuildTags(); // dates final boolean datesExisted = datesFile.exists(); - this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, false, false, datesFileNew), 500, '_'); + this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, datesFileNew), 500, '_'); if (!datesExisted) rebuildDates(); // autoReCrawl diff --git a/source/de/anomic/data/messageBoard.java b/source/de/anomic/data/messageBoard.java index 7f4b86f91..a7645f614 100644 --- a/source/de/anomic/data/messageBoard.java +++ b/source/de/anomic/data/messageBoard.java @@ -55,7 +55,7 @@ public class messageBoard { new File(path.getParent()).mkdir(); new File(pathNew.getParent()).mkdir(); if (database == null) { - database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, false, false, pathNew), 500, '_'); + database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, pathNew), 500, '_'); } sn = 0; } diff --git a/source/de/anomic/data/userDB.java b/source/de/anomic/data/userDB.java index ba48bdf9a..0091c4fb3 100644 --- a/source/de/anomic/data/userDB.java +++ b/source/de/anomic/data/userDB.java @@ -60,7 +60,7 @@ public final class userDB { this.userTableFile = userTableFileNew; userTableFile.getParentFile().mkdirs(); userTableFileNew.getParentFile().mkdirs(); - this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, false, false, userTableFile), 10, '_'); + this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, userTableFile), 10, '_'); } void resetDatabase() { diff --git a/source/de/anomic/data/wiki/wikiBoard.java b/source/de/anomic/data/wiki/wikiBoard.java index f472339e5..13bd3cd1c 100644 --- a/source/de/anomic/data/wiki/wikiBoard.java +++ b/source/de/anomic/data/wiki/wikiBoard.java @@ -57,11 +57,11 @@ public class wikiBoard { final File bkppath, final File bkppathNew) throws IOException { new File(actpath.getParent()).mkdirs(); if (datbase == null) { - datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, actpathNew), 500, '_'); + datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, actpathNew), 500, '_'); } new File(bkppath.getParent()).mkdirs(); if (bkpbase == null) { - bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, false, false, bkppathNew), 500, '_'); + bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, bkppathNew), 500, '_'); } } diff --git a/source/de/anomic/kelondro/blob/BLOBTree.java b/source/de/anomic/kelondro/blob/BLOBTree.java index 2e38d1741..178432473 100644 --- a/source/de/anomic/kelondro/blob/BLOBTree.java +++ b/source/de/anomic/kelondro/blob/BLOBTree.java @@ -70,7 +70,7 @@ public class BLOBTree { * Deprecated Class. Please use kelondroBLOBHeap instead */ private BLOBTree(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key, - final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail) { + final int nodesize, final char fillChar, final ByteOrder objectOrder) { // creates or opens a dynamic tree rowdef = new Row("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize, objectOrder); ObjectIndex fbi; @@ -78,17 +78,8 @@ public class BLOBTree { fbi = new Tree(file, useNodeCache, 0, rowdef, 1, 8); } catch (final IOException e) { e.printStackTrace(); - if (resetOnFail) { - FileUtils.deletedelete(file); - try { - fbi = new Tree(file, useNodeCache, -1, rowdef, 1, 8); - } catch (final IOException e1) { - e1.printStackTrace(); - throw new kelondroException(e.getMessage()); - } - } else { - throw new kelondroException(e.getMessage()); - } + FileUtils.deletedelete(file); + throw new kelondroException(e.getMessage()); } this.index = ((useObjectCache) && (!(fbi instanceof EcoTable))) ? (ObjectIndex) new Cache(fbi) : fbi; this.keylen = key; @@ -100,13 +91,13 @@ public class BLOBTree { } public static BLOBHeap toHeap(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key, - final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail, final File blob) throws IOException { + final int nodesize, final char fillChar, final ByteOrder objectOrder, final File blob) throws IOException { if (blob.exists() || !file.exists()) { // open the blob file and ignore the tree return new BLOBHeap(blob, key, objectOrder, 1024 * 64); } // open a Tree and migrate everything to a Heap - BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder, writebuffer, resetOnFail); + BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder); BLOBHeap heap = new BLOBHeap(blob, key, objectOrder, 1024 * 64); Iterator i = tree.keys(true, false); byte[] k, kk = new byte[key], v; diff --git a/source/de/anomic/plasma/plasmaRankingCRProcess.java b/source/de/anomic/plasma/plasmaRankingCRProcess.java index 1de71aeae..e5b6aaf00 100644 --- a/source/de/anomic/plasma/plasmaRankingCRProcess.java +++ b/source/de/anomic/plasma/plasmaRankingCRProcess.java @@ -144,7 +144,7 @@ public class plasmaRankingCRProcess { return true; } - private static boolean accumulate_upd(final File f, final ObjectIndex acc, final IndexCell seq) throws IOException { + public static boolean accumulate_upd(final File f, final ObjectIndex acc, final IndexCell seq) throws IOException { // open file AttrSeq source_cr = null; try { diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index 3c74186f6..e61292f16 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -74,6 +74,7 @@ public final class plasmaSearchQuery { public plasmaSearchRankingProfile ranking; public String host; // this is the client host that starts the query, not a site operator public String sitehash; // this is a domain hash, 6 bytes long or null + public String authorhash; public yacySeed remotepeer; public Long handle; // values that are set after a search: @@ -113,6 +114,7 @@ public final class plasmaSearchQuery { this.onlineSnippetFetch = false; this.host = null; this.sitehash = null; + this.authorhash = null; this.remotepeer = null; this.handle = Long.valueOf(System.currentTimeMillis()); this.specialRights = false; @@ -132,6 +134,7 @@ public final class plasmaSearchQuery { final int domType, final String domGroupName, final int domMaxTargets, final Bitfield constraint, final boolean allofconstraint, final String site, + final String authorhash, final int domainzone, final String host, final boolean specialRights) { @@ -155,6 +158,7 @@ public final class plasmaSearchQuery { this.constraint = constraint; this.allofconstraint = allofconstraint; this.sitehash = site; assert site == null || site.length() == 6; + this.authorhash = authorhash; assert authorhash == null || authorhash.length() > 0; this.onlineSnippetFetch = onlineSnippetFetch; this.host = host; this.remotepeer = null; @@ -325,6 +329,7 @@ public final class plasmaSearchQuery { "*" + this.prefer + "*" + this.urlMask + "*" + this.sitehash + + "*" + this.authorhash + "*" + this.targetlang + "*" + this.constraint + "*" + this.maxDistance; diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index 7da3d6540..612bce706 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -35,13 +35,11 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; -import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.kelondro.index.BinSearch; -import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.order.Digest; import de.anomic.kelondro.text.Reference; import de.anomic.kelondro.text.ReferenceContainer; @@ -78,9 +76,9 @@ public final class plasmaSearchRankingProcess { private final Segment indexSegment; private HashMap> localSearchInclusion; private final int[] domZones; - private final ConcurrentHashMap hostNavigator; private final ConcurrentHashMap ref; // reference score computation for the commonSense heuristic - private final TreeMap authorNavigator; + private final ConcurrentHashMap hostNavigator; + private final ConcurrentHashMap authorNavigator; public plasmaSearchRankingProcess( final Segment indexSegment, @@ -107,7 +105,7 @@ public final class plasmaSearchRankingProcess { this.flagcount = new int[32]; for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} this.hostNavigator = new ConcurrentHashMap(); - this.authorNavigator = new TreeMap(Base64Order.enhancedCoder); + this.authorNavigator = new ConcurrentHashMap(); this.ref = new ConcurrentHashMap(); this.domZones = new int[8]; for (int i = 0; i < 8; i++) {this.domZones[i] = 0;} @@ -330,16 +328,25 @@ public final class plasmaSearchRankingProcess { // author navigation: String author = metadata.dc_creator(); if (author != null && author.length() > 0) { - byte[] authorhash = Word.word2hash(author); - //synchronized (this.authorNavigator) { - AuthorInfo in = this.authorNavigator.get(authorhash); - if (in == null) { - this.authorNavigator.put(authorhash, new AuthorInfo(author)); - } else { - in.inc(); - this.authorNavigator.put(authorhash, in); - } - //} + // add author to the author navigator + String authorhash = new String(Word.word2hash(author)); + System.out.println("*** DEBUG authorhash = " + authorhash + ", query.authorhash = " + this.query.authorhash + ", author = " + author); + + // check if we already are filtering for authors + if (this.query.authorhash != null && !this.query.authorhash.equals(authorhash)) { + continue; + } + + // add author to the author navigator + AuthorInfo in = this.authorNavigator.get(authorhash); + if (in == null) { + this.authorNavigator.put(authorhash, new AuthorInfo(author)); + } else { + in.inc(); + this.authorNavigator.put(authorhash, in); + } + } else if (this.query.authorhash != null) { + continue; } // get the url