patch for media search (audio, video apps)

pull/1/head
Michael Peter Christen 13 years ago
parent 7860c1df80
commit 5f5ed33ed8

@ -42,17 +42,11 @@
--> -->
</div>#(/item)# </div>#(/item)#
:: ::
#{items}# #(item)#::<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#" target="#[target]#">#[hrefshort]#</a></tr>#(/item)#
<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#" target="#[target]#">#[hrefshort]#</a></tr>
#{/items}#
:: ::
#{items}# #(item)#::<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#" target="#[target]#">#[hrefshort]#</a></tr>#(/item)#
<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#" target="#[target]#">#[hrefshort]#</a></tr>
#{/items}#
:: ::
#{items}# #(item)#::<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#" target="#[target]#">#[hrefshort]#</a></tr>#(/item)#
<tr class="#(col)#TableCellLight::TableCellDark#(/col)#"><td>#[name]#</td><td><a href="#[href]#" target="#[target]#">#[hrefshort]#</a></tr>
#{/items}#
#(/content)# #(/content)#
<script type="text/javascript"> <script type="text/javascript">

@ -26,7 +26,6 @@
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.Collection; import java.util.Collection;
import java.util.List;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
@ -47,7 +46,6 @@ import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.QueryParams; import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
import net.yacy.search.snippet.MediaSnippet;
import net.yacy.search.snippet.ResultEntry; import net.yacy.search.snippet.ResultEntry;
import net.yacy.search.snippet.TextSnippet; import net.yacy.search.snippet.TextSnippet;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -252,29 +250,20 @@ public class yacysearchitem {
// any other media content // any other media content
// generate result object // generate result object
final ResultEntry result = theSearch.oneResult(item, 500); final ResultEntry ms = theSearch.oneResult(item, theQuery.isLocal() ? 1000 : 5000);
if (result == null) return prop; // no content
prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content
final List<MediaSnippet> media = result.mediaSnippets(); if (ms == null) {
if (item == 0) col = true; prop.put("content_item", "0");
if (media != null) {
int c = 0;
for (final MediaSnippet ms : media) {
final String resultUrlstring = ms.href.toNormalform(true, false);
final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");
prop.putHTML("content_items_" + c + "_href", resultUrlstring);
prop.putHTML("content_items_" + c + "_hrefshort", nxTools.shortenURLString(resultUrlstring, MAX_URL_LENGTH));
prop.putHTML("content_items_" + c + "_target", target);
prop.putHTML("content_items_" + c + "_name", shorten(ms.name, MAX_NAME_LENGTH));
prop.put("content_items_" + c + "_col", (col) ? "0" : "1");
c++;
col = !col;
}
prop.put("content_items", c);
} else { } else {
prop.put("content_items", "0"); final String resultUrlstring = ms.url().toNormalform(true, false);
final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");
prop.putHTML("content_item_href", resultUrlstring);
prop.putHTML("content_item_hrefshort", nxTools.shortenURLString(resultUrlstring, MAX_URL_LENGTH));
prop.putHTML("content_item_target", target);
prop.putHTML("content_item_name", shorten(ms.title(), MAX_NAME_LENGTH));
prop.put("content_item_col", (item % 2 == 0) ? "0" : "1");
prop.put("content_item_nl", (item == theQuery.offset) ? 0 : 1);
prop.put("content_item", 1);
} }
theQuery.transmitcount = item + 1; theQuery.transmitcount = item + 1;
return prop; return prop;

@ -57,6 +57,7 @@ import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.rwi.IndexCell; import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceFactory; import net.yacy.kelondro.rwi.ReferenceFactory;
@ -70,6 +71,16 @@ import de.anomic.crawler.retrieval.Response;
public class Segment { public class Segment {
// catchall word
final static String catchallString = "yacyall"; // a word that is always in all indexes; can be used for zero-word searches to find ALL documents
final static byte[] catchallHash;
final static Word catchallWord = new Word(0, 0, 0);
static {
catchallHash = Word.word2hash(catchallString); // "KZzU-Vf6h5k-"
catchallWord.flags = new Bitfield(4);
for (int i = 0; i < catchallWord.flags.length(); i++) catchallWord.flags.set(i, true);
}
// environment constants // environment constants
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
public static final int wCacheMaxChunk = 800; // maximum number of references for each urlhash public static final int wCacheMaxChunk = 800; // maximum number of references for each urlhash
@ -227,7 +238,7 @@ public class Segment {
UTF8.getBytes(language), UTF8.getBytes(language),
doctype, doctype,
outlinksSame, outlinksOther); outlinksSame, outlinksOther);
Word wprop; Word wprop = null;
byte[] wordhash; byte[] wordhash;
while (i.hasNext()) { while (i.hasNext()) {
wentry = i.next(); wentry = i.next();
@ -259,17 +270,26 @@ public class Segment {
} }
} }
} }
// assign the catchall word
ientry.setWord(wprop == null ? catchallWord : wprop); // we use one of the word properties as template to get the document characteristics
try {
this.termIndex.add(catchallHash, ientry);
} catch (final Exception e) {
Log.logException(e);
}
return wordCount; return wordCount;
} }
private int addCitationIndex(final DigestURI url, final Date urlModified, final Document document) { private int addCitationIndex(final DigestURI url, final Date urlModified, final Map<MultiProtocolURI, Properties> anchors) {
if (document.getAnchors() == null) return 0; if (anchors == null) return 0;
int refCount = 0; int refCount = 0;
// iterate over all outgoing links, this will create a context for those links // iterate over all outgoing links, this will create a context for those links
final byte[] urlhash = url.hash(); final byte[] urlhash = url.hash();
final long urldate = urlModified.getTime(); final long urldate = urlModified.getTime();
for (Map.Entry<MultiProtocolURI, Properties> anchorEntry: document.getAnchors().entrySet()) { for (Map.Entry<MultiProtocolURI, Properties> anchorEntry: anchors.entrySet()) {
MultiProtocolURI anchor = anchorEntry.getKey(); MultiProtocolURI anchor = anchorEntry.getKey();
byte[] refhash = new DigestURI(anchor).hash(); byte[] refhash = new DigestURI(anchor).hash();
//System.out.println("*** addCitationIndex: urlhash = " + ASCII.String(urlhash) + ", refhash = " + ASCII.String(refhash) + ", urldate = " + urlModified.toString()); //System.out.println("*** addCitationIndex: urlhash = " + ASCII.String(urlhash) + ", refhash = " + ASCII.String(refhash) + ", urldate = " + urlModified.toString());
@ -396,7 +416,7 @@ public class Segment {
); );
// STORE PAGE REFERENCES INTO CITATION INDEX // STORE PAGE REFERENCES INTO CITATION INDEX
final int refs = addCitationIndex(url, modDate, document); final int refs = addCitationIndex(url, modDate, document.getAnchors());
// finish index time // finish index time
final long indexingEndTime = System.currentTimeMillis(); final long indexingEndTime = System.currentTimeMillis();

Loading…
Cancel
Save