package xml; import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Set; import java.util.TreeSet; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaURL; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.net.URL; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; public class snippet { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) throws MalformedURLException { // return variable that accumulates replacements plasmaSwitchboard switchboard = (plasmaSwitchboard) env; serverObjects prop = new serverObjects(); //get the timeout for snippet-fetching int mediasnippet_timeout = 15000; int textsnippet_timeout = 10000; mediasnippet_timeout = Integer.parseInt(env.getConfig("timeout_text", "15000")); textsnippet_timeout = Integer.parseInt(env.getConfig("timeout_media", "10000")); // getting url String urlString = post.get("url", ""); URL url = new URL(urlString); prop.put("urlHash",plasmaURL.urlHash(url)); // if 'remove' is set to true, then RWI references to URLs that do not have the snippet are removed boolean remove = post.get("remove", "false").equals("true"); // boolean line_end_with_punctuation boolean pre = post.get("pre", "false").equals("true"); // type of media String media = post.get("media", "text"); String querystring = post.get("search", "").trim(); if ((querystring.length() > 2) && (querystring.charAt(0) == '"') && (querystring.charAt(querystring.length() - 1) == '"')) { querystring = querystring.substring(1, querystring.length() - 1).trim(); } final TreeSet[] query = plasmaSearchQuery.cleanQuery(querystring); Set queryHashes = plasmaCondenser.words2hashes(query[0]); // filter out stopwords final TreeSet filtered = kelondroMSetTools.joinConstructive(query[0], plasmaSwitchboard.stopwords); if (filtered.size() > 0) { kelondroMSetTools.excludeDestructive(query[0], plasmaSwitchboard.stopwords); } // find snippet if (media.equals("text")) { // attach text snippet plasmaSnippetCache.TextSnippet snippet = plasmaSnippetCache.retrieveTextSnippet(url, queryHashes, true, pre, 260, textsnippet_timeout); prop.put("status",snippet.getErrorCode()); if (snippet.getErrorCode() < 11) { // no problems occurred //prop.put("text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown"); prop.putASIS("text", (snippet.exists()) ? snippet.getLineMarked(queryHashes) : "unknown"); //FIXME: the ASIS should not be needed, but we have still htmlcode in .java files } else { // problems with snippet fetch prop.put("text", (remove) ? plasmaSnippetCache.failConsequences(snippet, queryHashes) : snippet.getError()); } prop.put("link", 0); prop.put("links", 0); prop.put("favicon",snippet.getFavicon()==null?"":snippet.getFavicon().toString()); } else { // attach media information ArrayList mediaSnippets = plasmaSnippetCache.retrieveMediaSnippets(url, queryHashes, media, true, mediasnippet_timeout); plasmaSnippetCache.MediaSnippet ms; for (int i = 0; i < mediaSnippets.size(); i++) { ms = (plasmaSnippetCache.MediaSnippet) mediaSnippets.get(i); try { url = new URL(ms.href); } catch (MalformedURLException e) { continue; } prop.put("link_" + i + "_type", ms.type); prop.put("link_" + i + "_href", ms.href); prop.put("link_" + i + "_code", switchboard.licensedURLs.aquireLicense(url)); prop.put("link_" + i + "_name", ms.name); prop.put("link_" + i + "_attr", ms.attr); } //System.out.println("DEBUG: " + mediaSnippets.size() + " ENTRIES IN MEDIA SNIPPET LINKS for url " + urlString); prop.put("text", ""); prop.put("link", mediaSnippets.size()); prop.put("links", mediaSnippets.size()); prop.put("favicon",""); } // return rewrite properties return prop; } }