small changes in surrogate reader, wiki code and portal test

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5894 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 07f09742bb
commit d079d6dfdb

@ -5,31 +5,36 @@
</head> </head>
<body> <body>
<script src="http://localhost:8080/yacy/ui/js/jquery-1.3.1.min.js" type="text/javascript"></script> <script src="http://localhost:8080/yacy/ui/js/jquery-1.3.1.min.js" type="text/javascript"></script>
<script> <script>
$(document).ready(function() { $(document).ready(function() {
yconf = { yconf = {
url : 'http://localhost:8080', url : 'http://localhost:8080',
global: true, logo : '/yacy/ui/img/yacy-logo.png',
theme : 'start', link : 'http://www.yacy.net',
title : 'YaCy Portal Search' global : false,
}; width : 500,
$.getScript(yconf.url+'/yacy/ui/js/yacyui-portalsearch.js', function(){}); height : 620,
}); position : ['top',30],
</script> theme : 'start',
<h3>YaCy Portal Search:</h3> title : 'YaCy Portal Search'
<div id="yacylivesearch"> };
<form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html"> $.getScript(yconf.url+'/yacy/ui/js/yacyui-portalsearch.js', function(){});
<input name="search" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/> });
<input type="hidden" name="verify" value="true" /> </script>
<input type="hidden" name="maximumRecords" value="10" /> <div id="yacylivesearch">
<input type="hidden" name="resource" value="local" /> <h3>YaCy Portal Search:</h3>
<input type="hidden" name="urlmaskfilter" value=".*" /> <form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html">
<input type="hidden" name="prefermaskfilter" value="" /> Live Search <input name="query" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/>
<input type="hidden" name="former" value="" /> <input type="hidden" name="verify" value="false" />
<input type="hidden" name="display" value="2" /> <input type="hidden" name="maximumRecords" value="10" />
<input type="submit" name="Enter" value="Suchen" /> <input type="hidden" name="resource" value="local" />
</form> <input type="hidden" name="urlmaskfilter" value=".*" />
</div> <input type="hidden" name="prefermaskfilter" value="" />
<input type="hidden" name="former" value="" />
<input type="hidden" name="display" value="2" />
<input type="submit" name="Enter" value="Search" />
</form>
</div>
<h4>Code Snippet:</h4> <h4>Code Snippet:</h4>
<pre> <pre>
&lt;script src="http://localhost:8080/yacy/ui/js/jquery-1.3.1.min.js" type="text/javascript"&gt;&lt;/script&gt; &lt;script src="http://localhost:8080/yacy/ui/js/jquery-1.3.1.min.js" type="text/javascript"&gt;&lt;/script&gt;
@ -37,7 +42,12 @@
$(document).ready(function() { $(document).ready(function() {
yconf = { yconf = {
url : 'http://localhost:8080', url : 'http://localhost:8080',
global : true, logo : '/yacy/ui/img/yacy-logo.png',
link : 'http://www.yacy.net',
global : false,
width : 500,
height : 620,
position : ['top',30],
theme : 'start', theme : 'start',
title : 'YaCy Portal Search' title : 'YaCy Portal Search'
}; };
@ -46,15 +56,15 @@
&lt;/script&gt; &lt;/script&gt;
&lt;div id="yacylivesearch"&gt; &lt;div id="yacylivesearch"&gt;
&lt;form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html"&gt; &lt;form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html"&gt;
&lt;input name="query" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/&gt; Live Search &lt;input name="query" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/&gt;
&lt;input type="hidden" name="verify" value="true" /&gt; &lt;input type="hidden" name="verify" value="false" /&gt;
&lt;input type="hidden" name="maximumRecords" value="10" /&gt; &lt;input type="hidden" name="maximumRecords" value="10" /&gt;
&lt;input type="hidden" name="resource" value="local" /&gt; &lt;input type="hidden" name="resource" value="local" /&gt;
&lt;input type="hidden" name="urlmaskfilter" value=".*" /&gt; &lt;input type="hidden" name="urlmaskfilter" value=".*" /&gt;
&lt;input type="hidden" name="prefermaskfilter" value="" /&gt; &lt;input type="hidden" name="prefermaskfilter" value="" /&gt;
&lt;input type="hidden" name="former" value="" /&gt; &lt;input type="hidden" name="former" value="" /&gt;
&lt;input type="hidden" name="display" value="2" /&gt; &lt;input type="hidden" name="display" value="2" /&gt;
&lt;input type="submit" name="Enter" value="Suchen" /&gt; &lt;input type="submit" name="Enter" value="Search" /&gt;
&lt;/form&gt; &lt;/form&gt;
&lt;/div&gt; &lt;/div&gt;
</pre> </pre>

@ -437,11 +437,7 @@ public class wikiCode extends abstractWikiParser implements wikiParser {
} else { } else {
kv = kl; kv = kl;
} }
//if (switchboard != null && switchboard.wikiDB.read(kl) != null) { result = result.substring(0, p0) + "<a class=\"known\" href=\"Wiki.html?page=" + kl + "\">" + kv + "</a>" + result.substring(p1 + 2); // oob exception in append() !
result = result.substring(0, p0) + "<a class=\"known\" href=\"Wiki.html?page=" + kl + "\">" + kv + "</a>" + result.substring(p1 + 2);
//} else {
// result = result.substring(0, p0) + "<a class=\"unknown\" href=\"Wiki.html?page=" + kl + "&edit=Edit\">" + kv + "</a>" + result.substring(p1 + 2);
//}
} }
} }

@ -85,8 +85,8 @@ public class mediawikiIndex {
wparser = new wikiCode(u.getHost()); wparser = new wikiCode(u.getHost());
hparser = new plasmaParser(); hparser = new plasmaParser();
// must be called before usage: // must be called before usage:
//plasmaParser.initHTMLParsableMimeTypes("text/html"); plasmaParser.initHTMLParsableMimeTypes("text/html");
//plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html"); plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html");
} }
public static void checkIndex(File wikimediaxml) { public static void checkIndex(File wikimediaxml) {
@ -309,9 +309,13 @@ public class mediawikiIndex {
this.title = title; this.title = title;
this.source = sb; this.source = sb;
} }
public void genHTML() throws MalformedURLException { public void genHTML() throws IOException {
html = wparser.transform(source.toString()); try {
url = new yacyURL("http://de.wikipedia.org/wiki/" + title, null); html = wparser.transform(source.toString());
url = new yacyURL("http://de.wikipedia.org/wiki/" + title, null);
} catch (Exception e) {
throw new IOException(e.getMessage());
}
} }
public void genDocument() throws InterruptedException, ParserException { public void genDocument() throws InterruptedException, ParserException {
document = hparser.parseSource(url, "text/html", "utf-8", html.getBytes()); document = hparser.parseSource(url, "text/html", "utf-8", html.getBytes());
@ -444,7 +448,7 @@ public class mediawikiIndex {
out.put(record); out.put(record);
} catch (RuntimeException e) { } catch (RuntimeException e) {
e.printStackTrace(); e.printStackTrace();
} catch (MalformedURLException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} catch (ParserException e) { } catch (ParserException e) {
e.printStackTrace(); e.printStackTrace();
@ -500,7 +504,6 @@ public class mediawikiIndex {
this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8"); this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8");
osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n"); osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n");
} }
System.out.println("[CONSUME] Title: " + record.title); System.out.println("[CONSUME] Title: " + record.title);
record.document.writeXML(osw, new Date()); record.document.writeXML(osw, new Date());
rc++; rc++;
@ -562,12 +565,12 @@ public class mediawikiIndex {
plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html"); plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html");
mediawikiIndex mi = new mediawikiIndex(urlStub); mediawikiIndex mi = new mediawikiIndex(urlStub);
wikiparserrecord poison = mi.newRecord(); wikiparserrecord poison = mi.newRecord();
int threads = Math.max(1, Runtime.getRuntime().availableProcessors() - 1); int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
BlockingQueue<wikiparserrecord> in = new ArrayBlockingQueue<wikiparserrecord>(threads * 10); BlockingQueue<wikiparserrecord> in = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
BlockingQueue<wikiparserrecord> out = new ArrayBlockingQueue<wikiparserrecord>(threads * 10); BlockingQueue<wikiparserrecord> out = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
ExecutorService service = Executors.newFixedThreadPool(threads + 1); ExecutorService service = Executors.newFixedThreadPool(threads + 1);
convertConsumer[] consumers = new convertConsumer[threads]; convertConsumer[] consumers = new convertConsumer[threads];
Future<Integer>[] consumerResults = new Future[threads]; Future<?>[] consumerResults = new Future[threads];
for (int i = 0; i < threads; i++) { for (int i = 0; i < threads; i++) {
consumers[i] = new convertConsumer(in, out, poison); consumers[i] = new convertConsumer(in, out, poison);
consumerResults[i] = service.submit(consumers[i]); consumerResults[i] = service.submit(consumers[i]);

@ -29,6 +29,7 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.UTFDataFormatException;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;

Loading…
Cancel
Save