small changes in surrogate reader, wiki code and portal test

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5894 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 07f09742bb
commit d079d6dfdb

@ -5,31 +5,36 @@
</head>
<body>
<script src="http://localhost:8080/yacy/ui/js/jquery-1.3.1.min.js" type="text/javascript"></script>
<script>
$(document).ready(function() {
yconf = {
url : 'http://localhost:8080',
global: true,
theme : 'start',
title : 'YaCy Portal Search'
};
$.getScript(yconf.url+'/yacy/ui/js/yacyui-portalsearch.js', function(){});
});
</script>
<h3>YaCy Portal Search:</h3>
<div id="yacylivesearch">
<form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html">
<input name="search" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/>
<input type="hidden" name="verify" value="true" />
<input type="hidden" name="maximumRecords" value="10" />
<input type="hidden" name="resource" value="local" />
<input type="hidden" name="urlmaskfilter" value=".*" />
<input type="hidden" name="prefermaskfilter" value="" />
<input type="hidden" name="former" value="" />
<input type="hidden" name="display" value="2" />
<input type="submit" name="Enter" value="Suchen" />
</form>
</div>
<script>
$(document).ready(function() {
yconf = {
url : 'http://localhost:8080',
logo : '/yacy/ui/img/yacy-logo.png',
link : 'http://www.yacy.net',
global : false,
width : 500,
height : 620,
position : ['top',30],
theme : 'start',
title : 'YaCy Portal Search'
};
$.getScript(yconf.url+'/yacy/ui/js/yacyui-portalsearch.js', function(){});
});
</script>
<div id="yacylivesearch">
<h3>YaCy Portal Search:</h3>
<form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html">
Live Search <input name="query" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/>
<input type="hidden" name="verify" value="false" />
<input type="hidden" name="maximumRecords" value="10" />
<input type="hidden" name="resource" value="local" />
<input type="hidden" name="urlmaskfilter" value=".*" />
<input type="hidden" name="prefermaskfilter" value="" />
<input type="hidden" name="former" value="" />
<input type="hidden" name="display" value="2" />
<input type="submit" name="Enter" value="Search" />
</form>
</div>
<h4>Code Snippet:</h4>
<pre>
&lt;script src="http://localhost:8080/yacy/ui/js/jquery-1.3.1.min.js" type="text/javascript"&gt;&lt;/script&gt;
@ -37,7 +42,12 @@
$(document).ready(function() {
yconf = {
url : 'http://localhost:8080',
global : true,
logo : '/yacy/ui/img/yacy-logo.png',
link : 'http://www.yacy.net',
global : false,
width : 500,
height : 620,
position : ['top',30],
theme : 'start',
title : 'YaCy Portal Search'
};
@ -46,15 +56,15 @@
&lt;/script&gt;
&lt;div id="yacylivesearch"&gt;
&lt;form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html"&gt;
&lt;input name="query" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/&gt;
&lt;input type="hidden" name="verify" value="true" /&gt;
Live Search &lt;input name="query" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/&gt;
&lt;input type="hidden" name="verify" value="false" /&gt;
&lt;input type="hidden" name="maximumRecords" value="10" /&gt;
&lt;input type="hidden" name="resource" value="local" /&gt;
&lt;input type="hidden" name="urlmaskfilter" value=".*" /&gt;
&lt;input type="hidden" name="prefermaskfilter" value="" /&gt;
&lt;input type="hidden" name="former" value="" /&gt;
&lt;input type="hidden" name="display" value="2" /&gt;
&lt;input type="submit" name="Enter" value="Suchen" /&gt;
&lt;input type="submit" name="Enter" value="Search" /&gt;
&lt;/form&gt;
&lt;/div&gt;
</pre>

@ -437,11 +437,7 @@ public class wikiCode extends abstractWikiParser implements wikiParser {
} else {
kv = kl;
}
//if (switchboard != null && switchboard.wikiDB.read(kl) != null) {
result = result.substring(0, p0) + "<a class=\"known\" href=\"Wiki.html?page=" + kl + "\">" + kv + "</a>" + result.substring(p1 + 2);
//} else {
// result = result.substring(0, p0) + "<a class=\"unknown\" href=\"Wiki.html?page=" + kl + "&edit=Edit\">" + kv + "</a>" + result.substring(p1 + 2);
//}
result = result.substring(0, p0) + "<a class=\"known\" href=\"Wiki.html?page=" + kl + "\">" + kv + "</a>" + result.substring(p1 + 2); // oob exception in append() !
}
}

@ -85,8 +85,8 @@ public class mediawikiIndex {
wparser = new wikiCode(u.getHost());
hparser = new plasmaParser();
// must be called before usage:
//plasmaParser.initHTMLParsableMimeTypes("text/html");
//plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html");
plasmaParser.initHTMLParsableMimeTypes("text/html");
plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html");
}
public static void checkIndex(File wikimediaxml) {
@ -309,9 +309,13 @@ public class mediawikiIndex {
this.title = title;
this.source = sb;
}
public void genHTML() throws MalformedURLException {
html = wparser.transform(source.toString());
url = new yacyURL("http://de.wikipedia.org/wiki/" + title, null);
public void genHTML() throws IOException {
try {
html = wparser.transform(source.toString());
url = new yacyURL("http://de.wikipedia.org/wiki/" + title, null);
} catch (Exception e) {
throw new IOException(e.getMessage());
}
}
public void genDocument() throws InterruptedException, ParserException {
document = hparser.parseSource(url, "text/html", "utf-8", html.getBytes());
@ -444,7 +448,7 @@ public class mediawikiIndex {
out.put(record);
} catch (RuntimeException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
} catch (IOException e) {
e.printStackTrace();
} catch (ParserException e) {
e.printStackTrace();
@ -500,7 +504,6 @@ public class mediawikiIndex {
this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8");
osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n");
}
System.out.println("[CONSUME] Title: " + record.title);
record.document.writeXML(osw, new Date());
rc++;
@ -562,12 +565,12 @@ public class mediawikiIndex {
plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html");
mediawikiIndex mi = new mediawikiIndex(urlStub);
wikiparserrecord poison = mi.newRecord();
int threads = Math.max(1, Runtime.getRuntime().availableProcessors() - 1);
int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
BlockingQueue<wikiparserrecord> in = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
BlockingQueue<wikiparserrecord> out = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
ExecutorService service = Executors.newFixedThreadPool(threads + 1);
convertConsumer[] consumers = new convertConsumer[threads];
Future<Integer>[] consumerResults = new Future[threads];
Future<?>[] consumerResults = new Future[threads];
for (int i = 0; i < threads; i++) {
consumers[i] = new convertConsumer(in, out, poison);
consumerResults[i] = service.submit(consumers[i]);

@ -29,6 +29,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UTFDataFormatException;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;

Loading…
Cancel
Save