small changes in surrogate reader, wiki code and portal test

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5894 6c8d7289-2bf4-0310-a012-ef5d649a1542
16 years ago · d079d6dfdb
parent 07f09742bb
commit d079d6dfdb
4 changed files with 53 additions and 43 deletions
--- a/htroot/yacy/ui/yacyui-portaltest.html
+++ b/htroot/yacy/ui/yacyui-portaltest.html
@ -5,31 +5,36 @@
 	</head>
 	<body>		
 		<script src="http://localhost:8080/yacy/ui/js/jquery-1.3.1.min.js" type="text/javascript"></script>
-		<script>			
+<script>			
-			$(document).ready(function() {
+	$(document).ready(function() {
-				yconf = {
+		yconf = {
-					url   : 'http://localhost:8080',
+			url    : 'http://localhost:8080',
-					global: true,
+			logo   : '/yacy/ui/img/yacy-logo.png',
-					theme : 'start',
+			link   : 'http://www.yacy.net',
-					title : 'YaCy Portal Search'
+			global : false,
-				};
+			width  : 500,
-				$.getScript(yconf.url+'/yacy/ui/js/yacyui-portalsearch.js', function(){});
+			height : 620,
-			});
+			position : ['top',30],
-		</script>			
+			theme  : 'start',
-		<h3>YaCy Portal Search:</h3>
+			title  : 'YaCy Portal Search'
-		<div id="yacylivesearch">
+		};
-			<form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html">
+		$.getScript(yconf.url+'/yacy/ui/js/yacyui-portalsearch.js', function(){});
-				<input name="search" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/>
+	});
-				<input type="hidden" name="verify" value="true" />
+</script>
-				<input type="hidden" name="maximumRecords" value="10" />
+<div id="yacylivesearch">
-				<input type="hidden" name="resource" value="local" />
+    <h3>YaCy Portal Search:</h3>
-				<input type="hidden" name="urlmaskfilter" value=".*" />
+	<form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html">
-				<input type="hidden" name="prefermaskfilter" value="" />
+		Live Search <input name="query" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/>
-				<input type="hidden" name="former" value="" />
+		<input type="hidden" name="verify" value="false" />
-				<input type="hidden" name="display" value="2" />
+		<input type="hidden" name="maximumRecords" value="10" />
-				<input type="submit" name="Enter" value="Suchen" />
+		<input type="hidden" name="resource" value="local" />
-			</form>
+		<input type="hidden" name="urlmaskfilter" value=".*" />
-		</div>
+		<input type="hidden" name="prefermaskfilter" value="" />
 		<input type="hidden" name="former" value="" />
 		<input type="hidden" name="display" value="2" />
 		<input type="submit" name="Enter" value="Search" />
 	</form>
 </div>
 <h4>Code Snippet:</h4>
 <pre>
 &lt;script src="http://localhost:8080/yacy/ui/js/jquery-1.3.1.min.js" type="text/javascript"&gt;&lt;/script&gt;
@ -37,7 +42,12 @@
 	$(document).ready(function() {
 		yconf = {
 			url    : 'http://localhost:8080',
-			global : true,
+			logo   : '/yacy/ui/img/yacy-logo.png',
 			link   : 'http://www.yacy.net',
 			global : false,
 			width  : 500,
 			height : 620,
 			position : ['top',30],
 			theme  : 'start',
 			title  : 'YaCy Portal Search'
 		};
@ -46,15 +56,15 @@
 &lt;/script&gt;
 &lt;div id="yacylivesearch"&gt;
 	&lt;form id="ysearch" method="get" accept-charset="UTF-8" action="http://localhost:8080/yacysearch.html"&gt;
-		&lt;input name="query" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/&gt;
+		Live Search &lt;input name="query" id="yquery" class="fancy" type="text" size="15" maxlength="80" value=""/&gt;
-		&lt;input type="hidden" name="verify" value="true" /&gt;
+		&lt;input type="hidden" name="verify" value="false" /&gt;
 		&lt;input type="hidden" name="maximumRecords" value="10" /&gt;
 		&lt;input type="hidden" name="resource" value="local" /&gt;
 		&lt;input type="hidden" name="urlmaskfilter" value=".*" /&gt;
 		&lt;input type="hidden" name="prefermaskfilter" value="" /&gt;
 		&lt;input type="hidden" name="former" value="" /&gt;
 		&lt;input type="hidden" name="display" value="2" /&gt;
-		&lt;input type="submit" name="Enter" value="Suchen" /&gt;
+		&lt;input type="submit" name="Enter" value="Search" /&gt;
 	&lt;/form&gt;
 &lt;/div&gt;
 </pre>
--- a/source/de/anomic/data/wiki/wikiCode.java
+++ b/source/de/anomic/data/wiki/wikiCode.java
@ -437,11 +437,7 @@ public class wikiCode extends abstractWikiParser implements wikiParser {
                } else {
                    kv = kl;
                }
-                //if (switchboard != null && switchboard.wikiDB.read(kl) != null) {
+                result = result.substring(0, p0) + "<a class=\"known\" href=\"Wiki.html?page=" + kl + "\">" + kv + "</a>" + result.substring(p1 + 2); // oob exception in append() !
                    result = result.substring(0, p0) + "<a class=\"known\" href=\"Wiki.html?page=" + kl + "\">" + kv + "</a>" + result.substring(p1 + 2);
                //} else {
                //    result = result.substring(0, p0) + "<a class=\"unknown\" href=\"Wiki.html?page=" + kl + "&edit=Edit\">" + kv + "</a>" + result.substring(p1 + 2);
                //}
            }
        }
--- a/source/de/anomic/tools/mediawikiIndex.java
+++ b/source/de/anomic/tools/mediawikiIndex.java
@ -85,8 +85,8 @@ public class mediawikiIndex {
        wparser = new wikiCode(u.getHost());
        hparser = new plasmaParser();
        // must be called before usage:
-        //plasmaParser.initHTMLParsableMimeTypes("text/html");
+        plasmaParser.initHTMLParsableMimeTypes("text/html");
-        //plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html");
+        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html");
    }
    public static void checkIndex(File wikimediaxml) {
@ -309,9 +309,13 @@ public class mediawikiIndex {
            this.title = title;
            this.source = sb;
        }
-        public void genHTML() throws MalformedURLException {
+        public void genHTML() throws IOException {
-            html = wparser.transform(source.toString());
+            try {
-            url = new yacyURL("http://de.wikipedia.org/wiki/" + title, null);
+                html = wparser.transform(source.toString());
                url = new yacyURL("http://de.wikipedia.org/wiki/" + title, null);
            } catch (Exception e) {
                throw new IOException(e.getMessage());
            }
        }
        public void genDocument() throws InterruptedException, ParserException {
            document = hparser.parseSource(url, "text/html", "utf-8", html.getBytes());
@ -444,7 +448,7 @@ public class mediawikiIndex {
                        out.put(record);
                    } catch (RuntimeException e) {
                        e.printStackTrace();
-                    } catch (MalformedURLException e) {
+                    } catch (IOException e) {
                        e.printStackTrace();
                    } catch (ParserException e) {
                        e.printStackTrace();
@ -500,7 +504,6 @@ public class mediawikiIndex {
                        this.osw = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(targetdir, outputfilename))), "UTF-8");
                        osw.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<surrogates xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n");
                    }
                    System.out.println("[CONSUME] Title: " + record.title);
                    record.document.writeXML(osw, new Date());
                    rc++;
@ -562,12 +565,12 @@ public class mediawikiIndex {
        plasmaParser.initParseableMimeTypes(plasmaParser.PARSER_MODE_CRAWLER, "text/html");
        mediawikiIndex mi = new mediawikiIndex(urlStub);
        wikiparserrecord poison = mi.newRecord();
-        int threads = Math.max(1, Runtime.getRuntime().availableProcessors() - 1);
+        int threads = Math.max(2, Runtime.getRuntime().availableProcessors() - 1);
        BlockingQueue<wikiparserrecord> in = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
        BlockingQueue<wikiparserrecord> out = new ArrayBlockingQueue<wikiparserrecord>(threads * 10);
        ExecutorService service = Executors.newFixedThreadPool(threads + 1);
        convertConsumer[] consumers = new convertConsumer[threads];
-        Future<Integer>[] consumerResults = new Future[threads];
+        Future<?>[] consumerResults = new Future[threads];
        for (int i = 0; i < threads; i++) {
        	consumers[i] = new convertConsumer(in, out, poison);
            consumerResults[i] = service.submit(consumers[i]);
--- a/source/de/anomic/xml/SurrogateReader.java
+++ b/source/de/anomic/xml/SurrogateReader.java
@ -29,6 +29,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UTFDataFormatException;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;