augmentation

Conflicts:
	source/net/yacy/interaction/AugmentHtmlStream.java
pull/1/head
cominch 13 years ago committed by Michael Peter Christen
parent 87a3fbb3c2
commit aa0295917c

@ -0,0 +1,43 @@
<script type="text/javascript">
$(document).ready(function()
{
$('#sciety_trigger').click (function() {
$('#sciet_panel').toggle("fast");
$(this).toggleClass("active");
return false;
});
});
</script>
<style type="text/css">
#sciety_trigger {
font-size: 16px;
font-family: sans-serif;
position: fixed;
top: 160px;
right: 0px;
z-index: 99999;
border: 1px solid #444444;
padding: 20px 40px 20px 15px;
color: #fff;
letter-spacing: -1px;
text-decoration: none;
}
</style>
<div class="sciety_trigger" id="sciety_trigger">
sciety
</div>

@ -31,423 +31,424 @@ import de.anomic.http.server.ServerSideIncludes;
public class AugmentHtmlStream { public class AugmentHtmlStream {
static RequestHeader globalrequestHeader; static RequestHeader globalrequestHeader;
/** /**
* creates a NodeVisitor which assigns a unique ID to every node * creates a NodeVisitor which assigns a unique ID to every node
* *
* @return customized NodeVisitor * @return customized NodeVisitor
*/ */
private static class VisitorAddUniqueID extends NodeVisitor { private static class VisitorAddUniqueID extends NodeVisitor {
private int counter; private int counter;
public VisitorAddUniqueID() { public VisitorAddUniqueID() {
this.setCounter(0); this.setCounter(0);
} }
@Override @Override
public void visitTag(Tag tag) { public void visitTag(Tag tag) {
if (tag.getAttribute("id") == null) { if (tag.getAttribute("id") == null) {
this.setCounter(this.getCounter() + 1); this.setCounter(this.getCounter() + 1);
tag.setAttribute("id", "\"sci" + this.getCounter() + "\""); tag.setAttribute("id", "\"sci" + this.getCounter() + "\"");
} }
if (tag instanceof org.htmlparser.tags.LinkTag) { if (tag instanceof org.htmlparser.tags.LinkTag) {
// Link // Link
Log.logInfo("AUGMENTATION", tag.getAttribute("href")); Log.logInfo("AUGMENTATION", tag.getAttribute("href"));
LinkTag lt = (LinkTag)tag; LinkTag lt = (LinkTag)tag;
} }
} }
@Override @Override
public void visitStringNode(Text string) { public void visitStringNode(Text string) {
} }
public void setCounter(int counter) { public void setCounter(int counter) {
this.counter = counter; this.counter = counter;
} }
public int getCounter() { public int getCounter() {
return this.counter; return this.counter;
} }
} }
/** /**
* creates a NodeVisitor which inspects the element if it contains useful * creates a NodeVisitor which inspects the element if it contains useful
* text * text
* *
* @return customized NodeVisitor * @return customized NodeVisitor
*/ */
private static class VisitorText extends NodeVisitor { private static class VisitorText extends NodeVisitor {
private int counter; private int counter;
public VisitorText() { public VisitorText() {
this.setCounter(0); this.setCounter(0);
} }
@Override @Override
public void visitTag(Tag tag) { public void visitTag(Tag tag) {
// tag.setText(tag.getText()+" <span>augmented</span>"); // tag.setText(tag.getText()+" <span>augmented</span>");
// Node node = new org.htmlparser.nodes.TextNode(loadInternal("interactionparts/scibutton.html", globalrequestHeader)); // Node node = new org.htmlparser.nodes.TextNode(loadInternal("interactionparts/scibutton.html", globalrequestHeader));
// NodeList nl = tag.getChildren(); // NodeList nl = tag.getChildren();
// nl.add (node); // nl.add (node);
// tag.setChildren(nl); // tag.setChildren(nl);
} }
@Override @Override
public void visitStringNode(Text string) { public void visitStringNode(Text string) {
// if (string.getParent() != null) { // if (string.getParent() != null) {
// //
// string.setText(string // string.setText(string
// .getText() // .getText()
// .replaceAll("und", // .replaceAll("und",
// "<a href=\"http://www.kit.edu/\" target=\"_blank\">KIT</a>")); // "<a href=\"http://www.kit.edu/\" target=\"_blank\">KIT</a>"));
// //
// //
// } // }
} }
public void setCounter(int counter) { public void setCounter(int counter) {
this.counter = counter; this.counter = counter;
} }
public int getCounter() { public int getCounter() {
return this.counter; return this.counter;
} }
} }
/** /**
* send web page to external REFLECT web service * send web page to external REFLECT web service
* *
* @return the web page with integrated REFLECT elements * @return the web page with integrated REFLECT elements
*/ */
private static String processExternal(String url, String fieldname, private static String processExternal(String url, String fieldname,
String data) throws IOException { String data) throws IOException {
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient();
try { try {
StringBuilder postdata = new StringBuilder(); StringBuilder postdata = new StringBuilder();
postdata.append("document="); postdata.append("document=");
postdata.append(URLEncoder.encode(data, "UTF-8")); postdata.append(URLEncoder.encode(data, "UTF-8"));
InputStream in = new ByteArrayInputStream(postdata.toString() InputStream in = new ByteArrayInputStream(postdata.toString()
.getBytes()); .getBytes());
byte[] result = client.POSTbytes(url, in, postdata.length()); byte[] result = client.POSTbytes(url, in, postdata.length());
if (result != null) { if (result != null) {
return new String(result); return new String(result);
} }
} finally { } finally {
client.finish(); client.finish();
} }
return null; return null;
} }
private static String loadInternal(String path, RequestHeader requestHeader) { private static String loadInternal(String path, RequestHeader requestHeader) {
ByteArrayOutputStream buffer = new ByteArrayOutputStream(); ByteArrayOutputStream buffer = new ByteArrayOutputStream();
String realmProp = requestHeader.get(RequestHeader.AUTHORIZATION); String realmProp = requestHeader.get(RequestHeader.AUTHORIZATION);
ServerSideIncludes.writeContent(path, buffer, realmProp, "127.0.0.1", requestHeader); // TODO: ip ServerSideIncludes.writeContent(path, buffer, realmProp, "127.0.0.1", requestHeader); // TODO: ip
return buffer.toString(); return buffer.toString();
} }
/** /**
* add DOCTYPE if necessary * add DOCTYPE if necessary
* *
* @return the web page with a leading DOCTYPE definition * @return the web page with a leading DOCTYPE definition
*/ */
private static String processAddDoctype(String data) { private static String processAddDoctype(String data) {
String result = data; String result = data;
BufferedReader reader = new BufferedReader(new StringReader(data)); BufferedReader reader = new BufferedReader(new StringReader(data));
try { try {
String firstline = reader.readLine(); String firstline = reader.readLine();
if (firstline != null) { if (firstline != null) {
if (!firstline.startsWith("<!DOCTYPE")) { if (!firstline.startsWith("<!DOCTYPE")) {
result = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n" result = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n"
+ data; + data;
} }
} }
} catch (IOException e1) { } catch (IOException e1) {
} }
return result; return result;
} }
/** /**
* load snippet from resource text file * load snippet from resource text file
* *
* @return text from resource text file * @return text from resource text file
*/ */
private static String loadPart(String part) { private static String loadPart(String part) {
String result = ""; String result = "";
try { try {
BufferedReader in = new BufferedReader(new FileReader(yacy.homedir + File.separatorChar + "htroot" BufferedReader in = new BufferedReader(new FileReader(yacy.homedir + File.separatorChar + "htroot"
+ File.separatorChar + "interaction" + File.separatorChar + File.separatorChar + "interaction" + File.separatorChar
+ "parts" + File.separatorChar + part)); + "parts" + File.separatorChar + part));
String str; String str;
while ((str = in.readLine()) != null) { while ((str = in.readLine()) != null) {
result += str; result += str;
} }
in.close(); in.close();
} catch (IOException e) { } catch (IOException e) {
} }
return result; return result;
} }
public static StringBuffer process (StringBuffer data, Charset charset, DigestURI url, RequestHeader requestHeader) { public static StringBuffer process (StringBuffer data, Charset charset, DigestURI url, RequestHeader requestHeader) {
globalrequestHeader = requestHeader; globalrequestHeader = requestHeader;
Switchboard sb = Switchboard.getSwitchboard(); Switchboard sb = Switchboard.getSwitchboard();
boolean augmented = false; boolean augmented = false;
try { try {
Log.logInfo("AUGMENTATION", url.getName()); Log.logInfo("AUGMENTATION", url.getName());
} catch (IOException e1) { } catch (IOException e1) {
// TODO Auto-generated catch block // TODO Auto-generated catch block
e1.printStackTrace(); e1.printStackTrace();
} }
String Doc = data.toString(); String Doc = data.toString();
// Send document to REFLECT (http://www.reflect.ws/REST_API.html) // Send document to REFLECT (http://www.reflect.ws/REST_API.html)
if (sb.getConfigBool("augmentation.reflect", false) == true) { if (sb.getConfigBool("augmentation.reflect", false) == true) {
try { try {
Doc = processExternal("http://reflect.ws/REST/GetHTML", Doc = processExternal("http://reflect.ws/REST/GetHTML",
"document", Doc); "document", Doc);
Log.logInfo("AUGMENTATION", "reflected " + url); Log.logInfo("AUGMENTATION", "reflected " + url);
augmented = true; augmented = true;
} catch (Exception e) { } catch (Exception e) {
} }
} }
// Add DOCTYPE if not present. // Add DOCTYPE if not present.
// This is required for IE to render position:absolute correctly. // This is required for IE to render position:absolute correctly.
if (sb.getConfigBool("augmentation.addDoctype", true) == true) { if (sb.getConfigBool("augmentation.addDoctype", true) == true) {
Doc = processAddDoctype(Doc); Doc = processAddDoctype(Doc);
augmented = true; augmented = true;
} }
if (sb.getConfigBool("augmentation.reparse", true) == true) { if (sb.getConfigBool("augmentation.reparse", true) == true) {
NodeList list = new NodeList(); NodeList list = new NodeList();
// Fill NodeList with parsed Document // Fill NodeList with parsed Document
try { try {
org.htmlparser.Parser par = new org.htmlparser.Parser(); org.htmlparser.Parser par = new org.htmlparser.Parser();
par.setInputHTML(Doc); par.setInputHTML(Doc);
list = par.parse(null); list = par.parse(null);
Log.logInfo ("AUGMENTATION", url.toString()); Log.logInfo ("AUGMENTATION", url.toString());
} catch (Exception e) { } catch (Exception e) {
} }
// Add Unique ID to every node element which has no id yet. // Add Unique ID to every node element which has no id yet.
// This allows consistent interaction between client (browser) and // This allows consistent interaction between client (browser) and
// back-end (data store) by providing "position awareness" in the // back-end (data store) by providing "position awareness" in the
// document. // document.
if (sb.getConfigBool("augmentation.reparse.adduniqueid", true) == true) { if (sb.getConfigBool("augmentation.reparse.adduniqueid", true) == true) {
try { try {
NodeVisitor visitorAddUniqueID = new AugmentHtmlStream.VisitorAddUniqueID(); NodeVisitor visitorAddUniqueID = new AugmentHtmlStream.VisitorAddUniqueID();
list.visitAllNodesWith(visitorAddUniqueID); list.visitAllNodesWith(visitorAddUniqueID);
} catch (Exception e) { } catch (Exception e) {
} }
} }
// Inspect on text tags // Inspect on text tags
try { try {
NodeVisitor visitorText = new AugmentHtmlStream.VisitorText(); NodeVisitor visitorText = new AugmentHtmlStream.VisitorText();
list.visitAllNodesWith(visitorText); list.visitAllNodesWith(visitorText);
} catch (Exception e) { } catch (Exception e) {
} }
String SCI_GUID = ""; String SCI_GUID = "";
String SCI_GUID_DOI = ""; String SCI_GUID_DOI = "";
String SCI_GUID_PMID = ""; String SCI_GUID_PMID = "";
String SCI_TITLE = ""; String SCI_TITLE = "";
String SCI_CREATOR = ""; String SCI_CREATOR = "";
String SCI_DESCRIPTION = ""; String SCI_DESCRIPTION = "";
String SCI_IDENTIFIER = ""; String SCI_IDENTIFIER = "";
String SCI_WHITELIST = ""; String SCI_WHITELIST = "";
String SCI_URL = ""; String SCI_URL = "";
String SCI_HASH = ""; String SCI_HASH = "";
SCI_URL = url.toString(); SCI_URL = url.toString();
// System.out.println("Starting augmentation for " + url); // System.out.println("Starting augmentation for " + url);
// System.out.println("Content: " + Doc); // System.out.println("Content: " + Doc);
if (!(list == null)) { if (!(list == null)) {
// DOCUMENT IS MANIPULABLE BY HTML REWRITER // DOCUMENT IS MANIPULABLE BY HTML REWRITER
// SO SEND IT TO YACY PARSER // SO SEND IT TO YACY PARSER
Document document = null; Document document = null;
try { try {
final StringReader stringReader = new StringReader(Doc); final StringReader stringReader = new StringReader(Doc);
InputStream inputStream = new InputStream() { InputStream inputStream = new InputStream() {
@Override @Override
public int read() throws IOException { public int read() throws IOException {
return stringReader.read(); return stringReader.read();
} }
}; };
document = Document.mergeDocuments( document = Document.mergeDocuments(
url, url,
"text/html", "text/html",
TextParser.parseSource(url, "text/html", null, TextParser.parseSource(url, "text/html", null,
data.length(), inputStream)); data.length(), inputStream));
} catch (Exception e) { } catch (Exception e) {
} }
if (document != null) { if (document != null) {
if (document.dc_format() == "text/html") { if (document.dc_format() == "text/html") {
SCI_TITLE = document.dc_title(); SCI_TITLE = document.dc_title();
SCI_CREATOR = document.dc_creator(); SCI_CREATOR = document.dc_creator();
SCI_DESCRIPTION = document.dc_description(); SCI_DESCRIPTION = document.dc_description();
SCI_IDENTIFIER = document.dc_identifier(); SCI_IDENTIFIER = document.dc_identifier();
} }
} }
SCI_HASH = "" + url.hashCode(); SCI_HASH = "" + url.hashCode();
// ADD AUGMENTED HEADER INFORMATION // ADD AUGMENTED HEADER INFORMATION
NodeList header = list.extractAllNodesThatMatch( NodeList header = list.extractAllNodesThatMatch(
new org.htmlparser.filters.NodeClassFilter( new org.htmlparser.filters.NodeClassFilter(
org.htmlparser.tags.HeadTag.class), true); org.htmlparser.tags.HeadTag.class), true);
org.htmlparser.util.SimpleNodeIterator iterHeader = header org.htmlparser.util.SimpleNodeIterator iterHeader = header
.elements(); .elements();
while (iterHeader.hasMoreNodes()) { while (iterHeader.hasMoreNodes()) {
org.htmlparser.tags.HeadTag ht = ((org.htmlparser.tags.HeadTag) iterHeader org.htmlparser.tags.HeadTag ht = ((org.htmlparser.tags.HeadTag) iterHeader
.nextNode()); .nextNode());
NodeList headchildren = ht.getChildren(); NodeList headchildren = ht.getChildren();
headchildren.add(new org.htmlparser.nodes.TextNode(loadInternal("env/templates/jqueryheader.template", requestHeader))); headchildren.add(new org.htmlparser.nodes.TextNode(loadInternal("env/templates/jqueryheader.template", requestHeader)));
// headchildren.add(new org.htmlparser.nodes.TextNode("<script type='text/javascript'>"+loadInternal("interaction/formdata.js", requestHeader)+"</script>")); headchildren.add(new org.htmlparser.nodes.TextNode("<script type='text/javascript'>"+loadInternal("interaction/interaction.js", requestHeader)+"</script>"));
headchildren.add(new org.htmlparser.nodes.TextNode("<script type='text/javascript'>"+loadInternal("interaction/interaction_sciety.js", requestHeader)+"</script>"));
headchildren.add(new org.htmlparser.nodes.TextNode("<script type='text/javascript'>"+loadInternal("interaction/interaction_metadata.js", requestHeader)+"</script>"));
// headchildren.add(new org.htmlparser.nodes.TextNode("<script type='text/javascript'>"+loadInternal("interaction/interaction_sciety.js", requestHeader)+"</script>")); augmented = true;
headchildren.add(new org.htmlparser.nodes.TextNode("<script type='text/javascript'>"+loadInternal("interaction/interaction.js", requestHeader)+"</script>")); ht.setChildren(headchildren);
}
augmented = true; // ADD AUGMENTED BODY INFORMATION
ht.setChildren(headchildren); NodeList body = list.extractAllNodesThatMatch(
} new org.htmlparser.filters.NodeClassFilter(
org.htmlparser.tags.BodyTag.class), true);
// ADD AUGMENTED BODY INFORMATION org.htmlparser.util.SimpleNodeIterator iterBody = body
.elements();
NodeList body = list.extractAllNodesThatMatch( while (iterBody.hasMoreNodes()) {
new org.htmlparser.filters.NodeClassFilter(
org.htmlparser.tags.BodyTag.class), true);
org.htmlparser.util.SimpleNodeIterator iterBody = body org.htmlparser.tags.BodyTag bt = ((org.htmlparser.tags.BodyTag) iterBody
.elements(); .nextNode());
while (iterBody.hasMoreNodes()) { NodeList bodychildren = bt.getChildren();
org.htmlparser.tags.BodyTag bt = ((org.htmlparser.tags.BodyTag) iterBody
.nextNode());
NodeList bodychildren = bt.getChildren(); // bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction/Footer.html", requestHeader)));
bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction/OverlayReview.html?link="+url.toNormalform(true, false), requestHeader)));
bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction/Footer.html", requestHeader))); bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction/Sidepanel_part.html?link="+url.toNormalform(true, false), requestHeader)));
// bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction/OverlayReview.html?link="+url.toNormalform(true, false), requestHeader))); // bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction/Overlay.html?link="+url.toNormalform(true, false), requestHeader)));
bodychildren.add(new org.htmlparser.nodes.TextNode(loadInternal("interaction/Overlay.html?link="+url.toNormalform(true, false), requestHeader)));
// ADD AUGMENTED INFO
// ADD AUGMENTED INFO org.htmlparser.tags.Div sci_aug = new org.htmlparser.tags.Div();
org.htmlparser.tags.Div sci_aug = new org.htmlparser.tags.Div(); sci_aug.setTagName("div");
sci_aug.setTagName("div"); sci_aug.setAttribute("id", "sciety_augmented");
sci_aug.setAttribute("style",
"visibility: hidden; position: absolute; overflow: hidden;");
sci_aug.setAttribute("id", "sciety_augmented"); org.htmlparser.util.NodeList childr = new org.htmlparser.util.NodeList();
sci_aug.setAttribute("style",
"visibility: hidden; position: absolute; overflow: hidden;");
org.htmlparser.util.NodeList childr = new org.htmlparser.util.NodeList();
sci_aug.setChildren(childr);
sci_aug.setChildren(childr); org.htmlparser.tags.Div sci_aug_endtag = new org.htmlparser.tags.Div();
org.htmlparser.tags.Div sci_aug_endtag = new org.htmlparser.tags.Div(); sci_aug_endtag.setTagName("/div");
sci_aug_endtag.setTagName("/div"); sci_aug.setEndTag(sci_aug_endtag);
sci_aug.setEndTag(sci_aug_endtag); bodychildren.add(sci_aug);
bodychildren.add(sci_aug); bt.setChildren(bodychildren);
bt.setChildren(bodychildren); augmented = true;
augmented = true; }
} Doc = list.toHtml(true);
Doc = list.toHtml(true); augmented = true;
augmented = true; } // not list = null
} // not list = null } // reparse
} // reparse
if (augmented) { if (augmented) {
return (new StringBuffer (Doc)); return (new StringBuffer (Doc));
} else { } else {
return (data); return (data);
} }
} }
} }
Loading…
Cancel
Save