yacy_search_server/htroot/ConfigParser_p.html

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <title>YaCy '#[clientname]#': Advanced Settings</title>
    #%env/templates/metas.template%#
	<script type="text/javascript">
    <!--
	function checkAll(formToCheckAll, checkStatus) {
		var inputs=document.getElementById(formToCheckAll);
		for (var i =0; i < inputs.elements.length; i++) {
			inputs.elements[i].checked = checkStatus;
		}
    }
    -->
    </script>
  </head>
  <body id="Settings">
    #%env/templates/header.template%#
    #%env/templates/submenuCrawler.template%#
    <h2>Parser Configuration</h2>
<form id="parsersettings" action="ConfigParser_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="parser">Content Parser Settings</legend>
<p>
  With this settings you can activate or deactivate parsing of additional content-types based on their MIME-types.<br />
  For a detailed description of the various MIME-types take a look at
  <a href="http://www.iana.org/assignments/media-types/" target="_blank">http://www.iana.org/assignments/media-types/</a>.</br>
  If you want to test a specific parser you can do so using the <a href="ViewFile.html">File Viewer</a>.
</p>
<table border="0">
  <tr class="TableHeader" valign="bottom">
    <td class="small" width="30" align="center"><input type="checkbox" id="allswitch" onclick="checkAll(this.form.id, this.checked);"/></td>
    <td class="small" width="60">Extension</td>
    <td class="small" width="300">Mime-Type</td>
  </tr>#{parser}#
  <tr class="TableCellDark">
    <td colspan="3">#[name]#</td>
  </tr>#{ext}#
  <tr id="#[name]#" class="TableCellLight">
    <td class="small" align="center"><input type="checkbox" name="extension_#[extension]#" #(status)#::checked="checked" #(/status)#/></td>
    <td class="small">#[extension]#</td>
    <td class="small"></td>
  </tr>#{/ext}##{mime}#
  <tr class="TableCellLight">
    <td class="small" align="center"><input type="checkbox" name="mimename_#[mimetype]#" #(status)#::checked="checked" #(/status)#/></td>
    <td class="small"></td>
    <td class="small">#[mimetype]#</td>
  </tr>#{/mime}#
  #{/parser}#
  <tr class="TableCellDark">
    <td colspan="3" class="small" ><input type="submit" name="parserSettings" value="Submit" class="btn btn-primary"/></td>
  </tr>
</table>
</fieldset>
<fieldset><legend id="parser">PDF Parser Attributes</legend>
<p>
  This is an experimental setting which makes it possible to split PDF documents into individual index entries.
  Every page will become a single index hit and the url is artifically extended with a post/get attribute value containing
  the page number as value. When such an url is displayed within a search result, then the post/get attribute is transformed into an anchor hash link.
  This makes it possible to view the individual page directly in the pdf.js viewer built-in into firefox,
  for reference see https://github.com/mozilla/pdf.js/wiki/Viewer-options
</p>
<table border="0">
  <tr class="TableCellLight">
    <td class="small" align="right" width="90">Split PDF</td>
    <td class="small" align="left" width="300"><input type="checkbox" name="individualPages" #(individualPages)#::checked="checked" #(/individualPages)#/></td>
  </tr>
  <tr class="TableCellLight">
    <td class="small" align="right">Property Name</td>
    <td class="small" align="left"><input type="text" name="individualPagePropertyname" value="#[individualPagePropertyname]#"/></td>
  </tr>
  <tr class="TableCellDark">
    <td colspan="3" class="small" ><input type="submit" name="pdfSettings" value="Submit" class="btn btn-primary"/></td>
  </tr>
</table>
</form>
    #%env/templates/footer.template%#
  </body>
</html>