commit 248077d3f0da329e3524709d8f080972851a8eaa Author: orbiter Date: Thu Apr 7 19:19:42 2005 +0000 initial load with yacy 0.36 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1 6c8d7289-2bf4-0310-a012-ef5d649a1542 diff --git a/addon/YACY-Search.url b/addon/YACY-Search.url new file mode 100644 index 000000000..1e3b94082 --- /dev/null +++ b/addon/YACY-Search.url @@ -0,0 +1,2 @@ +[InternetShortcut] +URL=http://localhost:8080/ diff --git a/addon/testkeys b/addon/testkeys new file mode 100755 index 000000000..123aeef27 Binary files /dev/null and b/addon/testkeys differ diff --git a/addon/yacyInit.readme b/addon/yacyInit.readme new file mode 100644 index 000000000..5aeb247c4 --- /dev/null +++ b/addon/yacyInit.readme @@ -0,0 +1,35 @@ +yacyInit.sh - start/stop script +provided by Matthias Kempka, 26.12.2004 + +INSTALLATION: +This installation is tested on Debian systems. It is likely that other +distributions have other paths. + +1. copy yacyInit.sh (the init skript) to /etc/init.d +2. edit /etc/init.d/yacyInit.sh and set the variables + DAEMON_DIR - the installation directory of yacy (the direcory + where the file startYACY.sh is). + USER - the user whose rights the proxy should have. This should not + be root. The user must have write access to DAEMON_DIR. + PATH - The default path should be ok for most systems. Ensure that java is + in the path, though. +3. Link /etc/init.d in the runlevel-directories, these are the + directories where the scripts reside that are called when entering + a runlevel. On Debian systems, these are /etc/rc0.d, /etc/rc1.d and + so on. I.e., as root issue the following commands. Note, that these + settings are ok for Debian systems and most likely have to be + adapted to work on other systems. Please write to lists@mkempka.de + for comments on that. + +ln -s /etc/init.d/yacyInit.sh /etc/rc0.d/K05yacyInit.sh +ln -s /etc/init.d/yacyInit.sh /etc/rc1.d/K05yacyInit.sh +ln -s /etc/init.d/yacyInit.sh /etc/rc2.d/S95yacyInit.sh +ln -s /etc/init.d/yacyInit.sh /etc/rc3.d/S95yacyInit.sh +ln -s /etc/init.d/yacyInit.sh /etc/rc4.d/S95yacyInit.sh +ln -s /etc/init.d/yacyInit.sh /etc/rc5.d/S95yacyInit.sh +ln -s /etc/init.d/yacyInit.sh /etc/rc6.d/K05yacyInit.sh + + +TODO: +- find log possibility when starting via the start-skript +- create concept which user should start yacy diff --git a/addon/yacyInit.sh b/addon/yacyInit.sh new file mode 100755 index 000000000..189757239 --- /dev/null +++ b/addon/yacyInit.sh @@ -0,0 +1,78 @@ +#! /bin/sh +# +# init script for the HTTP Proxy: yacy +# +# Provided by Matthias Kempka, 26.12.2004 + +PATH=/sbin:/bin:/usr/sbin:/usr/local/bin:/usr/bin #ensure java is in the path +DAEMON_DIR=/opt/yacy #installation directory +USER=yacy #set to the user whose rights the proxy will gain + + +CLASSPATH=$DAEMON_DIR/classes +DAEMON=$DAEMON_DIR/startYACY.sh +NAME="yacy" +DESC="Yacy HTTP Proxy" +PID_FILE=/var/run/$NAME.pid + +# Don't run if not installed +test -f $DAEMON || exit 0 + +JAVA=$(which java) + +if [ -f $PID_FILE ]; then + pid=$(cat "$PID_FILE") + pidno=$( ps ax | grep "$pid" | awk '{ print $1 }' | grep "$pid" ) +fi + +case "$1" in + start) + if [ -n "$pidno" ]; then + echo "already running" + exit 0 + fi + echo -n "Starting $DESC: " + start-stop-daemon --start --background --make-pidfile --chuid $USER\ + --pidfile $PID_FILE --startas $JAVA\ + -- -classpath $CLASSPATH yacy $DAEMON_DIR + echo "$NAME." + ;; + + stop) + if [ -n "$pidno" ]; then + echo -n "Stopping $DESC: " + cd $DAEMON_DIR + ./stopYACY.sh + timeout=20 + while [ -n "$pidno" ] + do + let timeout=$timeout-1 + if [ $timeout -eq 0 ]; then + start-stop-daemon --stop --pidfile $PID_FILE --oknodo + break + fi + echo -n "." + sleep 1 + pidno=$( ps ax | grep $pid | awk '{ print $1 }' | grep $pid ) + done + echo "$NAME." + cd - + exit 0 + fi + echo "not running." + ;; + + restart) + $0 stop + sleep 1 + $0 start + ;; + *) + N=/etc/init.d/yacyInit.sh + # echo "Usage: $N {start|stop|restart|reload|force-reload}" >&2 + echo "Usage: $N {start|stop|restart}" >&2 + exit 1 + ;; +esac + +exit 0 diff --git a/addon/yacybar.xpi b/addon/yacybar.xpi new file mode 100644 index 000000000..627fcbddd Binary files /dev/null and b/addon/yacybar.xpi differ diff --git a/doc/Contact.html b/doc/Contact.html new file mode 100644 index 000000000..fd95aa4a9 --- /dev/null +++ b/doc/Contact.html @@ -0,0 +1,34 @@ + + + +YACY: Contact + + + + + + + + + + + + + + +

Contact

+ +

YACY was developed and implemented by Michael Christen. +

You can hire me for professional consultancy, customizations or integrations, not only for the proxy, but also for a broad range of skills in professional enterprise technology. I am specialized on Network Architecture/Security and on Billing Systems in the telecommunication market. If you like to have further information about my professional work, please ask for a CV. +

Any feed-back is welcome! +

Please email me at . Please be specific in the subject, so that I can distiguish your mail from the many spam mails I get every day. +

The email-address presented here is not clickable and shown as an image to prevent that spam-senders can scan and parse the web page for address retrieval. Please re-type the address in your email application. + + + + + diff --git a/doc/Demo.html b/doc/Demo.html new file mode 100644 index 000000000..ed79ceb8c --- /dev/null +++ b/doc/Demo.html @@ -0,0 +1,44 @@ + + + +YACY: Demonstration of Search Interface + + + + + + + + + + + + + + +

Online Demo

+ +

Since YACY is a web-application, you can test it also online! +There are currently a few well-known public installations of YACY that you can use to test the search functions. +You will see there a lot of 'locked' menu's that you can only use if you are the peer-owner. +However, the search function is public.

+

Please respect the search results as a pure proof-of-concept. It's upon you to further enhance the search results by participation in the network with your own peer.

+

If you use any of the following search peers you must agree that the responsibility for the content of the web pages that these peers find does not belong to the peer-owner but belong to the owner of the web server where any found pages are stored.

+ +

Pick one of the following links: +

+ + + + + + diff --git a/doc/Details.html b/doc/Details.html new file mode 100644 index 000000000..abd0886cc --- /dev/null +++ b/doc/Details.html @@ -0,0 +1,87 @@ + + + +YACY Proxy: details + + + + + + + + + + + + + + + +

Details

+ +YACY supports the following features:
+ + +
Built-in Indexing and Search Engine +The proxy 'scrapes' the content that it passes and creates an index that can be shared between every YACY Proxy daemons. +You can use the indexing feature for intranet indexing: +you instantly have a search service at hand to index all intranet-served web pages. +You don't need to set up a separated search service. And the used PLASMA +indexing is not a naive quick-hack but an properly engineered and extremely fast algorithm; +it is capable of indexing a nearly unlimited number of pages, without slowing down the search process. + +
p2p-Based Global Search Engine +The proxy contains an index-sharing p2p-based algorithm which creates a global distributed search engine. +This spawns a world-wide global search index. +The current release is a minimum implementation of this concept and shall prove it's functionality. + +
Caching HTTP and transparent HTTPS Proxy +With optional pre-fetching. HTTP 1.1 with GET/HEAD/POST/CONNECT is supported. This is sufficient for nearly all public web pages. +HTTP headers are transparently forwarded. HTTPS connections through target port 443 are transparently forwarded, non-443 connections are suppressed to enhance security. Both (HTTP and HTTPS) proxies share the same proxy port, which is by default port 8080. + +
Privacy +The proxy protects your privacy, even with index sharing switched on. Please see the +privacy secion in the documentation.; + +
Security +The proxy can block unwanted access by setting IP filters and http passwords. +You can also enhance security by inspecting the source code, which is completely included. +Check the code and re-build your own proxy. + +
Web/HTTP server +The built-in HTTP server is the interface to the local and global search service; +the server may not only be used to administrate the proxy, but also to serve as an intranet/internet web server. + +
Ideal Internet Cafe Proxy Solution +Every Internet Cafe needs a caching proxy instead only a NAT to route the cafe's client traffic from the internet to maximize bandwidth. +This can only be done using a caching proxy. This is naturally provided by the YACY Proxy. Future versions may also include +billing support functions. + +
Terminal-Based +the proxy does not need to have a window-based environment and can run on a screen-less router; therefore you may run the proxy on your already existing servers, whatever they are since YACY Proxy is written in java and will run also on your platform. + +
Open-Source +This is a simple necessity for an application that implements a server. +Don't use any other server software that does not come with the source code. +Volunteers to extent the proxy are welcome! +If you think you have a great idea how to extend/enhance/fix the proxy, please let me know. + +
Easy Installation +You just need to decompress the release containter with your favourite decompressor (zip, rar, sit, tar etc. will do) +and double-click the application wrapper for your OS. No restart necessary. +Just double-click the application wrapper. + +
Licence Model +This is GPL-based freeware/open-source software! The release comes with complete source code. See the license for details. +If you like the software, you may like to hire me for professional consultancy, customizations or integrations. + +
+ + + + + diff --git a/doc/Download.html b/doc/Download.html new file mode 100644 index 000000000..4d25321a8 --- /dev/null +++ b/doc/Download.html @@ -0,0 +1,78 @@ + + + +YaCy: Download + + + + + + + + + + + + + + +

Download

+ +

A first alpha version is available. Please consider that the application may behave not yet really performant, nor always correct. The basic functionality is available, but may contain bugs. Please see also the releases' 'wishlist.txt' with the list of all not-yet implemented features and known bugs. +

download steps:


+ +

1st Step: Agree With License

+

If you download the software, you must agree to the applications GPL-based license.


+ +

2nd Step: Install Java

+

To run YaCy, you need an installation and/or support for Java2. +You can download the Java Runtime Environment "JRE" from the Sun Microsystems Java Page


+ +

3rd Step: Download YaCy

+

The Release comes in different flavours: a general one with application wrappers for Unix/Linux, Macintosh OS X and Windows, and a specialized Windows version with Windows installer. Please choose from either one. +

+ +

Latest Release: +The latest YaCy-release is 0.36. Download YaCy 0.36 here. +

+ +

+
All current and historic releases: +Please use one of the latest release for production.
+ +


+ + +

4th Step: Proceed With Installation

+

Please go to the installation page. +If you upgrade from a previous version of YaCy, please migrate your data +(simply move the DATA directory to your new application directory).


+ +

Final Step: Your Contribution is Appreciated

+

Open-Source/Freeware needs your contribution!

+Even if you are a non-programmer or first-time user of this software, you can help to + +of this distribution by +
+When you find a bug, please help to further improve the application by sending me a bug-report. +The report should describe a complete set of actions that are necessary to reproduce the error. +Please contact me here. Thank you!

+ + + + + diff --git a/doc/FAQ.html b/doc/FAQ.html new file mode 100644 index 000000000..2cbd62b07 --- /dev/null +++ b/doc/FAQ.html @@ -0,0 +1,168 @@ + + +YACY: FAQ + + + + + + + + + + + + + + +

FAQ

+ +

YACY is not only a distributed search engine, but also a caching HTTP proxy. +Both application parts benefit from each other.

+ + +

Why is this Search Engine also a Proxy?

+

+We wanted to avoid that you start a search service ony for that very time when you submit a search query. +This would give the Search Engine too less online time. +So we looked for a cause the you would like to run the Search Engine during all the time that you are online. +By giving you the surplus value of a caching proxy, the reason was found. +The already built-in blacklist for the proxy is another surplus value. +

+ +

Why is this Proxy also a Search Engine?

+

YACY has a built-in caching proxy, which means that YACY has a lot of indexig information +'for free' without crawling. This may not be a very usual function of a proxy, but a very useful one: +you see a lot of information when you browse the internet and maybe you would like to search exactly +only what you have seen. Beside this interesting feature, you can use YACY to index an intranet +simply by using the proxy; you don't need to additionally set up another search/indexing process +and maybe also of databases. YACY gives you an 'instant' database and an 'instant' search service.

+ +

Can I Crawl The Web With YACY?

+

Yes! You can start your own crawl and trigger also distributed crawling, which means that your peer asks other peers to perform specific crawl tasks. You can specify many parameters that focus your crawl to a limited set of web pages.

+ +

What do you mean with 'Global Search Engine'?

+

The integrated indexing and search service can not only be used localy, but also globaly. +Every proxy distributes some contact information to all other proxies that can be reached in the internet, +and proxies exchange but do not copy their indexes to each other. +This is done in such a way, that every peer knows how to address the correct other +peer to retrieve a special search index. +Therefore the community of all proxies spawn a distributed hash table (DHT) +which is used to share the reverse word index (RWI) to all operators and users of the proxies. +The applied logic of distribution and retrieval of RWI's on the DHT combines all participating proxies to +a Distributed Search Engine. +To point out that this is in contrast to local indexing and searching, +we call it a Global Search Engine. +

+ +

Is there a central server? Does the search engine network need one?

+

No. The network architecture does not need a central server, and there is none. +In fact there is a root server which is the 'first' peer, but any other peer has the same rights and tasks to perform. +We still distinguish three different classes of peers: +

+Junior peers can contribute to the network by submitting index files to senior/principal peers without beeing asked. (This function is currently very limited) +

+ +

Search Engines need a lot of terabytes of space, don't they? How much space do I need on my machine?

+

The global index is shared, but not copied to the peers. +If you run YACY, you need an average of the same space for the index as you need for the cache. +In fact, the global space for the index may reach the space of Terabytes, but not all of that on your machine!

+ +

Search Engines must do crawling, don't they? Do you?

+

No. They can do, but we collect information by simply using the information that passes the proxy. +If you want to crawl, you can do so and start your own crawl job with a certain search depth.

+ +

Does this proxy with search engine create much traffic?

+

No, it may create less. Because it does not need to do crawling, you don't have additional traffic. +In contrast, the proxy does caching which means that double-load of known pages is avoided and this possibly +speeds up your internet connection. Index sharing makes some traffic, but is only performed during idle-time of the proxy and of your internet usage.

+ +

Full-text indexing threads on my machine? This will slow down my internet browsing too much.

+

No, it does not, because indexing is only performed when the proxy is idle. This shifts the computing time to the moment when you read pages and you don't need computing time. Indexing is stopped automatically the next time you retrieve web pages through the proxy.

+ +

Do I need a fast machine? Search Engines need big server farms, don't they?

+

You don't need a fast machine to run YACY. You also don't need a lot of space. You can configure the amount of Megabytes that you want to spend for the cache and the index. Any time-critical task is delayed automatically and takes place when you are idle surfing. Whenever internet pages pass the proxy, any indexing (or if wanted: prefetch-crawling) is interrupted and delayed. The root server runs on a simple 500 MHz/20 GB Linux system. You don't need more.

+ +

Does the caching procedure slow down or delay my internet usage?

+

No. Any file that passes the proxy is streamed through the filter and caching process. At a certain point the information stream is duplicated; one copy is streamed to your browser, the other one to the cache. The files that pass the proxy are not delayed because they are not first stored and then passed to you, but streamed at the same time to you as it is streamed to the cache. Therefore your browser can do layout-while-loading as it would do without the proxy.

+ +

How can you ensure actuality of the search results?

+

Nobody can. How can a 'normal' search engine ensure this? By doing 'brute force crawling'? +We have a better solution for acuality: browsing results of all people who run YACY. +Many people prefer to look at news pages every day, and by passing through the proxy the latest news also arrive in the distributed search engine. +This may take place possibly faster than it happens with a normal/crawling search engine. +And the search results reflect the 'general demand' of information, because it is the average of all contributors.

+ +

I don't want to wait for search results much time. How much time takes a search?

+

Our architecture does not do peer-hopping, we also don't have a TTL (time to live). We expect that search results are instantly responded to the requester. +This can be done by asking the index-owning peer directly which is in fact possible by using DHT's (distributed hash tables). +Because we need some redundancy to catch up missing peers, we ask several peers simultanously. To collect their respond, we wait a little time of at most 10 seconds. +The user may configure a search time different than 10 seconds, but this is our target of maximum search time.

+ +

I am scared about the fact that the browsing results are distributed. What about privacy?

+

Don't be scared. We have an architecture that hides your private browsing profile from others. For example: no-one of the words that are indexed from +the pages you have seen is stored in clear text on your computer. Instead, a hash is used which can not be computed back into the original word. Because +Index files travel along peers you cannot state if a specific link was visited by you or another peer-user, so this frees you from beeing responsible +about the index files on your machine.

+ +

Do I need to set up and run a separate database?

+

No. YACY contains it's own database engine, which does not need any extra set-up or configuration.

+ +

What kind of database do you use? Is it fast enough?

+

The database stores either tables or property-lists in filed AVL-Trees. These are height-regulated binary trees. +Such a search tree ensures a logarithmic order of computation time. For example a search within an AVL tree with one million entries needs +an average of 20 comparisments, and at most 24 in the worst case. This database is therefore extremely fast. It lacks an API like +SQL or the LDAP protocol, but it does not need one because it provides a highly specialized database structure. +The missing interface pays off with a very small organization overhead, which improves the speed further in comparisment with other databases +with SQL or LDAP api's. This database is fast enough +for millions of indexed web pages, maybe also for billions. The speed is sufficient for billions of pages, but not the file organization +structure at the moment, because the tree-files would become too big. We will provide a solution at the time we need such big tables.

+ +

Why do you use your own database? Why not use mySQL or openLDAP?

+

The database structure we need is very special. One demand is that the entries can be retrieved in logarithmic time and can be +enumerated in any order. Enumeration in a specific order is needed to create conjunctions of tables very fast. This is needed when someone +searches for several words. We implement the search word conjunction by pairwise and simultanous enumeration/comparisment of index trees/sequences. +This forces us to use binary trees as data structure. Another demand is that we need the ability to have many index tables, maybe millions +of tables. The size of the tables may be not big in average, but we need many of them. This is in contrast of the organization of +relational databases, where the focus is on management of very large tables, but not of many of them. A third demand is the ease of +installation and maintenance: the user shall not be forced to install a RBMS first, care about tablespaces and such. The integrated +database is completely service-free.

+ +

What does Senior Mode mean? What is Junior Mode?

+

Junior peers are such peers that cannot be reached from other peers, while Senior peers can be contacted. +If your peer has global access, it runs in Senior Mode. If it is hidden from others, it is in Junior Mode. +If your peer is in Senior Mode, it is an access point for index sharing and distribution. It can be contacted for search requests and it collects index files +from other peers. If your peer is in Junior Mode, it collects index files from your browsing and distributes them only to other Senior peers, but does not collect index files. +

+ +

Why should I run my proxy in Senior Mode?

+

Some p2p-based file sharing software assign non-contributing peers very low priority. We think that that this is not always fair since sometimes the operator +does not always has the choice of opening the firewall or configuring the router accordingly. Our idea of 'information wares' and their exchange can also be +applied to junior peers: they must contribute to the global index by submitting their index actively, while senior peers contribute passively. +Therefore we don't need to give junior peers low priority: they contribute equally, so they may participate equally. +But enough senior peers are needed to make this architecture functional. +Since any peer contributes almost equally, either actively or passively, you should +decide to run in Senior Mode if you can. +

+ +

My proxy says it runs in 'Junior Mode'. How can I run it in Senior Mode?

+

Open your firewall for port 8080 (or the port you configured) or program your router to act as a virtual server.

+ +

How can I help?

+

First of all: run YACY in senior mode. This helps to enrich the global index and to make YACY more attractive. +If you want to add your own code, you are welcome; but please contact the author first and discuss your idea to see how it may fit into the overall architecture. +You can help a lot by simply giving us feed-back or telling us about new ideas. You can also help by telling other people about this software. +And if you find an error or you see an exception, we welcome your defect report. Any feed-back is welcome.

+ + + + + diff --git a/doc/Impressum.html b/doc/Impressum.html new file mode 100644 index 000000000..89144309f --- /dev/null +++ b/doc/Impressum.html @@ -0,0 +1,44 @@ + + + +Impressum + + + + + + + + + + + + + + +

Impressum

+ +

+Dipl. Inf. Michael Christen
+Finkenhofstrasse 9
+60322 Frankfurt am Main
+Germany
+E-Mail:
+

+

+Trotz sorgfältiger inhaltlicher Kontrolle übernehme ich +keine Haftung für die Inhalte externer Links. +Für den Inhalt der verlinkten Seiten sind ausschliesslich +deren Betreiber verantwortlich. +Ich weise darauf hin, das eine Benutzung der angegebenen Demo-Peers nur unter Beachtung der YaCy Applikationslizenz erlaubt ist. Falls sie die Demo-Peers zur Web-Suche benutzen wollen, so ist dies nur zur Recherche von legalem Inhalt erlaubt. Die Verantwortung für den Inhalt der durch eine YaCy-Suche gefundene Webseite liegt nicht beim Betreiber des Such-Peers, sondern beim Betreiber der jeweiligen gefundenen Webseite; sie dürfen eine nicht von ihnen vorgenommene YaCy-Installation nur dann benutzen, wenn sie akzeptieren das der Peer-Betreiber nicht die Verantwortung für die verlinkten Webseiten übernimmt. +

+ + + + + + diff --git a/doc/Installation.html b/doc/Installation.html new file mode 100644 index 000000000..6d09e6d2e --- /dev/null +++ b/doc/Installation.html @@ -0,0 +1,117 @@ + + + +YACY: Installation + + + + + + + + + + + + + + +

Installation and Start-up of YACY

+ + +

Since we provide YACY as a generic release for all operation systems and a special 'flavour' for Windows users, we distinguish two different processes for installation. Windows users may want to switch to the Windows installation instructions, however, the following description is more general and applies to all operation systems: + +

General Instructions:

+ +

Please follow these steps:

+

+ + + + + +
1st Step:
de-compress the release
+

After downloading +the latest release, simply decompress the archive with your favourite tool +(which can be WinRar or WinZip on Windows, or Stuffit Expander on Mac OS X; Linux +users type 'gunzip <release>.tar.gz' and 'tar -xf <release>.tar') and move the result to any place you want.

+

If you upgrade from a previous version of YACY, please migrate your settings and data. +This is very easy: simply move (not copy) your DATA directory from the application root directory of the old YACY installation to the new application root directory. If done so, you don't need to do the other remaining configuration steps below again.

+
2nd Step:
Configure Network Settings
+

Change the proxy settings either in your network configuration or directly in you browser. Check the 'Use HTTP Proxy' flag and configure the IP and port according to the location of the proxy. If you do a single-user installation without changing the configuration in #2, the IP/Host shall be set to '127.0.0.1' or 'localhost', and the Port shall be set to '8080'.

+
3rd Step:
Start YACY
+

We supply some wrapper shell scripts to start the java processes: +

    +
  • on a MS-Windows system, double-click the file 'startYACY.bat'
  • +
  • on a Mac OS X system, double-click the file 'startYACY.command'
  • +
  • on a Linux system, start the file 'startYACY.sh'
  • +
+

+
4th Step:
Administrate the proxy
+

After you started YACY, terminal-window will come up. +That's the application; no windows, no user interface. +You can now access YACY's administration interface by browsing to
+http://localhost:8080
+See the 'Settings' menu: you should set an administration password and checkt the access rules. +The default settings are fine, so please change them only if you know what they mean. +

5th Step:
Use YACY and it's search service
+

Browse the internet using your web-browser. You should notice that your actions take effect as cache fill/cache hit log's in the httpProxy's terminal window. Whenever you vistited a page through the proxy, the page is indexed and can be search using the search page at +http://localhost:8080. +Please be aware that if your settings allow to access the http-server, then anybode else can also search your index as well. If you don't want this, you must set the 'IP-Number filter' of the 'Server Access Settings' in the 'Settings' menu to a string that applies to you local network scheme, like +'localhost,127.0.0.1,192.168*,10*', which should be fine in most cases. +

+

+ +

Instructions for Windows and the Internet Explorer

+ +

+ + + + + +
1st Step:
Run Installer
+

The Windows release comes with it's own Installer in a single file. Just double-click the Installer file.

+

If you upgrade from a previous version of YACY, please migrate your settings and data. +This is very easy: simply move (not copy) your DATA directory from the application root directory of the old proxy installation +to the new application root directory. If done so, you don't need to do the other remaining configuration steps below again.

+
2nd Step:
Configure Browser
+

In your Internet Explorer, open 'Extras' -> 'Internet Options':
+

+

Select 'Connections':
+

+

Click on 'Settings' of the 'LAN-Settings', even if you are using a dial-up connection:
+

+

Check the 'Proxyserver' check-box:
+

+

Enter the location of YACY server. If YACY runs on the same machine as the Browser, set 'localhost'. If you have not changed the initial configuration, the port is '8080'. Check the 'No Proxy for local addess' button. Then hit 'Extended': +

+

Un-check the 'Use the same server for all protocols' - button. Then remove the proxy setting from 'FTP', 'Gopher' and 'Socks'. In the 'Exceptions' field, enter 'localhost;192.168;10': +

+

Close all windows by clicking on 'Ok'

+
3rd Step:
Start YACY
+

The installer creates a link to the application on the desktop. Just double-click the 'YACY Console' icon.

+
4th Step:
Administrate YACY
+

After you started YACY, terminal-window will come up. +That's the application; no windows, no user interface. +You can now access YACY's administration interface by browsing to
+http://localhost:8080
+See the 'Settings' menu: you should set an administration password and checkt the access rules. +The default settings are fine, so please change them only if you know what they mean. +

5th Step:
Use YACY and it's search service
+

Browse the internet using your web-browser. You should notice that your actions take effect as cache fill/cache hit log's in the httpProxy's terminal window. Whenever you vistited a page through the proxy, the page is indexed and can be search using the search page at +http://localhost:8080. +Please be aware that if your settings allow to access the http-server, then anybode else can also search your index as well. If you don't want this, you must set the 'IP-Number filter' of the 'Server Access Settings' in the 'Settings' menu to a string that applies to you local network scheme, like +'localhost,127.0.0.1,192.168*,10*', which should be fine in most cases. +

+

+ + + + + + diff --git a/doc/License.html b/doc/License.html new file mode 100644 index 000000000..9b0cf2fcf --- /dev/null +++ b/doc/License.html @@ -0,0 +1,65 @@ + + + +YACY: License + + + + + + + + + + + + + + +

License

+ +

The copyright for YaCy belongs to Michael Peter Christen; Frankfurt, Germany; . +

This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version.

+

This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details.

+

It is allowed to freely copy and distribute this software +as long as it is mentioned in connection with a link to the Anomic home page "http://www.anomic.de", or the YaCy home page "http://www.yacy.net". +You are allowed to use this software for any legal purpose, private or commercial.

+

You agree that Anomic/YaCy Software and the author(s) is/are not responsible for cost, loss of data or any harm that +may be caused by usage of this softare or this documentation. The usage of this software is on your own +risk. The installation and usage (starting/running) of this software may allow other people or application +to access your computer and any attached devices and is highly dependent on the configuration of the software +which must be done by the user of the software; Anomic/YaCy Software and the author(s) is/are also not responsible +for proper configuration and usage of the software, even if provoked by documentation provided together with +the software.

+

This software is provided as-is, including possible bugs, errors, misbehaviours, +failures, crashes, destructive effects to your software, data or system. +Users and administrators of this software use and operate it on their own risk.

+

Attention: YaCy is a content provider for a peer-to-peer based index sharing +and distribution network. It's purpose is to provide a world-wide global search index. +This application generates and distributes an index of files that passes the proxy and of files that are generated by web crawling, and crawling jobs that can be requested by other YaCy installations. +If you run the application you must agree that this software distributes automatically certain information about your system and network configuration and it also distributes index files that are created from the internet content that passed your system. +You must also agree that your system receives index transmissions from other peers and that your system is used by other peers to load internet content, index it and passes it again back to other peers. +The author(s) of YaCy cannot guarantee that a misuse of the index passing procedure, causing harm to your system or privacy or causing illegal content or harmfull behavior to other internet servers, can be avoided; the author(s) take no responsibility for such cases; you must agree that you take complete responsibility for such case. +You must agree that you take complete responsibility about the information that is stored on your system, even if this information was passed to you from other YaCy-peers without notification. +You must also agree that you are responsible to protect any users of your search peer against the use of your peer to index/search for illegal content; your local law applies. +You are not allowed to use this software to create or search for any information that is banned or illegal in your country. +If you want to search for illegal information, you are not allowed to use this software in any way, neither using it as private installation nor using any other available YaCy search peer as source for such information. +

+ +

If you like to make changes to the software, you may do it. But if you re-distribute +the software you must maintain the original copyright notice including this complete license statement.

+ + + + + diff --git a/doc/Links.html b/doc/Links.html new file mode 100644 index 000000000..ed9ea054f --- /dev/null +++ b/doc/Links.html @@ -0,0 +1,56 @@ + + + +YaCy: Related Links + + + + + + + + + + + + + + +

Links

+ +

Information related to YaCy and search engine / p2p technology


+ +

Other YaCy Project Sites +


+ +

Publications about YaCy +


+ +

Partner Sites +


+ + + + + + diff --git a/doc/News.html b/doc/News.html new file mode 100644 index 000000000..da9216136 --- /dev/null +++ b/doc/News.html @@ -0,0 +1,577 @@ + + + +YaCy: News + + + + + + + + + + + + + + +

News

+ +

This is essentially the release change-log. We have a release roadmap and releases published here will (hopefully) match the milestones from the roadmap's vision. + +

Release list in reverse order: + + + + +

v0.36_build20050326 +

+ +

v0.35_build20050306 +

+ + +

v0.34_build20050208 +

+ +

v0.33_build20050107 +

+ +

v0.32_build20041221 +

+ + +

v0.31_build20041209 +

+ +

v0.30_build20041125 +

+ + +

v0.29_build20041022 +

+ +

v0.28_build20041001 +

+ +

v0.27_build20040924 +

+ +

v0.26_build20040916 +

+ +

v0.25_build20040822 +

+ +

v0.24_build20040816 +

+ +

v0.23_build20040808 +

+ +

v0.22_build20040711 +

+ +

v0.21_build20040627 +

After an announcement on freshmeat.net we got many hits in the newly build p2p-network. We learned from the p2p-propagation behavior and +implemented a lot of new routines to stabilize the YACY network. +

+ +

v0.20_build20040614 +

The first step into the p2p-world: introduction the YACY (yet another cyberspace) p2p network propagation and information wares distribution system. YACY enables in this release a rudimentary index exchange so that you can use YACY to bootstrap a world-wide distributed search engine.

+ + +

v0.16_build20040503 +

This release is a major step to make the proxy enterprise-ready: we introduced several security mechanism and access +restrictions for the proxy and the server. Every security setting can be configured through a web page. Thanks to the new +HTTPS proxy, the proxy can now be considered as 'complete'. +

+ +

v0.15_build20040318 +

+ + +

v0.14-build20040213 +

+ +

v0.13-build20040210 +

+ +

v0.12-build20040204 +

+ +

v0.11-build20040124 +

+ +

v0.1-build20040119 +

+ +

build20040110 +

+ + +

build20040107 +

+ +

build20040105 +

+ +

build20031229 +

+ +

build20031218 +

+ +

build20031215 +

+ + + + + + diff --git a/doc/Platforms.html b/doc/Platforms.html new file mode 100644 index 000000000..76d80f75a --- /dev/null +++ b/doc/Platforms.html @@ -0,0 +1,50 @@ + + + +YACY: Platforms + + + + + + + + + + + + + + +

Supported Platforms

+ +

+ +

+ + + + + + + + + +

Any Java2 System
+

YACY is written entirely in Java (Version Java2 / 1.2 and up). Any system that supports Java2 can run YACY. That means it runs on almost any commercial and free platforms and operation systems that are around. This includes of course Mac OS X, Windows (NT, W2K, XP) and Linux systems. For java support of your platform, please see the installation documentation.

+

Windows
+The Proxy runs seamless on any Windows System and comes with an easy-to-use installer application. Just install and use the proxy like any other Windows application. Please download the Windows Release Flavour of YACY instead the generic one. +

Mac OS X
+The general distribution includes a Mac OS X wrapper shell, which is double-clickable. The application can be monitored and administrated through a web server that you can open with your Safari browser. +

Linux/Unix
+The proxy environment is terminal-based, not windows-based. You can start the proxy in a console, and monitor it's actions through a log file. A wrapper shell script for easy startup is included. You can administrate the proxy remotely through the built-in http server with any browser. +

+ + + + + diff --git a/doc/Technology.html b/doc/Technology.html new file mode 100644 index 000000000..361c81ae3 --- /dev/null +++ b/doc/Technology.html @@ -0,0 +1,96 @@ + + + +YACY: Technology + + + + + + + + + + + + + + +

Technology

+ +

YACY consists mainly of four parts: the p2p index exchange protocol, based on http; a spider/indexer; a caching http proxy which is not only a simple surplus value but also an informtaion provider for the indexing engine and the built-in database engine which makes installation and maintenance of yacy very easy.

+
+

All parts of this architecture are included in the YACY distribution. The YACY search engine can be accessed through the built-in http server.

+ + +

Algorithms

+ +

For our software architecture we emphasize that always the approriate data structure and algorithm is used +to ensure maximum performance. The right combination of structure and algorithm results in an ideal +order of computability which is the key to performant application design. We reject the myth that +the Java language is not appropriate for time-critical software; in contrast to that myth we +believe that Java with it's clean and save-to-use dynamic data structures is most notably qualified +to implement highly complex algorithms.

+ +

+ + + + + + + + + +
Transparent HTTP and HTTPS Proxy and Caching: +The proxy implementation provides a fast content-passing, since every file that the proxy reads from the targeted server is streamed directly to the accessing client while the stream is copied to a RAM cache for later processing. This ensures that the proxy mode is extremely fast and does not interrupt browsing. Whenever the Proxy idles, it processes it's RAM cache to perform indexing and storage to a local file of the cache. Every HTTP header that was passed along with the file is stored in a database and is re-used later on when a cache hit appears. The proxy function has maximum priority above other tasks, like cache management or indexing functions. +
Fast Database Implementation: +We implemented a file-based AVL tree upon a random-access-file. Tree nodes can be dynamically allocated and de-allocated and an unused-node list is maintained. For the PLASMA search algorithm, an ordered access to search results are necessary, therefore we needed an indexing mechanism which stores the index in an ordered way. The database supports such access, and the resulting database tables are stored as a single file. The database does not need any set-up or maintenance tasks that must done by an administrator. It is completely self-organizing. The AVL property ensures maximum performance in terms of algorithmic order. Any database may grow to an unthinkable number of records: with one billion records a database request needs a theoretical maximum number of only 44 comparisments. +
Sophisticated Page Indexing: +The page indexing is done by the creation of a 'reverse word index': every page is parsed, the words are extracted and for every word a database table is maintained. The database tables are held in a file-based hash-table, so accessing a word index is extremely fast, resulting in an extremely fast search. Conjunctions of search words are easily found, because the search results for each word is ordered and can be pairwise enumerated. In terms of computability: the order of the searched access efford to the word index for a single word is O(log <number of words in database>). It is always constant fast, since the data structure provides a 'pre-calculated' result. This means, the result speed is independent from the number of indexed pages! It only slows down for a page-ranking, and is multiplied by the number of words that are searched simultanously. That means, the search efford for n words is O(n * log w). You can't do better (consider that n is always small, since you rarely search for more that 10 words). +
Massive-Parallel Distributed Search Engine: +This technology is the driving force behind the YACY implementation. A DHT (Distributed Hash Table) - like technique will be used to publish the word cache. The idea is, that word indexes travel along the peers before a search request arrives at a specific word index. A search for a specific word would be performed by computing the peer and point directly to the peer, that hosts the index. No peer-hopping or such, since search requests are time-critical (the user usually does not want to wait long). Redundancy must be implemented as well, to catch up the (often) occasions of disappearing peers. Privacy is ensured, since no peer can know which word index is stored, updated or passed since word indexes are stored under a word hash, not the word itself. Search mis-use is regulated by the p2p-laws of give-and-take: every peer must contribute in the crawl/proxy-and-index - process before it is allowed to search. +

+ +

Privacy

+Sharing the index to other users may concern you about your privacy. We have made great efforts to keep and secure your privacy: + + +
Private Index and Index Movement +Your local word index does not only contain information that you created by surfing the internet, but also entries from other peers. +Word index files travel along the proxy peers to form a distributed hash table. Therefore nobody can argue that information that +is provided by your peer was also retrieved by your peer and therefore by your personal use of the internet. In fact it is very unlikely that +information that can be found on your peer was created by you, since the search process targets only peers where it is likely because +of the movement of the index to form the distributed hash table. During a test phase, all word indexes on your peer will be accessible. +The future production release will constraint searches to indexes entries on your peer that have been created by other peers, which will +ensure complete browsing privacy. + +
Word Index Storage and Content Responsibility +The words that are stored in your local word index are stored using a word hash. That means that not any word is stored, but only the word hash. +You cannot find any word that is indexed as clear text. You can also not re-translate the word hashes into the original word. This means that +you don't know actually which words are stored in your system. The positive effect is, that you cannot be responsible for the words that +are stored in your peer. But if you want to deny storage of specific words, you can put them into the 'bluelist' (in the file httpProxy.bluelist). +No word that is in the bluelist can be stored, searched or even viewed through the proxy. + +
Peer Communication Encryption +Information that is passed from one peer to another is encoded. That means that no information like search words, +indexed URL's or URL descriptions is transported in clear text. Network sniffers cannot see the content that is exchanged. +We also implemented an encryption method, where a temporary key, created by the requesting peer is used to encrypt the response +(not yet active in test release, but non-ascii/base64 - encoding is in place). + +
Access Restrictions +The proxy contains a two-stage access control: IP filter check and an account/password gateway that can be configured to access the proxy. +The default setting denies access to your proxy from the internet, but allowes usage from the intranet. The proxy and it's security settings +can be configured using the built-in web server for service pages; the access to this service pages itself can also be restricted again by using +an IP filter and an account/password combination. + +
+ + + + + diff --git a/doc/Volunteers.html b/doc/Volunteers.html new file mode 100644 index 000000000..0a7ddf92f --- /dev/null +++ b/doc/Volunteers.html @@ -0,0 +1,49 @@ + + + +YACY: Volunteers + + + + + + + + + + + + + + +

Volunteers

+ +

YACY's architecture with the PLASMA search engine and +the P2P-based distributed index was developed and implemented by Michael Peter Christen. + +

However, this project is just at the beginning and needs contributions from other developers, since there are many ideas how this project can move on to a broad range of users. + +

There are also some long-term targets. If the index-sharing someday works fine, maybe the browser producer like Opera or Konqueror would like to use the p2p-se to index the browser's cache and therefore provide each user with an open-source, free search engine. + +

At this time, some contributions already have been made. These are: +

+ +

Further volunteers are very welcome. +Please contact me if you have something that you are willing to do for this project. In any case: before you start something to do, please ask me in advance if I would like to integrate it later. Thank You!

+ + + + + diff --git a/doc/grafics/TutorialDWin1.gif b/doc/grafics/TutorialDWin1.gif new file mode 100644 index 000000000..e0dc07e33 Binary files /dev/null and b/doc/grafics/TutorialDWin1.gif differ diff --git a/doc/grafics/TutorialDWin2.gif b/doc/grafics/TutorialDWin2.gif new file mode 100644 index 000000000..808d004ef Binary files /dev/null and b/doc/grafics/TutorialDWin2.gif differ diff --git a/doc/grafics/TutorialDWin3.gif b/doc/grafics/TutorialDWin3.gif new file mode 100644 index 000000000..8632c5ca7 Binary files /dev/null and b/doc/grafics/TutorialDWin3.gif differ diff --git a/doc/grafics/TutorialDWin4.gif b/doc/grafics/TutorialDWin4.gif new file mode 100644 index 000000000..0d0f515c9 Binary files /dev/null and b/doc/grafics/TutorialDWin4.gif differ diff --git a/doc/grafics/TutorialDWin5.gif b/doc/grafics/TutorialDWin5.gif new file mode 100644 index 000000000..52ad38010 Binary files /dev/null and b/doc/grafics/TutorialDWin5.gif differ diff --git a/doc/grafics/TutorialDWin6.gif b/doc/grafics/TutorialDWin6.gif new file mode 100644 index 000000000..02bae4775 Binary files /dev/null and b/doc/grafics/TutorialDWin6.gif differ diff --git a/doc/grafics/architecture.gif b/doc/grafics/architecture.gif new file mode 100644 index 000000000..2a1b18319 Binary files /dev/null and b/doc/grafics/architecture.gif differ diff --git a/doc/grafics/mcemailh.gif b/doc/grafics/mcemailh.gif new file mode 100644 index 000000000..7f63a4c41 Binary files /dev/null and b/doc/grafics/mcemailh.gif differ diff --git a/doc/grafics/startupLinux.gif b/doc/grafics/startupLinux.gif new file mode 100644 index 000000000..24272c5a7 Binary files /dev/null and b/doc/grafics/startupLinux.gif differ diff --git a/doc/grafics/startupMac.gif b/doc/grafics/startupMac.gif new file mode 100644 index 000000000..be715cb7f Binary files /dev/null and b/doc/grafics/startupMac.gif differ diff --git a/doc/grafics/startupWin.gif b/doc/grafics/startupWin.gif new file mode 100644 index 000000000..60d82d02d Binary files /dev/null and b/doc/grafics/startupWin.gif differ diff --git a/doc/grafics/yacy.gif b/doc/grafics/yacy.gif new file mode 100644 index 000000000..06e845e42 Binary files /dev/null and b/doc/grafics/yacy.gif differ diff --git a/doc/index.html b/doc/index.html new file mode 100644 index 000000000..aee47e6ce --- /dev/null +++ b/doc/index.html @@ -0,0 +1,56 @@ + + + +YACY: a freeware caching HTTP Proxy in Java with integrated search engine + + + + + + + + + + + + + + +

YACY

p2p-based distributed Web Search Engine


+ +

Für eine deutsche Dokumentation sehen sie bitte hier und dort

+
+ + +
  + +The YACY project is a new approach to build a p2p-based Web indexing network.

+
    +
  • Crawl your own pages or start distributed crawling
  • +
  • Search your own or the global index
  • +

    +
  • Built-in caching http proxy, but usage of the proxy is not a requisite
  • +
  • Indexing benefits from the proxy cache; private information is not stored or indexed
  • +

    +
  • Filter unwanted content like ad- or spyware; share your web-blacklist with other peers
  • +
  • Extension to DNS: use your peer name as domain name!
  • +

    +
  • Easy to install! No additional database required!
  • +

    +
  • No central server!
  • +
  • GPL'ed, freeware
  • +
+
+Start today to contribute to the global index with our own YACY peer! + +
+ + + + + diff --git a/doc/navigation.js b/doc/navigation.js new file mode 100644 index 000000000..8d35015ef --- /dev/null +++ b/doc/navigation.js @@ -0,0 +1,122 @@ +var appname = "YACY: a Java Freeware P2P-Based Search Engine with Caching HTTP Proxy"; +var thismenu = new Array( + "index","FAQ","Details","Technology","Platforms","News","Demo","License","Download", + "Installation","Volunteers","Deutsches Forum@http://www.yacy-forum.de","English Forum@http://sourceforge.net/forum/?group_id=116142","Links","Contact","","Impressum"); +var mainmenu = new Array( + "YACY Home@http://www.yacy.net/index.html", + "Products@http://www.yacy.net/Products/index.html", + "Consulting@http://www.yacy.net/Consulting/index.html", + "Profile@http://www.yacy.net/Profile/index.html", + "Impressum@http://www.yacy.net/Impressum/index.html"); +var root = "http://www.yacy.net/"; + +function headline() { + document.writeln(""); + document.writeln( + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "
" + + "

Y A C Y    -   D I S T R I B U T E D    P 2 P - B A S E D    W E B   I N D E X I N G
"); + //tmenu(); + document.writeln("
"); + document.writeln("
"); +} + +function filename() { + var p = window.location.pathname; + return p.substring(p.lastIndexOf("/") + 1); +} + +function docname() { + var f = filename() + return f.substring(0, f.indexOf(".")); +} + +function lmenu() { + document.writeln(""); + var dn = docname(); + var printname; + var pos; + for (var i = 0; i < thismenu.length; ++i) { + document.writeln(""); + if (thismenu[i] == "index") printname = "About"; else printname = thismenu[i]; + if (thismenu[i] == "") { + document.writeln(""); + } else if (dn == thismenu[i]) { + document.writeln(""); + } else { + pos = thismenu[i].indexOf("@"); + if (pos >= 0) + document.writeln(""); + else + document.writeln(""); + } + } + document.writeln("
 
 " + printname + "
 " + thismenu[i].substring(0, pos) + "
 " + printname + "
"); +} + +function tmenu() { + //document.writeln(""); + + //document.writeln(""); + //document.writeln("
"); + var linkpath; + var printname; + var pos; + pos = mainmenu[0].indexOf("@"); + linkpath = mainmenu[0].substring(pos + 1); + printname = mainmenu[0].substring(0, pos); + document.writeln("" + printname + " "); + for (var i = 1; i < mainmenu.length; ++i) { + pos = mainmenu[i].indexOf("@"); + linkpath = mainmenu[i].substring(pos + 1); + printname = mainmenu[i].substring(0, pos); + document.writeln("·  " + printname + " "); + } + //document.writeln("
"); +} + + +function globalheader() { + document.writeln(""); + + document.writeln(""); + //document.writeln(""); + document.writeln(""); + //document.writeln(""); + document.writeln(""); + + document.writeln("
"); + //tmenu(); + document.writeln("
"); headline(); document.writeln("
" + + "" + + " " + + " " + + " " + + " " + + " " + + "
"); + lmenu(); + document.writeln(" "); + document.writeln(" "); + document.writeln(" "); + + //if ((docname() != "index") && (docname() != "indexd")) { + // document.writeln(" "); + // document.writeln(" "); + //} + + document.writeln("
" + appname + "

"); +} + +function globalfooter() { + document.writeln("

"); + document.writeln("
"); + document.writeln("
" + + "
"); +} diff --git a/doc/roadmap.txt b/doc/roadmap.txt new file mode 100644 index 000000000..3d1efc80d --- /dev/null +++ b/doc/roadmap.txt @@ -0,0 +1,73 @@ +YACY Release Road Map + +The milestones listed here may change while milestones are reached. +This is just a vision of the possible evolution of the proxy. + +0.1 http proxy with cache indexing + - database + - caching http proxy + - search engine on cache entries, implementationb of reverse word indexes ("RWI"'s) + - http server to access search function + - proxy configuration through built-in httpd + - simple prefetch without prefetch schemes + - firewall for proxy and httpd access protection + +0.2 peer fundamentals + - first information 'ware': seeds, which are peer information records + - seed categories: + 'junior' - for peers without server (like 'LO-ID'), + 'senior' - for peers with server (like 'HI-ID'), + 'principal' - for senior peers with ftp upload for superseeds lists + - first p2p protocol commands: + * 'Hello' - network bootstraping and seed propagation + * 'Query' - for peer information and access granting + * 'Search' - global naive search of RWI's throughout 'some' remote (other peer) RWI's + - ftp integration for principal peers: to upload seed list-files to dedicated places, listed in superseed-files + - blacklist distribution + +0.3 distributed hash tables for reverse word indexes + - next p2p-command: + * 'Tell' - for information wares distribution + - establishment of the DHT's structure for all information wares + - performance of search of RWI's using the DHT's + - more information ware: L-URL's (loaded-url register) and DHT's + +0.4 local proxy prefetch and separate spider + - prefetch schemes and local prefetch execution + - spider order entry + interface + - blacklists / hotlists + - search patterns + - search depth + +0.5 advanced search functions + - scheduled and exceptional ware propagation and distribution + - next p2p-command: + * 'Check' - for L-URL requesting + +0.6 gloabl prefetching and global spidering + - non-proxy mode, using a specialized prefetch schemes for such cases + - more information wares: P-URL's (prefetch url's, not loaded), deadlinks + - next p2p-command to support standalone-mode (non-proxy) + * 'Job' - requesting of P-URL's + - global prefetch execution with P-URL's + - delegated index conjunction + - delegated description provisioning + +0.7 ? split-off of mature code + - kelondro database + - httpd/http-proxy + - plasma indexing/searching + - p2p-protocol handler + +0.8 ? more interfaces + - konqueror cache access + - mozilla cache access + - squid cache access + - telnet command interface + +0.9 ? pre-production release + - bugfixes for all known bugs + - user demands integration + - proprietary functions + +1.0 production release diff --git a/doc/style.css b/doc/style.css new file mode 100644 index 000000000..77409ec7b --- /dev/null +++ b/doc/style.css @@ -0,0 +1,63 @@ + +body { + background-color:#F8F8FF; +} + +a, body, div, li, ol, span, table, td, tr, ul { + color:#000000; + bgcolor:#000000; + font-family:Helvetica, sans-serif; + font-size:13px; + font-style:normal; + line-height:14px; + margin-top:0px; + margin-bottom:0px; +} + +body, div, li, ol, span, table, td, tr, ul { + text-decoration:none; +} + +a:hover { + color:#0000FF; +} + +a.dark:hover { + color:#0088BB; + font-weight:bold; + text-decoration:none; +} + +a.white:hover { + color:#EEFF00; + font-weight:bold; + text-decoration:none; +} + +*.white { + color:#FFFFFF; + font-weight:bold; + text-decoration:none; + font-size:11px; +} + +*.yellow { + color:#EEDD00; + font-weight:bold; + text-decoration:none; + font-size:11px; +} + +*.dark { + color:#111111; + font-weight:bold; + text-decoration:none; + font-size:11px; +} + +*.blue { + color:#556699; + font-weight:bold; + text-decoration:none; + font-size:11px; +} diff --git a/gpl.txt b/gpl.txt new file mode 100644 index 000000000..45645b4b5 --- /dev/null +++ b/gpl.txt @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/htroot/Blacklist_p.html b/htroot/Blacklist_p.html new file mode 100644 index 000000000..941f74d7a --- /dev/null +++ b/htroot/Blacklist_p.html @@ -0,0 +1,121 @@ + + + +YACY: Blacklist Manager +#[metas]# + + +#[header]# +

+

Blacklist

+

This function provides an url filter to the proxy; any blacklisted url is blocked +from loading. You can define several blacklists and activate them separately. +You may also provide your blacklist to other peers by sharing them; in turn you may +collect blacklist entries from other peers.

+ + + + + + + + + + + + + +
+ +
+ + + + + + +
+Edit List: + +
+
+New List: + + +
+
+
+
+
+
+ +
+
+

Active List: #[filename]#

+
+these are the domain name / path patterns in this blacklist:
+you can select them here for deletion
+
+ + +
+ +

+Enter new domain name / path pattern in the form:
+"<domain>/<path-regexpr>":
+
+ + +

+ +Import Blacklist Items from other YACY Peers:
+
+ +Host: +
+ +
+ +

Import Blacklist items from URL:
+

+ +URL: +
+ +
+ +

Import Blacklist items from File:
+

+ +File: +
+ +
+ +
+

+#(status)# +:: +#[item]# was removed from Blacklist +:: +#[item]# was added to the BlackList +#(/status)# + + +#[footer]# + + diff --git a/htroot/Blacklist_p.java b/htroot/Blacklist_p.java new file mode 100644 index 000000000..9687ca60b --- /dev/null +++ b/htroot/Blacklist_p.java @@ -0,0 +1,259 @@ +// Blacklist_p.java +// ----------------------- +// part of the AnomicHTTPProxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// +// This File is contributed by Alexander Schier +// last change: 02.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Blacklist_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.data.*; + +public class Blacklist_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + listManager.switchboard = (plasmaSwitchboard) env; + + listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS")); + + serverObjects prop = new serverObjects(); + String line; + String HTMLout = ""; + String out = ""; + String removeItem = "removeme"; + int numItems=0; + int i=0; + + String filenames[] = listManager.getListslistArray("proxyBlackLists"); + String filename = ""; + + + if(post != null && post.containsKey("blackLists")){ //Blacklist selected + filename = (String)post.get("blackLists"); + }else if(post != null && post.containsKey("filename")){ + filename = (String)post.get("filename"); + }else if(filenames.length > 0){ //first BlackList + filename = filenames[0]; + }else{ //No BlackList + //No file + filename = ""; //? + System.out.println("DEBUG: No Blacklist found"); + } + prop.put("status", 0);//nothing + + + //List Management + + //Del list + if( post != null && post.containsKey("dellistbutton") ){ + + File BlackListFile = new File(listManager.listsPath, filename); + BlackListFile.delete(); + + //Remove from all BlackLists Lists + listManager.removeListFromListslist("proxyBlackLists", filename); + listManager.removeListFromListslist("proxyBlackListsActive", filename); + listManager.removeListFromListslist("proxyBlackListsShared", filename); + + //reload Blacklists + listManager.reloadBlacklists(); + + filenames = listManager.getListslistArray("proxyBlackLists"); + if(filenames.length > 0){ + filename = filenames[0]; + } + }//del list + + if( post != null && post.containsKey("newlistbutton") ){ + + String newList = (String)post.get("newlist"); + if( !newList.endsWith(".black") ){ + newList += ".black"; + } + + filename = newList; //to select it in the returnes Document + try{ + File newFile = new File(listManager.listsPath, newList); + newFile.createNewFile(); + + listManager.addListToListslist("proxyBlackLists", newList); + listManager.addListToListslist("proxyBlackListsActive", newList); + listManager.addListToListslist("proxyBlackListsShared", newList); + + }catch(IOException e){} + + }//newlist + + if( post != null && post.containsKey("activatelistbutton") ){ + + if( listManager.ListInListslist("proxyBlackListsActive", filename) ){ + listManager.removeListFromListslist("proxyBlackListsActive", filename); + }else{ //inactive list -> enable + listManager.addListToListslist("proxyBlackListsActive", filename); + } + + listManager.reloadBlacklists(); + } + + if( post != null && post.containsKey("sharelistbutton") ){ + + if( listManager.ListInListslist("proxyBlackListsShared", filename) ){ + //Remove from shared BlackLists + listManager.removeListFromListslist("proxyBlackListsShared", filename); + }else{ //inactive list -> enable + listManager.addListToListslist("proxyBlackListsShared", filename); + } + } + //List Management End + + + + Vector list = listManager.getListArray(new File(listManager.listsPath, filename)); + //remove a Item? + if( post != null && post.containsKey("delbutton") && post.containsKey("Itemlist") && !((String)post.get("Itemlist")).equals("") ){ + removeItem = (String)post.get("Itemlist"); + } + + //Read the List + Iterator it = list.iterator(); + while(it.hasNext()){ + line = (String) it.next(); + + if(! (line.startsWith("#") || line.equals("") || line.equals(removeItem)) ){ //Not the item to remove + prop.put("Itemlist_"+numItems+"_item", line); + numItems++; + } + + if(! line.equals(removeItem) ){ + out += line + serverCore.crlfString; //full list + }else{ + prop.put("status", 1);//removed + prop.put("status_item", line); + if (httpdProxyHandler.blackListURLs != null) + httpdProxyHandler.blackListURLs.remove(line); + } + } + prop.put("Itemlist", numItems); + + //Add a new Item + if( post != null && post.containsKey("addbutton") && !((String)post.get("newItem")).equals("") ){ + String newItem = (String)post.get("newItem"); + + //clean http:// + if ( newItem.startsWith("http://") ){ + newItem = newItem.substring(7); + } + + //append "/.*" + int pos = newItem.indexOf("/"); + if (pos < 0) { + // add default empty path pattern + pos = newItem.length(); + newItem = newItem + "/.*"; + } + + out += newItem+"\n"; + + prop.put("Itemlist_"+numItems+"_item", newItem); + numItems++; + prop.put("Itemlist", numItems); + + prop.put("status", 2);//added + prop.put("status_item", newItem);//added + + //add to blacklist + if (httpdProxyHandler.blackListURLs != null) + httpdProxyHandler.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1)); + } + listManager.writeList(new File(listManager.listsPath, filename), out); + + //List known hosts for BlackList retrieval + yacySeed seed; + if( yacyCore.seedDB != null && yacyCore.seedDB.sizeConnected() > 0 ){ //no nullpointer error + Enumeration e = yacyCore.seedDB.seedsConnected(true, false, null); + i=0; + while (e.hasMoreElements()) { + seed = (yacySeed) e.nextElement(); + if (seed != null) { + String Hash = seed.hash; + String Name = seed.get("Name", "nameless"); + prop.put("otherHosts_"+i+"_hash", Hash); + prop.put("otherHosts_"+i+"_name", Name); + i++; + } + } + prop.put("otherHosts", i); + }else{ + //DEBUG: System.out.println("BlackList_p: yacy seed not loaded!"); + } + String BlackLists[] = listManager.getListslistArray("proxyBlackLists"); + + //List BlackLists + for(i=0; i <= BlackLists.length -1;i++){ + prop.put("blackLists_"+i+"_name", BlackLists[i]); + prop.put("blackLists_"+i+"_active", 0); + prop.put("blackLists_"+i+"_shared", 0); + prop.put("blackLists_"+i+"_selected", 0); + if( BlackLists[i].equals(filename) ){ //current List + prop.put("blackLists_"+i+"_selected", 1); + } + if( listManager.ListInListslist("proxyBlackListsActive", BlackLists[i]) ){ + prop.put("blackLists_"+i+"_active", 1); + } + if( listManager.ListInListslist("proxyBlackListsShared", BlackLists[i]) ){ + prop.put("blackLists_"+i+"_shared", 1); + } + } + prop.put("blackLists", i); + + prop.put("filename", filename); + return prop; + } + +} diff --git a/htroot/CacheAdmin_p.html b/htroot/CacheAdmin_p.html new file mode 100644 index 000000000..e4f30099d --- /dev/null +++ b/htroot/CacheAdmin_p.html @@ -0,0 +1,26 @@ + + + +YACY: Local Cache Management +#[metas]# + + +#[header]# +

+

Local Cache


+ +

+The current cache size is #[cachesize]# KB. The maximum cache size is #[cachemax]# KB.
+ + + +
+#[tree]# + + +#[info]# +
+ +#[footer]# + + diff --git a/htroot/CacheAdmin_p.java b/htroot/CacheAdmin_p.java new file mode 100644 index 000000000..0b18d59a3 --- /dev/null +++ b/htroot/CacheAdmin_p.java @@ -0,0 +1,181 @@ +// CacheAdmin_p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.06.2003 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// you must compile this file with +// javac -classpath .:../classes CacheAdmin_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.text.*; +import java.io.*; +import java.net.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.data.*; +import de.anomic.plasma.*; +import de.anomic.http.*; +import de.anomic.htmlFilter.*; + +public class CacheAdmin_p { + + private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + public static String dateString(Date date) { + return SimpleFormatter.format(date); + } + + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + String action = ((post == null) ? "info" : post.get("action", "info")); + String pathString = ((post == null) ? "" : post.get("path", "/")); + String fileString = pathString; + File cache = new File(switchboard.getRootPath(), switchboard.getConfig("proxyCache", "DATA/HTCACHE")); + File file = new File(cache, pathString); + File dir; + URL url = plasmaHTCache.getURL(cache, file); + + if (file.isDirectory()) { + dir = file; + } else { + dir = file.getParentFile(); + pathString = (new File(pathString)).getParent().replace('\\','/'); + } + // generate dir listing + String[] list = dir.list(); + File f; String tree = "Directory of
" + ((pathString.length() == 0) ? "domain list" : linkPathString(pathString)) + "

"; + if (list == null) + tree += "[empty]"; + else { + for (int i = 0; i < list.length; i++) { + f = new File(dir, list[i]); + if (f.isDirectory()) + tree += " " + list[i] + "
" + serverCore.crlfString; + else + tree += " " + list[i] + "
" + serverCore.crlfString; + } + } + + String info = ""; + + if (action.equals("info")) { + if (!(file.isDirectory())) { + String urls = htmlFilterContentScraper.urlNormalform(url); + info += "Info for URL " + urls + ":

"; + try { + httpHeader fileheader = switchboard.cacheManager.getCachedResponse(plasmaURL.urlHash(url)); + info += "HTTP Header:
" + formatHeader(fileheader) + "
"; + String ff = file.toString(); + int p = ff.lastIndexOf('.'); + String ext = (p >= 0) ? ff.substring(p + 1).toLowerCase() : ""; + if ((ext.equals("gif")) || (ext.equals("jpg")) || (ext.equals("jpeg")) || (ext.equals("png"))) + info += ""; + else { + htmlFilterContentScraper scraper = new htmlFilterContentScraper(url); + OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); + serverFileUtils.copy(file, os); + info += "HEADLINE:
" + scraper.getHeadline() + "

"; + info += "HREF:
" + formatAnchor(scraper.getHyperlinks()) + "
"; + info += "MEDIA:
" + formatAnchor(scraper.getMedialinks()) + "
"; + info += "EMAIL:
" + formatAnchor(scraper.getEmaillinks()) + "
"; + info += "TEXT:
" + new String(scraper.getText()) + "
"; + } + } catch (Exception e) { + info += e.toString(); + e.printStackTrace(); + } + } + } + + // + prop.put("cachesize", "" + (switchboard.cacheManager.currCacheSize/1024)); + prop.put("cachemax", "" + (switchboard.cacheManager.maxCacheSize/1024)); + prop.put("tree", tree); + prop.put("info", info); + // return rewrite properties + return prop; + } + + private static String formatHeader(httpHeader header) { + String out = ""; + Iterator it = header.entrySet().iterator(); + Map.Entry entry; + while (it.hasNext()) { + entry = (Map.Entry) it.next(); + out += ""; + } + out += "
" + entry.getKey() + " = " + entry.getValue() + "
"; + return out; + } + + private static String formatAnchor(Properties a) { + String out = ""; + Enumeration e = a.keys(); + String url, descr; + while (e.hasMoreElements()) { + url = (String) e.nextElement(); + descr = a.getProperty(url).trim(); + if (descr.length() == 0) descr = "-"; + out += ""; + } + out += "
" + descr + " " + url + "
"; + return out; + } + + private static String linkPathString(String Path){ // contributed by Alexander Schier + String Elements[] = Path.split("/"); + String result = ""; + String tmpPath = ""; + for(int i=0;i<(Elements.length-1);i++){ + tmpPath += Elements[i] + "/"; + result += "" + Elements[i] + "/"; + } + if (Elements.length > 0) { + tmpPath += Elements[Elements.length - 1] + "/"; + result += "" + Elements[Elements.length - 1] + "/"; + } + return result; + } + +} diff --git a/htroot/CacheResource_p.html b/htroot/CacheResource_p.html new file mode 100644 index 000000000..fb7f97c59 --- /dev/null +++ b/htroot/CacheResource_p.html @@ -0,0 +1 @@ +#[resource]# \ No newline at end of file diff --git a/htroot/CacheResource_p.java b/htroot/CacheResource_p.java new file mode 100644 index 000000000..aa621a376 --- /dev/null +++ b/htroot/CacheResource_p.java @@ -0,0 +1,77 @@ +// CacheResource_p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 10.08.2003 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// you must compile this file with +// javac -classpath .:../Classes Message.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.text.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.data.*; +import de.anomic.plasma.*; +import de.anomic.http.*; + +public class CacheResource_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + String path = ((post == null) ? "" : post.get("path", "")); + File cache = new File(switchboard.getRootPath(), switchboard.getConfig("proxyCache", "DATA/HTCACHE")); + File f = new File(cache, path); + byte[] resource; + + try { + resource = serverFileUtils.read(f); + prop.put("resource", resource); + } catch (IOException e) { + prop.put("resource", new byte[0]); + } + return prop; + } + +} diff --git a/htroot/Config_p.html b/htroot/Config_p.html new file mode 100644 index 000000000..3df129fa7 --- /dev/null +++ b/htroot/Config_p.html @@ -0,0 +1,36 @@ + + + +YACY advanced Config +#[metas]# + + + +#[header]# +

+

advanced Config

+

+Here are all Config Options from YaCy.
+You can change anything, but some Options need a restart, and some Options can crash YaCy, when wrong values are used. +

+
+
+: + +
+ + + +#[footer]# + + diff --git a/htroot/Config_p.java b/htroot/Config_p.java new file mode 100644 index 000000000..87d05b26e --- /dev/null +++ b/htroot/Config_p.java @@ -0,0 +1,88 @@ +// Config_p.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// This file created by Alexander Schier +// +// This File is contributed by Alexander Schier +// last change: 02.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Config_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.data.*; + +public class Config_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + int count=0; + Enumeration keys = env.configKeys(); + String key=""; + + //change a Key + if(post != null && post.containsKey("key") && post.containsKey("value")){ + key=(String)post.get("key"); + String value=(String)post.get("value"); + if(!key.equals("")){ + env.setConfig(key, value); + } + } + + while(keys.hasMoreElements()){ + key=(String)keys.nextElement(); + prop.put("options_"+count+"_key", key); + prop.put("options_"+count+"_value", env.getConfig(key, "ERROR")); + count++; + } + prop.put("options", count); + + + return prop; + } + +} diff --git a/htroot/CookieMonitorIncoming_p.html b/htroot/CookieMonitorIncoming_p.html new file mode 100644 index 000000000..550d4e558 --- /dev/null +++ b/htroot/CookieMonitorIncoming_p.html @@ -0,0 +1,35 @@ + + + +YaCy: Incoming Cookies Monitor +#[metas]# + + +#[header]# +#[submenuCookie]# +
+

Cookie Monitor: Incoming Cookies

+ +

This is a list of Cookies that a web server has sent to clients of the YaCy Proxy:
+Showing #[num]# entries from a total of #[total]# Cookies.
+ + + + + + + +#{list}# + + + + + + +#{/list}# +
Sending HostDateReceiving ClientCookie
#[host]##[date]##[client]##[cookie]#
+

+
+#[footer]# + + diff --git a/htroot/CookieMonitorIncoming_p.java b/htroot/CookieMonitorIncoming_p.java new file mode 100644 index 000000000..137e0a6f0 --- /dev/null +++ b/htroot/CookieMonitorIncoming_p.java @@ -0,0 +1,95 @@ +// CookieMonitorIncoming_p.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last change: 25.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes Network.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.yacy.*; +import de.anomic.plasma.*; + +public class CookieMonitorIncoming_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) sb; + + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + + int maxCount = 100; + int entCount = 0; + boolean dark = true; + Iterator i = switchboard.incomingCookies.entrySet().iterator(); + Map.Entry entry; + String host, client, cookie; + Date date; + Object[] oa; + while ((entCount < maxCount) && (i.hasNext())) { + // get out values + entry = (Map.Entry) i.next(); + host = (String) entry.getKey(); + oa = (Object[]) entry.getValue(); + date = (Date) oa[0]; + client = (String) oa[1]; + cookie = (String) oa[2]; + + // put values in template + prop.put("list_" + entCount + "_dark", ((dark) ? 1 : 0) ); dark =! dark; + prop.put("list_" + entCount + "_host", host); + prop.put("list_" + entCount + "_date", httpc.dateString(date)); + prop.put("list_" + entCount + "_client", client); + prop.put("list_" + entCount + "_cookie", cookie); + + // next + entCount++; + } + prop.put("list", entCount); + prop.put("num", entCount); + prop.put("total", switchboard.incomingCookies.size()); + // return rewrite properties + return prop; + } + +} diff --git a/htroot/CookieMonitorOutgoing_p.html b/htroot/CookieMonitorOutgoing_p.html new file mode 100644 index 000000000..b65941629 --- /dev/null +++ b/htroot/CookieMonitorOutgoing_p.html @@ -0,0 +1,35 @@ + + + +YaCy: Outgoing Cookies Monitor +#[metas]# + + +#[header]# +#[submenuCookie]# +
+

Cookie Monitor: Outgoing Cookies

+ +

This is a list of Cookies that a browser using the YaCy Proxy has sent to a web server:
+Showing #[num]# entries from a total of #[total]# Cookies.
+ + + + + + + +#{list}# + + + + + + +#{/list}# +
Receiving HostDateSending ClientCookie
#[host]##[date]##[client]##[cookie]#
+

+
+#[footer]# + + diff --git a/htroot/CookieMonitorOutgoing_p.java b/htroot/CookieMonitorOutgoing_p.java new file mode 100644 index 000000000..c7b882335 --- /dev/null +++ b/htroot/CookieMonitorOutgoing_p.java @@ -0,0 +1,95 @@ +// CookieMonitorOutgoing_p.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last change: 25.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes Network.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.yacy.*; +import de.anomic.plasma.*; + +public class CookieMonitorOutgoing_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) sb; + + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + + int maxCount = 100; + int entCount = 0; + boolean dark = true; + Iterator i = switchboard.outgoingCookies.entrySet().iterator(); + Map.Entry entry; + String host, client, cookie; + Date date; + Object[] oa; + while ((entCount < maxCount) && (i.hasNext())) { + // get out values + entry = (Map.Entry) i.next(); + host = (String) entry.getKey(); + oa = (Object[]) entry.getValue(); + date = (Date) oa[0]; + client = (String) oa[1]; + cookie = (String) oa[2]; + + // put values in template + prop.put("list_" + entCount + "_dark", ((dark) ? 1 : 0) ); dark =! dark; + prop.put("list_" + entCount + "_host", host); + prop.put("list_" + entCount + "_date", httpc.dateString(date)); + prop.put("list_" + entCount + "_client", client); + prop.put("list_" + entCount + "_cookie", cookie); + + // next + entCount++; + } + prop.put("list", entCount); + prop.put("num", entCount); + prop.put("total", switchboard.outgoingCookies.size()); + // return rewrite properties + return prop; + } + +} diff --git a/htroot/EditProfile_p.html b/htroot/EditProfile_p.html new file mode 100644 index 000000000..0cc6ee3bf --- /dev/null +++ b/htroot/EditProfile_p.html @@ -0,0 +1,76 @@ + + + +YaCy: Your Personal Profile +#[metas]# + + +#[header]# +

+

Your Personal Profile

+

+You can create a personal profile here. Other YaCy peers users can view these information using a link on the network page.
+You do not need to provide any personal data here, but if you want to distribute your contact information, you can do that here. +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Name
Nick Name
Homepage
EMail
 
ICQ
Jabber
Yahoo!
MSN
 
Comment
+ +
+ +
+#[footer]# + + + diff --git a/htroot/EditProfile_p.java b/htroot/EditProfile_p.java new file mode 100644 index 000000000..e677b0ffd --- /dev/null +++ b/htroot/EditProfile_p.java @@ -0,0 +1,99 @@ +// EditProfile_p.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// +// This File is contributed by Alexander Schier +// last change: 27.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Blacklist_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import java.net.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.data.*; + +public class EditProfile_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + //listManager.switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + Properties profile = new Properties(); + try{ + profile.load(new FileInputStream(new File("DATA/SETTINGS/profile.txt"))); + + }catch(IOException e){} + if(post != null && post.containsKey("set")){ + profile.setProperty("name", (String)post.get("name")); + profile.setProperty("nickname", (String)post.get("nickname")); + profile.setProperty("homepage", (String)post.get("homepage")); + profile.setProperty("email", (String)post.get("email")); + + profile.setProperty("icq", (String)post.get("icq")); + profile.setProperty("jabber", (String)post.get("jabber")); + profile.setProperty("yahoo", (String)post.get("yahoo")); + profile.setProperty("msn", (String)post.get("msn")); + + profile.setProperty("comment", (String)post.get("comment")); + } + prop.put("name", profile.getProperty("name", "")); + prop.put("nickname", profile.getProperty("nickname", "")); + prop.put("homepage", profile.getProperty("homepage", "")); + prop.put("email", profile.getProperty("email", "")); + + prop.put("icq", profile.getProperty("icq", "")); + prop.put("jabber", profile.getProperty("jabber", "")); + prop.put("yahoo", profile.getProperty("yahoo", "")); + prop.put("msn", profile.getProperty("msn", "")); + + prop.put("comment", profile.getProperty("comment", "")); + + try{ + profile.store( new FileOutputStream(new File("DATA/SETTINGS/profile.txt")), null ); + }catch(IOException e){ + } + + return prop; + } + +} diff --git a/htroot/Help.html b/htroot/Help.html new file mode 100644 index 000000000..d931c732c --- /dev/null +++ b/htroot/Help.html @@ -0,0 +1,56 @@ + + + +YACY: Help +#[metas]# + + +#[header]# +

+

Help

+ +

+This is a distributed web crawler and also a caching http proxy. You are using the online-interface of the application. You can use this interface to configure your personal settings, proxy settings, access control and crawling properties. You can also use this interface to start crawls, send messages to other peers and monitor your index, cache status and crawling processes. Most important, you can use the search page to search either your own or the global index. +

+ +

+For more detailed information, visit the YACY home page. +

+ +

Local and Global Search: Options and Functions

+The proxy provides a search interface that accessed your local index, created from web pages that passed the proxy. +The search can also be applied globally, by search other peers. You can use the following options to enhance your search results: + + + +
Search Word List +You can search for several words simultanous. Words must be separated by a single space. +The words are treated conjunctive, that means every must occur in the result, not any. +If you do a global search (see below) you may get different results each time you do a search. + +
Maximum Number of Results +You can select the number of wanted maximum links. We do not yet support multiple result pages for virtually any possible link. +Instead we encourage you to enhance the search result by submitting more search words. + +
Result Order Options +The search engine provides an experimental 'Quality' ranking. In contrast to other known search engines we provide also +a result order by date. If you change the order to 'Date-Quality' the most recently updated page from the search results is listed first. +For pages that have the same date the second order, 'Quality' is applied. + +
Resource Domain +This search engine is constructed to search the web pages that pass the proxy. But the search index is distributed to other peers as well, +so you can search also globally: this function is currently only rudimentary, but can be choosen for test cases. Future releases will +automatically distribute index information before a search happends to form a performant distributed hash table -- a very fast global search. + +
Maximum Search Time +Searching the local index is extremely fast, it happends within milliseconds, even for a large number (millions) of pages. But searching the +global index needs more time to find the correct remote peer that contains best search results. This is especially the case while the +distributed index is in test mode. Search results get more stable (repeated global search produce more similar results) the longer +the search time is. + +
+ + +#[footer]# + + diff --git a/htroot/IndexControl_p.html b/htroot/IndexControl_p.html new file mode 100644 index 000000000..a3681d6c5 --- /dev/null +++ b/htroot/IndexControl_p.html @@ -0,0 +1,78 @@ + + + +YACY: Index Control +#[metas]# + + +#[header]# +

+

Index Administration

+ +

+The local index currently consists of (at least) #[wcount]# reverse word indexes and #[ucount]# URL references
+

+ + + + + + + + + + + + + + + +
Word
Word-Hash: + +
+ +

+ +

+ + + + + + + + + + +
URL
URL-Hash: + +

+ + + + + + + + + + + + + + +
Index Distribution: +This enables automated, DHT-ruled Index Transmission to other peers. This is currently only activated for junior peers.
Index Receive: +Accept remote Index Transmissions. This works only if you are a senior peer. The DHT-rules do not work without this function.
+

+

+ +#[result]# + +#[footer]# + + diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java new file mode 100644 index 000000000..f52a4c135 --- /dev/null +++ b/htroot/IndexControl_p.java @@ -0,0 +1,382 @@ +// IndexControl_p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 02.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes IndexControl_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.net.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.plasma.*; +import de.anomic.http.*; +import de.anomic.htmlFilter.*; +import de.anomic.yacy.*; + +public class IndexControl_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + if ((post == null) || (env == null)) { + prop.put("keystring", ""); + prop.put("keyhash", ""); + prop.put("urlstring", ""); + prop.put("urlhash", ""); + prop.put("result", ""); + prop.put("wcount", "" + switchboard.wordIndex.sizeMin()); + prop.put("ucount", "" + switchboard.loadedURL.size()); + prop.put("otherHosts", ""); + prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : ""); + prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : ""); + return prop; // be save + } + + // default values + String keystring = ((String) post.get("keystring")).trim(); + String keyhash = ((String) post.get("keyhash")).trim(); + String urlstring = ((String) post.get("urlstring")).trim(); + String urlhash = ((String) post.get("urlhash")).trim(); + + if (!(urlstring.startsWith("http://"))) urlstring = "http://" + urlstring; + + prop.put("keystring", keystring); + prop.put("keyhash", keyhash); + prop.put("urlstring", urlstring); + prop.put("urlhash", urlhash); + prop.put("result", ""); + + // read values from checkboxes + String[] urlx = post.getAll("urlhx.*"); + boolean delurl = post.containsKey("delurl"); + boolean delurlref = post.containsKey("delurlref"); + //System.out.println("DEBUG CHECK: " + ((delurl) ? "delurl" : "") + " " + ((delurlref) ? "delurlref" : "")); + + if (post.containsKey("setIndexDistribute")) { + boolean allowDistributeIndex = ((String) post.get("indexDistribute", "")).equals("on"); + switchboard.setConfig("allowDistributeIndex", (allowDistributeIndex) ? "true" : "false"); + } + + if (post.containsKey("setIndexReceive")) { + boolean allowReceiveIndex = ((String) post.get("indexReceive", "")).equals("on"); + switchboard.setConfig("allowReceiveIndex", (allowReceiveIndex) ? "true" : "false"); + yacyCore.seedDB.mySeed.setFlagAcceptRemoteIndex(allowReceiveIndex); + } + + if (post.containsKey("keyhashdeleteall")) { + try { + if ((delurl) || (delurlref)) { + // generate an urlx array + try { + HashSet keyhashes = new HashSet(); + keyhashes.add(keyhash); + plasmaWordIndexEntity index = switchboard.searchManager.searchHashes(keyhashes, 10000); + Enumeration en = index.elements(true); + int i = 0; + urlx = new String[index.size()]; + while (en.hasMoreElements()) urlx[i++] = ((plasmaWordIndexEntry) en.nextElement()).getUrlHash(); + } catch (IOException e) { + urlx = new String[0]; + } + } + if (delurlref) for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true); + if ((delurl) || (delurlref)) for (int i = 0; i < urlx.length; i++) switchboard.loadedURL.remove(urlx[i]); + switchboard.wordIndex.deleteComplete(keyhash); + } catch (IOException e) {} + post.remove("keyhashdeleteall"); + if ((keystring.length() > 0) && (plasmaWordIndexEntry.word2hash(keystring).equals(keyhash))) + post.put("keystringsearch", "generated"); + else + post.put("keyhashsearch", "generated"); + } + + if (post.containsKey("keyhashdelete")) { + try { + if (delurlref) for (int i = 0; i < urlx.length; i++) switchboard.removeAllUrlReferences(urlx[i], true); + if ((delurl) || (delurlref)) for (int i = 0; i < urlx.length; i++) switchboard.loadedURL.remove(urlx[i]); + switchboard.wordIndex.removeEntries(keyhash, urlx, true); + } catch (IOException e) {} + // this shall lead to a presentation of the list; so handle that the remaining program + // thinks that it was called for a list presentation + post.remove("keyhashdelete"); + if ((keystring.length() > 0) && (plasmaWordIndexEntry.word2hash(keystring).equals(keyhash))) + post.put("keystringsearch", "generated"); + else + post.put("keyhashsearch", "generated"); + //prop.put("result", "Delete of relation of url hashes " + result + " to key hash " + keyhash); + } + + if (post.containsKey("urlhashdeleteall")) { + int i = switchboard.removeAllUrlReferences(urlhash, true); + prop.put("result", "Deleted URL and " + i + " references from " + i + " word indexes."); + } + + if (post.containsKey("urlhashdelete")) { + plasmaCrawlLURL.entry entry = switchboard.loadedURL.getEntry(urlhash); + URL url = entry.url(); + if (url == null) { + prop.put("result", "No Entry for url hash " + urlhash + "; nothing deleted."); + } else { + urlstring = htmlFilterContentScraper.urlNormalform(url); + prop.put("urlstring", ""); + switchboard.loadedURL.remove(urlhash); + prop.put("result", "Removed URL " + urlstring); + } + } + + if (post.containsKey("keystringsearch")) { + keyhash = plasmaWordIndexEntry.word2hash(keystring); + prop.put("keyhash", keyhash); + prop.put("urlstring", ""); + prop.put("urlhash", ""); + prop.put("result", genUrlList(switchboard, keyhash, keystring)); + } + + if (post.containsKey("keyhashsearch")) { + if ((keystring.length() == 0) || (!(plasmaWordIndexEntry.word2hash(keystring).equals(keyhash)))) + prop.put("keystring", ""); + prop.put("urlstring", ""); + prop.put("urlhash", ""); + prop.put("result", genUrlList(switchboard, keyhash, "")); + } + + if (post.containsKey("keyhashtransfer")) { + if ((keystring.length() == 0) || (!(plasmaWordIndexEntry.word2hash(keystring).equals(keyhash)))) + prop.put("keystring", ""); + prop.put("urlstring", ""); + prop.put("urlhash", ""); + plasmaWordIndexEntity[] indexes = new plasmaWordIndexEntity[1]; + String result; + long starttime = System.currentTimeMillis(); + try {indexes[0] = switchboard.wordIndex.getEntity(keyhash, true); + result = yacyClient.transferIndex(yacyCore.seedDB.getConnected(post.get("hostHash", "")), indexes, switchboard.loadedURL); + } catch (IOException e) { + result = "IOException: " + e.getMessage(); + } + prop.put("result", (result == null) ? ("Successfully transferred " + indexes[0].size() + " words in " + ((System.currentTimeMillis() - starttime) / 1000) + " seconds") : result); + } + + if (post.containsKey("keyhashsimilar")) { + Iterator hashIt = switchboard.wordIndex.hashIterator(keyhash, true, true, true); + String result = "Sequential List of Word-Hashes:
"; + String hash; + int i = 0; + while (hashIt.hasNext()) { + hash = (String) hashIt.next(); + result += "" + hash + " " + (((i + 1) % 8 == 0) ? "
" : ""); + i++; + } + prop.put("result", result); + } + + if (post.containsKey("urlstringsearch")) { + try { + URL url = new URL(urlstring); + urlhash = plasmaURL.urlHash(url); + prop.put("urlhash", urlhash); + plasmaCrawlLURL.entry entry = switchboard.loadedURL.getEntry(urlhash); + prop.put("result", genUrlProfile(switchboard, entry, urlhash)); + } catch (MalformedURLException e) { + prop.put("urlstring", "wrong url: " + urlstring); + prop.put("urlhash", ""); + } + } + + if (post.containsKey("urlhashsearch")) { + plasmaCrawlLURL.entry entry = switchboard.loadedURL.getEntry(urlhash); + URL url = entry.url(); + if (url == null) { + prop.put("result", "No Entry for url hash " + urlhash); + } else { + urlstring = url.toString(); + prop.put("urlstring", urlstring); + prop.put("result", genUrlProfile(switchboard, entry, urlhash)); + } + } + + if (post.containsKey("urlhashsimilar")) { + try { + Iterator hashIt = switchboard.loadedURL.urlHashes(urlhash, true); + String result = "Sequential List of URL-Hashes:
"; + String hash; + int i = 0; + while (hashIt.hasNext()) { + hash = (String) hashIt.next(); + result += "" + hash + " " + (((i + 1) % 8 == 0) ? "
" : ""); + i++; + } + prop.put("result", result); + } catch (IOException e) { + prop.put("result", "Error: " + e.getMessage()); + } + } + + //List known hosts + yacySeed seed; + int hc = 0; + if ((yacyCore.seedDB != null) && (yacyCore.seedDB.sizeConnected() > 0)) { + Enumeration e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(keyhash); + while (e.hasMoreElements()) { + seed = (yacySeed) e.nextElement(); + if (seed != null) { + prop.put("hosts_" + hc + "_hosthash", seed.hash); + prop.put("hosts_" + hc + "_hostname", /*seed.hash + " " +*/ seed.get("Name", "nameless")); + hc++; + } + } + prop.put("hosts", "" + hc); + } else { + prop.put("hosts", "0"); + } + + // insert constants + prop.put("wcount", "" + switchboard.wordIndex.sizeMin()); + prop.put("ucount", "" + switchboard.loadedURL.size()); + prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : ""); + prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : ""); + // return rewrite properties + return prop; + } + + public static String genUrlProfile(plasmaSwitchboard switchboard, plasmaCrawlLURL.entry entry, String urlhash) { + if (entry == null) return "No entry found for url-hash " + urlhash; + URL url = entry.url(); + if (url == null) return "No entry found for url-hash " + urlhash; + String result = "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "
URL String" + htmlFilterContentScraper.urlNormalform(url) + "
Hash" + urlhash + "
Description" + entry.descr() + "
Modified-Date" + entry.moddate() + "
Loaded-Date" + entry.loaddate() + "
Referrer" + switchboard.loadedURL.getEntry(entry.referrerHash()).url() + "
Doctype" + entry.doctype() + "
Copy-Count" + entry.copyCount() + "
Local-Flag" + entry.local() + "
Quality" + entry.quality() + "
Language" + entry.language() + "
Size" + entry.size() + "
Words" + entry.wordCount() + "

"; + result += + "
" + + "" + + "" + + "" + + "" + + "
" + + " this may produce unresolved references at other word indexes but they do not harm

" + + "
" + + " delete the reference to this url at every other word where the reference exists (very extensive, but prevents unresolved references)
" + + "
"; + return result; + } + + public static String genUrlList(plasmaSwitchboard switchboard, String keyhash, String keystring) { + // search for a word hash and generate a list of url links + try { + HashSet keyhashes = new HashSet(); + keyhashes.add(keyhash); + plasmaWordIndexEntity index = switchboard.searchManager.searchHashes(keyhashes, 10000); + String result = ""; + if (index.size() == 0) { + result = "No URL entries related to this word hash " + keyhash + "."; + } else { + Enumeration en = index.elements(true); + plasmaWordIndexEntry ie; + result = "URL entries related to this word hash " + keyhash + ":
"; + result += "
"; + String us, uh; + int i = 0; + while (en.hasMoreElements()) { + ie = (plasmaWordIndexEntry) en.nextElement(); + uh = ie.getUrlHash(); + result += + ""; + if (switchboard.loadedURL.exists(uh)) { + us = switchboard.loadedURL.getEntry(uh).url().toString(); + result += + "" + uh + " " + us + "
"; + } else { + result += + "" + uh + " <unresolved URL Hash>
"; + } + } + result += + "" + + "" + + "" + + "" + + "
Reference Deletion


" + + "

" + + "
  (= delete Word)
" + + "
" + + "

" + + "delete also the referenced URL itself (reasonable and recommended, may produce unresolved references at other word indexes but they do not harm)" + + "
" + + "

" + + "for every resolveable and deleted URL reference, delete the same reference at every other word where the reference exists (very extensive, but prevents further unresolved references)" + + "
"; + } + return result; + } catch (IOException e) { + return ""; + } + } + +} diff --git a/htroot/IndexCreate_p.html b/htroot/IndexCreate_p.html new file mode 100644 index 000000000..c1a191f34 --- /dev/null +++ b/htroot/IndexCreate_p.html @@ -0,0 +1,322 @@ + + + +YACY: Index Creation +#[metas]# + + +#[header]# +

+

Index Creation

+ +

+

+You can define url's as start points for Web page crawling and start that crawling here. +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Crawling Depth: + A minimum of 1 is recommended. + Be careful with the prefetch number. Consider a branching factor of average 20; + A prefect-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW. +
Crawling Filter: + This is an emacs-like regular expression that must match with the crawled url. + Use this i.e. to crawl a single domain. If you set this filter is would make sense to increase + the crawl depth. +
Accept URL's with '?' / dynamic URL's: + URL's pointing to dynamic content should usually not be crawled. However, there are sometimes web pages with static content that + is accessed with URL's containing question marks. If you are unshure, do not check this to avoid crawl loops. +
Store to Proxy Cache: + This option is used by default for proxy prefetch, but is not needed for explicit crawling. + We recommend to leave this switched off unless you want to control the crawl results with the + Cache Monitor. +
Do Local Indexing: + This should be switched on by default, unless you want to crawl only to fill the + Proxy Cache without indexing. +
Do Remote Indexing + If checked, the crawl will try to assign the leaf nodes of the search tree to remote peers. + If you need your crawling results locally, you must switch this off. + Only senior and principal peers can initiate or receive remote crawls. +
Exclude static Stop-Words + To exclude all words given in the file yacy.stopwords from indexing, + check this box. +
Start Point:Existing start url's are re-crawled. + Other already visited url's are sorted out as 'double'. + A complete re-crawl will be available soon. +
+

+ +

+
Distributed Indexing: +Crawling and indexing can be done by remote peers. +Your peer can search and index for other peers and they can search for you.
+ + + + +
+ + Accept remote crawling requests +
+

+ + +

+#(error)# +:: +Error with profile management. Please stop yacy, delete the File DATA/PLASMADB/crawlProfiles0.db and restart. +:: +Error: #[errmsg]# +:: +Application not yet initialized. Sorry. Please wait some seconds and repeat the request. +:: +ERROR: Crawl filter "#[newcrawlingfilter]#" does not match with crawl root "#[crawlingStart]#". Please try again with different filter


+:: +Crawling of "#[crawlingURL]#" failed. Reason: #[reasonString]#
+:: +Error with url input "#[crawlingStart]#": #[error]# +#(/error)# +
+#(info)# +:: +Set new prefetch depth to "#[newproxyPrefetchDepth]#" +:: +Crawling of "#[crawlingURL]#" started. +You can monitor the crawling progress with this page. +Please wait some seconds before refresh of this page, because the request is enqueued and delayed until the http server is idle for a certain time. +The indexing result is presented on the +Index Monitor-page. +It will take at least 30 seconds until the first result appears there. Please be patient, the crawling will pause each time you use the proxy or web server to ensure maximum availability. +If you crawl any un-wanted pages, you can delete them here.
+:: +Removed #[numEntries]# entries from crawl queue. This queue may fill again if the loading and indexing queue is not empty +#(/info)# +
+#(refreshbutton)# +:: +
+ +
+
+#(/refreshbutton)# +Crawl Profile List:
+ + + + + + + + + + + +#{crawlProfiles}# + + + + + + + + + + +#{/crawlProfiles}# +
Crawl ThreadStart URLDepthFilterAccept '?'Fill Proxy CacheLocal IndexingRemote Indexing
#[name]##[startURL]##[depth]##[filter]##(withQuery)#no::yes#(/withQuery)##(storeCache)#no::yes#(/storeCache)##(localIndexing)#no::yes#(/localIndexing)##(remoteIndexing)#no::yes#(/remoteIndexing)#
+
+#(remoteCrawlPeers)# +No remote crawl peers availible.
+:: +#[num]# peers available for remote crawling. + + + + + + + + + +
Idle Peers + #{available}##[name]# (#[due]# seconds due)   #{/available}# +
Busy Peers + #{busy}##[name]# (#[due]# seconds due)  #{/busy}# +
+#(/remoteCrawlPeers)# +
+#(rejected)# +:: +
+There are #[num]# entries in the rejected-urls list. +#(only-latest)# +:: +Showing latest #[num]# entries. + +  +#(/only-latest)# + +
+There are #[num]# entries in the rejected-queue:
+ + + + + + + +#{list}# + + + + + + +#{/list}# +
InitiatorExecutorURLFail-Reason
#[initiator]##[executor]##[url]##[failreason]#
+#(/rejected)# +
+#(indexing-queue)# +The indexing queue is empty
+:: +There are #[num]# entries in the indexing queue:
+ + + + + + + + +#{list}# + + + + + + + + +#{/list}# +
InitiatorDepthModified Date#HREFAnchor Name +URL
#[initiator]##[depth]##[modified]##[href]##[anchor]##[url]#
+#(/indexing-queue)# +
+#(loader-set)# +The loader set is empty
+:: +There are #[num]# entries in the loader set:
+ + + + + +#{list}# + + + + + +#{/list}# +
InitiatorDepth +URL
#[initiator]##[depth]##[url]#
+#(/loader-set)# +
+#(crawler-queue)# +The crawler queue is empty

+:: +There are #[num]# entries in the crawler queue. Showing #[show-num]# most recent entries: + + + + + + + + +#{list}# + + + + + + + +#{/list}# +
InitiatorDepthModified DateAnchor NameURL
#[initiator]##[depth]##[modified]##[anchor]##[url]#
+
+
+ +
+#(/crawler-queue)# +

+#[footer]# + + diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java new file mode 100644 index 000000000..ef147f490 --- /dev/null +++ b/htroot/IndexCreate_p.java @@ -0,0 +1,368 @@ +// IndexCreate_p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 02.12.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes IndexCreate_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.yacy.*; +import java.text.*; +import java.net.*; +import java.io.*; + +public class IndexCreate_p { + + private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); + private static String daydate(Date date) { + if (date == null) return ""; else return dayFormatter.format(date); + } + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + prop.put("error", 0); + prop.put("info", 0); + prop.put("refreshbutton", 0); + prop.put("rejected", 0); + int showRejectedCount = 10; + int i; + + if (post != null) { + if (post.containsKey("crawlingstart")) { + // init crawl + if (yacyCore.seedDB == null) { + prop.put("error", 3); + } else { + // set new properties + String newcrawlingfilter = post.get("crawlingFilter", ".*"); + env.setConfig("crawlingFilter", newcrawlingfilter); + int newcrawlingdepth = Integer.parseInt((String) post.get("crawlingDepth", "0")); + env.setConfig("crawlingDepth", ("" + newcrawlingdepth)); + boolean crawlingQ = ((String) post.get("crawlingQ", "")).equals("on"); + env.setConfig("crawlingQ", (crawlingQ) ? "true" : "false"); + boolean storeHTCache = ((String) post.get("storeHTCache", "")).equals("on"); + env.setConfig("storeHTCache", (storeHTCache) ? "true" : "false"); + boolean localIndexing = ((String) post.get("localIndexing", "")).equals("on"); + env.setConfig("localIndexing", (localIndexing) ? "true" : "false"); + boolean crawlOrder = ((String) post.get("crawlOrder", "")).equals("on"); + env.setConfig("crawlOrder", (crawlOrder) ? "true" : "false"); + boolean xsstopw = ((String) post.get("xsstopw", "")).equals("on"); + env.setConfig("xsstopw", (crawlOrder) ? "true" : "false"); + boolean xdstopw = ((String) post.get("xdstopw", "")).equals("on"); + env.setConfig("xdstopw", (crawlOrder) ? "true" : "false"); + boolean xpstopw = ((String) post.get("xpstopw", "")).equals("on"); + env.setConfig("xpstopw", (crawlOrder) ? "true" : "false"); + + String crawlingStart = (String) post.get("crawlingURL"); + if (!(crawlingStart.startsWith("http"))) crawlingStart = "http://" + crawlingStart; + + // check if url is proper + URL crawlingStartURL = null; + try { + crawlingStartURL = new URL(crawlingStart); + } catch (MalformedURLException e) { + crawlingStartURL = null; + } + + // check if pattern matches + if ((crawlingStartURL == null) || (!(crawlingStart.matches(newcrawlingfilter)))) { + // print error message + prop.put("error", 4); //crawlfilter does not match url + prop.put("error_newcrawlingfilter", newcrawlingfilter); + prop.put("error_crawlingStart", crawlingStart); + } else try { + // stack request + // first delete old entry, if exists + String urlhash = plasmaURL.urlHash(crawlingStart); + switchboard.loadedURL.remove(urlhash); + switchboard.noticeURL.remove(urlhash); + + // stack url + String reasonString = switchboard.stackCrawl(crawlingStart, null, yacyCore.seedDB.mySeed.hash, "CRAWLING-ROOT", new Date(), 0, + switchboard.profiles.newEntry(crawlingStartURL.getHost(), crawlingStart, newcrawlingfilter, newcrawlingfilter, newcrawlingdepth, newcrawlingdepth, crawlingQ, storeHTCache, true, localIndexing, crawlOrder, xsstopw, xdstopw, xpstopw)); + + if (reasonString == null) { + // liftoff! + prop.put("info", 2);//start msg + prop.put("info_crawlingURL", ((String) post.get("crawlingURL"))); + } else { + prop.put("error", 5); //Crawling failed + prop.put("error_crawlingURL", ((String) post.get("crawlingURL"))); + prop.put("error_reasonString", reasonString); + } + } catch (Exception e) { + // mist + prop.put("error", 6);//Error with url + prop.put("error_crawlingStart", crawlingStart); + prop.put("error_error", e.getMessage()); + e.printStackTrace(); + } + } + } + if (post.containsKey("clearRejected")) { + switchboard.errorURL.clearStack(); + } + if (post.containsKey("moreRejected")) { + showRejectedCount = Integer.parseInt(post.get("showRejected", "10")); + } + if (post.containsKey("distributedcrawling")) { + boolean crawlResponse = ((String) post.get("crawlResponse", "")).equals("on"); + env.setConfig("crawlResponse", (crawlResponse) ? "true" : "false"); + } + if (post.containsKey("clearcrawlqueue")) { + String urlHash; + int c = 0; + while (switchboard.noticeURL.localStackSize() > 0) { + urlHash = switchboard.noticeURL.localPop().hash(); + if (urlHash != null) { + switchboard.noticeURL.remove(urlHash); + c++; + } + } + prop.put("info", 3);//crawling queue cleared + prop.put("info_numEntries", c); + } + } + + // define visible variables + prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0")); + prop.put("crawlingDepth", env.getConfig("crawlingDepth", "0")); + prop.put("crawlingFilter", env.getConfig("crawlingFilter", "0")); + prop.put("crawlingQChecked", env.getConfig("crawlingQ", "").equals("true") ? 1 : 0); + prop.put("storeHTCacheChecked", env.getConfig("storeHTCache", "").equals("true") ? 1 : 0); + prop.put("localIndexingChecked", env.getConfig("localIndexing", "").equals("true") ? 1 : 0); + prop.put("crawlOrderChecked", env.getConfig("crawlOrder", "").equals("true") ? 1 : 0); + prop.put("crawlResponseChecked", env.getConfig("crawlResponse", "").equals("true") ? 1 : 0); + prop.put("xsstopwChecked", env.getConfig("xsstopw", "").equals("true") ? 1 : 0); + prop.put("xdstopwChecked", env.getConfig("xdstopw", "").equals("true") ? 1 : 0); + prop.put("xpstopwChecked", env.getConfig("xpstopw", "").equals("true") ? 1 : 0); + + int processStackSize = switchboard.processStack.size(); + int loaderThreadsSize = switchboard.cacheLoader.size(); + int crawlerListSize = switchboard.noticeURL.stackSize(); + int completequeue = processStackSize + loaderThreadsSize + crawlerListSize; + + if ((completequeue > 0) || ((post != null) && (post.containsKey("refreshpage")))) { + prop.put("refreshbutton", 1); + } + + // create prefetch table + boolean dark; + + // sed crawl profiles + int count = 0; + //try{ + Iterator it = switchboard.profiles.profiles(true); + plasmaCrawlProfile.entry profile; + dark = true; + while (it.hasNext()) { + profile = (plasmaCrawlProfile.entry) it.next(); + //table += profile.map().toString() + "
"; + prop.put("crawlProfiles_"+count+"_dark", ((dark) ? 1 : 0)); + prop.put("crawlProfiles_"+count+"_name", profile.name()); + prop.put("crawlProfiles_"+count+"_startURL", profile.startURL()); + prop.put("crawlProfiles_"+count+"_depth", profile.generalDepth()); + prop.put("crawlProfiles_"+count+"_filter", profile.generalFilter()); + prop.put("crawlProfiles_"+count+"_withQuery", ((profile.crawlingQ()) ? 1 : 0)); + prop.put("crawlProfiles_"+count+"_storeCache", ((profile.storeHTCache()) ? 1 : 0)); + prop.put("crawlProfiles_"+count+"_localIndexing", ((profile.localIndexing()) ? 1 : 0)); + prop.put("crawlProfiles_"+count+"_remoteIndexing", ((profile.remoteIndexing()) ? 1 : 0)); + + dark = !dark; + count++; + } + //}catch(IOException e){}; + prop.put("crawlProfiles", count); + + // remote crawl peers + if (yacyCore.seedDB == null) { + //table += "Sorry, cannot show any crawl output now because the system is not completely initialised. Please re-try."; + prop.put("error", 3); + } else { + Enumeration crawlavail = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, true); + Enumeration crawlpendi = yacyCore.dhtAgent.getAcceptRemoteCrawlSeeds(plasmaURL.dummyHash, false); + if ((!(crawlavail.hasMoreElements())) && (!(crawlpendi.hasMoreElements()))) { + prop.put("remoteCrawlPeers", 0); //no peers availible + } else { + prop.put("remoteCrawlPeers", 1); + int maxcount = 100; + int availcount = 0; + yacySeed seed; + while ((availcount < maxcount) && (crawlavail.hasMoreElements())) { + seed = (yacySeed) crawlavail.nextElement(); + prop.put("remoteCrawlPeers_available_" + availcount + "_name", seed.getName()); + prop.put("remoteCrawlPeers_available_" + availcount + "_due", (yacyCore.yacyTime() - seed.available)); + availcount++; + } + prop.put("remoteCrawlPeers_available", availcount); + int pendicount = 0; + while ((pendicount < maxcount) && (crawlpendi.hasMoreElements())) { + seed = (yacySeed) crawlpendi.nextElement(); + prop.put("remoteCrawlPeers_busy_" + pendicount + "_name", seed.getName()); + prop.put("remoteCrawlPeers_busy_" + pendicount + "_due", (yacyCore.yacyTime() - seed.available)); + pendicount++; + } + prop.put("remoteCrawlPeers_busy", pendicount); + prop.put("remoteCrawlPeers_num", (availcount + pendicount)); + } + + // failure cases + if (switchboard.errorURL.stackSize() != 0) { + if (showRejectedCount > switchboard.errorURL.stackSize()) showRejectedCount = switchboard.errorURL.stackSize(); + prop.put("rejected", 1); + prop.put("rejected_num", switchboard.errorURL.stackSize()); + if (showRejectedCount != switchboard.errorURL.stackSize()) { + prop.put("rejected_only-latest", 1); + prop.put("rejected_only-latest_num", showRejectedCount); + prop.put("rejected_only-latest_newnum", ((int) (showRejectedCount * 1.5))); + }else{ + prop.put("rejected_only-latest", 0); + } + dark = true; + String url, initiatorHash, executorHash; + plasmaCrawlEURL.entry entry; + yacySeed initiatorSeed, executorSeed; + int j=0; + for (i = switchboard.errorURL.stackSize() - 1; i >= (switchboard.errorURL.stackSize() - showRejectedCount); i--) { + entry = (plasmaCrawlEURL.entry) switchboard.errorURL.getStack(i); + initiatorHash = entry.initiator(); + executorHash = entry.executor(); + url = entry.url().toString(); + initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash); + executorSeed = yacyCore.seedDB.getConnected(executorHash); + prop.put("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName())); + prop.put("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName())); + prop.put("rejected_list_"+j+"_url", url); + prop.put("rejected_list_"+j+"_failreason", entry.failreason()); + prop.put("rejected_list_"+j+"_dark", ((dark) ? 1 : 0)); + dark = !dark; + j++; + } + prop.put("rejected_list", j); + } + + // now about the current processes + if (completequeue > 0) { + + yacySeed initiator; + + if (switchboard.processStack.size() == 0) { + prop.put("indexing-queue", 0); //is empty + } else { + prop.put("indexing-queue", 1); + prop.put("indexing-queue_num", switchboard.processStack.size());//num entries in queue + dark = true; + plasmaHTCache.Entry pcentry; + for (i = 0; i < switchboard.processStack.size(); i++) { + pcentry = (plasmaHTCache.Entry) switchboard.processStack.get(i); + if (pcentry != null) { + initiator = yacyCore.seedDB.getConnected(pcentry.initiator()); + prop.put("indexing-queue_list_"+i+"_dark", ((dark) ? 1 : 0)); + prop.put("indexing-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName())); + prop.put("indexing-queue_list_"+i+"_depth", pcentry.depth); + prop.put("indexing-queue_list_"+i+"_modified", daydate(pcentry.lastModified)); + prop.put("indexing-queue_list_"+i+"_href",((pcentry.scraper == null) ? "0" : ("" + pcentry.scraper.getHyperlinks().size()))); + prop.put("indexing-queue_list_"+i+"_anchor", ((pcentry.scraper == null) ? "-" : pcentry.scraper.getHeadline()) ); + prop.put("indexing-queue_list_"+i+"_url", pcentry.urlString); + dark = !dark; + } + } + prop.put("indexing-queue_list", i); + } + + if (loaderThreadsSize == 0) { + prop.put("loader-set", 0); + } else { + prop.put("loader-set", 1); + prop.put("loader-set_num", loaderThreadsSize); + dark = true; + plasmaCrawlLoader.Exec[] loaderThreads = switchboard.cacheLoader.threadStatus(); + for (i = 0; i < loaderThreads.length; i++) { + initiator = yacyCore.seedDB.getConnected(loaderThreads[i].initiator); + prop.put("loader-set_list_"+i+"_dark", ((dark) ? 1 : 0) ); + prop.put("loader-set_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.put("loader-set_list_"+i+"_depth", loaderThreads[i].depth ); + prop.put("loader-set_list_"+i+"_url", loaderThreads[i].url ); // null pointer exception here !!! maybe url = null; check reason. + dark = !dark; + } + prop.put("loader-set_list", i ); + } + + if (crawlerListSize == 0) { + prop.put("crawler-queue", 0); + } else { + prop.put("crawler-queue", 1); + plasmaCrawlNURL.entry[] crawlerList = switchboard.noticeURL.localTop(20); + prop.put("crawler-queue_num", crawlerListSize);//num Entries + prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent + plasmaCrawlNURL.entry urle; + dark = true; + for (i = 0; i < crawlerList.length; i++) { + urle = crawlerList[i]; + if (urle != null) { + initiator = yacyCore.seedDB.getConnected(urle.initiator()); + prop.put("crawler-queue_list_"+i+"_dark", ((dark) ? 1 : 0) ); + prop.put("crawler-queue_list_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()) ); + prop.put("crawler-queue_list_"+i+"_depth", urle.depth()); + prop.put("crawler-queue_list_"+i+"_modified", daydate(urle.loaddate()) ); + prop.put("crawler-queue_list_"+i+"_anchor", urle.name()); + prop.put("crawler-queue_list_"+i+"_url", urle.url()); + dark = !dark; + } + } + prop.put("crawler-queue_list", i); + } + } + } + // return rewrite properties + return prop; + } + +} + + + diff --git a/htroot/IndexMonitor.html b/htroot/IndexMonitor.html new file mode 100644 index 000000000..05f0804fb --- /dev/null +++ b/htroot/IndexMonitor.html @@ -0,0 +1,128 @@ + + + +YaCy: Index Monitor +#[metas]# + + +#[header]# + + + + + + + + + + + + + + + + + + + +
+ +#(process)# +

Indexing Queues Monitor Overview

+

These are monitoring pages for the different indexing queues.

+

YaCy knows 5 different ways to aquire web indexes. The details of these processes (1-5) are described within the submenu's listed +above which also will show you a table with indexing results so far. The information in these tables is considered as private, +so you need to log-in with your administration password.

+

Case (6) is a monitor of the local receipt-generator, the opposed case of (1). It contains also a indexing result monitor but is not considered private +since it shows crawl requests from other peers. +

+

+

The image above illustrates the data flow initiated by web index aquisition. +Some processes occur double to document the complex index migration structure. +

+:: +

(1) Index Monitor of Remote Crawl Receipts

+

This is the list of web pages that this peer initiated to crawl, +but had been crawled by other peers. +This is the 'mirror'-case of process (6). +

+

Use Case: You get entries here, if you start a local crawl on the 'Index Creation'-Page and check the +'Do Remote Indexing'-flag. Every page that a remote peer indexes upon this peer's request +is reported back and can be monitored here.

+:: +

(2) Index Monitor for Result of Search Queries

+

This index transfer was initiated by your peer by doing a search query. +The index was crawled and contributed by other peers.

+

Use Case: This list fills up if you do a search query on the 'Search Page'

+:: +

(3) Index Monitor for Index Transfer.

+

The url fetch was initiated and executed by other peers. +These links here have been transmitted to you because your peer ist most appropriate for storage according to +the logic of the Global Distributed Hash Table.

+

Use Case: This list may fill if you check the 'Index Receive'-flag on the 'Index Control' page

+:: +

(4) Index Monitor for Proxy Indexing

+

These web pages had been indexed as result of your proxy usage. +No personal or protected page is indexed; +such pages are detected by Cookie-Use or POST-Parameters (either in URL or as HTTP protocol) +and automatically excluded from indexing.

+

Use Case: You must use YaCy as proxy to fill up this table. +Set your browser's proxy setting to the same port as given +on the 'Settings'-page in the 'Proxy and Administration Port' field.

+:: +

(5) Index Monitor for Local Crawling.

+

These web pages had been crawled by your own crawl task.

+

Use Case: start a crawl by setting a crawl start point on the 'Index Create' page.

+:: +

(6) Index Monitor for Global Crawling

+

These pages had been indexed by your peer, but the crawl was initiated by a remote peer. +This is the 'mirror'-case of process (1).

+

Use Case: This list may fill if you check the 'Accept remote crawling requests'-flag on the 'Index Crate' page

+#(/process)# + + +#(table)# +

The stack is empty.

+:: +

+#(size)# +Showing all #[all]# entries in this stack. +:: +Showing latest #[count]# lines from a stack of #[all]# entries. +#(/size)# + + + + +#(showInit)#::#(/showInit)# +#(showExec)#::#(/showExec)# + + + + + +#{indexed}# + + +#(showInit)#::#(/showInit)# +#(showExec)#::#(/showExec)# + + + + + +#{/indexed}# +
+ +
InitiatorExecutorModified Date#WordsTitleURL
+
+ + + +
#[initiatorSeed]##[executorSeed]##[moddate]##[wordcount]##[urldescr]##[url]#

+

+:: +#(/table)# +#[footer]# + + diff --git a/htroot/IndexMonitor.java b/htroot/IndexMonitor.java new file mode 100644 index 000000000..f71d101b7 --- /dev/null +++ b/htroot/IndexMonitor.java @@ -0,0 +1,124 @@ +// IndexMonitor.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last change: 09.03.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Settings_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.yacy.*; +import java.text.*; + +public class IndexMonitor { + + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + int showIndexedCount = 40; + boolean si = false; + boolean se = false; + + + if (post == null) { + post = new serverObjects(); + post.put("process", "0"); + } + + // find process number + int process; + try { + process = Integer.parseInt(post.get("process", "0")); + } catch (NumberFormatException e) { + process = 0; + } + + // check if authorization is needed and/or given + if (((process > 0) && (process < 6)) || + (post.containsKey("clearlist")) || + (post.containsKey("deleteentry"))) { + String authorization = ((String) header.get("Authorization", "xxxxxx")).trim().substring(6); + if (authorization.length() == 0) { + // force log-in + prop.put("AUTHENTICATE", "admin log-in"); + return prop; + } + String adminAccountBase64MD5 = switchboard.getConfig("adminAccountBase64MD5", ""); + boolean authenticated = (adminAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(authorization))); + if (!authenticated) { + // force log-in (again, because wrong password was given) + prop.put("AUTHENTICATE", "admin log-in"); + return prop; + } + } + + // do the commands + if (post.containsKey("clearlist")) switchboard.loadedURL.clearStack(process); + if (post.containsKey("deleteentry")) { + String hash = post.get("hash", null); + if (hash != null) { + // delete from database + switchboard.loadedURL.remove(hash); + } + } + if (post.containsKey("moreIndexed")) { + showIndexedCount = Integer.parseInt(post.get("showIndexed", "40")); + } + if (post.get("si") != null) si = true; + if (post.get("se") != null) se = true; + + // create table + if (process == 0) { + prop.put("table", 2); + } else { + prop.putAll(switchboard.loadedURL.genTableProps(process, showIndexedCount, si, se, "unknown", null, "IndexMonitor.html", true)); + } + prop.put("process", process); + // return rewrite properties + return prop; + } + +} diff --git a/htroot/IndexShare_p.html b/htroot/IndexShare_p.html new file mode 100644 index 000000000..a50b6e6a0 --- /dev/null +++ b/htroot/IndexShare_p.html @@ -0,0 +1,43 @@ + + + +YACY: Index Sharing +#[metas]# + + +#[header]# +

+

Index Sharing

+ +

+The local index currently consists of (at least) #[wcount]# reverse word indexes and #[ucount]# URL references
+

+ + + + +
Index:  +distribute 
+receive +
+   receive grant default: 
+   for each remote peer  +
 links/minute 
 words/minute  +
+
+
+
+ + + + + + +
#[dtable]#
#[rtable]#
+

+#[footer]# + + diff --git a/htroot/IndexShare_p.java b/htroot/IndexShare_p.java new file mode 100644 index 000000000..00fb942c6 --- /dev/null +++ b/htroot/IndexShare_p.java @@ -0,0 +1,87 @@ +// IndexShare_p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 24.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes IndexShare_p.java +// if the shell's current path is HTROOT + +//import java.util.*; +//import java.net.*; +//import java.io.*; +//import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.plasma.*; +import de.anomic.http.*; +import de.anomic.yacy.*; +//import de.anomic.htmlFilter.*; + +public class IndexShare_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + if ((post == null) || (env == null)) { + prop.put("linkfreq", switchboard.getConfig("defaultLinkReceiveFrequency","30")); + prop.put("wordfreq", switchboard.getConfig("defaultWordReceiveFrequency","10")); + prop.put("dtable", ""); + prop.put("rtable", ""); + prop.put("wcount", "" + switchboard.wordIndex.sizeMin()); + prop.put("ucount", "" + switchboard.loadedURL.size()); + return prop; // be save + } + + if (post.containsKey("indexsharesetting")) { + switchboard.setConfig("allowDistributeIndex", (post.containsKey("distribute")) ? "true" : "false"); + switchboard.setConfig("allowReceiveIndex", (post.containsKey("receive")) ? "true" : "false"); + switchboard.setConfig("defaultLinkReceiveFrequency", (String) post.get("linkfreq", "30")); + switchboard.setConfig("defaultWordReceiveFrequency", (String) post.get("wordfreq", "10")); + } + + // insert constants + prop.put("wcount", "" + switchboard.wordIndex.sizeMin()); + prop.put("ucount", "" + switchboard.loadedURL.size()); + // return rewrite properties + return prop; + } + +} diff --git a/htroot/Lab.html b/htroot/Lab.html new file mode 100644 index 000000000..6f1fa9348 --- /dev/null +++ b/htroot/Lab.html @@ -0,0 +1,24 @@ + + + +YACY: Lab +#[metas]# + + +#[header]# +

+

The YACY Lab

+ +

+This is the place where we try new functions and future surplus-values of the AnomicHTTPProxy and the YACY search engine. +All these things here are to be considered as probably unstable, and/or experimental. +You may try out these things but please do not care about bugs.

+ + + +#[footer]# + + diff --git a/htroot/Log_p.html b/htroot/Log_p.html new file mode 100644 index 000000000..693aa55c9 --- /dev/null +++ b/htroot/Log_p.html @@ -0,0 +1,11 @@ + + + + +
+
+#[log]#
+
+
+ + diff --git a/htroot/Log_p.java b/htroot/Log_p.java new file mode 100644 index 000000000..1e00f362d --- /dev/null +++ b/htroot/Log_p.java @@ -0,0 +1,103 @@ +// Log_p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// +// This File is contributed by Alexander Schier +// last major change: 14.12.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// you must compile this file with +// javac -classpath .:../Classes Message.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.text.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.data.*; +import de.anomic.plasma.*; +import de.anomic.http.*; + +public class Log_p { + + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + String log = ""; + boolean reversed = false; + int lines = 50; + + Object logLines[] = serverLog.getLastLog().toArray(); + + if(post != null){ + if(post.containsKey("mode") && ((String)post.get("mode")).equals("reversed")){ + reversed=true; + } + if(post.containsKey("lines")){ + lines = (int)Integer.parseInt((String)post.get("lines")); + } + } + + if(!reversed){ + //Iterator it = serverLog.getLastLog().iterator(); + //while(it.hasNext()){ + //log += it.next() + "\n"; + //} + + //either all Entries(=(logLines.length-1)-lines;i--){ + if(i>=0){ + log += (String)logLines[i] + "\n"; + } + } + } + prop.put("log", log); + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/MessageSend_p.html b/htroot/MessageSend_p.html new file mode 100644 index 000000000..a9bf2f9db --- /dev/null +++ b/htroot/MessageSend_p.html @@ -0,0 +1,16 @@ + + + +YACY: Send Message +#[metas]# + + +#[header]# +

+

Send Message


+ +#[body]# + +#[footer]# + + diff --git a/htroot/MessageSend_p.java b/htroot/MessageSend_p.java new file mode 100644 index 000000000..7062db03b --- /dev/null +++ b/htroot/MessageSend_p.java @@ -0,0 +1,129 @@ +// MessageSend_p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.06.2003 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// you must compile this file with +// javac -classpath .:../Classes MessageSend_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.text.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.plasma.*; +import de.anomic.http.*; + +public class MessageSend_p { + + private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + public static String dateString(Date date) { + return SimpleFormatter.format(date); + } + + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + String body = ""; + if ((post == null) || (post.get("hash","").length() == 0)) { + prop.put("body", "

You cannot call this page directly. Instead, use a link on the Network page.

"); + return prop; + } + + String hash = post.get("hash", ""); + String subject = post.get("subject", ""); + String message = post.get("message", ""); + + if (message.length() == 0) { + // open a editor page for the message + // first ask if the other peer is online, and also what kind of dokument it accepts + HashMap result = yacyClient.permissionMessage(hash); + //System.out.println("DEBUG: permission request result = " + result.toString()); + String peerName; + if (hash.equals(yacyCore.seedDB.mySeed.hash)) { + peerName = yacyCore.seedDB.mySeed.get("Name","nameless"); + } else { + yacySeed targetPeer = yacyCore.seedDB.getConnected(hash); + if (targetPeer == null) + peerName = "nameless"; + else + peerName = targetPeer.get("Name","nameless"); + } + String response = (result == null) ? "-1" : (String) result.get("response"); + if ((response == null) || (response.equals("-1"))) { + // we don't have permission or other peer does not exist + body += "

You cannot send a message to '" + peerName + "'. The peer does not respond.

"; + } else { + // write input form + int messagesize = Integer.parseInt((String) result.get("messagesize")); + int attachmentsize = Integer.parseInt((String) result.get("attachmentsize")); + body += "

The peer '" + peerName + "' is alive and responded:
"; + body += "'" + response + " You are allowed to send me a message ≤ " + messagesize + " kb and an attachment ≤ " + attachmentsize + ".'

"; + body += "


"; + body += "

Your Message

"; + body += "

Subject:

"; + body += "

Text:

"; + body += ""; + body += ""; + body += ""; + body += "
"; + } + } else { + // send written message to peer + int messagesize = Integer.parseInt(post.get("messagesize", "0")); + int attachmentsize = Integer.parseInt(post.get("attachmentsize", "0")); + + if (messagesize < 1000) messagesize = 1000; // debug + if (subject.length() > 100) subject = subject.substring(0, 100); + if (message.length() > messagesize) message = message.substring(0, messagesize); + HashMap result = yacyClient.postMessage(hash, subject, message.getBytes()); + body += "

Your message has been send. The target peer respondet:

"; + body += "

" + result.get("response") + "

"; + } + + // return rewrite properties + prop.put("body", body); + return prop; + } + +} diff --git a/htroot/Messages_p.html b/htroot/Messages_p.html new file mode 100644 index 000000000..81cf3aa1f --- /dev/null +++ b/htroot/Messages_p.html @@ -0,0 +1,18 @@ + + + +YACY: Messages +#[metas]# + + +#[header]# +

+

Messages


+ +

+#[messages]# +

+ +#[footer]# + + diff --git a/htroot/Messages_p.java b/htroot/Messages_p.java new file mode 100644 index 000000000..a6899028a --- /dev/null +++ b/htroot/Messages_p.java @@ -0,0 +1,132 @@ +// Messages_p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.06.2003 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// you must compile this file with +// javac -classpath .:../Classes Message.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.text.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.data.*; +import de.anomic.plasma.*; +import de.anomic.http.*; + +public class Messages_p { + + private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + public static String dateString(Date date) { + return SimpleFormatter.format(date); + } + + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + String action = ((post == null) ? "list" : post.get("action", "list")); + String messages = ""; + messageBoard.entry message; + + // first reset notification + File notifierSource = new File(switchboard.getRootPath(), switchboard.getConfig("htRootPath","htroot") + "/env/grafics/notifierInactive.gif"); + File notifierDest = new File(switchboard.getRootPath(), switchboard.getConfig("htRootPath","htroot") + "/env/grafics/notifier.gif"); + try {serverFileUtils.copy(notifierSource, notifierDest);} catch (IOException e) {}; + + if (action.equals("delete")) { + String key = post.get("object",""); + switchboard.messageDB.remove(key); + action = "list"; + } + + if (action.equals("list")) { + messages += + "" + + ""; + try { + Iterator i = switchboard.messageDB.keys("remote", true); + String key; + + boolean dark = true; + while (i.hasNext()) { + key = (String) i.next(); + message = switchboard.messageDB.read(key); + messages += ""; dark = !dark; + messages += ""; + messages += ""; + messages += ""; + messages += ""; + messages += ""; + messages += ""; + } + messages += "
DateFromToSubjectAction
" + dateString(message.date()) + "" + message.author() + "" + message.recipient() + "" + message.subject() + "" + + "view / " + + "reply / " + + "delete" + + "
"; + } catch (IOException e) { + messages += "IO Error reading message Table: " + e.getMessage(); + } + } + + if (action.equals("view")) { + String key = post.get("object",""); + message = switchboard.messageDB.read(key); + messages += ""; + messages += ""; + messages += ""; + messages += ""; + messages += ""; + messages += ""; + messages += "
From:" + message.author() + "
To:" + message.recipient() + "
Send Date:" + dateString(message.date()) + "
Subject:" + message.subject() + "
" + new String(message.message()) + "
"; + } + + prop.put("messages", messages); + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/Network.html b/htroot/Network.html new file mode 100644 index 000000000..8a29eb802 --- /dev/null +++ b/htroot/Network.html @@ -0,0 +1,179 @@ + + + +YaCy: Network Overview +#[metas]# + + +#[header]# + + + + + + + + + + + + + +
+#(page)# +

Network Overview

+:: +

Active Peers

+:: +

Passive Peers

+:: +

Potential Peers

+#(/page)# +

+#(table)# +

no remote #[peertype]# peer for this list known

+:: +

Showing #[num]# entries from a total of #[total]# peers.
+ + + + + +#(complete)#:: + + +#(/complete)# + + + + + + + + + + + + + + + +#{list}# + + + + +#(complete)# +:: + + +#(/complete)# + + + + + + + + + + + + + + + + +#{/list}# +
Profile
 
Message
 
Name*
 
Address
 
Hash
 
Type
 
Version
 
Contact
 
Last Seen
#min ago
 
  < >
Uptime
#minutes
  < >
#Links
  < >
#RWI's
  < >
Accept
Crawl
 
Accept
Index
 
Sent
Words
 
Sent
URL's
 
Received
Words
 
Received
URL's
 
#Seeds
 
#Connects
per hour
 
viewsend#[shortname]#http://#[ip]#:#[port]##[hash]##(type)#junior::senior::principal#(/type)##[version]##(contact)#passive::direct#(/contact)##[lastSeen]##[uptime]##[links]##[words]##(acceptcrawl)#no::yes#(/acceptcrawl)##(acceptindex)#no::yes#(/acceptindex)##[sI]##[sU]##[rI]##[rU]##[seeds]##[connects]#
+

+:: +

All Peers:
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Branch#Peers#All Links#All Words
Active (connected Senior and Principal)#[active-count]##[active-links]##[active-words]#
Passive (disconnected Senior and Principal)#[passive-count]##[passive-links]##[passive-words]#
Potential (Junior)#[potential-count]##[potential-links]##[potential-words]#
Network Total#[all-count]##[all-links]##[all-words]#
+

+

Your Peer:
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeVersionUptime
#minutes
#Links#RWI'sAccept
Crawl
Accept
Index
Sent
Words
Sent
URL's
Received
Words
Received
URL's
#Seeds#Connects
per hour
#[my-name]##(my-type)#virgin::junior::senior::principal#(/my-type)##[my-version]##[my-uptime]##[my-links]##[my-words]##(my-acceptcrawl)#no::yes#(/my-acceptcrawl)##(my-acceptindex)#no::yes#(/my-acceptindex)##[my-sI]##[my-sU]##[my-rI]##[my-rU]##[my-seeds]##[my-connects]#
+

+

+#(comment)# +:: +You are in online mode, but probably no internet resource is available. Please check you internet connection. +:: +You are either not in online mode or you do not use the proxy option. +To get connection to the YACY network, you must use the proxy by setting your browsers settings +('on-demand - mode', see here +for an installation guide) or you can go online by activating the permanent online mode. +To do this, press this button: +

+ +
+#(/comment)# +

+#(/table)# +#[footer]# + + diff --git a/htroot/Network.java b/htroot/Network.java new file mode 100644 index 000000000..4a9a34844 --- /dev/null +++ b/htroot/Network.java @@ -0,0 +1,264 @@ +// Network.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 16.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes Network.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.yacy.*; + +public class Network { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) { + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + boolean overview = (post == null) || (((String) post.get("page", "0")).equals("0")); + + String mySeedType = yacyCore.seedDB.mySeed.get("PeerType", "virgin"); + boolean iAmActive = (mySeedType.equals("senior")) || (mySeedType.equals("principal")); + + if (overview) { + long accActLinks = yacyCore.seedDB.countActiveURL(); + long accActWords = yacyCore.seedDB.countActiveRWI(); + long accPassLinks = yacyCore.seedDB.countPassiveURL(); + long accPassWords = yacyCore.seedDB.countPassiveRWI(); + long accPotLinks = yacyCore.seedDB.countPotentialURL(); + long accPotWords = yacyCore.seedDB.countPotentialRWI(); + + int conCount = yacyCore.seedDB.sizeConnected(); + int disconCount = yacyCore.seedDB.sizeDisconnected(); + int potCount = yacyCore.seedDB.sizePotential(); + + boolean complete = ((post == null) ? false : post.get("links", "false").equals("true")); + + // create own peer info + yacySeed seed = yacyCore.seedDB.mySeed; + if (yacyCore.seedDB.mySeed != null){ //our Peer + long links, words; + try { + links = Long.parseLong(seed.get("LCount", "0")); + words = Long.parseLong(seed.get("ICount", "0")); + } catch (Exception e) {links = 0; words = 0;} + + prop.put("table_my-name", seed.get("Name", "-") ); + if (yacyCore.seedDB.mySeed.isVirgin()) { + prop.put("table_my-type", 0); + } else if(yacyCore.seedDB.mySeed.isJunior()) { + prop.put("table_my-type", 1); + accPotLinks += links; + accPotWords += words; + } else if(yacyCore.seedDB.mySeed.isSenior()) { + prop.put("table_my-type", 2); + accActLinks += links; + accActWords += words; + } else if(yacyCore.seedDB.mySeed.isPrincipal()) { + prop.put("table_my-type", 3); + accActLinks += links; + accActWords += words; + } + prop.put("table_my-version", seed.get("Version", "-")); + prop.put("table_my-uptime", seed.get("Uptime", "-")); + prop.put("table_my-links", groupDigits(links)); + prop.put("table_my-words", groupDigits(words)); + prop.put("table_my-acceptcrawl", "" + (seed.getFlagAcceptRemoteCrawl() ? 1 : 0) ); + prop.put("table_my-acceptindex", "" + (seed.getFlagAcceptRemoteIndex() ? 1 : 0) ); + prop.put("table_my-sI", seed.get("sI", "-")); + prop.put("table_my-sU", seed.get("sU", "-")); + prop.put("table_my-rI", seed.get("rI", "-")); + prop.put("table_my-rU", seed.get("rU", "-")); + prop.put("table_my-seeds", seed.get("SCount", "-")); + prop.put("table_my-connects", seed.get("CCount", "-")); + } + + // overall results: Network statistics + if (iAmActive) conCount++; else if (mySeedType.equals("junior")) potCount++; + prop.put("table_active-count", conCount); + prop.put("table_active-links", groupDigits(accActLinks)); + prop.put("table_active-words", groupDigits(accActWords)); + prop.put("table_passive-count", disconCount); + prop.put("table_passive-links", groupDigits(accPassLinks)); + prop.put("table_passive-words", groupDigits(accPassWords)); + prop.put("table_potential-count", potCount); + prop.put("table_potential-links", groupDigits(accPotLinks)); + prop.put("table_potential-words", groupDigits(accPotWords)); + prop.put("table_all-count", (conCount + disconCount + potCount)); + prop.put("table_all-links", groupDigits(accActLinks + accPassLinks + accPotLinks)); + prop.put("table_all-words", groupDigits(accActWords + accPassWords + accPotWords)); + + String comment = ""; + prop.put("table_comment", 0); + if (conCount == 0) { + if (Integer.parseInt(sb.getConfig("onlineMode", "1")) == 2) { + prop.put("table_comment", 1);//in onlinemode, but not online + } else { + prop.put("table_comment", 2);//not in online mode, and not online + } + } + prop.put("table", 2); // triggers overview + prop.put("page", 0); + } else { + // generate table + int page = Integer.parseInt(post.get("page", "1")); + int conCount = 0; + int maxCount = 100; + if (yacyCore.seedDB == null) { + prop.put("table", 0);//no remote senior/principal proxies known" + } else { + int size = 0; + switch (page) { + case 1 : size = yacyCore.seedDB.sizeConnected(); break; + case 2 : size = yacyCore.seedDB.sizeDisconnected(); break; + case 3 : size = yacyCore.seedDB.sizePotential(); break; + } + if (size == 0) { + prop.put("table", 0);//no remote senior/principal proxies known" + } else { + // add temporary the own seed to the database + if (iAmActive) { + yacyCore.peerActions.updateMySeed(); + yacyCore.seedDB.addConnected(yacyCore.seedDB.mySeed); + } + boolean dark = true; + yacySeed seed; + boolean complete = post.containsKey("ip"); + Enumeration e = null; + switch (page) { + case 1 : e = yacyCore.seedDB.seedsSortedConnected(post.get("order", "down").equals("up"), post.get("sort", "ICount")); break; + case 2 : e = yacyCore.seedDB.seedsSortedDisconnected(post.get("order", "up").equals("up"), post.get("sort", "LastSeen")); break; + case 3 : e = yacyCore.seedDB.seedsSortedPotential(post.get("order", "up").equals("up"), post.get("sort", "LastSeen")); break; + } + while ((e.hasMoreElements()) && (conCount < maxCount)) { + seed = (yacySeed) e.nextElement(); + if (seed != null) { + if (conCount >= maxCount) break; + if (seed.hash.equals(yacyCore.seedDB.mySeed.hash)) { + prop.put("table_list_"+conCount+"_dark", 2); + } else { + prop.put("table_list_"+conCount+"_dark", ((dark) ? 1 : 0) ); dark=!dark; + } + long links, words; + try { + links = Long.parseLong(seed.get("LCount", "0")); + words = Long.parseLong(seed.get("ICount", "0")); + } catch (Exception exc) {links = 0; words = 0;} + prop.put("table_list_"+conCount+"_complete", ((complete)? 1 : 0) ); + prop.put("table_list_"+conCount+"_hash", seed.hash); + String shortname = seed.get("Name", "deadlink"); + if (shortname.length() > 20) shortname = shortname.substring(0, 20) + "..."; + prop.put("table_list_"+conCount+"_shortname", shortname); + prop.put("table_list_"+conCount+"_fullname", seed.get("Name", "deadlink")); + if (complete) { + prop.put("table_list_"+conCount+"_complete", 1); + prop.put("table_list_"+conCount+"_complete_ip", seed.get("IP", "-") ); + prop.put("table_list_"+conCount+"_complete_port", seed.get("Port", "-") ); + prop.put("table_list_"+conCount+"_complete_hash", seed.hash); + }else{ + prop.put("table_list_"+conCount+"_complete", 0); + } + if (seed.isJunior()) { + prop.put("table_list_"+conCount+"_type", 0); + } else if(seed.isSenior()){ + prop.put("table_list_"+conCount+"_type", 1); + } else if(seed.isPrincipal()) { + prop.put("table_list_"+conCount+"_type", 2); + prop.put("table_list_"+conCount+"_type_url", seed.get("seedURL", "http://nowhere/") ); + } + prop.put("table_list_"+conCount+"_version", seed.get("Version", "-")); + prop.put("table_list_"+conCount+"_contact", (seed.getFlagDirectConnect() ? 1 : 0) ); + prop.put("table_list_"+conCount+"_lastSeen", lastSeen(seed.get("LastSeen", "-")) ); + prop.put("table_list_"+conCount+"_uptime", seed.get("Uptime", "-") ); + prop.put("table_list_"+conCount+"_links", groupDigits(links)); + prop.put("table_list_"+conCount+"_words", groupDigits(words)); + prop.put("table_list_"+conCount+"_acceptcrawl", (seed.getFlagAcceptRemoteCrawl() ? 1 : 0) ); + prop.put("table_list_"+conCount+"_acceptindex", (seed.getFlagAcceptRemoteIndex() ? 1 : 0) ); + prop.put("table_list_"+conCount+"_sI", seed.get("sI", "-")); + prop.put("table_list_"+conCount+"_sU", seed.get("sU", "-")); + prop.put("table_list_"+conCount+"_rI", seed.get("rI", "-")); + prop.put("table_list_"+conCount+"_rU", seed.get("rU", "-")); + prop.put("table_list_"+conCount+"_seeds", seed.get("SCount", "-")); + prop.put("table_list_"+conCount+"_connects", seed.get("CCount", "-")); + conCount++; + }//seed != null + }//while + if (iAmActive) yacyCore.seedDB.removeMySeed(); + prop.put("table_list", conCount); + prop.put("table", 1); + prop.put("table_num", conCount); + prop.put("table_total", (maxCount > conCount) ? conCount : maxCount); + prop.put("table_complete", ((complete)? 1 : 0) ); + } + } + prop.put("page", page); + prop.put("table_page", page); + switch (page) { + case 1 : prop.put("table_peertype", "senior/principal"); break; + case 2 : prop.put("table_peertype", "senior/principal"); break; + case 3 : prop.put("table_peertype", "junior"); break; + } + } + // return rewrite properties + return prop; + } + + private static String lastSeen(String date) { + long l = 0; + if (date.length() == 0) + l = 999; + else + try { + l = (yacyCore.universalTime() - yacyCore.shortFormatter.parse(date).getTime()) / 1000 / 60; + } catch (java.text.ParseException e) { + l = 999; + } + if (l == 999) return "-"; else return "" + l; + } + + private static String groupDigits(long Number) { + String s = "" + Number; + String t = ""; + for (int i = 0; i < s.length(); i++) t = s.charAt(s.length() - i - 1) + (((i % 3) == 0) ? "," : "") + t; + return t.substring(0, t.length() - 1); + } +} diff --git a/htroot/Network.xml b/htroot/Network.xml new file mode 100644 index 000000000..f0ba26308 --- /dev/null +++ b/htroot/Network.xml @@ -0,0 +1,48 @@ + +#(table)# +:: +:: + + + #[active-count]# + #[active-links]# + #[active-words]# + + + #[passive-count]# + #[passive-links]# + #[passive-words]# + + + #[potential-count]# + #[potential-links]# + #[potential-words]# + + + #[all-count]# + #[all-links]# + #[all-words]# + + + #[my-name]# + #(my-type)#virgin::junior::senior::principal#(/my-type)# + #[my-version]# + #[my-uptime]# + #[my-links]# + #[my-acceptcrawl]# + #[my-acceptindex]# + #[my-sI]# + #[my-sU]# + #[my-rI]# + #[my-rU]# + #[my-seeds]# + #[my-connects]# + + #[comment]# + + +#(/table)# \ No newline at end of file diff --git a/htroot/Performance_p.html b/htroot/Performance_p.html new file mode 100644 index 000000000..b63860fe8 --- /dev/null +++ b/htroot/Performance_p.html @@ -0,0 +1,56 @@ + + + +YaCy: Performace +#[metas]# + + +#[header]# +

+

Performance

+

+You can manipulate some performance attributes here.
+

+ + + + + + + + + + + + + + + + +#{table}# + + + + + + + + + + + + + + + + + + +#{/table}# +
ThreadQueue SizeTotal
Block Time
Total
Sleep Time
Total
Exec Time
Total
Cycles
Idle
Cycles
Busy
Cycles
Sleep Time
per Cycle
(milliseconds)
Exec Time
per Busy-Cycle
(milliseconds)
Delay between
idle loops
Delay between
busy loops
Full Description
#[shortdescr]##[queuesize]##[blocktime]# sec#[blockpercent]#%#[sleeptime]# sec#[sleeppercent]#%#[exectime]# sec#[execpercent]#%#[totalcycles]##[idlecycles]##[busycycles]##[sleeppercycle]##[execpercycle]# sec sec#[longdescr]#

+ +Changes take effect immidiately +

+#[footer]# + + diff --git a/htroot/Performance_p.java b/htroot/Performance_p.java new file mode 100644 index 000000000..9ca8d1552 --- /dev/null +++ b/htroot/Performance_p.java @@ -0,0 +1,137 @@ +// Performace_p.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 16.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes Network.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.net.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; + +public class Performance_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) sb; + serverObjects prop = new serverObjects(); + + Iterator threads = switchboard.threadNames(); + String threadName; + serverThread thread; + + // calculate totals + long blocktime_total = 0, sleeptime_total = 0, exectime_total = 0; + while (threads.hasNext()) { + threadName = (String) threads.next(); + thread = switchboard.getThread(threadName); + blocktime_total += thread.getBlockTime(); + sleeptime_total += thread.getSleepTime(); + exectime_total += thread.getExecTime(); + } + if (blocktime_total == 0) blocktime_total = 1; + if (sleeptime_total == 0) sleeptime_total = 1; + if (exectime_total == 0) exectime_total = 1; + + // set templates for latest news from the threads + long blocktime, sleeptime, exectime; + long idlesleep, busysleep; + int queuesize; + threads = switchboard.threadNames(); + int c = 0; + long idleCycles, busyCycles; + while (threads.hasNext()) { + threadName = (String) threads.next(); + thread = switchboard.getThread(threadName); + + // set values to templates + prop.put("table_" + c + "_threadname", threadName); + prop.put("table_" + c + "_shortdescr", thread.getShortDescription()); + prop.put("table_" + c + "_longdescr", thread.getLongDescription()); + queuesize = thread.getJobCount(); + prop.put("table_" + c + "_queuesize", (queuesize == Integer.MAX_VALUE) ? "unlimited" : ("" + queuesize)); + + blocktime = thread.getBlockTime(); + sleeptime = thread.getSleepTime(); + exectime = thread.getExecTime(); + idleCycles = thread.getIdleCycles(); + busyCycles = thread.getBusyCycles(); + prop.put("table_" + c + "_blocktime", blocktime / 1000); + prop.put("table_" + c + "_blockpercent", "" + (100 * blocktime / blocktime_total)); + prop.put("table_" + c + "_sleeptime", sleeptime / 1000); + prop.put("table_" + c + "_sleeppercent", "" + (100 * sleeptime / sleeptime_total)); + prop.put("table_" + c + "_exectime", exectime / 1000); + prop.put("table_" + c + "_execpercent", "" + (100 * exectime / exectime_total)); + prop.put("table_" + c + "_totalcycles", "" + (idleCycles + busyCycles)); + prop.put("table_" + c + "_idlecycles", "" + idleCycles); + prop.put("table_" + c + "_busycycles", "" + busyCycles); + prop.put("table_" + c + "_sleeppercycle", ((idleCycles + busyCycles) == 0) ? "-" : ("" + (sleeptime / (idleCycles + busyCycles)))); + prop.put("table_" + c + "_execpercycle", (busyCycles == 0) ? "-" : ("" + (exectime / busyCycles))); + + if (post == null) { + // load with old values + idlesleep = Long.parseLong(switchboard.getConfig(threadName + "_idlesleep" , "1000")); + busysleep = Long.parseLong(switchboard.getConfig(threadName + "_busysleep", "1000")); + } else { + // load with new values + idlesleep = Long.parseLong((String) post.get(threadName + "_idlesleep", "1")) * 1000; + busysleep = Long.parseLong((String) post.get(threadName + "_busysleep", "1")) * 1000; + + // on-the-fly re-configuration + switchboard.setThreadSleep(threadName, idlesleep, busysleep); + switchboard.setConfig(threadName + "_idlesleep", idlesleep); + switchboard.setConfig(threadName + "_busysleep", busysleep); + } + prop.put("table_" + c + "_idlesleep", idlesleep / 1000); + prop.put("table_" + c + "_busysleep", busysleep / 1000); + + c++; + } + prop.put("table", c); + + // return rewrite values for templates + return prop; + } + +} diff --git a/htroot/ProxyIndexingMonitor_p.html b/htroot/ProxyIndexingMonitor_p.html new file mode 100644 index 000000000..49e747817 --- /dev/null +++ b/htroot/ProxyIndexingMonitor_p.html @@ -0,0 +1,101 @@ + + + +YaCy: Proxy Index Monitor +#[metas]# + + +#[header]# +

+

Index Monitor for Proxy Indexing

+

+This is the control page for web pages that your peer has indexed during the current application run-time +as result of proxy fetch/prefetch. +No personal or protected page is indexed; +those pages are detected by Cookie-Use or POST-Parameters (either in URL or as HTTP protocol) +and automatically excluded from indexing. +

+ +

+
Proxy pre-fetch setting: +this is an automated html page loading procedure that takes actual proxy-requested +url's as crawling start points for crawling.
+ + + + + + + + + + + + + + + + +
Prefetch Depth: + A prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all + embedded url's, but since embedded image links are loaded by the browser + this means that only embedded href-anchors are prefetched additionally.
Store to Cache:It is almost always recommendet to set this on. The only exception is that you have another caching proxy running as secondary proxy and YaCy is configured to used that proxy in proxy-proxy - mode.
+

+ +#(info)# +:: +
The file DATA/PLASMADB/crawlProfiles0.db is missing or corrupted. +Please delete that file and restart.
+:: +
+Proxy pre-fetch is now set to depth-#[message]#.
+Proxy caching is now set #(caching)#off::on#(/caching)#.
+:: +
An error has occurred: #[error]#.
+#(/info)# + +

Snapshot of recently indexed web pages that passed the proxy:
+#(table4)# +The stack is empty. +:: + +#(size)# +Showing all #[all]# entries in this stack. +:: +Showing latest #[count]# lines from a stack of #[all]# entries. +#(/size)# + + + + +#(showInit)#::#(/showInit)# +#(showExec)#::#(/showExec)# + + + + + +#{indexed}# + + +#(showInit)#::#(/showInit)# +#(showExec)#::#(/showExec)# + + + + + +#{/indexed}# +
+
InitiatorExecutorModified Date#WordsTitleURL
+
+ + + +
#[initiatorSeed]##[executorSeed]##[moddate]##[wordcount]##[urldescr]##[url]#

+#(/table4)# +

+ +#[footer]# + + diff --git a/htroot/ProxyIndexingMonitor_p.java b/htroot/ProxyIndexingMonitor_p.java new file mode 100644 index 000000000..c1923848e --- /dev/null +++ b/htroot/ProxyIndexingMonitor_p.java @@ -0,0 +1,130 @@ +// ProxyIndexingMonitor_p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 02.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Settings_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.yacy.*; +import java.text.*; + +public class ProxyIndexingMonitor_p { + + private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); + private static String daydate(Date date) { + if (date == null) return ""; else return dayFormatter.format(date); + } + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + int showIndexedCount = 20; + boolean se = false; + + prop.put("info", 0); + prop.put("info_message", ""); + + if (post != null) { + if (post.containsKey("clearlist4")) switchboard.loadedURL.clearStack(4); // local: by proxy crawl + if (post.containsKey("deleteentry")) { + String hash = post.get("hash", null); + if (hash != null) { + // delete from database + switchboard.loadedURL.remove(hash); + } + } + + if (post.containsKey("moreIndexed")) { + showIndexedCount = Integer.parseInt(post.get("showIndexed", "40")); + } + + if (post.get("se") != null) se = true; + + if (post.containsKey("proxyprofileset")) try { + // read values and put them in global settings + int newProxyPrefetchDepth = Integer.parseInt((String) post.get("proxyPrefetchDepth", "0")); + env.setConfig("proxyPrefetchDepth", "" + newProxyPrefetchDepth); + boolean proxyStoreHTCache = ((String) post.get("proxyStoreHTCache", "")).equals("on"); + env.setConfig("proxyStoreHTCache", (proxyStoreHTCache) ? "true" : "false"); + + // implant these settings also into the crawling profile for the proxy + plasmaCrawlProfile.entry profile = switchboard.profiles.getEntry(switchboard.getConfig("defaultProxyProfile", "")); + if (profile == null) { + prop.put("info", 1);//delete DATA/PLASMADB/crawlProfiles0.db + } else { + try { + profile.changeEntry("generalDepth", "" + newProxyPrefetchDepth); + profile.changeEntry("storeHTCache", (proxyStoreHTCache) ? "true": "false"); + prop.put("info", 2);//new proxyPrefetchdepth + prop.put("info_message", newProxyPrefetchDepth); + prop.put("info_caching", (proxyStoreHTCache) ? 1 : 0); + } catch (IOException e) { + prop.put("info", 3); //Error: errmsg + prop.put("info_error", e.getMessage()); + } + } + + } catch (Exception e) { + prop.put("info", 2); //Error: errmsg + prop.put("info_error", e.getMessage()); + System.out.println("Case3"); + e.printStackTrace(); + } + } + + // create tables + String myname = yacyCore.seedDB.mySeed.getName(); + prop.putAll(switchboard.loadedURL.genTableProps(4, showIndexedCount, false, false, "proxy", null, "ProxyIndexingMonitor_p.html", true)); + + prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0")); + prop.put("proxyStoreHTCacheChecked", env.getConfig("proxyStoreHTCache", "").equals("true") ? 1 : 0); + // return rewrite properties + return prop; + } + +} diff --git a/htroot/SettingsAck_p.html b/htroot/SettingsAck_p.html new file mode 100644 index 000000000..319090ad2 --- /dev/null +++ b/htroot/SettingsAck_p.html @@ -0,0 +1,67 @@ + + + +YACY: Settings Acknowledge +#[metas]# + + +#[header]# +

+

Settings Receipt:

+ +

+#(info)# +No information has been submitted
+Nothing changed +:: +Error with submitted information.
+Nothing changed. +:: +The user name must be given.
+Your request cannot be processed.
Nothing changed. +:: +The password redundancy check failed. You have probably misstyped your password.
+Your request cannot be processed.
Nothing changed. +:: +Shutting down.
Application will terminate after working off all crawling tasks. +:: +Your administration account setting has been made.
+Your new administration account name is #[user]#. The password has been accepted.
If you go back to the Settings page, you must log-in again. +:: +Your proxy account setting has been changed.
+Your proxy account check has been disabled, since you did not supply a password.

+The new proxy IP filter is set to #[filter]# +:: +Your proxy account setting has been changed.
+Your new proxy account name is #[user]#. The password has been accepted.
+If you open any public web page through the proxy, you must log-in then.
+The new proxy IP filter is set to #[filter]#. +:: +Your server access filter is now set to #[filter]#
+ +:: +Auto pop-up of the Status page is now disabled
+:: +Auto pop-up of the Status page is now enabled
+:: +You are now permanently online. After a short while you should see the effect on the status page.
+:: +Port is: #[port]#
+PeerName is: #[peerName]#
+Shutdown time is: #[shutdownWaiting]# seconds
+if you changed the Port, you need to restart the Proxy. +:: +SeedFTP Server Settings changed. You are now a principal peer.
+:: +SeedFTP Server Settings changed, but something is wrong. Please return to the settings page and modify the data.
+:: +The remote-proxy setting has been changed
+The new setting is effective immediately, you don't need to re-start. +#(/info)# +

+

You can now go back to the Settings page if you want to make more changes.

+ +#[footer]# + + diff --git a/htroot/SettingsAck_p.java b/htroot/SettingsAck_p.java new file mode 100644 index 000000000..76013d090 --- /dev/null +++ b/htroot/SettingsAck_p.java @@ -0,0 +1,242 @@ +// SettingsAck_p.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 16.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes SettingsAck_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.http.*; + +public class SettingsAck_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + + //if (post == null) System.out.println("POST: NULL"); else System.out.println("POST: " + post.toString()); + + // set values + String s; + int pos; + + if (post == null) { + prop.put("info", 1);//no information submitted + return prop; + } + + // admin password + if (post.containsKey("adminaccount")) { + // read and process data + String user = (String) post.get("adminuser"); + String pw1 = (String) post.get("adminpw1"); + String pw2 = (String) post.get("adminpw2"); + // do checks + if ((user == null) || (pw1 == null) || (pw2 == null)) { + prop.put("info", 1);//error with submitted information + return prop; + } + if (user.length() == 0) { + prop.put("info", 2);//username must be given + return prop; + } + if (!(pw1.equals(pw2))) { + prop.put("info", 3);//pw check failed + return prop; + } + // check passed. set account: + env.setConfig("adminAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String(user + ":" + pw1))); + env.setConfig("adminAccount", ""); + prop.put("info", 5);//admin account changed + prop.put("info_user", user); + return prop; + } + + + // proxy password + if (post.containsKey("proxyaccount")) { + // read and process data + String filter = (String) post.get("proxyfilter"); + String user = (String) post.get("proxyuser"); + String pw1 = (String) post.get("proxypw1"); + String pw2 = (String) post.get("proxypw2"); + // do checks + if ((filter == null) || (user == null) || (pw1 == null) || (pw2 == null)) { + prop.put("info", 1);//error with submitted information + return prop; + } + if (user.length() == 0) { + prop.put("info", 2);//username must be given + return prop; + } + if (!(pw1.equals(pw2))) { + prop.put("info", 3);//pw check failed + return prop; + } + if (filter.length() == 0) filter = "*"; + // check passed. set account: + env.setConfig("proxyClient", filter); + if (pw1.length() == 0) { + // only ip filter setting without account + env.setConfig("proxyAccountBase64MD5", ""); + env.setConfig("proxyAccount", ""); + prop.put("info", 6);//proxy account has changed(no pw) + prop.put("info_filter", filter); + } else { + // also paccount setting + env.setConfig("proxyAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String(user + ":" + pw1))); + env.setConfig("proxyAccount", ""); + prop.put("info", 7);//proxy account has changed + prop.put("info_user", user); + prop.put("info_filter", filter); + } + return prop; + } + + // server password + if (post.containsKey("serveraccount")) { + // read and process data + String filter = (String) post.get("serverfilter"); + String user = (String) post.get("serveruser"); + String pw1 = (String) post.get("serverpw1"); + String pw2 = (String) post.get("serverpw2"); + // do checks + if (filter == null) { + //if ((filter == null) || (user == null) || (pw1 == null) || (pw2 == null)) { + prop.put("info", 1);//error with submitted information + return prop; + } + if (user.length() == 0) { + prop.put("info", 2);//username must be given + return prop; + } + if (!(pw1.equals(pw2))) { + prop.put("info", 3);//pw check failed + return prop; + } + if (filter.length() == 0) filter = "*"; + // check passed. set account: + env.setConfig("serverClient", filter); + env.setConfig("serverAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String(user + ":" + pw1))); + env.setConfig("serverAccount", ""); + + prop.put("info", 8);//server access filter updated + prop.put("info_user", user); + prop.put("info_filter", filter); + return prop; + } + + if (post.containsKey("dispop")) { + env.setConfig("browserPopUpTrigger", "false"); + prop.put("info", 9);//popup disabled + return prop; + } + + if (post.containsKey("enpop")) { + env.setConfig("browserPopUpTrigger", "true"); + prop.put("info", 10);//popup enabled + return prop; + } + + if (post.containsKey("pmode")) { + env.setConfig("onlineMode", "2"); + prop.put("info", 11);//permanent online mode + yacyCore.triggerOnlineAction(); + return prop; + } + + if (post.containsKey("generalsettings")) { + String port = (String) post.get("port"); + String peerName = (String) post.get("peername"); + String shutdownWaiting = (String) post.get("shutdownWaiting"); + String info = ""; + env.setConfig("port", port); + env.setConfig("peerName", peerName); + env.setConfig("shutdownWaiting", shutdownWaiting); + + prop.put("info", 12);//port or peername changed + prop.put("info_port", port); + prop.put("info_peerName", peerName); + prop.put("info_shutdownWaiting", shutdownWaiting); + return prop; + } + + if (post.containsKey("proxysettings")) { + httpdProxyHandler.remoteProxyUse = ((String) post.get("remoteProxyUse", "")).equals("on"); + httpdProxyHandler.remoteProxyHost = (String) post.get("remoteProxyHost", ""); + try { + httpdProxyHandler.remoteProxyPort = Integer.parseInt((String) post.get("remoteProxyPort", "")); + } catch (NumberFormatException e) { + httpdProxyHandler.remoteProxyPort = 3128; + } + httpdProxyHandler.remoteProxyNoProxy = (String) post.get("remoteProxyNoProxy", ""); + httpdProxyHandler.remoteProxyNoProxyPatterns = httpdProxyHandler.remoteProxyNoProxy.split(","); + env.setConfig("remoteProxyHost", httpdProxyHandler.remoteProxyHost); + env.setConfig("remoteProxyPort", "" + httpdProxyHandler.remoteProxyPort); + env.setConfig("remoteProxyNoProxy", httpdProxyHandler.remoteProxyNoProxy); + env.setConfig("remoteProxyUse", (httpdProxyHandler.remoteProxyUse) ? "true" : "false"); + prop.put("info", 15); // The remote-proxy setting has been changed + return prop; + } + + if (post.containsKey("seedFTPsettings")) { + env.setConfig("seedFTPServer", (String)post.get("seedFTPServer")); + env.setConfig("seedFTPPath", (String)post.get("seedFTPPath")); + env.setConfig("seedFTPAccount", (String)post.get("seedFTPAccount")); + env.setConfig("seedFTPPassword", (String)post.get("seedFTPPassword")); + env.setConfig("seedURL", (String)post.get("seedURL")); + if (yacyCore.saveSeedList(env)) + prop.put("info", 13);//SeedServer changed + else + prop.put("info", 14);//Seedserver changed, but something is wrong + return prop; + } + + // nothing made + prop.put("info", 1);//no information submitted + return prop; + } + +} diff --git a/htroot/Settings_p.html b/htroot/Settings_p.html new file mode 100644 index 000000000..c686b9abc --- /dev/null +++ b/htroot/Settings_p.html @@ -0,0 +1,185 @@ + + + +YACY: Settings +#[metas]# + + +#[header]# +

+

Settings

+ +

This is the configuration page for the AnomicHTTPProxy. Access to this page should be limited to an administration person only. +To restrict the access to this page, please set an administrator account and password below.

+

If you want to restore all settings to the default values, +but forgot your administration password, you must stop the proxy, +delete the file 'DATA/SETTINGS/httpProxy.conf' in the YaCy application root folder and start YaCy again. + +

+
Administration Account Settings +

This is the account that restricts access to this 'Settings' page. If you have not customized it yet, you should do so now: +

+ + + + + +
Account Name:     
Password:
Password (repeat same as above):
+

+

+ +

+
General Settings +

+ + + + + + + + + + + + + + + + + + + + + +
Proxy and Administration Port:Changes will take effect after restart only.
Shutdown Time Limit:This is the time that a peer takes at most for shutdown. + The shutdown-procedure is difficult since there are many caches that must be flushed + first (seconds) +
Peer Name:Your peer name defines also a new '.yacy' - domain, which can be accessed from every peer running this proxy. + Using your 'Home Page' and 'File Share' - zones you also have a platform to provide content to your new domain.
+ (hint: choose a name that appears on a web page that tells something about you, vistit the page, get the 'senior' status, and you can be found..)
+

+

+ + +

+
Remote Proxy (optional) +

YaCy can use another proxy to connect to the internet. You can enter the address for a remote proxy here:

+

+ + + + + + + + + + + + + + + + + + + + +
remote Proxy Host:
remote Proxy Port:
no-Proxy Adresses:
use remote Proxy:
+

+

+ +

You can restrict the access to this proxy using a two-stage security barrier: +

    +
  • define an access domain with a list of granted client IP-Numbers or with wild cards
  • +
  • define an user account with a user:password - pair
  • +

+ +

+
Proxy Access Settings +

This is the account that restricts access to the proxy function. You probably don't want to share the proxy to the internet, so you should set the IP-Number Access Domain to a pattern that corresponds to you local intranet. The default setting should be right in most cases. If you want, you can also set a proxy account so that every proxy user must authenticate first, but this is rather unusual.

+

+ + + + + +
IP-Number filter:
Account Name:     
Password:
Password (repeat same as above):
+

+

+ +

+
Server Access Settings +

Here you can restrict access to the server. By default, the access is not limited, because this function is needed to spawn the p2p index-sharing function. If you block access to your server (setting anything else than '*'), then you will also be blocked from using other peer's indexes for search service. However, blocking access may be correct in enterprise environments where you only want to index your company own web pages.

+

ATTENTION: your current IP is recognized as "#[clientIP]#". If the value you enter here does not match with this IP, you will not be able to access the server pages any more

+

+ + + + +
IP-Number filter:
     
+

+

+ +

+
System Behaviour Settings + + + +
Auto pop-up of status page on start-up: /
+
+

+ +

+
+Seed Upload Settings +

This is the account for an ftp server where you can host a seed-list file. +If you set this, you will become a principal peer. +Your peer will then upload the seed-bootstrap information periodically, +but only if there had been changes to the seed-list. +

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Server:The host where you have an FTP account, like 'ftp.<my-host>.net'
Path:The remote path on the ftp server, like 'yacy/seed.txt'. + Missing sub-directories are NOT created automatically.
Username:Your log-in at the ftp server
Password:The password
URL:The URL that can be used to retrieve the uploaded seed file, like + http://www.<my-host>.net/yacy/seed.txt'
+

+

+ + +#[footer]# + + diff --git a/htroot/Settings_p.java b/htroot/Settings_p.java new file mode 100644 index 000000000..8966f24cc --- /dev/null +++ b/htroot/Settings_p.java @@ -0,0 +1,134 @@ +// Settings.p.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 02.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Settings_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.http.*; + +public class Settings_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + + //if (post == null) System.out.println("POST: NULL"); else System.out.println("POST: " + post.toString()); + + prop.put("port", env.getConfig("port", "8080")); + prop.put("shutdownWaiting", env.getConfig("shutdownWaiting", "120")); + prop.put("peerName", env.getConfig("peerName", "nameless")); + // set values + String s; + int pos; + + // admin password + if (env.getConfig("adminAccountBase64", "").length() == 0) { + // no password has been specified + prop.put("adminuser","admin"); + } else { + s = env.getConfig("adminAccount", "admin:void"); + pos = s.indexOf(":"); + if (pos < 0) { + prop.put("adminuser","admin"); + } else { + prop.put("adminuser",s.substring(0, pos)); + } + } + + // remote proxy + prop.put("remoteProxyHost", env.getConfig("remoteProxyHost", "")); + prop.put("remoteProxyPort", env.getConfig("remoteProxyPort", "")); + prop.put("remoteProxyNoProxy", env.getConfig("remoteProxyNoProxy", "")); + prop.put("remoteProxyUseChecked", ((String) env.getConfig("remoteProxyUse", "false")).equals("true") ? 1 : 0); + + // proxy access filter + prop.put("proxyfilter", env.getConfig("proxyClient", "*")); + + // proxy password + if (env.getConfig("proxyAccountBase64", "").length() == 0) { + // no password has been specified + prop.put("proxyuser","proxy"); + } else { + s = env.getConfig("proxyAccount", "proxy:void"); + pos = s.indexOf(":"); + if (pos < 0) { + prop.put("proxyuser","proxy"); + } else { + prop.put("proxyuser",s.substring(0, pos)); + } + } + + // server access filter + prop.put("serverfilter", env.getConfig("serverClient", "*")); + + // server password + if (env.getConfig("serverAccountBase64", "").length() == 0) { + // no password has been specified + prop.put("serveruser","server"); + } else { + s = env.getConfig("serverAccount", "server:void"); + pos = s.indexOf(":"); + if (pos < 0) { + prop.put("serveruser","server"); + } else { + prop.put("serveruser",s.substring(0, pos)); + } + } + + // clientIP + prop.put("clientIP", (String) header.get("CLIENTIP", "")); // read an artificial header addendum + //seedFTPSettings + prop.put("seedFTPServer", env.getConfig("seedFTPServer", "")); + prop.put("seedFTPPath", env.getConfig("seedFTPPath", "")); + prop.put("seedFTPAccount", env.getConfig("seedFTPAccount", "")); + prop.put("seedFTPPassword", env.getConfig("seedFTPPassword", "")); + prop.put("seedURL", env.getConfig("seedURL", "")); + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/Skins_p.html b/htroot/Skins_p.html new file mode 100644 index 000000000..34017d214 --- /dev/null +++ b/htroot/Skins_p.html @@ -0,0 +1,44 @@ + + + +YACY: Skins +#[metas]# + + +#[header]# +

+

Skin selection

+

+You can change the appearance of YACY with skins. Select one of the defaultskins, download new Skins, or create your own skin.

+ +Current Skin: #[currentskin]# +

+ +

+Skins:
+ +
+ +
+
+ +
+Install new Skin from URL:
+Use this Skin
+ +
+ +#(status)# +:: +Unable to get URL: #[url]# +:: +Error saving the Skin. +#(status)# + +#[footer]# + + diff --git a/htroot/Skins_p.java b/htroot/Skins_p.java new file mode 100644 index 000000000..9c7e5788c --- /dev/null +++ b/htroot/Skins_p.java @@ -0,0 +1,159 @@ +// Skins_p.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// +// This File is contributed by Alexander Schier +// last change: 29.12.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Blacklist_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import java.net.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.data.*; + +public class Skins_p { + + private static boolean copyFile(File from, File to){ + if(from == null || to == null){ + return false; + } + try{ + serverFileUtils.copy(from, to); + return true; + }catch(IOException e){ + return false; + } + } + + private static boolean changeSkin(serverSwitch env, String skinPath, String skin){ + File styleFile = new File(env.getRootPath(), "htroot/env/style.css"); + File skinFile = new File(skinPath, skin); + + if(copyFile(skinFile, styleFile)){ + env.setConfig("currentSkin", skin.substring(0,skin.length()-4)); + return true; + } + return false; + } + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + //listManager.switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + String skinPath = new File(env.getRootPath(), env.getConfig("skinPath", "DATA/SKINS")).toString(); + + //Fallback + prop.put("currentskin", ""); + prop.put("status", 0);//nothing + + String[] skinFiles = listManager.getDirListing(skinPath); + if(skinFiles == null){ + return prop; + } + + //if there are no skins, use the current style as default + //normally only invoked at first start of YACY + if(skinFiles.length == 0){ + copyFile(new File(env.getRootPath(), "htroot/env/style.css"), new File(skinPath, "default.css")); + env.setConfig("currentSkin", "default"); + } + + if (post != null){ + //change skin + if(post.containsKey("use")){ + changeSkin(env, skinPath, (String)post.get("skin")); + + //delete skin + }else if(post.containsKey("delete")){ + File skinfile= new File(skinPath, (String)post.get("skin")); + skinfile.delete(); + + //load Skin from URL + } else if (post.containsKey("url")){ + String url = (String)post.get("url"); + Vector skinVector; + try{ + skinVector = httpc.wget(new URL(url), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); + }catch(IOException e){ + prop.put("status", 1);//unable to get url + prop.put("status_url", url); + return prop; + } + try{ + Iterator it = skinVector.iterator(); + File skinFile = new File(skinPath, url.substring(url.lastIndexOf("/"), url.length())); + BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(skinFile))); + + while(it.hasNext()){ + bw.write(it.next() + "\n"); + } + bw.close(); + }catch(IOException e){ + prop.put("status", 2);//error saving the skin + return prop; + } + if(post.containsKey("use") && ((String)post.get("use")).equals("on")){ + changeSkin(env, skinPath, url.substring(url.lastIndexOf("/"), url.length())); + } + } + } + + + //reread Skins + skinFiles = listManager.getDirListing(skinPath); + int i; + for(i=0;i<= skinFiles.length-1;i++){ + if(skinFiles[i].endsWith(".css")){ + prop.put("skinlist_"+i+"_file", skinFiles[i]); + prop.put("skinlist_"+i+"_name", skinFiles[i].substring(0, skinFiles[i].length() -4)); + } + } + prop.put("skinlist", i); + + prop.put("currentskin", env.getConfig("currentSkin", "default")); + return prop; + } + +} diff --git a/htroot/Statistics.html b/htroot/Statistics.html new file mode 100644 index 000000000..df7fb830b --- /dev/null +++ b/htroot/Statistics.html @@ -0,0 +1,49 @@ + + + +YaCy: Statistics +#[metas]# + + +#[header]# + + + + + + + + + +
+#(page)# +

Backlinks

+#(backlinks)# +

no backlinks so far (another web page must link to your peer's search page +and someone must have clicked on that link; then the referrer pages appears here).

+:: +

Showing #[num]# entries from a total of #[total]# backlinks:
+ + + + + + + +#{list}# + + + + + + +#{/list}# +
Url
 
Time
 
Client IP
 
Client User Agent
 
#[url]##[date]##[clientip]##[useragent]#
+

+#(/backlinks)# +:: +

Zeitgeist

+#(/page)# +#[footer]# + + diff --git a/htroot/Statistics.java b/htroot/Statistics.java new file mode 100644 index 000000000..b0f1b4f66 --- /dev/null +++ b/htroot/Statistics.java @@ -0,0 +1,99 @@ +// Statistics.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 16.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes Network.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.net.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; + +public class Statistics { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) sb; + serverObjects prop = new serverObjects(); + int page = (post == null) ? 0 : Integer.parseInt((String) post.get("page", "0")); + + prop.put("page", page); + switch (page) { + case 0: + if (switchboard.facilityDB.size("backlinks") == 0) { + prop.put("page_backlinks", 0); + } else { + prop.put("page_backlinks", 1); + Iterator it = switchboard.facilityDB.maps("backlinks", false, "date"); + int count = 0; + int maxCount = 100; + boolean dark = true; + Map map; + String urlString; + URL url; + while ((it.hasNext()) && (count < maxCount)) { + map = (Map) it.next(); + if (count >= maxCount) break; + urlString = (String) map.get("key"); + try { url = new URL(urlString); } catch (MalformedURLException e) { url = null; } + if ((url != null) && (serverCore.isNotLocal(url))) { + prop.put("page_backlinks_list_" + count + "_dark", ((dark) ? 1 : 0)); dark =! dark; + prop.put("page_backlinks_list_" + count + "_url", urlString); + prop.put("page_backlinks_list_" + count + "_date", map.get("date")); + prop.put("page_backlinks_list_" + count + "_clientip", map.get("clientip")); + prop.put("page_backlinks_list_" + count + "_useragent", map.get("useragent")); + count++; + } + }//while + prop.put("page_backlinks_list", count); + prop.put("page_backlinks_num", count); + prop.put("page_backlinks_total", switchboard.facilityDB.size("backlinks")); + } + break; + } + // return rewrite properties + return prop; + } + +} diff --git a/htroot/Status.html b/htroot/Status.html new file mode 100644 index 000000000..29e2b29da --- /dev/null +++ b/htroot/Status.html @@ -0,0 +1,114 @@ + + + +YACY: Status +#[metas]# + + + +#[header]# +

+

System, Index and Peer Status

+ +

Welcome to YACY!

+ +
+ + + + +
+ +
+
+ +

+ + + + + + + + + + + + + + + +
System Properties
protection +#(protection)# +your settings are not protected! please go to the settings page immediately and set an administration password. +:: +your settings are protected by a password +#(/protection)# +
system version +#[version]##(versioncomment)#:: - the latest public version is #[latestVersion]#. Click here to download it.#(/versioncomment)# +
proxy host#[host]#:#[port]#
remote proxy#(remoteProxy)#not used::#[host]#:#[port]##(/remoteProxy)#
this peer address +#(peerAddress)# +not assigned +:: +not assigned. The peer must go online to get an address. +The peer does not go online until you use the proxy to surf the internet, +thus proving that you want to go online. +If you don't know how to configure your system to use a proxy, +see the installation instructions. +:: +#[address]#; your '.yacy' home at http://www.#[peername]#.yacy +#(/peerAddress)# +
this peer name#[peerName]#/#[hash]#
this peer statistics +#(peerStatistics)# +unknown +:: +uptime = #[uptime]#, href# = #[links]#, rwi# = #[words]#, +connects(#[juniorConnects]#|#[seniorConnects]#|#[principalConnects]#|#[disconnects]#) #[connects]# peers/hour +#(/peerStatistics)# +
this peer status +#(peerStatus)# +virgin - you have not yet published your peer, because you have not yet used the proxy. If you configured your proxy setting (see above: configure your browser's proxy settings and go online by browsing the internet) you must first load any page through the proxy to prove that this works. With this status you are not allowed to search other peers. +:: +junior - you cannot be reached. A possible reason is that you are behind a firewall, NAT or Router. But you can search the internet using the other peers global index on your own search page. We encourage you to open your firewall for port 8080, or set a 'virtual server' in your router settings. Please be fair, contribute your own index to the global index. +:: +senior - you are running a server and you support the global internet index, which you can also search yourself. Thank you! +:: +principal - you are senior and you publish your seed-list to a ftp account which can be retrieved at #[seedURL]#. You can of course search the internet using the other peers global index on your own search page. +#(/peerStatus)# +
other peers#(otherPeers)#not online.::#[num]# other peers online.#(/otherPeers)#
seed server +#(seedServer)# +disabled. To enable this you need a ftp account where you can upload files to a web space. If you do that, you become a YACY root server. You can configure your account details on the Settings page. +:: +enabled: updating periodically to server #[seedFTPServer]# +#(/seedServer)# +
auto-popup on start-up +#(popup)# +disabled. To enable this again please use the Settings page +:: +
+enabled +
+#(/popup)# +
online-mode +#(omode)# + +:: +you are in event-based online mode. +The yacy p2p network will boot when you start using the proxy or you switch to permanent mode. +Attention: using the proxy in permanent mode will keep your internet connection online forever. +Use this only if you have a flatrate or you have an always-on connection. +To start permanent mode, press this button: +
+ +
+:: +You are in permanent mode. Attention: if you don't have a flatrate or are always-on, +you must switch off the proxy to go off-line. +#(/omode)# +
+ +

+ +#[footer]# + + diff --git a/htroot/Status.java b/htroot/Status.java new file mode 100644 index 000000000..d8fc3c207 --- /dev/null +++ b/htroot/Status.java @@ -0,0 +1,171 @@ +// Status.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 12.07.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Status.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.http.*; + +public class Status { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + + // set values + String s; + int pos; + + /* + versionProbe=http://www.anomic.de/AnomicHTTPProxy/release.txt + superseedFile=superseed.txt + */ + // update seed info + yacyCore.peerActions.updateMySeed(); + + // password protection + if (env.getConfig("adminAccountBase64MD5", "").length() == 0) + prop.put("protection", 0);//not protected + else + prop.put("protection", 1);//protected + + // version information + if ((yacyCore.latestVersion == null) || (yacyCore.latestVersion.length() < 3) || (yacyCore.latestVersion.equals(env.getConfig("version","")))) + prop.put("versioncomment", 0);//no commet + else + prop.put("versioncomment", 1);//new version + prop.put("versioncomment_latestVersion", yacyCore.latestVersion); + + prop.put("host", serverCore.publicIP()); + prop.put("port", env.getConfig("port", "")); + if (env.getConfig("remoteProxyUse", "false").equals("true")) { + prop.put("remoteProxy", 1); + prop.put("remoteProxy_host", env.getConfig("remoteProxyHost", "")); + prop.put("remoteProxy_port", env.getConfig("remoteProxyPort", "")); + } else { + prop.put("remoteProxy", 0);//not used + } + + // peer information + String thisHash = ""; + String thisName = env.getConfig("peerName", ""); + if (yacyCore.seedDB.mySeed == null) { + thisHash = "not assigned"; + prop.put("peerAddress", 0);//not assigned + prop.put("peerStatistics", 0);//unknown + } else { + prop.put("peerStatistics", 1); + prop.put("peerStatistics_uptime", yacyCore.seedDB.mySeed.get("Uptime", "unknown")); + prop.put("peerStatistics_links", yacyCore.seedDB.mySeed.get("LCount", "unknown")); + prop.put("peerStatistics_words", yacyCore.seedDB.mySeed.get("ICount", "unknown")); + prop.put("peerStatistics_juniorConnects", yacyCore.peerActions.juniorConnects); + prop.put("peerStatistics_seniorConnects", yacyCore.peerActions.seniorConnects); + prop.put("peerStatistics_principalConnects", yacyCore.peerActions.principalConnects); + prop.put("peerStatistics_disconnects", yacyCore.peerActions.disconnects); + prop.put("peerStatistics_connects", yacyCore.seedDB.mySeed.get("CCount", "0")); + if (yacyCore.seedDB.mySeed.getAddress() == null) { + thisHash = yacyCore.seedDB.mySeed.hash; + prop.put("peerAddress", 1);//not assigned + instructions + } else { + thisHash = yacyCore.seedDB.mySeed.hash; + prop.put("peerAddress", 2);//Address + prop.put("peerAddress_address", yacyCore.seedDB.mySeed.getAddress()); + prop.put("peerAddress_peername", env.getConfig("peerName", "").toLowerCase()); + } + } + String peerStatus = ((yacyCore.seedDB.mySeed == null) ? "virgin" : yacyCore.seedDB.mySeed.get("PeerType", "virgin")); + if (peerStatus.equals("virgin")) { + prop.put("peerStatus", 0);//virgin + } else if (peerStatus.equals("junior")) { + prop.put("peerStatus", 1);//junior + } else if (peerStatus.equals("senior")) { + prop.put("peerStatus", 2);//senior + } else if (peerStatus.equals("principal")) { + prop.put("peerStatus", 3);//principal + prop.put("peerStatus_seedURL", yacyCore.seedDB.mySeed.get("seedURL", "?")); + } + prop.put("peerName", thisName); + prop.put("hash", thisHash); + if ((env.getConfig("seedFTPServer","").length() != 0) && + (env.getConfig("seedFTPAccount","").length() != 0) && + (env.getConfig("seedFTPPassword","").length() != 0) && + (env.getConfig("seedFTPPath","").length() != 0)) { + prop.put("seedServer", 1);//enabled + prop.put("seedServer_seedFTPServer", env.getConfig("seedFTPServer","")); + } else { + prop.put("seedServer", 0);//disabled + } + + if ((yacyCore.seedDB != null) && (yacyCore.seedDB.sizeConnected() > 0)){ + prop.put("otherPeers", 1); + prop.put("otherPeers_num", yacyCore.seedDB.sizeConnected()); + }else{ + prop.put("otherPeers", 0);//not online + } + + // pop-up trigger management + if (post != null) { + if (post.containsKey("dispop")) env.setConfig("browserPopUpTrigger", "false"); + if (post.containsKey("enpop")) env.setConfig("browserPopUpTrigger", "true"); + } + + if (env.getConfig("browserPopUpTrigger", "false").equals("false")) { + prop.put("popup", 0); + } else { + prop.put("popup", 1); + } + + if (env.getConfig("onlineMode", "1").equals("1")) { + prop.put("omode", 1); + } else { + prop.put("omode", 2); + } + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/Steering.html b/htroot/Steering.html new file mode 100644 index 000000000..5ba285dda --- /dev/null +++ b/htroot/Steering.html @@ -0,0 +1,32 @@ + + + +YACY: Settings Acknowledge +#[metas]# + + +#[header]# +

+

Steering Receipt:

+ +

+#(info)# +No information has been submitted
+Nothing changed +:: +Your system is not protected by a password
+Please go to the Settings page and set an administration password +:: +You don't have the correct access right to perform this task.
+Please log in. +:: +Shutting down. +
Application will terminate after working off all scheduled tasks. +:: +#(/info)# +

+

You can now go back to the Settings page if you want to make more changes.

+ +#[footer]# + + diff --git a/htroot/Steering.java b/htroot/Steering.java new file mode 100644 index 000000000..aeda81828 --- /dev/null +++ b/htroot/Steering.java @@ -0,0 +1,84 @@ +// Steering.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 18.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes SettingsAck_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import de.anomic.plasma.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.http.*; + +public class Steering { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + prop.put("info", 0);//no information submitted + if (post == null) return prop; + + // handle access rights + switch (switchboard.adminAuthenticated(header)) { + case 0: // wrong password given + try {Thread.currentThread().sleep(3000);} catch (InterruptedException e) {} + case 1: // no password given + prop.put("AUTHENTICATE", "admin log-in"); // force log-in + return prop; + case 2: // no password stored + prop.put("info", 1); // actions only with password + return prop; + case 3: // soft-authenticated for localhost only + case 4: // hard-authenticated, all ok + } + + if (post.containsKey("shutdown")) { + switchboard.terminate = true; + prop.put("info", 3);//shutting down + return prop; + } + + return prop; + } + +} diff --git a/htroot/ViewLog_p.html b/htroot/ViewLog_p.html new file mode 100644 index 000000000..66fdfe7e3 --- /dev/null +++ b/htroot/ViewLog_p.html @@ -0,0 +1,25 @@ + + + +YACY: Log +#[metas]# + + +#[header]# +

+

Log


+ + +
+ + + + +
Linesreversed order
+
+ +#[footer]# + + diff --git a/htroot/ViewProfile.html b/htroot/ViewProfile.html new file mode 100644 index 000000000..870701e9a --- /dev/null +++ b/htroot/ViewProfile.html @@ -0,0 +1,77 @@ + + + +YACY: Remote Peer Profile +#[metas]# + + +#[header]# +

+

Remote Peer Profile:

+

+#(success)# +Wrong access of this page +:: +The requested peer is not known +:: +The peer #[peername]# is not online. +:: +

This is #[peername]#'s Profile:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#{other}# + + + + +#{/other}# +
Name#(name)# ::#[value]##(/name)#
Nick Name#(nickname)# ::#[value]##(/nickname)#
Homepage#(homepage)# ::#[value]##(/homepage)#
EMail#(email)# ::#[value]##(/email)#
 
ICQ#(icq)# ::#[value]##(/icq)#
Jabber#(jabber)# ::#[value]##(/jabber)#
Yahoo!#(yahoo)# ::#[value]##(/yahoo)#
MSN#(msn)# ::#[value]##(/msn)#
 
Comment
#[key]##[value]#
+#(/success)# +#[footer]# + + diff --git a/htroot/ViewProfile.java b/htroot/ViewProfile.java new file mode 100644 index 000000000..2d3e130f0 --- /dev/null +++ b/htroot/ViewProfile.java @@ -0,0 +1,132 @@ +// ViewProfile_p.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// +// This File is contributed by Alexander Schier +// last change: 27.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Blacklist_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import java.net.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.yacy.*; + +public class ViewProfile { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + //listManager.switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + + if ((post != null) && (post.containsKey("hash")) && (yacyCore.seedDB != null)) { //no nullpointer error.. + yacySeed seed = yacyCore.seedDB.getConnected((String)post.get("hash")); + if (seed == null) { + seed = yacyCore.seedDB.getDisconnected((String)post.get("hash")); + if (seed == null) { + prop.put("success","1"); // peer unknown + } else { + prop.put("success","2"); // peer known, but disconnected + prop.put("success_peername", seed.getName()); + } + } else { + prop.put("success","3"); // all ok + HashMap profile = yacyClient.getProfile(seed); + System.out.println("fetched profile:" + profile); + Iterator i = profile.entrySet().iterator(); + Map.Entry entry; + //all known Keys which should be set as they are + Vector knownKeys = new Vector(); + knownKeys.add("name"); + knownKeys.add("nickname"); + //knownKeys.add("homepage");//+http + knownKeys.add("email"); + knownKeys.add("icq"); + knownKeys.add("jabber"); + knownKeys.add("yahoo"); + knownKeys.add("msn"); + knownKeys.add("comment"); + + //empty values + Iterator it=knownKeys.iterator(); + while(it.hasNext()){ + prop.put("success_"+(String)it.next(), 0); + } + + //number of not explicitly recopgnized but displayed items + int numUnknown=0; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + String key=(String)entry.getKey(); + String value=(String)entry.getValue(); + //all known Keys which should be set as they are + if(knownKeys.contains(key)){ + prop.put("success_"+key, 1); + prop.put("success_"+key+"_value", value); + //special handling, hide flower if no icq uin is set + }else if(key.equals("homepage")){ + if(! (value.startsWith("http")) ){ + value="http://"+value; + } + prop.put("success_"+key, 1); + prop.put("success_"+key+"_value", value); + //This will display Unknown Items(of newer versions) as plaintext + }else{//unknown + prop.put("success_other_"+numUnknown+"_key", key); + prop.put("success_other_"+numUnknown+"_value", value); + numUnknown++; + } + } + prop.put("success_other", numUnknown); + //prop.putAll(profile); + prop.put("success_peername", seed.getName()); + } + } else { + prop.put("success","0"); // wrong access + } + + return prop; + } + +} diff --git a/htroot/Wiki.html b/htroot/Wiki.html new file mode 100644 index 000000000..437f50f53 --- /dev/null +++ b/htroot/Wiki.html @@ -0,0 +1,13 @@ + + + +yacyWiki +#[metas]# + + +#[header]# +

#[pagecontent]#

+

#[pageedit]#

+#[footer]# + + diff --git a/htroot/Wiki.java b/htroot/Wiki.java new file mode 100644 index 000000000..29c7676a8 --- /dev/null +++ b/htroot/Wiki.java @@ -0,0 +1,393 @@ +// Wiki.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 01.07.2003 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// Contains contributions from Alexandier Schier [AS] + +// you must compile this file with +// javac -classpath .:../classes Wiki.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.text.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +//import de.anomic.yacy.*; +import de.anomic.data.*; +import de.anomic.plasma.*; +import de.anomic.http.*; + +public class Wiki { + + private static String ListLevel = ""; + private static String numListLevel = ""; + + private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + public static String dateString(Date date) { + return SimpleFormatter.format(date); + } + + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + if (post == null) { + post = new serverObjects(); + post.put("page", "start"); + } + + String pagename = post.get("page", "start"); + String ip = post.get("CLIENTIP", "127.0.0.1"); + String author = post.get("author", "anonymous"); + if (author.equals("anonymous")) { + author = switchboard.wikiDB.guessAuthor(ip); + if (author == null) { + if (de.anomic.yacy.yacyCore.seedDB.mySeed == null) + author = "anonymous"; + else + author = de.anomic.yacy.yacyCore.seedDB.mySeed.get("Name", "anonymous"); + } + } + + if (post.containsKey("submit")) { + // store a new page + switchboard.wikiDB.write(switchboard.wikiDB.newEntry(pagename, author, ip, + post.get("reason", "edit"), + ((String) post.get("content", "")).getBytes())); + } + + wikiBoard.entry page = switchboard.wikiDB.read(pagename); + + if (post.containsKey("edit")) { + // edit the page + try { + prop.put("pagecontent", ""); + prop.put("pageedit", + "
" + + //"" + + "

Author:

" + + "

Text:

" + + "" + + "" + + "" + + "" + + "
"); + } catch (UnsupportedEncodingException e) {} + } else if (post.containsKey("index")) { + // view an index + String index = "" + + ""; + + String subject; + try { + Iterator i = switchboard.wikiDB.keys(true); + wikiBoard.entry entry; + while (i.hasNext()) { + subject = (String) i.next(); + entry = switchboard.wikiDB.read(subject); + index += ""; + index += ""; + index += ""; + index += ""; + index += ""; + } + } catch (IOException e) { + index += "IO Error reading wiki database: " + e.getMessage(); + } + index += "
SubjectChange DateAuthor
" + subject + "" + dateString(entry.date()) + "" + entry.author() + "
"; + prop.put("pagecontent", index); + prop.put("pageedit", + "
" + + "" + + "" + + "
"); + } else { + // show page + prop.put("pagecontent", + "" + + "" + + "
 " + + "yacyWiki page: " + pagename + ",   last edited by " + page.author() + ",   change date " + dateString(page.date()) + + "
" + + "
" + + transform(page.page(), switchboard) + + "
" + + "
"); + + prop.put("pageedit", + "
" + + "" + + "" + + "" + + "" + + "
"); + } + + // return rewrite properties + return prop; + } + + + public static String transform(byte[] content, plasmaSwitchboard switchboard) { + ByteArrayInputStream bais = new ByteArrayInputStream(content); + BufferedReader br = new BufferedReader(new InputStreamReader(bais)); + String line; + String out = ""; + try { + while ((line = br.readLine()) != null) { + out += transformLine(new String(line), switchboard) + serverCore.crlfString; + } + return out; + } catch (IOException e) { + return "internal error: " + e.getMessage(); + } + } + + public static String transformLine(String result, plasmaSwitchboard switchboard) { + // transform page + int p0, p1; + + // avoide html inside + //p0 = 0; while ((p0 = result.indexOf("&", p0+1)) >= 0) result = result.substring(0, p0) + "&" + result.substring(p0 + 1); + p0 = 0; while ((p0 = result.indexOf('"', p0+1)) >= 0) result = result.substring(0, p0) + """ + result.substring(p0 + 1); + p0 = 0; while ((p0 = result.indexOf("<", p0+1)) >= 0) result = result.substring(0, p0) + "<" + result.substring(p0 + 1); + p0 = 0; while ((p0 = result.indexOf(">", p0+1)) >= 0) result = result.substring(0, p0) + ">" + result.substring(p0 + 1); + //p0 = 0; while ((p0 = result.indexOf("*", p0+1)) >= 0) result = result.substring(0, p0) + "•" + result.substring(p0 + 1); + p0 = 0; while ((p0 = result.indexOf("(C)", p0+1)) >= 0) result = result.substring(0, p0) + "©" + result.substring(p0 + 3); + + // format lines + if (result.startsWith(" ")) result = "" + result + ""; + if (result.startsWith("----")) result = "
"; + + // format headers + if ((p0 = result.indexOf("====")) >= 0) { + p1 = result.indexOf("====", p0 + 4); + if (p1 >= 0) result = result.substring(0, p0) + "

" + + result.substring(p0 + 4, p1) + "

" + + result.substring(p1 + 4); + } + if ((p0 = result.indexOf("===")) >= 0) { + p1 = result.indexOf("===", p0 + 3); + if (p1 >= 0) result = result.substring(0, p0) + "

" + + result.substring(p0 + 3, p1) + "

" + + result.substring(p1 + 3); + } + if ((p0 = result.indexOf("==")) >= 0) { + p1 = result.indexOf("==", p0 + 2); + if (p1 >= 0) result = result.substring(0, p0) + "

" + + result.substring(p0 + 2, p1) + "

" + + result.substring(p1 + 2); + } + + if ((p0 = result.indexOf("''''")) >= 0) { + p1 = result.indexOf("''''", p0 + 4); + if (p1 >= 0) result = result.substring(0, p0) + "" + + result.substring(p0 + 4, p1) + "" + + result.substring(p1 + 4); + } + if ((p0 = result.indexOf("'''")) >= 0) { + p1 = result.indexOf("'''", p0 + 3); + if (p1 >= 0) result = result.substring(0, p0) + "" + + result.substring(p0 + 3, p1) + "" + + result.substring(p1 + 3); + } + if ((p0 = result.indexOf("''")) >= 0) { + p1 = result.indexOf("''", p0 + 2); + if (p1 >= 0) result = result.substring(0, p0) + "" + + result.substring(p0 + 2, p1) + "" + + result.substring(p1 + 2); + } + + //* unorderd Lists contributed by [AS] + //** Sublist + if(result.startsWith(ListLevel + "*")){ //more stars + p0 = result.indexOf(ListLevel); + p1 = result.length(); + result = "
    " + serverCore.crlfString + + "
  • " + + result.substring(ListLevel.length() + 1, p1) + + "
  • "; + ListLevel += "*"; + }else if(ListLevel.length() > 0 && result.startsWith(ListLevel)){ //equal number of stars + p0 = result.indexOf(ListLevel); + p1 = result.length(); + result = "
  • " + + result.substring(ListLevel.length(), p1) + + "
  • "; + }else if(ListLevel.length() > 0){ //less stars + int i = ListLevel.length(); + String tmp = ""; + + while(! result.startsWith(ListLevel.substring(0,i)) ){ + tmp += "
"; + i--; + } + ListLevel = ListLevel.substring(0,i); + p0 = ListLevel.length(); + p1 = result.length(); + + if(ListLevel.length() > 0){ + result = tmp + + "
  • " + + result.substring(p0, p1) + + "
  • "; + }else{ + result = tmp + result.substring(p0, p1); + } + } + + + //# sorted Lists contributed by [AS] + //## Sublist + if(result.startsWith(numListLevel + "#")){ //more # + p0 = result.indexOf(numListLevel); + p1 = result.length(); + result = "
      " + serverCore.crlfString + + "
    1. " + + result.substring(numListLevel.length() + 1, p1) + + "
    2. "; + numListLevel += "#"; + }else if(numListLevel.length() > 0 && result.startsWith(numListLevel)){ //equal number of # + p0 = result.indexOf(numListLevel); + p1 = result.length(); + result = "
    3. " + + result.substring(numListLevel.length(), p1) + + "
    4. "; + }else if(numListLevel.length() > 0){ //less # + int i = numListLevel.length(); + String tmp = ""; + + while(! result.startsWith(numListLevel.substring(0,i)) ){ + tmp += "
    "; + i--; + } + numListLevel = numListLevel.substring(0,i); + p0 = numListLevel.length(); + p1 = result.length(); + + if(numListLevel.length() > 0){ + result = tmp + + "
  • " + + result.substring(p0, p1) + + "
  • "; + }else{ + result = tmp + result.substring(p0, p1); + } + } + // end contrib [AS] + + + // create links + String kl, kv; + int p; + // internal links + while ((p0 = result.indexOf("[[")) >= 0) { + p1 = result.indexOf("]]", p0 + 2); + if (p1 <= p0) break; else; { + kl = result.substring(p0 + 2, p1); + if ((p = kl.indexOf("|")) > 0) { + kv = kl.substring(p + 1); + kl = kl.substring(0, p); + } else { + kv = kl; + } + if (switchboard.wikiDB.read(kl) != null) + result = result.substring(0, p0) + + "" + kv + "" + + result.substring(p1 + 2); + else + result = result.substring(0, p0) + + "" + kv + "" + + result.substring(p1 + 2); + } + } + + // external links + while ((p0 = result.indexOf("[")) >= 0) { + p1 = result.indexOf("]", p0 + 1); + if (p1 <= p0) break; else { + kl = result.substring(p0 + 1, p1); + if ((p = kl.indexOf(" ")) > 0) { + kv = kl.substring(p + 1); + kl = kl.substring(0, p); + } else { + kv = kl; + } + if (!(kl.startsWith("http://"))) kl = "http://" + kl; + result = result.substring(0, p0) + + "" + kv + "" + + result.substring(p1 + 1); + } + } + + if (result.endsWith("")) return result; else return result + "
    "; + } + + /* + what we need: + + == New section == + === Subsection === + ==== Sub-subsection ==== + link colours: existent=green, non-existent=red + ---- + [[wikipedia FAQ|answers]] (first element is wiki page name, second is link print name) + [http://www.nupedia.com Nupedia] (external link) + [http://www.nupedia.com] (un-named external link) + ''Emphasize'', '''strongly''', '''''very strongly''''' (italics, bold, bold-italics) + + * Lists are easy to do: + ** start every line with a star + *** more stars means deeper levels + # Numbered lists are also good + ## very organized + ## easy to follow + ; Definition list : list of definitions + ; item : the item's definition + : A colon indents a line or paragraph. + A manual newline starts a new paragraph. + + A picture: [[Image:Wiki.png]] + [[Image:Wiki.png|right|jigsaw globe]] (floating right-side with caption) + + */ + +} diff --git a/htroot/env/grafics/buttonImage.gif b/htroot/env/grafics/buttonImage.gif new file mode 100644 index 000000000..426a4b5c7 Binary files /dev/null and b/htroot/env/grafics/buttonImage.gif differ diff --git a/htroot/env/grafics/dir.gif b/htroot/env/grafics/dir.gif new file mode 100644 index 000000000..f23c3acf9 Binary files /dev/null and b/htroot/env/grafics/dir.gif differ diff --git a/htroot/env/grafics/doc.gif b/htroot/env/grafics/doc.gif new file mode 100644 index 000000000..326b81bbb Binary files /dev/null and b/htroot/env/grafics/doc.gif differ diff --git a/htroot/env/grafics/fileIcon.gif b/htroot/env/grafics/fileIcon.gif new file mode 100644 index 000000000..ef8a995e2 Binary files /dev/null and b/htroot/env/grafics/fileIcon.gif differ diff --git a/htroot/env/grafics/fileIconSmall.gif b/htroot/env/grafics/fileIconSmall.gif new file mode 100644 index 000000000..5b84d3fd8 Binary files /dev/null and b/htroot/env/grafics/fileIconSmall.gif differ diff --git a/htroot/env/grafics/folderIcon.gif b/htroot/env/grafics/folderIcon.gif new file mode 100644 index 000000000..d13ad1863 Binary files /dev/null and b/htroot/env/grafics/folderIcon.gif differ diff --git a/htroot/env/grafics/folderIconSmall.gif b/htroot/env/grafics/folderIconSmall.gif new file mode 100644 index 000000000..3b6175c3f Binary files /dev/null and b/htroot/env/grafics/folderIconSmall.gif differ diff --git a/htroot/env/grafics/indexmonitor.gif b/htroot/env/grafics/indexmonitor.gif new file mode 100644 index 000000000..48cb01ba5 Binary files /dev/null and b/htroot/env/grafics/indexmonitor.gif differ diff --git a/htroot/env/grafics/kaskelix.gif b/htroot/env/grafics/kaskelix.gif new file mode 100644 index 000000000..ed4b7fbb5 Binary files /dev/null and b/htroot/env/grafics/kaskelix.gif differ diff --git a/htroot/env/grafics/kaskelix.jpg b/htroot/env/grafics/kaskelix.jpg new file mode 100644 index 000000000..6ddde6f21 Binary files /dev/null and b/htroot/env/grafics/kaskelix.jpg differ diff --git a/htroot/env/grafics/lock.gif b/htroot/env/grafics/lock.gif new file mode 100644 index 000000000..dd6929a7f Binary files /dev/null and b/htroot/env/grafics/lock.gif differ diff --git a/htroot/env/grafics/mcemailh.gif b/htroot/env/grafics/mcemailh.gif new file mode 100644 index 000000000..7f63a4c41 Binary files /dev/null and b/htroot/env/grafics/mcemailh.gif differ diff --git a/htroot/env/grafics/notifier.gif b/htroot/env/grafics/notifier.gif new file mode 100644 index 000000000..330c7e881 Binary files /dev/null and b/htroot/env/grafics/notifier.gif differ diff --git a/htroot/env/grafics/notifierActive.gif b/htroot/env/grafics/notifierActive.gif new file mode 100644 index 000000000..761a15a99 Binary files /dev/null and b/htroot/env/grafics/notifierActive.gif differ diff --git a/htroot/env/grafics/notifierInactive.gif b/htroot/env/grafics/notifierInactive.gif new file mode 100644 index 000000000..330c7e881 Binary files /dev/null and b/htroot/env/grafics/notifierInactive.gif differ diff --git a/htroot/env/grafics/yacy.gif b/htroot/env/grafics/yacy.gif new file mode 100644 index 000000000..b8dbdbc6c Binary files /dev/null and b/htroot/env/grafics/yacy.gif differ diff --git a/htroot/env/style.css b/htroot/env/style.css new file mode 100644 index 000000000..a66fe4773 --- /dev/null +++ b/htroot/env/style.css @@ -0,0 +1,162 @@ +body { + background-color:#F8F8FF; +} +h1 { +font-family:Helvetica, sans-serif; +font-size:16pt; +font-style:normal; +line-height:14pt; +margin-top:0pt; + margin-bottom:0pt; +} +h2 { +font-family:Helvetica, sans-serif; +font-size:14pt; +font-style:normal; +line-height:14pt; +margin-top:0pt; + margin-bottom:0pt; +} +h3 { +font-family:Helvetica, sans-serif; +font-size:12pt; +font-style:normal; +line-height:14pt; +margin-top:0pt; + margin-bottom:0pt; +} +h4 { +font-family:Helvetica, sans-serif; +font-size:10pt; +font-style:normal; +line-height:14pt; +margin-top:0pt; + margin-bottom:0pt; +} +a, body, div, li, ol, span, table, td, tr, ul { +color:#000000; +font-family:Helvetica, sans-serif; +font-size:10pt; +font-style:normal; +line-height:14px; +margin-top:0px; +margin-bottom:0px; +} +*.small { +font-family:Helvetica, sans-serif; +font-size:8pt; +font-style:normal; +line-height:9px; +margin-top:0px; +margin-bottom:0px; +} +body, div, li, ol, span, table, td, tr, ul { + +text-decoration:none; +} +a:hover { +color:#0000FF; +} +a.unknown { +color:#990000; +} +a.known { +color:#009900; +} +a.extern { +color:#000099; +} +*.xxxyellow { +color:#0000FF; +font-weight:bold; +text-decoration:none; +font-size:11px; +} +*.xxxblue { +color:#556699; +font-weight:bold; +text-decoration:none; +font-size:11px; +} +*.tt { +font-family:Courier,Terminal,sans-serif; +font-size:8pt; +line-height:9px; +} +.Menu { +width: 140; +} +.SubMenu { + +} +.MenuHeader { +background-color: #4070a0; +color:white; +font-size: 9px; +font-weight:bold; +} +.MenuItem { +background-color: #bdcdd4; +font-weight:bold; +text-decoration:none; +font-size:11px; +padding-top: 2; +padding-bottom: 2; +} +.MenuSubItem { +background-color: #bdcdd4; +font-weight:bold; +text-decoration:none; +font-size:11px; +padding-top: 3; +padding-bottom: 3; +} +.MenuItemLink { +color: black; +font-weight:bold; +text-decoration:none; +font-size:11px; +} +a.MenuItemLink:hover { +color:#0088BB; +font-weight:bold; +text-decoration:none; +} +.MenuSpacer { +height: 4; +background-color: #ffffff; +} +.MenuSubSpacer { +padding-left: 2; +padding-right: 2; +background-color: #ffffff; +} +.Headline { +color: white; +font-weight: bold; +font-size: 160%; +font-family: Helvetica, Arial; +text-align: center; +} +.Heading { +background-color: #4070a0; +height: 36px; +} +.TableHeader { +background-color: #88aaaa; +} +.TableCellDark { +background-color: #bbcccc; +} +.TableCellLight { +background-color: #ddeeee; +} +.TableCellSummary { +background-color: #ffbbaa; +} +.WikiBackground { +background-color: #eeeeee; +} +.MessageBackground { +background-color: #eeeeee; +} diff --git a/htroot/env/templates/footer.template b/htroot/env/templates/footer.template new file mode 100644 index 000000000..27e828c29 --- /dev/null +++ b/htroot/env/templates/footer.template @@ -0,0 +1,6 @@ +

    + + + + + diff --git a/htroot/env/templates/header.template b/htroot/env/templates/header.template new file mode 100644 index 000000000..b435b3405 --- /dev/null +++ b/htroot/env/templates/header.template @@ -0,0 +1,84 @@ + + + + + +
    + + + + + + + +
    + + + + + + + +
    +
    + YACY - Distributed Web Indexing - Administration +
    + +
    +
    +   +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \n"; + } + } + //write the list + try{ + BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(new File(listsPath, filename)))); + bw.write(out); + bw.close(); + }catch(IOException e){} + + }else if( post != null && post.containsKey("url") ){ + //load from URL + address = (String)post.get("url"); + status = "URL \"" + address + "\" not found or empty List"; //will later be resetted + //Name = " "; //No Name + Name = address; + + try { + otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); //get List + } catch (Exception e) {} + status = ""; //TODO: check if the wget failed... + + //Make HTML-Optionlist with retrieved items + for(i = 0; i <= (otherBlacklist.size() -1); i++){ + String tmp = (String) otherBlacklist.get(i); + if( !Blacklist.contains(tmp) && (!tmp.equals("")) && (!tmp.startsWith("#")) ){ //This List may contain comments. + //newBlacklist.add(tmp); + count++; + HTMLout += "\n"; + } + } + + }else if( post != null && post.containsKey("file") ){ + + try{ + //Read the List + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream( (String)post.get("file") ))); + while((line = br.readLine()) != null){ + if(! (line.startsWith("#") || line.equals("")) ){ + otherBlacklist.add(line); + } + } + br.close(); + }catch(IOException e){ + status = "File Error! Wrong Path?"; + } + status = ""; + Name = (String)post.get("file"); + + //Make HTML-Optionlist with retrieved items + for(i = 0; i <= (otherBlacklist.size() -1); i++){ + String tmp = (String) otherBlacklist.get(i); + if( !Blacklist.contains(tmp) && (!tmp.equals("")) && (!tmp.startsWith("#")) ){ //This List may contain comments. + //newBlacklist.add(tmp); + count++; + HTMLout += "\n"; + } + } + + }else if( post != null && post.containsKey("add") ){ //Step 2: Add the Items + num = Integer.parseInt( (String)post.get("num") ); + for(i=1;i <= num; i++){ //count/num starts with 1! + if( post.containsKey( String.valueOf(i) ) ){ + String newItem = (String)post.get( String.valueOf(i) ); + //This should not be needed... + if ( newItem.startsWith("http://") ){ + newItem = newItem.substring(7); + } + // separate the newItem into host and path + int pos = newItem.indexOf("/"); + if (pos < 0) { + // add default empty path pattern + pos = newItem.length(); + newItem = newItem + "/.*"; + } + out += newItem+"\n"; + status += ""+newItem+" was added to the Blacklist
    \n"; + if (httpdProxyHandler.blackListURLs != null) + httpdProxyHandler.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1)); + + //write the list + try{ + BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(new File(listsPath, filename)))); + bw.write(out); + bw.close(); + }catch(IOException e){} + + }else{ + } + } + }else{ + status = "Wrong Invocation! Please invoke with sharedBlacklist.html?name=PeerName"; + } + + prop.put("filename", filename); + prop.put("status", status); + prop.put("table",HTMLout); + prop.put("name", Name); + prop.put("num", String.valueOf(count)); + return prop; + } + +} diff --git a/htroot/yacy/crawlOrder.html b/htroot/yacy/crawlOrder.html new file mode 100644 index 000000000..daf73e7de --- /dev/null +++ b/htroot/yacy/crawlOrder.html @@ -0,0 +1,9 @@ +version=#[version]# +uptime=#[uptime]# +response=#[response]# +reason=#[reason]# +delay=#[delay]# +depth=#[depth]# +lurl=#[lurl]# +forward=#[forward]# +key=#[key]# \ No newline at end of file diff --git a/htroot/yacy/crawlOrder.java b/htroot/yacy/crawlOrder.java new file mode 100644 index 000000000..6f13493cb --- /dev/null +++ b/htroot/yacy/crawlOrder.java @@ -0,0 +1,184 @@ +// crawlOrder.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 02.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes crawlOrder.java + + +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.yacy.*; +import de.anomic.tools.*; +import java.util.*; +import java.net.*; + +public class crawlOrder { + + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + if ((post == null) || (env == null)) return prop; + + int proxyPrefetchDepth = Integer.parseInt(env.getConfig("proxyPrefetchDepth", "0")); + int crawlingdepth = Integer.parseInt(env.getConfig("crawlingDepth", "0")); + + // request values + String iam = (String) post.get("iam", ""); // seed hash of requester + String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability + String process = (String) post.get("process", ""); // process type + String key = (String) post.get("key", ""); // transmission key + String url = crypt.simpleDecode((String) post.get("url", ""), key); // the url string to crawl + String referrer = crypt.simpleDecode((String) post.get("referrer", ""), key); // the referrer url + int orderDepth = Integer.parseInt((String) post.get("depth", "0")); // crawl depth + + // response values + /* + the result can have one of the following values: + negative cases, no retry + denied - the peer does not want to crawl that + exception - an exception occurred + + negative case, retry possible + rejected - the peer has rejected to process, but a re-try should be possible + + positive case with crawling + stacked - the resource is processed asap + + positive case without crawling + double - the resource is already in database, believed to be fresh and not reloaded + the resource is also returned in lurl + */ + String response = "denied"; + String reason = "false-input"; + String delay = "5"; + String lurl = ""; + boolean granted = switchboard.getConfig("crawlResponse", "false").equals("true"); + int acceptDepth = Integer.parseInt(switchboard.getConfig("crawlResponseDepth", "0")); + int acceptDelay = Integer.parseInt(switchboard.getConfig("crawlResponseDelay", "0")); + + if (orderDepth > acceptDepth) orderDepth = acceptDepth; + + // check if requester is authorized + if ((yacyCore.seedDB.mySeed == null) || (!(yacyCore.seedDB.mySeed.hash.equals(youare)))) { + // this request has a wrong target + response = "denied"; + reason = "authentify-problem"; + delay = "3600"; // may request one hour later again + } else if (orderDepth > 0) { + response = "denied"; + reason = "order must be 0"; + delay = "3600"; // may request one hour later again + } else if (!(granted)) { + response = "denied"; + reason = "not granted to remote crawl"; + delay = "3600"; // may request one hour later again + } else try { + yacySeed requester = yacyCore.seedDB.getConnected(iam); + int queuesize = switchboard.queueSize(); + String urlhash = plasmaURL.urlHash(new URL(url)); + if (requester == null) { + response = "denied"; + reason = "unknown-client"; + delay = "240"; + } else if (!((requester.isSenior()) || (requester.isPrincipal()))) { + response = "denied"; + reason = "not-qualified"; + delay = "240"; + } else if (queuesize > 1) { + response = "rejected"; + reason = "busy"; + delay = "" + (queuesize * acceptDelay); + } else if (!(process.equals("crawl"))) { + response = "denied"; + reason = "unknown-order"; + delay = "9999"; + } else { + // stack url + String reasonString = switchboard.stackCrawl(url, referrer, iam, "REMOTE-CRAWLING", new Date(), 0, switchboard.defaultRemoteProfile); + if (reasonString == null) { + // liftoff! + response = "stacked"; + reason = "ok"; + delay = "" + acceptDelay; // this value needs to be calculated individually + } else if (reasonString.equals("double_(already_loaded)")) { + // case where we have already the url loaded; + reason = reasonString; + delay = "" + (acceptDelay / 4); + // send lurl-Entry as response + plasmaCrawlLURL.entry entry = switchboard.loadedURL.getEntry(plasmaCrawlLURL.urlHash(url)); + if (entry != null) { + response = "double"; + switchboard.loadedURL.notifyGCrawl(entry.hash(), iam, youare); + lurl = crypt.simpleEncode(entry.toString()); + delay = "1"; + } else { + response = "rejected"; + } + } else { + response = "rejected"; + reason = reasonString; + delay = "" + (acceptDelay / 4); + } + } + } catch (Exception e) { + // mist + e.printStackTrace(); + reason = "ERROR: " + e.getMessage(); + delay = "600"; + } + + prop.put("response", response); + prop.put("reason", reason); + prop.put("delay", delay); + prop.put("depth", acceptDepth); + prop.put("lurl", lurl); + prop.put("forward", ""); + prop.put("key", key); + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/yacy/crawlReceipt.html b/htroot/yacy/crawlReceipt.html new file mode 100644 index 000000000..a1a398b88 --- /dev/null +++ b/htroot/yacy/crawlReceipt.html @@ -0,0 +1,3 @@ +version=#[version]# +uptime=#[uptime]# +delay=#[delay]# diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java new file mode 100644 index 000000000..0d216be6e --- /dev/null +++ b/htroot/yacy/crawlReceipt.java @@ -0,0 +1,129 @@ +// crawlReceipt.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 02.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes crawlOrder.java + + +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.yacy.*; +import de.anomic.tools.*; +import java.util.*; +import java.net.*; + +public class crawlReceipt { + + + /* + * this is used to respond on a remote crawling request + */ + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + if ((post == null) || (env == null)) return prop; + + int proxyPrefetchDepth = Integer.parseInt(env.getConfig("proxyPrefetchDepth", "0")); + int crawlingDepth = Integer.parseInt(env.getConfig("crawlingDepth", "0")); + + // request values + String iam = (String) post.get("iam", ""); // seed hash of requester + String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability + String process = (String) post.get("process", ""); // process type + String key = (String) post.get("key", ""); // transmission key + String urlhash = (String) post.get("urlhash", ""); // the url hash that has been crawled + String result = (String) post.get("result", ""); // the result; either "ok" or "fail" + String reason = (String) post.get("reason", ""); // the reason for that result + String words = (String) post.get("wordh", ""); // priority word hashes + String propStr = crypt.simpleDecode((String) post.get("lurlEntry", ""), key); + + /* + the result can have one of the following values: + negative cases, no retry + unavailable - the resource is not available (a broken link); not found or interrupted + exception - an exception occurred + robot - a robot-file has denied to crawl that resource + + negative cases, retry possible + rejected - the peer has rejected to load the resource + dequeue - peer too busy - rejected to crawl + + positive cases with crawling + fill - the resource was loaded and processed + update - the resource was already in database but re-loaded and processed + + positive cases without crawling + known - the resource is already in database, believed to be fresh and not reloaded + stale - the resource was reloaded but not processed because source had no changes + + */ + + if ((yacyCore.seedDB.mySeed == null) || (!(yacyCore.seedDB.mySeed.hash.equals(youare)))) { + // no yacy connection / unknown peers + prop.put("delay", "3600"); + } else if (propStr == null) { + // error with url / wrong key + prop.put("delay", "3600"); + } else if (result.equals("fill")) { + // put new data into database + switchboard.loadedURL.newEntry(propStr, true, youare, iam, 1); + switchboard.noticeURL.remove(urlhash); + + // ready for more + prop.put("delay", "10"); + } else { + plasmaCrawlNURL.entry en = switchboard.noticeURL.getEntry(urlhash); + if (en != null) { + switchboard.errorURL.newEntry(en.url(), en.referrerHash(), en.initiator(), iam, en.name(), result + ":" + reason, new bitfield(plasmaURL.urlFlagLength), false); + switchboard.noticeURL.remove(urlhash); + } + prop.put("delay", "100"); // what shall we do with that??? + } + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/yacy/hello.html b/htroot/yacy/hello.html new file mode 100644 index 000000000..7e1dfbef0 --- /dev/null +++ b/htroot/yacy/hello.html @@ -0,0 +1,6 @@ +version=#[version]# +uptime=#[uptime]# +yourip=#[yourip]# +yourtype=#[yourtype]# +mytime=#[mytime]# +#[seedlist]# diff --git a/htroot/yacy/hello.java b/htroot/yacy/hello.java new file mode 100644 index 000000000..284404e47 --- /dev/null +++ b/htroot/yacy/hello.java @@ -0,0 +1,132 @@ +// hello.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 30.06.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../../Classes hello.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.http.*; + +public class hello { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + serverObjects prop = new serverObjects(); // return variable that accumulates replacements + + if ((post == null) || + (env == null) || + (yacyCore.seedDB == null) || + (yacyCore.seedDB.mySeed == null)) return new serverObjects(); + + String iam = (String) post.get("iam",""); // complete seed of the requesting peer + String key = (String) post.get("key",""); // transmission key for response + String seed = (String) post.get("seed",""); // + String pattern = (String) post.get("pattern",""); // + String countStr= (String) post.get("count","0"); // + String mytime = (String) post.get("mytime",""); // + int count = 0; + try {count = (countStr == null) ? 0 : Integer.parseInt(countStr);} catch (NumberFormatException e) {count = 0;} + Date remoteTime = yacyCore.parseUniversalDate((String) post.get("mytime")); // read remote time + yacySeed remoteSeed = yacySeed.genRemoteSeed(seed, key, remoteTime); + + //System.out.println("YACYHELLO: REMOTESEED=" + ((remoteSeed == null) ? "NULL" : remoteSeed.toString())); + if (remoteSeed == null) return new serverObjects(); + + // we easily know the caller's IP: + String yourip = (String) header.get("CLIENTIP", ""); // read an artificial header addendum + //System.out.println("YACYHELLO: YOUR IP=" + yourip); + prop.put("yourip", yourip); + remoteSeed.put("IP", yourip); + + // now let's check if the calling peer can be reached and answers + int port = Integer.parseInt((String) remoteSeed.get("Port", "8080")); + int urls = yacyClient.queryUrlCount(remoteSeed); + if (urls >= 0) { + if (remoteSeed.get("PeerType", "senior") == null) { + prop.put("yourtype", "senior"); + remoteSeed.put("PeerType", "senior"); + } else if (remoteSeed.get("PeerType", "principal").equals("principal")) { + prop.put("yourtype", "principal"); + } else { + prop.put("yourtype", "senior"); + remoteSeed.put("PeerType", "senior"); + } + // connect the seed + yacyCore.peerActions.peerArrival(remoteSeed, true); + } else { + prop.put("yourtype", "junior"); + remoteSeed.put("LastSeen", yacyCore.universalDateShortString()); + yacyCore.peerActions.juniorConnects++; // update statistics + remoteSeed.put("PeerType", "junior"); + yacyCore.log.logInfo("hello: responded remote junior peer '" + remoteSeed.getName() + "' from " + yourip + ":" + port); + // no connection here, instead store junior in connection cache + if ((remoteSeed.hash != null) && (remoteSeed.isProper())) yacyCore.peerActions.peerPing(remoteSeed); + } + + String seeds = ""; + + // attach also my own seed + seeds += "seed0=" + yacyCore.seedDB.mySeed.genSeedStr(key) + serverCore.crlfString; + + // attach some more seeds, as requested + if (yacyCore.seedDB != null) { + if (count > yacyCore.seedDB.sizeConnected()) count = yacyCore.seedDB.sizeConnected(); + if (count > 100) count = 100; + yacySeed[] ys = yacyCore.seedDB.seedsByAge(true, count); // latest seeds + int c = 1; + for (int i = 1; i < ys.length; i++) { + if ((ys[i] != null) && (ys[i].isProper())) { + seeds += "seed" + c + "=" + ys[i].genSeedStr(key) + serverCore.crlfString; + c++; + } + } + } + + prop.put("mytime", yacyCore.universalDateShortString()); + prop.put("seedlist", seeds); + // return rewrite properties + return prop; + } + +} diff --git a/htroot/yacy/list.html b/htroot/yacy/list.html new file mode 100644 index 000000000..285c7277d --- /dev/null +++ b/htroot/yacy/list.html @@ -0,0 +1 @@ +#[list]# diff --git a/htroot/yacy/list.java b/htroot/yacy/list.java new file mode 100644 index 000000000..6d8c57c3b --- /dev/null +++ b/htroot/yacy/list.java @@ -0,0 +1,88 @@ +// list.java +// ----------------------- +// part of the AnomicHTTPProxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// +// This File is contributed by Alexander Schier +// last change: 18.06.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../../Classes list.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.http.*; +import de.anomic.data.*; + +public class list { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + String col = (String) post.get("col", ""); + + File listsPath = new File(env.getRootPath(),env.getConfig("listsPath", "DATA/LISTS")); + + if (col.equals("black")) { + String filename = ""; + String line; + String out = ""; + + String filenames=env.getConfig("proxyBlackListsShared", ""); + String filenamesarray[] = filenames.split(","); + + if(filenamesarray.length >0){ + for(int i = 0;i <= filenamesarray.length -1; i++){ + filename = filenamesarray[i]; + out += listManager.getListString(new File(listsPath,filename).toString(), false) + serverCore.crlfString; + } + }//if filenamesarray.length >0 + + prop.put("list",out); + } else { + prop.put("list",""); + } + + return prop; + } + +} diff --git a/htroot/yacy/message.html b/htroot/yacy/message.html new file mode 100644 index 000000000..7df9d7b47 --- /dev/null +++ b/htroot/yacy/message.html @@ -0,0 +1,5 @@ +version=#[version]# +uptime=#[uptime]# +messagesize=#[messagesize]# +attachmentsize=#[attachmentsize]# +response=#[response]# diff --git a/htroot/yacy/message.java b/htroot/yacy/message.java new file mode 100644 index 000000000..989bd0e55 --- /dev/null +++ b/htroot/yacy/message.java @@ -0,0 +1,166 @@ +// message.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 29.06.2003 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// you must compile this file with +// javac -classpath .:../../Classes message.java +// if the shell's current path is HTROOT/yacy + +import java.util.*; +import java.text.*; +import java.io.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.plasma.*; +import de.anomic.http.*; + +public class message { + + private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + public static String dateString(Date date) { + return SimpleFormatter.format(date); + } + + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + //System.out.println("yacy/message:post=" + post.toString()); + + if ((post == null) || (env == null)) return new serverObjects(); + + String process = (String) post.get("process", "permission"); + String key = (String) post.get("key", ""); + + int messagesize = 10240; + int attachmentsize = 0; + + prop.put("messagesize", "0"); + prop.put("attachmentsize", "0"); + + //System.out.println("DEBUG yacy/message: message post values = " + post.toString()); + + String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability + // check if we are the right target and requester has correct information about this peer + if ((yacyCore.seedDB.mySeed == null) || (!(yacyCore.seedDB.mySeed.hash.equals(youare)))) { + // this request has a wrong target + //System.out.println("DEBUG yacy/message: authenticate failed"); + prop.put("response", "-1"); // request rejected + return prop; + } + + prop.put("messagesize", "" + messagesize); + prop.put("attachmentsize", "" + attachmentsize); + + if (process.equals("permission")) { + // permission: respond with accceptabeale message and attachment size + String iam = (String) post.get("iam", ""); // seed hash of requester + prop.put("response", "Welcome to my peer!"); + // that's it! + } + + if (process.equals("post")) { + // post: post message to message board + String otherSeedString = (String) post.get("myseed", ""); + if (otherSeedString.length() == 0) { + prop.put("response", "-1"); // request rejected + return prop; + } + Date remoteTime = yacyCore.parseUniversalDate((String) post.get("mytime")); // read remote time + yacySeed otherSeed = yacySeed.genRemoteSeed(otherSeedString, key, remoteTime); + + String subject = crypt.simpleDecode((String) post.get("subject", ""), key); // message's subject + String message = crypt.simpleDecode((String) post.get("message", ""), key); // message body + + prop.put("response", "Thank you!"); + + // save message + switchboard.messageDB.write(switchboard.messageDB.newEntry( + "remote", + otherSeed.get("Name", "anonymous"), otherSeed.hash, + yacyCore.seedDB.mySeed.getName(), yacyCore.seedDB.mySeed.hash, + subject, message.getBytes())); + + // finally write notification + File notifierSource = new File(switchboard.getRootPath(), switchboard.getConfig("htRootPath","htroot") + "/env/grafics/notifierActive.gif"); + File notifierDest = new File(switchboard.getRootPath(), switchboard.getConfig("htRootPath","htroot") + "/env/grafics/notifier.gif"); + try { + serverFileUtils.copy(notifierSource, notifierDest); + } catch (IOException e) { + System.out.println("NEW MESSAGE ARRIVED! (error: " + e.getMessage() + ")"); + }; + } + //System.out.println("respond = " + prop.toString()); + + // return rewrite properties + return prop; + } +/* +on 83 +DEBUG: message post values = {youare=Ty2F86ekSWM5, key=pPQSZaXD, iam=WSjicAx1hRio, process=permission} +von 93 wurde gesendet: +DEBUG: PUT BODY=------------1090394265522 +Content-Disposition: form-data; name="youare" + +Ty2F86ekSWM5 +------------1090394265522 +Content-Disposition: form-data; name="key" + +pPQSZaXD +------------1090394265522 +Content-Disposition: form-data; name="iam" + +WSjicAx1hRio +------------1090394265522 +Content-Disposition: form-data; name="process" + +permission +------------1090394265522 + + +on 93 +DEBUG: message post values = {youare=WSjicAx1hRio, key=YJZLwaNS, iam=Ty2F86ekSWM5, process=permission} + + */ +} diff --git a/htroot/yacy/profile.html b/htroot/yacy/profile.html new file mode 100644 index 000000000..2dbf84277 --- /dev/null +++ b/htroot/yacy/profile.html @@ -0,0 +1,2 @@ +#{list}##[key]#=#[value]# +#{/list}# diff --git a/htroot/yacy/profile.java b/htroot/yacy/profile.java new file mode 100644 index 000000000..1ae179611 --- /dev/null +++ b/htroot/yacy/profile.java @@ -0,0 +1,83 @@ +// profile.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// +// This file ist contributed by Alexander Schier +// last major change: 27.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../../Classes hello.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import de.anomic.server.*; +import de.anomic.http.*; + +public class profile { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + serverObjects prop = new serverObjects(); // return variable that accumulates replacements + Properties profile = new Properties(); + int count=0; + String key=""; + String value=""; + try{ + profile.load(new FileInputStream(new File("DATA/SETTINGS/profile.txt"))); + + }catch(IOException e){} + + Iterator it = ((Map)profile).keySet().iterator(); + while(it.hasNext()){ + key=(String)it.next(); + value=profile.getProperty(key, "").replaceAll("\r","").replaceAll("\n","\\\\n"); + if( !(key.equals("")) && !(value.equals("")) ){ + prop.put("list_"+count+"_key", key); + prop.put("list_"+count+"_value", value); + count++; + } + } + prop.put("list", count); + + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/yacy/query.html b/htroot/yacy/query.html new file mode 100644 index 000000000..462743956 --- /dev/null +++ b/htroot/yacy/query.html @@ -0,0 +1,4 @@ +version=#[version]# +uptime=#[uptime]# +response=#[response]# +mytime=#[mytime]# \ No newline at end of file diff --git a/htroot/yacy/query.java b/htroot/yacy/query.java new file mode 100644 index 000000000..fb924d023 --- /dev/null +++ b/htroot/yacy/query.java @@ -0,0 +1,148 @@ +// query.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 15.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../../Classes query.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.plasma.*; +import de.anomic.http.*; + +public class query { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) { + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + + plasmaSwitchboard switchboard = (plasmaSwitchboard) sb; + // System.out.println("YACYQUERY: RECEIVED POST = " + ((post == null) ? "NULL" : post.toString())); + + if ((post == null) || (switchboard == null)) return new serverObjects(); + + String iam = (String) post.get("iam", ""); // complete seed of the requesting peer + String youare = (String) post.get("youare", ""); // seed hash of the target peer, used for testing network stability + String key = (String) post.get("key", ""); // transmission key for response + String obj = (String) post.get("object", ""); // keyword for query subject + String env = (String) post.get("env", ""); // argument to query + + // check if we are the right target and requester has correct information about this peer + if ((yacyCore.seedDB.mySeed == null) || (!(yacyCore.seedDB.mySeed.hash.equals(youare)))) { + // this request has a wrong target + prop.put("response", "-1"); // request rejected + return prop; + } + + // requests about environment + + if (obj.equals("wordcount")) { + // the total number of different words in the rwi is returned + prop.put("response", "0"); // dummy response + return prop; + } + + if (obj.equals("rwicount")) { + // return the number of available word indexes + // shall contain a word hash, the number of assigned lurls to this hash is returned + prop.put("response", "0"); // dummy response + return prop; + } + + if (obj.equals("lurlcount")) { + // return the number of all available l-url's + Hashtable result = switchboard.action("urlcount", null); + //System.out.println("URLCOUNT result = " + ((result == null) ? "NULL" : result.toString())); + prop.put("response", ((result == null) ? "-1" : (String) result.get("urls"))); + return prop; + } + + if (obj.equals("purlcount")) { + // return number of stacked prefetch urls + prop.put("response", "0"); // dummy response + return prop; + } + + if (obj.equals("seedcount")) { + // return number of stacked prefetch urls + prop.put("response", "0"); // dummy response + return prop; + } + + + // requests about requirements + + if (obj.equals("wantedlurls")) { + prop.put("response", "0"); // dummy response + return prop; + } + + if (obj.equals("wantedpurls")) { + prop.put("response", "0"); // dummy response + return prop; + } + + if (obj.equals("wantedword")) { + // response returns a list of wanted word hashes + prop.put("response", "0"); // dummy response + return prop; + } + + if (obj.equals("wantedrwi")) { + // shall contain a word hash, the number of wanted lurls for this hash is returned + prop.put("response", "0"); // dummy response + return prop; + } + + if (obj.equals("wantedseeds")) { + // return a number of wanted seed + prop.put("response", "0"); // dummy response + return prop; + } + + prop.put("mytime", yacyCore.universalDateShortString()); + // return rewrite properties + return prop; + } + +} diff --git a/htroot/yacy/search.html b/htroot/yacy/search.html new file mode 100644 index 000000000..ddb735807 --- /dev/null +++ b/htroot/yacy/search.html @@ -0,0 +1,10 @@ +version=#[version]# +uptime=#[uptime]# +count=#[linkcount]# +total=#[totalcount]# +fwhop=#[fwhop]# +fwsrc=#[fwsrc]# +fwrec=#[fwrec]# +searchtime=#[searchtime]# +references=#[references]# +#[links]# \ No newline at end of file diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java new file mode 100644 index 000000000..de56fecb6 --- /dev/null +++ b/htroot/yacy/search.java @@ -0,0 +1,95 @@ +// search.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 02.06.2003 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// you must compile this file with +// javac -classpath .:../../Classes search.java +// if the shell's current path is htroot/yacy + +import java.util.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.plasma.*; +import de.anomic.yacy.*; +import de.anomic.http.*; + +public class search { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + // be save + if ((post == null) || (env == null)) return prop; + + //System.out.println("yacy: search received request = " + post.toString()); + + String oseed = (String) post.get("myseed", ""); // complete seed of the requesting peer + String youare = (String) post.get("youare", ""); // seed hash of the target peer, used for testing network stability + String key = (String) post.get("key", ""); // transmission key for response + String query = (String) post.get("query", ""); // a string of word hashes + String fwdep = (String) post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results + String fwden = (String) post.get("fwden", ""); // forward deny, a list of seed hashes. They may NOT be target of forward hopping + long duetime= Long.parseLong((String) post.get("duetime", "3000")); + int count = Integer.parseInt((String) post.get("count", "10")); // maximum number of wanted results + boolean global = ((String) post.get("resource", "global")).equals("global"); // if true, then result may consist of answers from other peers + Date remoteTime = yacyCore.parseUniversalDate((String) post.get("mytime")); // read remote time + if (yacyCore.seedDB == null) { + yacyCore.log.logError("yacy.search: seed cache not initialized"); + } else { + yacyCore.peerActions.peerArrival(yacySeed.genRemoteSeed(oseed, key, remoteTime), true); + } + + HashSet keyhashes = new HashSet(); + for (int i = 0; i < (query.length() / plasmaWordIndexEntry.wordHashLength); i++) { + keyhashes.add(query.substring(i * plasmaWordIndexEntry.wordHashLength, (i + 1) * plasmaWordIndexEntry.wordHashLength)); + } + long timestamp = System.currentTimeMillis(); + prop = switchboard.searchFromRemote(keyhashes, count, global, duetime); + prop.put("searchtime", "" + (System.currentTimeMillis() - timestamp)); + + int links = Integer.parseInt(prop.get("linkcount","0")); + yacyCore.seedDB.mySeed.incSI(links); + yacyCore.seedDB.mySeed.incSU(links); + return prop; + } + +} diff --git a/htroot/yacy/transferRWI.html b/htroot/yacy/transferRWI.html new file mode 100644 index 000000000..13a4c88a6 --- /dev/null +++ b/htroot/yacy/transferRWI.html @@ -0,0 +1,4 @@ +version=#[version]# +uptime=#[uptime]# +unknownURL=#[unknownURL]# +result=#[result]# diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java new file mode 100644 index 000000000..70069b9ed --- /dev/null +++ b/htroot/yacy/transferRWI.java @@ -0,0 +1,138 @@ +// transferRWI.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last change: 24.01.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes transferRWI.java + + +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.yacy.*; +import de.anomic.tools.*; +import java.util.*; +import java.net.*; +import java.io.*; + +public class transferRWI { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + if ((post == null) || (env == null)) return prop; + + // request values + String iam = (String) post.get("iam", ""); // seed hash of requester + String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability + String key = (String) post.get("key", ""); // transmission key + int wordc = Integer.parseInt((String) post.get("wordc", "")); // number of different words + int entryc = Integer.parseInt((String) post.get("entryc", "")); // number of entries in indexes + byte[] indexes = ((String) post.get("indexes", "")).getBytes(); // the indexes, as list of word entries + boolean granted = switchboard.getConfig("allowReceiveIndex", "false").equals("true"); + + // response values + String result = ""; + String unknownURLs = ""; + + if (granted) { + // decode request + Vector v = new Vector(); + int s = 0; + int e; + while (s < indexes.length) { + e = s; while (e < indexes.length) if (indexes[e++] < 32) {e--; break;} + if ((e - s) > 0) v.add(new String(indexes, s, e - s)); + s = e; while (s < indexes.length) if (indexes[s++] >= 32) {s--; break;} + } + // the value-vector should now have the same length as entryc + if (v.size() != entryc) System.out.println("ERROR WITH ENTRY COUNTER: v=" + v.size() + ", entryc=" + entryc); + + // now parse the Strings in the value-vector and write index entries + String estring; + int p; + String wordHash; + String urlHash; + plasmaWordIndexEntry entry; + HashSet unknownURL = new HashSet(); + String[] wordhashes = new String[v.size()]; + int received = 0; + for (int i = 0; i < v.size(); i++) { + estring = (String) v.elementAt(i); + p = estring.indexOf("{"); + if (p > 0) { + wordHash = estring.substring(0, p); + wordhashes[i] = wordHash; + entry = new plasmaWordIndexEntry(estring.substring(p)); + try { + switchboard.wordIndex.addEntry(wordHash, entry); + urlHash = entry.getUrlHash(); + if ((!(unknownURL.contains(urlHash))) && + (!(switchboard.loadedURL.exists(urlHash)))) { + unknownURL.add(urlHash); + } + received++; + } catch (IOException ee) { + ee.printStackTrace(); + } + } + } + yacyCore.seedDB.mySeed.incRI(received); + + // finally compose the unknownURL hash list + Iterator it = unknownURL.iterator(); + while (it.hasNext()) unknownURLs += "," + (String) it.next(); + if (unknownURLs.length() > 0) unknownURLs = unknownURLs.substring(1); + switchboard.log.logInfo("Received " + received + " Words [" + wordhashes[0] + " .. " + wordhashes[wordhashes.length - 1] + "] from peer " + iam + ", requested " + unknownURL.size() + " URL's"); + result = "ok"; + } else { + result = "error_not_granted"; + } + + prop.put("unknownURL", unknownURLs); + prop.put("result", result); + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/yacy/transferURL.html b/htroot/yacy/transferURL.html new file mode 100644 index 000000000..a7fcebd67 --- /dev/null +++ b/htroot/yacy/transferURL.html @@ -0,0 +1,4 @@ +version=#[version]# +uptime=#[uptime]# +double=#[double]# +result=#[result]# diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java new file mode 100644 index 000000000..ddb0403bc --- /dev/null +++ b/htroot/yacy/transferURL.java @@ -0,0 +1,105 @@ +// transferURL.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last change: 24.01.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../classes transferRWI.java + + +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.yacy.*; +import de.anomic.tools.*; +import java.util.*; +import java.net.*; +import java.io.*; + +public class transferURL { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + if ((post == null) || (env == null)) return prop; + + // request values + String iam = (String) post.get("iam", ""); // seed hash of requester + String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability + String key = (String) post.get("key", ""); // transmission key + int urlc = Integer.parseInt((String) post.get("urlc", "")); // number of transported urls + boolean granted = switchboard.getConfig("allowReceiveIndex", "false").equals("true"); + + // response values + String result = ""; + String doublevalues = "0"; + + if (granted) { + int received = 0; + int sizeBefore = switchboard.loadedURL.size(); + // read the urls from the other properties and store + String urls; + for (int i = 0; i < urlc; i++) { + urls = (String) post.get("url" + i); + if (urls != null) { + switchboard.loadedURL.newEntry(urls, true, iam, iam, 3); + received++; + } + } + + yacyCore.seedDB.mySeed.incRU(received); + + // return rewrite properties + int more = switchboard.loadedURL.size() - sizeBefore; + doublevalues = "" + (received - more); + switchboard.log.logInfo("Received " + received + " URL's from peer " + iam); + if ((received - more) > 0) switchboard.log.logError("Received " + doublevalues + " double URL's from peer " + iam); + result = "ok"; + } else { + result = "error_not_granted"; + } + + prop.put("double", doublevalues); + prop.put("result", result); + return prop; + } + +} diff --git a/httpd.mime b/httpd.mime new file mode 100644 index 000000000..50b825a87 --- /dev/null +++ b/httpd.mime @@ -0,0 +1,74 @@ +### +### This is the MIME file type matching configuration for the YACY Server +### + +# Extension = MIME type +ai = application/postscript +aiff = audio/x-aiff +au = audio/basic +avi = video/x-msvideo +bat = text/plain +bin = application/octet-stream +bz2 = application/x-bzip2 +class = application/octet-stream +c = text/plain +com = application/octet-stream +css = text/css +db = application/octet-stream +dll = application/octet-stream +doc = application/msword +dot = application/msword +dvi = application/x-dvi +eps = application/postscript +exe = application/octet-stream +gif = image/gif +gz = application/gzip +hqx = application/mac-binhex40 +htm = text/html +html = text/html +java = text/plain +jpe = image/jpeg +jpeg = image/jpeg +jpg = image/jpeg +js = text/javascript +lha = application/x-lzh +lzh = application/x-lzh +mov = video/quicktime +mpe = video/mpeg +mpeg = video/mpeg +mpg = video/mpeg +pdf = application/pdf +php = application/x-httpd-php +phtml = application/x-httpd-php +pl = text/plain +png = image/png +pot = application/mspowerpoint +pps = application/mspowerpoint +ppt = application/mspowerpoint +ppz = application/mspowerpoint +ps = application/postscript +qt = video/quicktime +ra = audio/x-pn-realaudio +ram = audio/x-pn-realaudio +rpm = audio/x-pn-realaudio-plugin +rss = application/xml +rtf = application/rtf +sh = text/plain +shtml = text/html +stream = audio/x-qt-stream +swf = application/x-shockwave-flash +tar = application/tar +tex = application/x-tex +tgz = application/tar +tif = image/tiff +tiff = image/tiff +torrent = application/x-bittorrent +txt = text/plain +wav = audio/x-wav +xhtml = application/xhtml+xml +xla = application/msexcel +xls = application/msexcel +xml = application/xml +Z = application/x-compress +zip = application/zip +default = application/octet-stream diff --git a/killYACY.sh b/killYACY.sh new file mode 100755 index 000000000..149513c84 --- /dev/null +++ b/killYACY.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# THIS IS ONLY FOR EMERGENCY CASES +# To stop YaCy, use stopYACY.sh + +cd `dirname $0` +PID=`fuser yacy.log | awk '{print $2}'` +echo "process-id is " $PID +kill -3 $PID +sleep 1 +kill -9 $PID +echo "killed pid " $PID ", YaCy terminated" + diff --git a/makerelease.sh b/makerelease.sh new file mode 100755 index 000000000..eff07edb6 --- /dev/null +++ b/makerelease.sh @@ -0,0 +1,268 @@ +#!/bin/sh +# +# THIS IS THE YACY MAKE-RELEASE SCRIPT +# YOU CAN USE IT TO COMPILE YOUR OWN RELEASE +# THE TARGET OF THE COMPILATION CAN BE FOUND +# IN THE 'RELEASE' DIRECTORY AFTERWARDS +# ----------------------------------------- +# This Software is Copyrighted +# (C) by Michael Peter Christen; mc@anomic.de +# first published on http://www.anomic.de +# Frankfurt, Germany, 2004 +# last major change: 28.12.2004 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Using this software in any meaning (reading, learning, copying, compiling, +# running) means that you agree that the Author(s) is (are) not responsible +# for cost, loss of data or any harm that may be caused directly or indirectly +# by usage of this softare or this documentation. The usage of this software +# is on your own risk. The installation and usage (starting/running) of this +# software may allow other people or application to access your computer and +# any attached devices and is highly dependent on the configuration of the +# software which must be done by the user of the software; the author(s) is +# (are) also not responsible for proper configuration and usage of the +# software, even if provoked by documentation provided together with +# the software. +# +# Any changes to this file according to the GPL as documented in the file +# gpl.txt aside this file in the shipment you received can be done to the +# lines that follows this copyright notice here, but changes must not be +# done inside the copyright notive above. A re-distribution must contain +# the intact and unchanged copyright notice. +# Contributions and changes to the program code must be marked as such. + +# define variables +version='0.36' +datestr=`date +%Y%m%d` +release='yacy_v'$version'_'$datestr +#release='yacy_dev_v'$version'_'$datestr +target='RELEASE' +classes='classes' +source='source' +doc='doc' +data='DATA' +mainclass='yacy.java' +mkdir $release + +# clean up +rm -Rf $target &> /dev/null +rm -Rf $classes/*.class &> /dev/null +rm $doc/release.txt &> /dev/null + +rm $classes/*.class &> /dev/null +rm $classes/de/anomic/kelondro/*.class &> /dev/null +rm $classes/de/anomic/tools/*.class &> /dev/null +rm $classes/de/anomic/data/*.class &> /dev/null +rm $classes/de/anomic/htmlFilter/*.class &> /dev/null +rm $classes/de/anomic/http/*.class &> /dev/null +rm $classes/de/anomic/net/*.class &> /dev/null +rm $classes/de/anomic/plasma/*.class &> /dev/null +rm $classes/de/anomic/server/*.class &> /dev/null +rm $classes/de/anomic/yacy/*.class &> /dev/null + +rm $source/*.class &> /dev/null +rm $source/de/anomic/kelondro/*.class &> /dev/null +rm $source/de/anomic/tools/*.class &> /dev/null +rm $source/de/anomic/data/*.class &> /dev/null +rm $source/de/anomic/htmlFilter/*.class &> /dev/null +rm $source/de/anomic/http/*.class &> /dev/null +rm $source/de/anomic/net/*.class &> /dev/null +rm $source/de/anomic/plasma/*.class &> /dev/null +rm $source/de/anomic/server/*.class &> /dev/null +rm $source/de/anomic/yacy/*.class &> /dev/null + +rm $source/*~ &> /dev/null +rm $source/de/anomic/kelondro/*~ &> /dev/null +rm $source/de/anomic/tools/*~ &> /dev/null +rm $source/de/anomic/data/*~ &> /dev/null +rm $source/de/anomic/htmlFilter/*~ &> /dev/null +rm $source/de/anomic/http/*~ &> /dev/null +rm $source/de/anomic/net/*~ &> /dev/null +rm $source/de/anomic/plasma/*~ &> /dev/null +rm $source/de/anomic/server/*~ &> /dev/null +rm $source/de/anomic/yacy/*~ &> /dev/null +rm doc/*~ &> /dev/null +rm addon/*~ &> /dev/null +rm htroot/*~ &> /dev/null +rm htroot/yacy/*~ &> /dev/null +rm htroot/htdocsdefault/*~ &> /dev/null +rm htroot/env/*~ &> /dev/null +rm htroot/env/grafics/*~ &> /dev/null +rm htroot/env/templates/*~ &> /dev/null + + +# make release directory +mkdir $target + +# compile core +mv -f $source/$mainclass $source/$mainclass.orig +sed `echo 's/<>/'$datestr'/'` $source/$mainclass.orig > $source/$mainclass.sed1 +sed `echo 's/<>/'$version'/'` $source/$mainclass.sed1 > $source/$mainclass +rm $source/$mainclass.sed1 +#javac -classpath $classes -sourcepath $source -d $classes -g:none $source/httpd.java +#javac -classpath $classes -sourcepath $source -d $classes -g:none $source/$mainclass +javac -classpath $classes -sourcepath $source -d $classes -g $source/de/anomic/tools/*.java +javac -classpath $classes -sourcepath $source -d $classes -g $source/de/anomic/net/*.java +javac -classpath $classes -sourcepath $source -d $classes -g $source/de/anomic/htmlFilter/*.java +javac -classpath $classes -sourcepath $source -d $classes -g $source/de/anomic/server/*.java +javac -classpath $classes -sourcepath $source -d $classes -g $source/de/anomic/http/*.java +javac -classpath $classes -sourcepath $source -d $classes -g $source/de/anomic/kelondro/*.java +javac -classpath $classes -sourcepath $source -d $classes -g $source/de/anomic/data/*.java +javac -classpath $classes -sourcepath $source -d $classes -g $source/de/anomic/plasma/*.java +javac -classpath $classes -sourcepath $source -d $classes -g $source/de/anomic/yacy/*.java +javac -classpath $classes -sourcepath $source -d $classes -g $source/$mainclass +mv -f $source/$mainclass.orig $source/$mainclass + +# compile server pages +#javac -classpath $classes -sourcepath htroot -d $classes -g htroot/*.java +javac -classpath $classes -sourcepath htroot -d htroot -g htroot/*.java +javac -classpath $classes -sourcepath htroot/yacy -d htroot/yacy -g htroot/yacy/*.java +javac -classpath $classes -sourcepath htroot/htdocsdefault -d htroot/htdocsdefault -g htroot/htdocsdefault/*.java + +# copy classes +mkdir $release/$classes +cp -R $classes/* $release/$classes/ + +# copy configuration files +cp yacy.init $release +cp yacy.yellow $release +#cp yacy.black $release +#cp yacy.blue $release +cp yacy.stopwords $release +cp httpd.mime $release +cp superseed.txt $release + +# copy wrappers +cp startYACY.command $release +cp startYACY.bat $release +cp startYACY_noconsole.bat $release +cp startYACY.sh $release +cp stopYACY.command $release +cp stopYACY.bat $release +cp stopYACY.sh $release +cp killYACY.sh $release +cp makerelease.sh $release + +# copy documentation +cp readme.txt $release +cp gpl.txt $release +mkdir $release/$doc +mkdir $release/$doc/grafics +cp $doc/*.css $release/$doc/ +cp $doc/*.js $release/$doc/ +cp $doc/*.html $release/$doc/ +cp $doc/*.txt $release/$doc/ +cp $doc/grafics/*.gif $release/$doc/grafics/ +cp $doc/grafics/*.ico $release/$doc/grafics/ +cp $doc/grafics/*.jpg $release/$doc/grafics/ + +# copy source code +mkdir $release/$source +cp -R $source/* $release/$source/ + +# copy server pages +mkdir $release/htroot +mkdir $release/htroot/yacy +mkdir $release/htroot/htdocsdefault +mkdir $release/htroot/env +mkdir $release/htroot/env/grafics +mkdir $release/htroot/env/templates +mkdir $release/htroot/proxymsg +cp htroot/*.rss $release/htroot/ +cp htroot/*.xml $release/htroot/ +cp htroot/*.html $release/htroot/ +cp htroot/*.java $release/htroot/ +cp htroot/*.class $release/htroot/ +cp htroot/yacy/*.html $release/htroot/yacy/ +cp htroot/yacy/*.java $release/htroot/yacy/ +cp htroot/yacy/*.class $release/htroot/yacy/ +cp htroot/htdocsdefault/*.html $release/htroot/htdocsdefault/ +cp htroot/htdocsdefault/*.java $release/htroot/htdocsdefault/ +cp htroot/htdocsdefault/*.class $release/htroot/htdocsdefault/ +cp htroot/env/*.css $release/htroot/env/ +cp htroot/env/grafics/* $release/htroot/env/grafics/ +cp htroot/env/templates/*.template $release/htroot/env/templates/ +cp htroot/proxymsg/*.html $release/htroot/proxymsg/ + +# copy add-on's +mkdir $release/addon +cp addon/* $release/addon/ + +# set access rights +chmod 644 $release/* +chmod 755 $release/htroot +chmod 644 $release/htroot/* +chmod 755 $release/htroot/env +chmod 644 $release/htroot/env/* +chmod 755 $release/htroot/env/grafics +chmod 644 $release/htroot/env/grafics/* +chmod 755 $release/htroot/env/templates +chmod 644 $release/htroot/env/templates/* +chmod 755 $release/htroot/yacy +chmod 644 $release/htroot/yacy/* +chmod 755 $release/htroot/htdocsdefault +chmod 644 $release/htroot/htdocsdefault/* +chmod 755 $release/htroot/proxymsg +chmod 644 $release/htroot/proxymsg/* +chmod 755 $release/$source +chmod 644 $release/$source/*.java +chmod 755 $release/$source/de +chmod 755 $release/$source/de/anomic +chmod 755 $release/$source/de/anomic/* +chmod 644 $release/$source/de/anomic/kelondro/*.java +chmod 644 $release/$source/de/anomic/tools/*.java +chmod 644 $release/$source/de/anomic/data/*.java +chmod 644 $release/$source/de/anomic/htmlFilter/*.java +chmod 644 $release/$source/de/anomic/http/*.java +chmod 644 $release/$source/de/anomic/net/*.java +chmod 644 $release/$source/de/anomic/plasma/*.java +chmod 644 $release/$source/de/anomic/server/*.java +chmod 644 $release/$source/de/anomic/yacy/*.java +chmod 755 $release/$classes +chmod 644 $release/$classes/* +chmod 755 $release/$classes/de +chmod 755 $release/$classes/de/anomic +chmod 755 $release/$classes/de/anomic/* +chmod 644 $release/$classes/de/anomic/kelondro/*.class +chmod 644 $release/$classes/de/anomic/tools/*.class +chmod 644 $release/$classes/de/anomic/data/*.class +chmod 644 $release/$classes/de/anomic/htmlFilter/*.class +chmod 644 $release/$classes/de/anomic/http/*.class +chmod 644 $release/$classes/de/anomic/net/*.class +chmod 644 $release/$classes/de/anomic/plasma/*.class +chmod 644 $release/$classes/de/anomic/server/*.class +chmod 644 $release/$classes/de/anomic/yacy/*.class +chmod 755 $release/$doc +chmod 644 $release/$doc/* +chmod 755 $release/$doc/grafics +chmod 644 $release/$doc/grafics/* +chmod 755 $release/*.command +chmod 755 $release/*.sh +chmod 755 $release/addon + +# compress files +tar -cf $release.tar $release +rm -Rf $release +gzip -9 $release.tar +mv $release.tar.gz $target + +# make release test file: +# this file must be copied later on to +# www.anomic.de/AnomicHTTPProxy/ +echo $version > $doc/release.txt + +# finished +echo finished. created $target/$release.tar.gz diff --git a/readme.txt b/readme.txt new file mode 100644 index 000000000..d3771557a --- /dev/null +++ b/readme.txt @@ -0,0 +1,47 @@ +README for yacy (C) by Michael Peter Christen; mc@anomic.de +------------------------------------------------------------------------------- +Please visit www.anomic.de for latest changes or new documentation. +YACY comes with ABSOLUTELY NO WARRANTY! +This is free software, and you are welcome to redistribute it +under certain conditions; see file gpl.txt for details. +------------------------------------------------------------------------------- + +This is a caching http proxy with integrated search engine. +The complete documentation can be found inside the 'doc' subdirectory +in this release. +Start browsing the manual by opening the index.html file with our web browser. + +YOU NEED JAVA 1.4.2 TO RUN THIS APPLICATION! PLEASE DOWNLOAD FROM java.sun.com + +Startup of YACY: + +- on Linux : start startYACY.sh ("nohup ./startYACY.sh > yacy.log &") +- on Windows : double-click startYACY.bat +- on Mac OS X : double-click startYACY.command (alias possible!) +- on any other OS : set your classpath to the 'Classes' folder + and execute yacy.class, while your current system + path must target the release directory to access the + configuration files. + +Then start to administrate yacy by using the applications on-line +interface: browse to http://localhost:8080, you see you personal +proxy configuration and administration interface. + +Configure you internet connection to use yacy at port 8080. +You can also change the default proxy port. + +If you like to use yacy not as proxy but only as distributed crawling/search +engine, you can do so. Start crawling at the 'Index Creation' menu point. + +If you have any questions, please do not hesitate to contact the author: +Send a mail to Michael Christen (mc@anomic.de) with a meaningful subject +including the word 'yacy' to prevent that your email gets stuck +in my anti-spam filter. + +If you like to have a customized version for special needs, feel free to ask +the author for a business proposal to customize yacy according to +your needs. We provide also integration solutions if the software is about +to be integrated into your enterprise application. + +Germany, Frankfurt a.M., 05.01.2005 +Michael Peter Christen diff --git a/source/de/anomic/data/listManager.java b/source/de/anomic/data/listManager.java new file mode 100644 index 000000000..0cc0760bf --- /dev/null +++ b/source/de/anomic/data/listManager.java @@ -0,0 +1,200 @@ +// listManager.java +// ------------------------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// +// This file ist contributed by Alexander Schier +// last major change: 09.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.data; + +import java.util.*; +import java.io.*; +import de.anomic.plasma.*; +import de.anomic.http.*; +import de.anomic.server.*; + +//The Naming of the functions is a bit strange... + +public class listManager { + public static plasmaSwitchboard switchboard; + public static File listsPath; + +//===============Listslists===================== + //get an array of all Lists from a Config Property + public static String[] getListslistArray(String Listname){ + return switchboard.getConfig(Listname, "").split(","); + } + + //removes a List from a Lists-List + public static void removeListFromListslist(String ListName, String BlackList){ + String Lists[] = getListslistArray(ListName); + String temp = ""; + + for(int i=0;i <= Lists.length -1;i++){ + if( !Lists[i].equals(BlackList) && !Lists[i].equals("") ){ + temp += Lists[i] + ","; + } + } + if( temp.endsWith(",") ){ //remove "," at end... + temp = temp.substring(0, temp.length() -1); + } + if( temp.startsWith(",") ){ //remove "," at end... + temp = temp.substring(1, temp.length() ); + } + + switchboard.setConfig(ListName, temp); + } + + //add a new List to a List-List + public static void addListToListslist(String ListName, String newList){ + String Lists[] = getListslistArray(ListName); + String temp = ""; + + for(int i = 0;i <= (Lists.length -1); i++){ + temp += Lists[i] + ","; + } + temp += newList; + switchboard.setConfig(ListName, temp); + } + + //returns true, if the Lists-List contains the Listname + public static boolean ListInListslist(String Listname, String BlackList){ + String Lists[] = getListslistArray(Listname); + + for(int u=0;u <= Lists.length -1;u++){ + if( BlackList.equals(Lists[u]) ){ + return true; + } + } + return false; + } + +//================generel Lists================== + + //Gets a Array of all lines(Items) of a (list)file + public static Vector getListArray(File listFile){ + String line; + Vector list = new Vector(); + int count = 0; + try{ + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile))); + + while( (line = br.readLine()) != null){ + list.add(line); + count++; + } + br.close(); + }catch(IOException e){ + //list is empty + } + return list; + } + + //Writes the Liststring to a file + public static boolean writeList(File listFile, String out){ + try{ + BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(listFile))); + bw.write(out); + bw.close(); + return true; + }catch(IOException e){ + return false; + } + } + + //overloaded function to write an array + public static boolean writeList(File listFile, String[] list){ + String out = ""; + for(int i=0;i <= list.length; i++){ + out += list[i] + serverCore.crlfString; + } + return writeList(listFile, out); //(File, String) + } + + public static String getListString(String filename, boolean withcomments){ + String temp = ""; + String line = ""; + try{ + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File(listsPath ,filename)))); + //Read the List + while((line = br.readLine()) != null){ + if( (!line.startsWith("#") || withcomments) || (!line.equals("")) ){ + temp += line + serverCore.crlfString; + } + } + br.close(); + }catch(IOException e){} + return temp; + } + + //get a Directory Listing as a String Array + public static String[] getDirListing(String dirname){ + String[] fileListString; + File[] fileList; + File dir = new File(dirname); + + if(dir != null){ + if(!dir.exists()){ + dir.mkdir(); + } + fileList = dir.listFiles(); + fileListString = new String[fileList.length]; + for(int i=0;i<= fileList.length-1;i++){ + fileListString[i]=fileList[i].getName(); + } + return fileListString; + } + return null; + } + + +//=============Blacklist specific================ + + //load all active Blacklists in the Proxy + public static void reloadBlacklists(){ + String f = switchboard.getConfig("proxyBlackListsActive", ""); + if (f != ""){ + httpdProxyHandler.blackListURLs = httpdProxyHandler.loadBlacklist("black", f, "/"); + }else{ + httpdProxyHandler.blackListURLs = new TreeMap(); + } + } + + +} diff --git a/source/de/anomic/data/messageBoard.java b/source/de/anomic/data/messageBoard.java new file mode 100644 index 000000000..d68864030 --- /dev/null +++ b/source/de/anomic/data/messageBoard.java @@ -0,0 +1,262 @@ +// messageBoard.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.06.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.data; + +import java.io.*; +import java.util.*; +import java.text.*; +import de.anomic.kelondro.*; +import de.anomic.server.*; + +public class messageBoard { + + private static final int categoryLength = 12; + private static final String dateFormat = "yyyyMMddHHmmss"; + private static final int recordSize = 512; + + private static TimeZone GMTTimeZone = TimeZone.getTimeZone("PST"); + private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat(dateFormat); + + private kelondroMap database = null; + private int sn = 0; + + public messageBoard(File path, int bufferkb) throws IOException { + new File(path.getParent()).mkdir(); + if (database == null) { + if (path.exists()) + database = new kelondroMap(new kelondroDyn(path, bufferkb * 0x400)); + else + database = new kelondroMap(new kelondroDyn(path, bufferkb * 0x400, categoryLength + dateFormat.length() + 2, recordSize)); + } + sn = 0; + } + + public void close() throws IOException { + database.close(); + } + + private static String dateString() { + return SimpleFormatter.format(new GregorianCalendar(GMTTimeZone).getTime()); + } + + private String snString() { + String s = "" + sn; + if (s.length() == 1) s = "0" + s; + sn++; + if (sn > 99) sn = 0; + return s; + } + + public entry newEntry(String category, + String authorName, String authorHash, + String recName, String recHash, + String subject, byte[] message) { + return new entry(category, authorName, authorHash, recName, recHash, subject, message); + } + + public class entry { + + String key; // composed by category and date + Map record; // contains author, target hash, subject and message + + public entry(String category, + String authorName, String authorHash, + String recName, String recHash, + String subject, byte[] message) { + record = new HashMap(); + key = category; + if (key.length() > categoryLength) key = key.substring(0, categoryLength); + while (key.length() < categoryLength) key += "_"; + key += dateString() + snString(); + if ((authorName == null) || (authorName.length() == 0)) authorName = "anonymous"; + record.put("author", authorName); + if ((recName == null) || (recName.length() == 0)) recName = "anonymous"; + record.put("recipient", recName); + if (authorHash == null) authorHash = ""; + record.put("ahash", authorHash); + if (recHash == null) recHash = ""; + record.put("rhash", recHash); + if (subject == null) subject = ""; + record.put("subject", subject); + if (message == null) + record.put("message", ""); + else + record.put("message", serverCodings.enhancedCoder.encodeBase64(message)); + record.put("read", "false"); + } + + private entry(String key, Map record) { + this.key = key; + this.record = record; + } + + public Date date() { + try { + String c = key.substring(categoryLength); + c = c.substring(0, c.length() - 2); + return SimpleFormatter.parse(c); + } catch (ParseException e) { + return new Date(); + } + } + + public String category() { + String c = key.substring(0, categoryLength); + while (c.endsWith("_")) c = c.substring(0, c.length() - 1); + return c; + } + + public String author() { + String a = (String) record.get("author"); + if (a == null) return "anonymous"; else return a; + } + + public String recipient() { + String a = (String) record.get("recipient"); + if (a == null) return "anonymous"; else return a; + } + + public String authorHash() { + String a = (String) record.get("ahash"); + if (a == null) return null; else return a; + } + + public String recipientHash() { + String a = (String) record.get("rhash"); + if (a == null) return null; else return a; + } + + public String subject() { + String s = (String) record.get("subject"); + if (s == null) return ""; else return s; + } + + public byte[] message() { + String m = (String) record.get("message"); + if (m == null) return new byte[0]; + record.put("read", "true"); + return serverCodings.enhancedCoder.decodeBase64(m); + } + + public boolean read() { + String r = (String) record.get("read"); + if (r == null) return false; + if (r.equals("false")) return false; + return true; + } + } + + public String write(entry message) { + // writes a message and returns key + try { + database.set(message.key, message.record); + return message.key; + } catch (IOException e) { + return null; + } + } + + public entry read(String key) { + try { + Map record = database.get(key); + return new entry(key, record); + } catch (IOException e) { + return null; + } + } + + /* + public boolean has(String key) { + try { + return database.has(key); + } catch (IOException e) { + return false; + } + } + */ + + public void remove(String key) { + try { + database.remove(key); + } catch (IOException e) { + } + } + + public Iterator keys(String category, boolean up) throws IOException { + //return database.keys(); + return new catIter(category, up); + } + + public class catIter implements Iterator { + + Iterator allIter = null; + String nextKey = null; + String category = ""; + + public catIter(String category, boolean up) throws IOException { + this.allIter = database.keys(up, false); + this.category = category; + findNext(); + } + + public void findNext() { + while (allIter.hasNext()) { + nextKey = (String) allIter.next(); + if (nextKey.startsWith(this.category)) return; + } + nextKey = null; + } + + public boolean hasNext() { + return nextKey != null; + } + + public Object next() { + String next = nextKey; + findNext(); + return next; + } + + public void remove() { + } + + } +} diff --git a/source/de/anomic/data/wikiBoard.java b/source/de/anomic/data/wikiBoard.java new file mode 100644 index 000000000..ce8b65a06 --- /dev/null +++ b/source/de/anomic/data/wikiBoard.java @@ -0,0 +1,295 @@ +// wikiBoard.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 20.07.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.data; + +import java.io.*; +import java.util.*; +import java.text.*; +import de.anomic.kelondro.*; +import de.anomic.server.*; + +public class wikiBoard { + + private static final int keyLength = 64; + private static final String dateFormat = "yyyyMMddHHmmss"; + private static final int recordSize = 512; + + private static TimeZone GMTTimeZone = TimeZone.getTimeZone("PST"); + private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat(dateFormat); + + private kelondroMap datbase = null; + private kelondroMap bkpbase = null; + private Hashtable authors = new Hashtable(); + private int sn = 0; + + public wikiBoard(File actpath, File bkppath, int bufferkb) throws IOException { + new File(actpath.getParent()).mkdir(); + if (datbase == null) { + if (actpath.exists()) + datbase = new kelondroMap(new kelondroDyn(actpath, bufferkb * 0x40)); + else + datbase = new kelondroMap(new kelondroDyn(actpath, bufferkb * 0x400, keyLength, recordSize)); + } + new File(bkppath.getParent()).mkdir(); + if (bkpbase == null) { + if (bkppath.exists()) + bkpbase = new kelondroMap(new kelondroDyn(bkppath, bufferkb * 0x400)); + else + bkpbase = new kelondroMap(new kelondroDyn(bkppath, bufferkb * 0x400, keyLength + dateFormat.length(), recordSize)); + } + sn = 0; + } + + public void close() throws IOException { + datbase.close(); + bkpbase.close(); + } + + private static String dateString() { + return dateString(new GregorianCalendar(GMTTimeZone).getTime()); + } + + private static String dateString(Date date) { + return SimpleFormatter.format(date); + } + + private static String normalize(String key) { + if (key == null) return "null"; + return key.trim().toLowerCase(); + } + + public static String webalize(String key) { + if (key == null) return "null"; + key = key.trim().toLowerCase(); + int p; + while ((p = key.indexOf(" ")) >= 0) + key = key.substring(0, p) + "%20" + key.substring(p +1); + return key; + } + + public String guessAuthor(String ip) { + String author = (String) authors.get(ip); + //System.out.println("DEBUG: guessing author for ip = " + ip + " is '" + author + "', authors = " + authors.toString()); + return author; + } + + public entry newEntry(String subject, String author, String ip, String reason, byte[] page) { + return new entry(normalize(subject), author, ip, reason, page); + } + + public class entry { + + String key; + Map record; + + public entry(String subject, String author, String ip, String reason, byte[] page) { + record = new HashMap(); + key = subject; + if (key.length() > keyLength) key = key.substring(0, keyLength); + record.put("date", dateString()); + if ((author == null) || (author.length() == 0)) author = "anonymous"; + record.put("author", serverCodings.enhancedCoder.encodeBase64(author.getBytes())); + if ((ip == null) || (ip.length() == 0)) ip = ""; + record.put("ip", ip); + if ((reason == null) || (reason.length() == 0)) reason = ""; + record.put("reason", serverCodings.enhancedCoder.encodeBase64(reason.getBytes())); + if (page == null) + record.put("page", ""); + else + record.put("page", serverCodings.enhancedCoder.encodeBase64(page)); + authors.put(ip, author); + //System.out.println("DEBUG: setting author " + author + " for ip = " + ip + ", authors = " + authors.toString()); + } + + private entry(String key, Map record) { + this.key = key; + this.record = record; + } + + public String subject() { + return key; + } + + public Date date() { + try { + String c = (String) record.get("date"); + return SimpleFormatter.parse(c); + } catch (ParseException e) { + return new Date(); + } + } + + public String author() { + String a = (String) record.get("author"); + if (a == null) return "anonymous"; + byte[] b = serverCodings.enhancedCoder.decodeBase64(a); + if (b == null) return "anonymous"; + return new String(b); + } + + public String reason() { + String r = (String) record.get("reason"); + if (r == null) return ""; + byte[] b = serverCodings.enhancedCoder.decodeBase64(r); + if (b == null) return "unknown"; + return new String(b); + } + + public byte[] page() { + String m = (String) record.get("page"); + if (m == null) return new byte[0]; + byte[] b = serverCodings.enhancedCoder.decodeBase64(m); + if (b == null) return "".getBytes(); + return b; + } + + private void setAncestorDate(Date date) { + record.put("bkp", dateString(date)); + } + + private Date getAncestorDate() { + try { + String c = (String) record.get("date"); + if (c == null) return null; + return SimpleFormatter.parse(c); + } catch (ParseException e) { + return null; + } + } + + /* + public boolean hasAncestor() { + Date ancDate = getAncestorDate(); + if (ancDate == null) return false; + try { + return bkpbase.has(key + dateString(ancDate)); + } catch (IOException e) { + return false; + } + } + */ + + public entry getAncestor() { + Date ancDate = getAncestorDate(); + if (ancDate == null) return null; + return read(key + dateString(ancDate), bkpbase); + } + + private void setChild(String subject) { + record.put("child", serverCodings.enhancedCoder.encodeBase64(subject.getBytes())); + } + + private String getChildName() { + String c = (String) record.get("child"); + if (c == null) return null; + byte[] subject = serverCodings.enhancedCoder.decodeBase64(c); + if (subject == null) return null; + return new String(subject); + } + + public boolean hasChild() { + String c = (String) record.get("child"); + if (c == null) return false; + byte[] subject = serverCodings.enhancedCoder.decodeBase64(c); + return (subject != null); + } + + public entry getChild() { + String childName = getChildName(); + if (childName == null) return null; + return read(childName, datbase); + } + } + + public String write(entry page) { + // writes a new page and returns key + try { + // first load the old page + entry oldEntry = read(page.key); + // set the bkp date of the new page to the date of the old page + Date oldDate = oldEntry.date(); + page.setAncestorDate(oldDate); + oldEntry.setChild(page.subject()); + // write the backup + //System.out.println("key = " + page.key); + //System.out.println("oldDate = " + oldDate); + //System.out.println("record = " + oldEntry.record.toString()); + bkpbase.set(page.key + dateString(oldDate), oldEntry.record); + // write the new page + datbase.set(page.key, page.record); + return page.key; + } catch (IOException e) { + return null; + } + } + + public entry read(String key) { + return read(key, datbase); + } + + private entry read(String key, kelondroMap base) { + try { + key = normalize(key); + Map record = base.get(key); + if (record == null) + return newEntry(key, "anonymous", "127.0.0.1", "New Page", "".getBytes()); + else + return new entry(key, record); + } catch (IOException e) { + return null; + } + } + + /* + public boolean has(String key) { + try { + return datbase.has(normalize(key)); + } catch (IOException e) { + return false; + } + } + */ + + public Iterator keys(boolean up) throws IOException { + return datbase.keys(up, false); + } + +} diff --git a/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java b/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java new file mode 100644 index 000000000..92f00476d --- /dev/null +++ b/source/de/anomic/htmlFilter/htmlFilterAbstractScraper.java @@ -0,0 +1,406 @@ +// htmlFilterAbstractScraper.java +// --------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 18.02.2004 +// +// You agree that the Author(s) is (are) not responsible for cost, +// loss of data or any harm that may be caused by usage of this softare or +// this documentation. The usage of this software is on your own risk. The +// installation and usage (starting/running) of this software may allow other +// people or application to access your computer and any attached devices and +// is highly dependent on the configuration of the software which must be +// done by the user of the software;the author(s) is (are) also +// not responsible for proper configuration and usage of the software, even +// if provoked by documentation provided together with the software. +// +// THE SOFTWARE THAT FOLLOWS AS ART OF PROGRAMMING BELOW THIS SECTION +// IS PUBLISHED UNDER THE GPL AS DOCUMENTED IN THE FILE gpl.txt ASIDE THIS +// FILE AND AS IN http://www.gnu.org/licenses/gpl.txt +// ANY CHANGES TO THIS FILE ACCORDING TO THE GPL CAN BE DONE TO THE +// LINES THAT FOLLOWS THIS COPYRIGHT NOTICE HERE, BUT CHANGES MUST NOT +// BE DONE ABOVE OR INSIDE THE COPYRIGHT NOTICE. A RE-DISTRIBUTION +// MUST CONTAIN THE INTACT AND UNCHANGED COPYRIGHT NOTICE. +// CONTRIBUTIONS AND CHANGES TO THE PROGRAM CODE SHOULD BE MARKED AS SUCH. + +package de.anomic.htmlFilter; + +import java.util.*; +import de.anomic.server.*; + +public abstract class htmlFilterAbstractScraper implements htmlFilterScraper { + + public static final byte lb = (byte) '<'; + public static final byte rb = (byte) '>'; + public static final byte sl = (byte) '/'; + + private HashSet tags0; + private HashSet tags1; + + public htmlFilterAbstractScraper(HashSet tags0, HashSet tags1) { + this.tags0 = tags0; + this.tags1 = tags1; + } + + public boolean isTag0(String tag) { + return tags0.contains(tag); + } + + public boolean isTag1(String tag) { + return tags1.contains(tag); + } + + //the 'missing' method that shall be implemented: + public abstract void scrapeText(byte[] text); + /* could be easily implemented as: + { } + */ + + // the other methods must take into account to construct the return value correctly + public void scrapeTag0(String tagname, Properties tagopts) { + } + + public void scrapeTag1(String tagname, Properties tagopts, byte[] text) { + } + + protected static serverByteBuffer stripAllTags(serverByteBuffer bb) { + int p0, p1; + while ((p0 = bb.indexOf(lb)) >= 0) { + p1 = bb.indexOf(rb, p0); + if (p1 >= 0) { + bb = new serverByteBuffer(bb.getBytes(0, p0)).trim().append((byte) 32).append(new serverByteBuffer(bb.getBytes(p1 + 1)).trim()); + } else { + bb = new serverByteBuffer(bb.getBytes(0, p0)).trim().append(new serverByteBuffer(bb.getBytes(p0 + 1)).trim()); + } + } + return bb.trim(); + } + + // string conversions + private static serverByteBuffer code_iso8859(byte c) { + String s = code_iso8859s(c); + if (s == null) return null; else return new serverByteBuffer(s.getBytes()); + } + + private static String code_iso8859s(byte c) { + switch ((int) c & 0xff) { + + // german umlaute and ligaturen + case 0xc4: return "AE"; case 0xd6: return "OE"; case 0xdc: return "UE"; + case 0xe4: return "ae"; case 0xf6: return "oe"; case 0xfc: return "ue"; + case 0xdf: return "ss"; + + // accent on letters; i.e. french characters + case 0xc0: case 0xc1: case 0xc2: case 0xc3: case 0xc5: return "A"; + case 0xc6: return "AE"; + case 0xc7: return "C"; + case 0xc8: case 0xc9: case 0xca: return "E"; + case 0xcc: case 0xcd: case 0xce: case 0xcf: return "I"; + case 0xd0: return "D"; + case 0xd1: return "N"; + case 0xd2: case 0xd3: case 0xd4: case 0xd5: case 0xd8: return "O"; + case 0xd7: return "x"; + case 0xd9: case 0xda: case 0xdb: return "U"; + case 0xdd: return "Y"; + case 0xde: return "p"; + + case 0xe0: case 0xe1: case 0xe2: case 0xe3: case 0xe5: return "a"; + case 0xe6: return "ae"; + case 0xe7: return "c"; + case 0xe8: case 0xe9: case 0xea: return "e"; + case 0xec: case 0xed: case 0xee: case 0xef: return "i"; + case 0xf0: return "d"; + case 0xf1: return "n"; + case 0xf2: case 0xf3: case 0xf4: case 0xf5: case 0xf8: return "o"; + case 0xf7: return "%"; + case 0xf9: case 0xfa: case 0xfb: return "u"; + case 0xfd: case 0xff: return "y"; + case 0xfe: return "p"; + + // special characters + case 0xa4: return " euro "; + default: return null; + } + } + + public static serverByteBuffer convertUmlaute(serverByteBuffer bb) { + serverByteBuffer t = new serverByteBuffer(); + serverByteBuffer z; + for (int i = 0; i < bb.length(); i++) { + z = code_iso8859(bb.byteAt(i)); + t.append((z == null) ? (new serverByteBuffer().append(bb.byteAt(i))) : z); + } + return t; + } + + private static String transscripts(String code) { + if (code.equals(""")) return "\""; //Anführungszeichen oben + if (code.equals("&")) return "&"; //Ampersand-Zeichen, kaufmännisches Und + if (code.equals("<")) return "<"; //öffnende spitze Klammer + if (code.equals(">")) return ">"; //schließende spitze Klammer + if (code.equals(" ")) return " "; //Erzwungenes Leerzeichen + if (code.equals("¡")) return "!"; //umgekehrtes Ausrufezeichen + if (code.equals("¢")) return " cent "; //Cent-Zeichen + if (code.equals("£")) return " pound "; //Pfund-Zeichen + if (code.equals("¤")) return " currency "; //Währungs-Zeichen + if (code.equals("¥")) return " yen "; //Yen-Zeichen + if (code.equals("¦")) return " "; //durchbrochener Strich + if (code.equals("§")) return " paragraph "; //Paragraph-Zeichen + if (code.equals("¨")) return " "; //Pünktchen oben + if (code.equals("©")) return " copyright "; //Copyright-Zeichen + if (code.equals("ª")) return " "; //Ordinal-Zeichen weiblich + if (code.equals("«")) return " "; //angewinkelte Anführungszeichen links + if (code.equals("¬")) return " not "; //Verneinungs-Zeichen + if (code.equals("­")) return "-"; //kurzer Trennstrich + if (code.equals("®")) return " trademark "; //Registriermarke-Zeichen + if (code.equals("¯")) return " "; //Überstrich + if (code.equals("°")) return " degree "; //Grad-Zeichen + if (code.equals("±")) return " +/- "; //Plusminus-Zeichen + if (code.equals("²")) return " square "; //Hoch-2-Zeichen + if (code.equals("³")) return " 3 "; //Hoch-3-Zeichen + if (code.equals("´")) return " "; //Acute-Zeichen + if (code.equals("µ")) return " micro "; //Mikro-Zeichen + if (code.equals("¶")) return " paragraph "; //Absatz-Zeichen + if (code.equals("·")) return " "; //Mittelpunkt + if (code.equals("¸")) return " "; //Häkchen unten + if (code.equals("¹")) return " "; //Hoch-1-Zeichen + if (code.equals("º")) return " degree "; //Ordinal-Zeichen männlich + if (code.equals("»")) return " "; //angewinkelte Anführungszeichen rechts + if (code.equals("¼")) return " quarter "; //ein Viertel + if (code.equals("½")) return " half "; //ein Halb + if (code.equals("¾")) return " 3/4 "; //drei Viertel + if (code.equals("¿")) return "?"; //umgekehrtes Fragezeichen + if (code.equals("À")) return "A"; //A mit Accent grave + if (code.equals("Á")) return "A"; //A mit Accent acute + if (code.equals("Â")) return "A"; //A mit Circumflex + if (code.equals("Ã")) return "A"; //A mit Tilde + if (code.equals("Ä")) return "Ae"; //A Umlaut + if (code.equals("Å")) return "A"; //A mit Ring + if (code.equals("Æ")) return "A"; //A mit legiertem E + if (code.equals("Ç")) return "C"; //C mit Häkchen + if (code.equals("È")) return "E"; //E mit Accent grave + if (code.equals("É")) return "E"; //E mit Accent acute + if (code.equals("Ê")) return "E"; //E mit Circumflex + if (code.equals("Ë")) return "E"; //E Umlaut + if (code.equals("Ì")) return "I"; //I mit Accent grave + if (code.equals("Í")) return "I"; //I mit Accent acute + if (code.equals("Î")) return "I"; //I mit Circumflex + if (code.equals("Ï")) return "I"; //I Umlaut + if (code.equals("Ð")) return "D"; //Eth (isländisch) + if (code.equals("Ñ")) return "N"; //N mit Tilde + if (code.equals("Ò")) return "O"; //O mit Accent grave + if (code.equals("Ó")) return "O"; //O mit Accent acute + if (code.equals("Ô")) return "O"; //O mit Circumflex + if (code.equals("Õ")) return "O"; //O mit Tilde + if (code.equals("Ö")) return "Oe"; //O Umlaut + if (code.equals("×")) return " times "; //Mal-Zeichen + if (code.equals("Ø")) return "O"; //O mit Schrägstrich + if (code.equals("Ù")) return "U"; //U mit Accent grave + if (code.equals("Ú")) return "U"; //U mit Accent acute + if (code.equals("Û")) return "U"; //U mit Circumflex + if (code.equals("Ü")) return "Ue"; //U Umlaut + if (code.equals("Ý")) return "Y"; //Y mit Accent acute + if (code.equals("Þ")) return "P"; //THORN (isländisch) + if (code.equals("ß")) return "ss"; //scharfes S + if (code.equals("à")) return "a"; //a mit Accent grave + if (code.equals("á")) return "a"; //a mit Accent acute + if (code.equals("â")) return "a"; //a mit Circumflex + if (code.equals("ã")) return "a"; //a mit Tilde + if (code.equals("ä")) return "ae"; //a Umlaut + if (code.equals("å")) return "a"; //a mit Ring + if (code.equals("æ")) return "a"; //a mit legiertem e + if (code.equals("ç")) return "c"; //c mit Häkchen + if (code.equals("è")) return "e"; //e mit Accent grave + if (code.equals("é")) return "e"; //e mit Accent acute + if (code.equals("ê")) return "e"; //e mit Circumflex + if (code.equals("ë")) return "e"; //e Umlaut + if (code.equals("ì")) return "i"; //i mit Accent grave + if (code.equals("í")) return "i"; //i mit Accent acute + if (code.equals("î")) return "i"; //i mit Circumflex + if (code.equals("ï")) return "i"; //i Umlaut + if (code.equals("ð")) return "d"; //eth (isländisch) + if (code.equals("ñ")) return "n"; //n mit Tilde + if (code.equals("ò")) return "o"; //o mit Accent grave + if (code.equals("ó")) return "o"; //o mit Accent acute + if (code.equals("ô")) return "o"; //o mit Circumflex + if (code.equals("õ")) return "o"; //o mit Tilde + if (code.equals("ö")) return "oe"; //o Umlaut + if (code.equals("÷")) return "%"; //Divisions-Zeichen + if (code.equals("ø")) return "o"; //o mit Schrägstrich + if (code.equals("ù")) return "u"; //u mit Accent grave + if (code.equals("ú")) return "u"; //u mit Accent acute + if (code.equals("û")) return "u"; //u mit Circumflex + if (code.equals("ü")) return "ue"; //u Umlaut + if (code.equals("ý")) return "y"; //y mit Accent acute + if (code.equals("þ")) return "p"; //thorn (isländisch) + if (code.equals("ÿ")) return "y"; //y Umlaut + if (code.equals("Α")) return " Alpha "; //Alpha groß + if (code.equals("α")) return " alpha "; //alpha klein + if (code.equals("Β")) return " Beta "; //Beta groß + if (code.equals("β")) return " beta "; //beta klein + if (code.equals("Γ")) return " Gamma "; //Gamma groß + if (code.equals("γ")) return " gamma "; //gamma klein + if (code.equals("Δ")) return " Delta "; //Delta groß + if (code.equals("δ")) return " delta "; //delta klein + if (code.equals("Ε")) return " Epsilon "; //Epsilon groß + if (code.equals("ε")) return " epsilon "; //epsilon klein + if (code.equals("Ζ")) return " Zeta "; //Zeta groß + if (code.equals("ζ")) return " zeta "; //zeta klein + if (code.equals("Η")) return " Eta "; //Eta groß + if (code.equals("η")) return " eta "; //eta klein + if (code.equals("Θ")) return " Theta "; //Theta groß + if (code.equals("θ")) return " theta "; //theta klein + if (code.equals("Ι")) return " Iota "; //Iota groß + if (code.equals("ι")) return " iota "; //iota klein + if (code.equals("Κ")) return " Kappa "; //Kappa groß + if (code.equals("κ")) return " kappa "; //kappa klein + if (code.equals("Λ")) return " Lambda "; //Lambda groß + if (code.equals("λ")) return " lambda "; //lambda klein + if (code.equals("Μ")) return " Mu "; //Mu groß + if (code.equals("μ")) return " mu "; //mu klein + if (code.equals("Ν")) return " Nu "; //Nu groß + if (code.equals("ν")) return " nu "; //nu klein + if (code.equals("Ξ")) return " Xi "; //Xi groß + if (code.equals("ξ")) return " xi "; //xi klein + if (code.equals("Ο")) return " Omicron "; //Omicron groß + if (code.equals("ο")) return " omicron "; //omicron klein + if (code.equals("Π")) return " Pi "; //Pi groß + if (code.equals("π")) return " pi "; //pi klein + if (code.equals("Ρ")) return " Rho "; //Rho groß + if (code.equals("ρ")) return " rho "; //rho klein + if (code.equals("Σ")) return " Sigma "; //Sigma groß + if (code.equals("ς")) return " sigma "; //sigmaf klein + if (code.equals("σ")) return " sigma "; //sigma klein + if (code.equals("Τ")) return " Tau "; //Tau groß + if (code.equals("τ")) return " tau "; //tau klein + if (code.equals("Υ")) return " Ypsilon "; //Upsilon groß + if (code.equals("υ")) return " ypsilon "; //upsilon klein + if (code.equals("Φ")) return " Phi "; //Phi groß + if (code.equals("φ")) return " phi "; //phi klein + if (code.equals("Χ")) return " Chi "; //Chi groß + if (code.equals("χ")) return " chi "; //chi klein + if (code.equals("Ψ")) return " Psi "; //Psi groß + if (code.equals("ψ")) return " psi "; //psi klein + if (code.equals("Ω")) return " Omega "; //Omega groß + if (code.equals("ω")) return " omega "; //omega klein + if (code.equals("ϑ")) return " theta "; //theta Symbol + if (code.equals("ϒ")) return " ypsilon "; //upsilon mit Haken + if (code.equals("ϖ")) return " pi "; //pi Symbol + if (code.equals("∀")) return " for all "; //für alle + if (code.equals("∂")) return " part of "; //teilweise + if (code.equals("∃")) return " exists "; //existiert + if (code.equals("∅")) return " null "; //leer + if (code.equals("∇")) return " nabla "; //nabla + if (code.equals("∈")) return " element of "; //Element von + if (code.equals("∉")) return " not element of "; //kein Element von + if (code.equals("∋")) return " contains "; //enthält als Element + if (code.equals("∏")) return " product "; //Produkt + if (code.equals("∑")) return " sum "; //Summe + if (code.equals("−")) return " minus "; //minus + if (code.equals("∗")) return " times "; //Asterisk + if (code.equals("√")) return " sqare root "; //Quadratwurzel + if (code.equals("∝")) return " proportional to "; //proportional zu + if (code.equals("∞")) return " unlimited "; //unendlich + if (code.equals("∠")) return " angle "; //Winkel + if (code.equals("∧")) return " and "; //und + if (code.equals("∨")) return " or "; //oder + if (code.equals("∩")) return " "; //Schnittpunkt + if (code.equals("∪")) return " unity "; //Einheit + if (code.equals("∫")) return " integral "; //Integral + if (code.equals("∴")) return " cause "; //deshalb + if (code.equals("∼")) return " similar to "; //ähnlich wie + if (code.equals("≅")) return " equal "; //annähernd gleich + if (code.equals("≈")) return " equal "; //beinahe gleich + if (code.equals("≠")) return " not equal "; //ungleich + if (code.equals("≡")) return " identical "; //identisch mit + if (code.equals("≤")) return " smaller or equal than "; //kleiner gleich + if (code.equals("≥")) return " greater or equal than "; //größer gleich + if (code.equals("⊂")) return " subset of "; //Untermenge von + if (code.equals("⊃")) return " superset of "; //Obermenge von + if (code.equals("⊄")) return " not subset of "; //keine Untermenge von + if (code.equals("⊆")) return ""; //Untermenge von oder gleich mit + if (code.equals("⊇")) return ""; //Obermenge von oder gleich mit + if (code.equals("⊕")) return ""; //Direktsumme + if (code.equals("⊗")) return ""; //Vektorprodukt + if (code.equals("⊥")) return ""; //senkrecht zu + if (code.equals("⋅")) return ""; //Punkt-Operator + if (code.equals("◊")) return ""; //Raute + if (code.equals("⌈")) return ""; //links oben + if (code.equals("⌉")) return ""; //rechts oben + if (code.equals("⌊")) return ""; //links unten + if (code.equals("⌋")) return ""; //rechts unten + if (code.equals("⟨")) return ""; //spitze Klammer links + if (code.equals("⟩")) return ""; //spitze Klammer rechts + if (code.equals("←")) return ""; //Pfeil links + if (code.equals("↑")) return ""; //Pfeil oben + if (code.equals("→")) return ""; //Pfeil rechts + if (code.equals("↓")) return ""; //Pfeil unten + if (code.equals("↔")) return ""; //Pfeil links/rechts + if (code.equals("↵")) return ""; //Pfeil unten-Knick-links + if (code.equals("⇐")) return ""; //Doppelpfeil links + if (code.equals("⇑")) return ""; //Doppelpfeil oben + if (code.equals("⇒")) return ""; //Doppelpfeil rechts + if (code.equals("⇓")) return ""; //Doppelpfeil unten + if (code.equals("⇔")) return ""; //Doppelpfeil links/rechts + if (code.equals("•")) return ""; //Bullet-Zeichen + if (code.equals("…")) return ""; //Horizontale Ellipse + if (code.equals("′")) return ""; //Minutenzeichen + if (code.equals("‾")) return ""; //Überstrich + if (code.equals("⁄")) return ""; //Bruchstrich + if (code.equals("℘")) return ""; //Weierstrass p + if (code.equals("ℑ")) return ""; //Zeichen für "imaginär" + if (code.equals("ℜ")) return ""; //Zeichen für "real" + if (code.equals("™")) return ""; //Trademark-Zeichen + if (code.equals("€")) return ""; //Euro-Zeichen + if (code.equals("ℵ")) return ""; //Alef-Symbol + if (code.equals("♠")) return ""; //Pik-Zeichen + if (code.equals("♣")) return ""; //Kreuz-Zeichen + if (code.equals("♥")) return ""; //Herz-Zeichen + if (code.equals("♦")) return ""; //Karo-Zeichen + if (code.equals(" ")) return ""; //Leerzeichen Breite n + if (code.equals(" ")) return ""; //Leerzeichen Breite m + if (code.equals(" ")) return ""; //Schmales Leerzeichen + if (code.equals("‌")) return ""; //null breiter Nichtverbinder + if (code.equals("‍")) return ""; //null breiter Verbinder + if (code.equals("‎")) return ""; //links-nach-rechts-Zeichen + if (code.equals("‏")) return ""; //rechts-nach-links-Zeichen + if (code.equals("–")) return ""; //Gedankenstrich Breite n + if (code.equals("—")) return ""; //Gedankenstrich Breite m + if (code.equals("‘")) return ""; //einfaches Anführungszeichen links + if (code.equals("’")) return ""; //einfaches Anführungszeichen rechts + if (code.equals("‚")) return ""; //einfaches low-9-Zeichen + if (code.equals("“")) return ""; //doppeltes Anführungszeichen links + if (code.equals("”")) return ""; //doppeltes Anführungszeichen rechts + if (code.equals("„")) return ""; //doppeltes low-9-Zeichen rechts + if (code.equals("†")) return ""; //Kreuz + if (code.equals("‡")) return ""; //Doppelkreuz + if (code.equals("‰")) return ""; //zu tausend + if (code.equals("‹")) return ""; //angewinkeltes einzelnes Anf.zeichen links + if (code.equals("›")) return ""; //angewinkeltes einzelnes Anf.zeichen rechts + + return ""; + } + + private static byte[] transscript(byte[] code) { + return transscripts(new String(code)).getBytes(); + } + + protected static serverByteBuffer transscriptAll(serverByteBuffer bb) { + int p0, p1; + while ((p0 = bb.indexOf((byte) '&')) >= 0) { + p1 = bb.indexOf((byte) ';', p0); + if (p1 >= 0) + bb = new serverByteBuffer(bb.getBytes(0, p0)).append(transscript(bb.getBytes(p0, p1 + 1))).append(bb.getBytes(p1 + 1)); + else + bb = new serverByteBuffer(bb.getBytes(0, p0)).append(bb.getBytes(p0 + 1)); + } + return bb; + } + + protected static serverByteBuffer stripAll(serverByteBuffer bb) { + //return stripAllTags(s); + return convertUmlaute(transscriptAll(stripAllTags(bb))); + } + +} diff --git a/source/de/anomic/htmlFilter/htmlFilterAbstractTransformer.java b/source/de/anomic/htmlFilter/htmlFilterAbstractTransformer.java new file mode 100644 index 000000000..3808ede63 --- /dev/null +++ b/source/de/anomic/htmlFilter/htmlFilterAbstractTransformer.java @@ -0,0 +1,80 @@ +// htmlFilterAbstractTransformer.java +// ---------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 18.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.htmlFilter; + +import java.util.*; + +public abstract class htmlFilterAbstractTransformer implements htmlFilterTransformer { + + private HashSet tags0; + private HashSet tags1; + + public htmlFilterAbstractTransformer(HashSet tags0, HashSet tags1) { + this.tags0 = tags0; + this.tags1 = tags1; + } + + public boolean isTag0(String tag) { + return tags0.contains(tag); + } + + public boolean isTag1(String tag) { + return tags1.contains(tag); + } + + //the 'missing' method that shall be implemented: + public abstract byte[] transformText(byte[] text); + /* could be easily implemented as: + { + return text; + } + */ + + // the other methods must take into account to construct the return value correctly + public byte[] transformTag0(String tagname, Properties tagopts, byte quotechar) { + return htmlFilterOutputStream.genTag0(tagname, tagopts, quotechar); + } + + public byte[] transformTag1(String tagname, Properties tagopts, byte[] text, byte quotechar) { + return htmlFilterOutputStream.genTag1(tagname, tagopts, text, quotechar); + } + +} diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java new file mode 100644 index 000000000..e5e4f49f8 --- /dev/null +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -0,0 +1,287 @@ +// htmlFilterContentScraper.java +// ----------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 18.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.htmlFilter; + +import java.net.*; +import java.util.*; +import de.anomic.server.*; + + +public class htmlFilterContentScraper extends htmlFilterAbstractScraper implements htmlFilterScraper { + + + // statics: for initialisation of the HTMLFilterAbstractTransformer + private static HashSet linkTags0; + private static HashSet linkTags1; + + public static String mediaExt = + "swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar," + + "sit,hqx,img,dmg,tar,gz,ps,pdf,doc,xls,ppt,ram,bz2,arj"; + + static { + linkTags0 = new HashSet(); + linkTags0.add("img"); + + linkTags1 = new HashSet(); + linkTags1.add("a"); + linkTags1.add("h1"); + linkTags1.add("title"); + } + + // class variables: collectors for links + private Properties anchor; + private Properties image; + private String title; + private String headline; + private serverByteBuffer text; + private URL root; + + public htmlFilterContentScraper(URL root) { + // the root value here will not be used to load the resource. + // it is only the reference for relative links + super(linkTags0, linkTags1); + this.root = root; + this.anchor = new Properties(); + this.image = new Properties(); + this.title = ""; + this.headline = ""; + this.text = new serverByteBuffer(); + } + + + public void scrapeText(byte[] newtext) { + //System.out.println("SCRAPE: " + new String(newtext)); + if ((text.length() != 0) && (text.byteAt(text.length() - 1) != 32)) text.append((byte) 32); + text.append(new serverByteBuffer(super.stripAll(new serverByteBuffer(newtext))).trim()).append((byte) ' '); + } + + public static String urlNormalform(URL url) { + if (url == null) return null; + return urlNormalform(url.toString()); + } + + public static String urlNormalform(String us) { + if (us == null) return null; + if (us.length() == 0) return null; + int p; + if ((p = us.indexOf("#")) >= 0) us = us.substring(0, p); + if (us.endsWith(":80")) us = us.substring(0, us.length() - 3); + if (((us.endsWith("/")) && (us.lastIndexOf('/', us.length() - 2) < 8))) us = us.substring(0, us.length() - 1); + return us; + } + + private String absolutePath(String relativePath) { + try { + return urlNormalform(new URL(root, relativePath)); + } catch (Exception e) { + return ""; + } + } + + public void scrapeTag0(String tagname, Properties tagopts) { + if (tagname.equals("img")) image.setProperty(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("alt","")); + } + + public void scrapeTag1(String tagname, Properties tagopts, byte[] text) { + //System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + new String(text)); + if (tagname.equals("a")) anchor.setProperty(absolutePath(tagopts.getProperty("href", "")), + new serverByteBuffer(super.stripAll(new serverByteBuffer(text)).getBytes()).trim().toString()); + if (tagname.equals("h1")) headline = new String(super.stripAll(new serverByteBuffer(text)).getBytes()); + if (tagname.equals("title")) title = new String(super.stripAll(new serverByteBuffer(text)).getBytes()); + } + + + public String getHeadline() { + String hl = ""; + + // extract headline from content + if (title.length() > 0) hl = title.trim(); + else if (headline.length() > 0) hl = headline.trim(); + else if (text.length() > 80) hl = new String(text.getBytes(), 0, 80).trim(); + else hl = text.toString().trim(); + + // clean the line: may contain too many funny symbols + for (int i = 0; i < hl.length(); i++) + if (hl.charAt(i) < ' ') hl = hl.substring(0, i) + " " + hl.substring(i + 1); + // clean the line: remove double-spaces + int p; + while ((p = hl.indexOf(" ")) >= 0) hl = hl.substring(0, p) + hl.substring(p + 1); + + // return result + return hl.trim(); + } + + public byte[] getText() { + return text.getBytes(); + } + + public Properties getAnchor() { + return anchor; + } + + public Properties getImage() { + return image; + } + + public Properties getHyperlinks() { + if (hyperlinks == null) resortLinks(); + return hyperlinks; + } + + public Properties getMedialinks() { + if (medialinks == null) resortLinks(); + return medialinks; + } + + public Properties getEmaillinks() { + if (emaillinks == null) resortLinks(); + return emaillinks; + } + + Properties hyperlinks = null; + Properties medialinks = null; + Properties emaillinks = null; + + private synchronized void resortLinks() { + Enumeration e; + String url; + int extpos; + String ext; + e = anchor.propertyNames(); + hyperlinks = new Properties(); + medialinks = new Properties(); + emaillinks = new Properties(); + while (e.hasMoreElements()) { + url = (String) e.nextElement(); + if ((url != null) && (url.startsWith("mailto:"))) { + emaillinks.setProperty(url.substring(7), anchor.getProperty(url)); + } else { + extpos = url.lastIndexOf("."); + String normal; + if (extpos > 0) { + ext = url.substring(extpos).toLowerCase(); + normal = urlNormalform(url); + if (normal != null) { + if (mediaExt.indexOf(ext.substring(1)) >= 0) { + // this is not an normal anchor, its a media link + medialinks.setProperty(normal, anchor.getProperty(url)); + } else { + hyperlinks.setProperty(normal, anchor.getProperty(url)); + } + } + } + } + } + // finally add the images to the medialinks + e = image.propertyNames(); + String normal; + while (e.hasMoreElements()) { + url = (String) e.nextElement(); + normal = urlNormalform(url); + if (normal != null) medialinks.setProperty(normal, image.getProperty(url)); // avoid NullPointerException + } + } + + + public synchronized void expandHyperlinks() { + // we add artificial hyperlinks to the hyperlink set that can be calculated from + // given hyperlinks and imagelinks + hyperlinks.putAll(allReflinks(hyperlinks)); + hyperlinks.putAll(allReflinks(medialinks)); + hyperlinks.putAll(allSubpaths(hyperlinks)); + hyperlinks.putAll(allSubpaths(medialinks)); + } + + private static Map allReflinks(Map links) { + // we find all links that are part of a reference inside a url + HashMap v = new HashMap(); + Iterator i = links.keySet().iterator(); + String s; + int pos; + loop: while (i.hasNext()) { + s = (String) i.next(); + if ((pos = s.toLowerCase().indexOf("http://",7)) > 0) { + i.remove(); + s = s.substring(pos); + while ((pos = s.toLowerCase().indexOf("http://",7)) > 0) s = s.substring(pos); + if (!(v.containsKey(s))) v.put(s, "ref"); + continue loop; + } + if ((pos = s.toLowerCase().indexOf("/www.",7)) > 0) { + i.remove(); + s = "http:/" + s.substring(pos); + while ((pos = s.toLowerCase().indexOf("/www.",7)) > 0) s = "http:/" + s.substring(pos); + if (!(v.containsKey(s))) v.put(s, "ref"); + continue loop; + } + } + return v; + } + + private static Map allSubpaths(Map links) { + HashMap v = new HashMap(); + Iterator i = links.keySet().iterator(); + String s; + int pos; + while (i.hasNext()) { + s = (String) i.next(); + if (s.endsWith("/")) s = s.substring(0, s.length() - 1); + pos = s.lastIndexOf("/"); + while (pos > 8) { + s = s.substring(0, pos + 1); + if (!(v.containsKey(s))) v.put(s, "sub"); + s = s.substring(0, pos); + pos = s.lastIndexOf("/"); + } + } + return v; + } + + + public void print() { + System.out.println("TITLE :" + title); + System.out.println("HEADLINE:" + headline); + System.out.println("ANCHORS :" + anchor.toString()); + System.out.println("IMAGES :" + image.toString()); + System.out.println("TEXT :" + new String(text.getBytes())); + } + +} diff --git a/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java b/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java new file mode 100644 index 000000000..6d187d877 --- /dev/null +++ b/source/de/anomic/htmlFilter/htmlFilterContentTransformer.java @@ -0,0 +1,120 @@ +// htmlFilterContentTransformer.java +// --------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 18.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.htmlFilter; + +import java.io.*; +import java.util.*; +import de.anomic.server.*; + +public class htmlFilterContentTransformer extends htmlFilterAbstractTransformer implements htmlFilterTransformer { + + // statics: for initialisation of the HTMLFilterAbstractTransformer + private static HashSet linkTags0; + private static HashSet linkTags1; + + static { + linkTags0 = new HashSet(); + linkTags0.add("img"); + + linkTags1 = new HashSet(); + linkTags1.add("a"); + } + + private static Vector bluelist = null; + + public htmlFilterContentTransformer() { + super(linkTags0, linkTags1); + } + + public void init(String initarg) { + if (bluelist == null) { + // here, the initarg is used to load a list of bluelisted words + bluelist = new Vector(); + File f = new File(initarg); + if ((f.exists()) && (f.canRead())) try { + BufferedReader r = new BufferedReader(new FileReader(f)); + String s; + while ((s = r.readLine()) != null) { + if ((!(s.startsWith("#"))) && (s.length() > 0)) bluelist.add(s.toLowerCase()); + } + r.close(); + } catch (Exception e) { + } + } + } + + private static byte[] genBlueLetters(int length) { + serverByteBuffer bb = new serverByteBuffer(" ".getBytes()); + length = length / 2; + if (length > 10) length = 7; + while (length-- > 0) bb.append((byte) 'X'); + bb.append(" ".getBytes()); + return bb.getBytes(); + } + + private boolean hit(byte[] text) { + if ((text == null) || (bluelist == null)) return false; + String lc = new String(text).toLowerCase(); + for (int i = 0; i < bluelist.size(); i++) if (lc.indexOf((String) bluelist.elementAt(i)) >= 0) return true; + return false; + } + + public byte[] transformText(byte[] text) { + if (hit(text)) { + System.out.println("FILTERHIT: " + text); + return genBlueLetters(text.length); + } else + return text; + } + + public byte[] transformTag0(String tagname, Properties tagopts, byte quotechar) { + if (hit(tagopts.getProperty("src","").getBytes())) return genBlueLetters(5); + if (hit(tagopts.getProperty("alt","").getBytes())) return genBlueLetters(5); + return htmlFilterOutputStream.genTag0(tagname, tagopts, quotechar); + } + + public byte[] transformTag1(String tagname, Properties tagopts, byte[] text, byte quotechar) { + if (hit(tagopts.getProperty("href","").getBytes())) return genBlueLetters(text.length); + if (hit(text)) return genBlueLetters(text.length); + return htmlFilterOutputStream.genTag1(tagname, tagopts, text, quotechar); + } + +} diff --git a/source/de/anomic/htmlFilter/htmlFilterOutputStream.java b/source/de/anomic/htmlFilter/htmlFilterOutputStream.java new file mode 100644 index 000000000..4550db46e --- /dev/null +++ b/source/de/anomic/htmlFilter/htmlFilterOutputStream.java @@ -0,0 +1,483 @@ +// htmlFilterOutputStream.java +// --------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 16.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software;the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + This class implements an output stream. Any data written to that output + is automatically parsed. + After finishing with writing, the htmlFilter can be read out. + + */ + + +package de.anomic.htmlFilter; + +import java.io.*; +import java.net.*; +import java.util.*; +import de.anomic.server.*; + +public class htmlFilterOutputStream extends OutputStream { + + public static final byte lb = (byte) '<'; + public static final byte rb = (byte) '>'; + public static final byte dash = (byte) '-'; + public static final byte excl = (byte) '!'; + public static final byte singlequote = (byte) 39; + public static final byte doublequote = (byte) 34; + + private OutputStream out; + private serverByteBuffer buffer; + private String filterTag; + private Properties filterOpts; + private serverByteBuffer filterCont; + private htmlFilterScraper scraper; + private htmlFilterTransformer transformer; + private boolean inSingleQuote; + private boolean inDoubleQuote; + private boolean inComment; + private boolean inScript; + private boolean binaryUnsuspect; + private boolean passbyIfBinarySuspect; + + public htmlFilterOutputStream(OutputStream out, + htmlFilterScraper scraper, htmlFilterTransformer transformer, + boolean passbyIfBinarySuspect) { + this.out = out; + this.scraper = scraper; + this.transformer = transformer; + this.buffer = new serverByteBuffer(); + this.filterTag = null; + this.filterOpts = null; + this.filterCont = null; + this.inSingleQuote = false; + this.inDoubleQuote = false; + this.inComment = false; + this.inScript = false; + this.binaryUnsuspect = true; + this.passbyIfBinarySuspect = passbyIfBinarySuspect; + } + + + public static byte[] genTag0raw(String tagname, boolean opening, byte[] tagopts) { + serverByteBuffer bb = new serverByteBuffer(); + bb.append((byte) '<'); + if (!(opening)) bb.append((byte) '/'); + bb.append(tagname.getBytes()); + if (tagopts.length > 0) { + //if (tagopts[0] == (byte) 32) + bb.append(tagopts); + //else bb.append((byte) 32).append(tagopts); + } + bb.append((byte) '>'); + return bb.getBytes(); + } + + public static byte[] genTag1raw(String tagname, byte[] tagopts, byte[] text) { + serverByteBuffer bb = new serverByteBuffer(); + bb.append((byte) '<').append(tagname.getBytes()); + if (tagopts.length > 0) { + //if (tagopts[0] == (byte) 32) + bb.append(tagopts); + //else bb.append((byte) 32).append(tagopts); + } + bb.append((byte) '>'); + bb.append(text); + bb.append((byte) '<').append((byte) '/').append(tagname.getBytes()).append((byte) '>'); + return bb.getBytes(); + } + + + public static byte[] genTag0(String tagname, Properties tagopts, byte quotechar) { + serverByteBuffer bb = new serverByteBuffer().append((byte) '<').append(tagname.getBytes()); + if (tagopts.size() != 0) bb = bb.append((byte) 32).append(genOpts(tagopts, quotechar)); + bb = bb.append((byte) '>'); + return bb.getBytes(); + } + + public static byte[] genTag1(String tagname, Properties tagopts, byte[] text, byte quotechar) { + return new serverByteBuffer(genTag0(tagname, tagopts, quotechar)).append(text).append(("").getBytes()).getBytes(); + } + + // a helper method for pretty-printing of properties for html tags + public static byte[] genOpts(Properties prop, byte quotechar) { + Enumeration e = prop.propertyNames(); + serverByteBuffer bb = new serverByteBuffer(); + String key; + while (e.hasMoreElements()) { + key = (String) e.nextElement(); + bb = bb.append((byte) 32).append(key.getBytes()).append((byte) '='); + bb = bb.append(quotechar).append(prop.getProperty(key).getBytes()).append(quotechar); + } + if (bb.length() > 0) return bb.getBytes(1); else return bb.getBytes(); + } + + private byte[] filterTag(String tag, boolean opening, byte[] content, byte quotechar) { + //System.out.println("FILTER1: filterTag=" + ((filterTag == null) ? "null" : filterTag) + ", tag=" + tag + ", opening=" + ((opening) ? "true" : "false") + ", content=" + new String(content)); // debug + if (filterTag == null) { + // we are not collection tag text + if (tag == null) { + // and this is not a tag opener/closer + if (scraper != null) scraper.scrapeText(content); + if (transformer != null) return transformer.transformText(content); else return content; + } else { + // we have a new tag + if (opening) { + if ((scraper != null) && (scraper.isTag0(tag))) { + // this single tag is collected at once here + scraper.scrapeTag0(tag, new serverByteBuffer(content).propParser()); + } + if ((transformer != null) && (transformer.isTag0(tag))) { + // this single tag is collected at once here + return transformer.transformTag0(tag, new serverByteBuffer(content).propParser(), quotechar); + } else if (((scraper != null) && (scraper.isTag1(tag))) || + ((transformer != null) && (transformer.isTag1(tag)))) { + // ok, start collecting + filterTag = tag; + filterOpts = new serverByteBuffer(content).propParser(); + filterCont = new serverByteBuffer(); + return new byte[0]; + } else { + // we ignore that thing and return it again + return genTag0raw(tag, true, content); + } + } else { + // we ignore that thing and return it again + return genTag0raw(tag, false, content); + } + } + } else { + // we are collection tag text for the tag 'filterTag' + if (tag == null) { + // go on collecting content + if (scraper != null) scraper.scrapeText(content); + if (transformer != null) + filterCont.append(transformer.transformText(content)); + else + filterCont.append(content); + return new byte[0]; + } else { + // it's a tag! which one? + if ((opening) || (!(tag.equals(filterTag)))) { + // this tag is not our concern. just add it + filterCont.append(genTag0raw(tag, opening, content)); + return new byte[0]; + } else { + // it's our closing tag! return complete result. + byte[] ret; + if (scraper != null) scraper.scrapeTag1(filterTag, filterOpts, filterCont.getBytes()); + if (transformer != null) + ret = transformer.transformTag1(filterTag, filterOpts, filterCont.getBytes(), quotechar); + else + ret = genTag1(filterTag, filterOpts, filterCont.getBytes(), quotechar); + filterTag = null; + filterOpts = null; + filterCont = null; + return ret; + } + } + } + } + + private byte[] filterFinalize(byte quotechar) { + if (filterTag == null) { + return new byte[0]; + } else { + // it's our closing tag! return complete result. + byte[] ret; + if (scraper != null) scraper.scrapeTag1(filterTag, filterOpts, filterCont.getBytes()); + if (transformer != null) + ret = transformer.transformTag1(filterTag, filterOpts, filterCont.getBytes(), quotechar); + else + ret = genTag1(filterTag, filterOpts, filterCont.getBytes(), quotechar); + filterTag = null; + filterOpts = null; + filterCont = null; + return ret; + } + } + + private byte[] filterSentence(byte[] in, byte quotechar) { + if (in.length == 0) return in; + //System.out.println("FILTER0: " + new String(in)); // debug + // scan the string and parse structure + if ((in.length > 2) && (in[0] == lb)) { + // a tag + String tag; + int tagend; + if (in[1] == '/') { + // a closing tag + tagend = tagEnd(in, 2); + tag = new String(in, 2, tagend - 2).toLowerCase(); + byte[] text = new byte[in.length - tagend - 1]; + System.arraycopy(in, tagend, text, 0, in.length - tagend - 1); + return filterTag(tag, false, text, quotechar); + } else { + // an opening tag + tagend = tagEnd(in, 1); + tag = new String(in, 1, tagend - 1).toLowerCase(); + byte[] text = new byte[in.length - tagend - 1]; + System.arraycopy(in, tagend, text, 0, in.length - tagend - 1); + return filterTag(tag, true, text, quotechar); + } + } else { + // a text + return filterTag(null, true, in, quotechar); + } + } + + private static int tagEnd(byte[] tag, int start) { + char c; + for (int i = start; i < tag.length; i++) { + c = (char) tag[i]; + if ((c != '!') && (c != '-') && + ((c < '0') || (c > '9')) && + ((c < 'a') || (c > 'z')) && + ((c < 'A') || (c > 'Z')) + ) return i; + } + return tag.length - 1; + } + + public void write(int b) throws IOException { + write((byte) (b & 0xff)); + } + + private void write(byte b) throws IOException { + //System.out.print((char) b); + if ((binaryUnsuspect) && (binaryHint(b))) { + binaryUnsuspect = false; + if (passbyIfBinarySuspect) finalize(); + } + + if ((binaryUnsuspect) || (!(passbyIfBinarySuspect))) { + byte[] filtered; + if (inSingleQuote) { + buffer.append(b); + if (b == singlequote) inSingleQuote = false; + // check error cases + if ((b == rb) && (buffer.byteAt(0) == lb)) { + inSingleQuote = false; + // the tag ends here. after filtering: pass on + filtered = filterSentence(buffer.getBytes(), singlequote); + if (out != null) out.write(filtered); + buffer = new serverByteBuffer(); + } + } else if (inDoubleQuote) { + buffer.append(b); + if (b == doublequote) inDoubleQuote = false; + // check error cases + if ((b == rb) && (buffer.byteAt(0) == lb)) { + inDoubleQuote = false; + // the tag ends here. after filtering: pass on + filtered = filterSentence(buffer.getBytes(), doublequote); + if (out != null) out.write(filtered); + buffer = new serverByteBuffer(); + } + } else if (inComment) { + buffer.append(b); + if ((b == rb) && (buffer.length() > 6) && + (buffer.byteAt(buffer.length() - 3) == dash)) { + // comment is at end + inComment = false; + if (out != null) out.write(buffer.getBytes()); + buffer = new serverByteBuffer(); + } + } else if (inScript) { + buffer.append(b); + if ((b == rb) && (buffer.length() > 14) && + (buffer.byteAt(buffer.length() - 8) == (byte) '/') && + (buffer.byteAt(buffer.length() - 7) == (byte) 's') && + (buffer.byteAt(buffer.length() - 6) == (byte) 'c') && + (buffer.byteAt(buffer.length() - 5) == (byte) 'r') && + (buffer.byteAt(buffer.length() - 4) == (byte) 'i') && + (buffer.byteAt(buffer.length() - 3) == (byte) 'p') && + (buffer.byteAt(buffer.length() - 2) == (byte) 't')) { + // script is at end + inScript = false; + if (out != null) out.write(buffer.getBytes()); + buffer = new serverByteBuffer(); + } + } else { + if (buffer.length() == 0) { + if (b == rb) { + // very strange error case; we just let it pass + if (out != null) out.write(b); + } else { + buffer.append(b); + } + } else if (buffer.byteAt(0) == lb) { + if (b == singlequote) inSingleQuote = true; + if (b == doublequote) inDoubleQuote = true; + // fill in tag text + if ((buffer.length() == 3) && (buffer.byteAt(1) == excl) && + (buffer.byteAt(2) == dash) && (b == dash)) { + // this is the start of a comment + inComment = true; + buffer.append(b); + } else if ((buffer.length() == 6) && + (buffer.byteAt(1) == (byte) 's') && + (buffer.byteAt(2) == (byte) 'c') && + (buffer.byteAt(3) == (byte) 'r') && + (buffer.byteAt(4) == (byte) 'i') && + (buffer.byteAt(5) == (byte) 'p') && + ( b == (byte) 't')) { + // this is the start of a comment + inScript = true; + buffer.append(b); + } else if (b == rb) { + buffer.append(b); + // the tag ends here. after filtering: pass on + filtered = filterSentence(buffer.getBytes(), doublequote); + if (out != null) out.write(filtered); + buffer = new serverByteBuffer(); + } else if (b == lb) { + // this is an error case + // we consider that there is one rb missing + if (buffer.length() > 0) { + filtered = filterSentence(buffer.getBytes(), doublequote); + if (out != null) out.write(filtered); + } + buffer = new serverByteBuffer(); + buffer.append(b); + } else { + buffer.append(b); + } + } else { + // fill in plain text + if (b == lb) { + // the text ends here + if (buffer.length() > 0) { + filtered = filterSentence(buffer.getBytes(), doublequote); + if (out != null) out.write(filtered); + } + buffer = new serverByteBuffer(); + buffer.append(b); + } else { + // simply append + buffer.append(b); + } + } + } + } else { + out.write(b); + } + } + + public void write(byte b[]) throws IOException { + this.write(b, 0, b.length); + } + + public void write(byte b[], int off, int len) throws IOException { + //System.out.print(new String(b, off, len)); + if ((off | len | (b.length - (len + off)) | (off + len)) < 0) throw new IndexOutOfBoundsException(); + for (int i = off ; i < (len - off) ; i++) this.write(b[i]); + } + + public void flush() throws IOException { + // we cannot flush the current string buffer to prevent that + // the filter process is messed up + // instead, we simply flush the underlying output stream + if (out != null) out.flush(); + // if you want to flush all, call close() at end of writing; + } + + private byte[] finalized = null; + + public void finalize() throws IOException { + // if we are forced to close, we of course flush the buffer first, + // then close the connection + byte quotechar = (inSingleQuote) ? singlequote : doublequote; + if (buffer != null) { + if (buffer.length() > 0) { + byte[] filtered = filterSentence(buffer.getBytes(), quotechar); + if (out != null) out.write(filtered); + } + buffer = null; + } + finalized = filterFinalize(quotechar); + } + + public void close() throws IOException { + finalize(); + if (out != null) { + if (finalized != null) out.write(finalized); + out.flush(); + out.close(); + } + } + + private static boolean binaryHint(byte b) { + if (b < 0) return false; + if (b > 31) return false; + if ((b == 8) || (b == 9) || (b == 10) || (b == 13)) return false; + //return false; + System.out.println("BINARY HINT: " + (int) b); + return true; + } + + public boolean binarySuspect() { + return !binaryUnsuspect; + } + + public static void main(String[] args) { + // test app + // takes one argument: a file name + if (args.length != 1) return; + byte[] buffer = new byte[512]; + try { + htmlFilterContentScraper lc = new htmlFilterContentScraper(new URL("http://www.anomic.de/")); + Vector v = new Vector(); + v.add("proxy"); + htmlFilterTransformer lt = new htmlFilterContentTransformer(); + InputStream is = new FileInputStream(args[0]); + FileOutputStream fos = new FileOutputStream(new File(args[0] + ".out")); + OutputStream os = new htmlFilterOutputStream(fos, lc, lt, false); + int i; + while ((i = is.read(buffer)) > 0) os.write(buffer, 0, i); + os.close(); + fos.close(); + is.close(); + lc.print(); + } + catch (MalformedURLException e) {} + catch (IOException e) {} + } + +} diff --git a/source/de/anomic/htmlFilter/htmlFilterScraper.java b/source/de/anomic/htmlFilter/htmlFilterScraper.java new file mode 100644 index 000000000..ea709051f --- /dev/null +++ b/source/de/anomic/htmlFilter/htmlFilterScraper.java @@ -0,0 +1,57 @@ +// htmlFilterScraper.java +// --------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 18.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.htmlFilter; + +import java.util.*; + +public interface htmlFilterScraper { + + public boolean isTag0(String tag); + + public boolean isTag1(String tag); + + public void scrapeText(byte[] text); + + public void scrapeTag0(String tagname, Properties tagopts); + + public void scrapeTag1(String tagname, Properties tagopts, byte[] text); + +} diff --git a/source/de/anomic/htmlFilter/htmlFilterTransformer.java b/source/de/anomic/htmlFilter/htmlFilterTransformer.java new file mode 100644 index 000000000..816cf8138 --- /dev/null +++ b/source/de/anomic/htmlFilter/htmlFilterTransformer.java @@ -0,0 +1,71 @@ +// htmlFilterTransformer.java +// --------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 18.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.htmlFilter; + +import java.util.*; + +public interface htmlFilterTransformer { + + // the init method is used to initialize the transformer with some values + // i.e. the initarg - String can be the name of a file which may contain + // more specific transformation rules + public void init(String initarg); + + // tests, if a given body-less tag (i.e.
    shall be supervised) + // only tags that are defined here will be cached and not streamed + public boolean isTag0(String tag); + + // tests if a given tag that may have a body (i.e. ..body.. ) + // shall be supervised + public boolean isTag1(String tag); + + // method that is called with any text between tags + // the returned text replaces the given text + // if the text shall not be changed, it must be returned as called + public byte[] transformText(byte[] text); + + // method that is called when a body-less tag occurs + public byte[] transformTag0(String tagname, Properties tagopts, byte quotechar); + + // method that is called when a body-containing text occurs + public byte[] transformTag1(String tagname, Properties tagopts, byte[] text, byte quotechar); + +} diff --git a/source/de/anomic/http/httpHeader.java b/source/de/anomic/http/httpHeader.java new file mode 100644 index 000000000..5adc8ad87 --- /dev/null +++ b/source/de/anomic/http/httpHeader.java @@ -0,0 +1,257 @@ +// httpHeader.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 29.04.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + Documentation: + this class implements a key-value mapping, as a hashtable + The difference to ordinary hashtable implementations is that the + keys are not compared by the equal() method, but are always + treated as string and compared as + key.uppercase().equal(.uppercase(comparator)) + You use this class by first creation of a static HashMap + that then is used a the reverse mapping cache for every new + instance of this class. +*/ + +package de.anomic.http; + +import java.io.*; +import java.util.*; +import java.text.*; +import de.anomic.server.*; + +public class httpHeader extends TreeMap implements Map { + + private HashMap reverseMappingCache; + + private static Collator insensitiveCollator = Collator.getInstance(Locale.US); + static { + insensitiveCollator.setStrength(Collator.SECONDARY); + insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION); + } + + public httpHeader() { + this(null); + } + + public httpHeader(HashMap reverseMappingCache) { + // this creates a new TreeMap with a case insesitive mapping + // to provide a put-method that translates given keys into their + // 'proper' appearance, a translation cache is needed. + // upon instantiation, such a mapping cache can be handed over + // If the reverseMappingCache is null, none is used + super(insensitiveCollator); + this.reverseMappingCache = reverseMappingCache; + } + + public httpHeader(HashMap reverseMappingCache, File f) throws IOException { + // creates also a case insensitive map and loads it initially + // with some values + super(insensitiveCollator); + this.reverseMappingCache = reverseMappingCache; + + // load with data + BufferedReader br = new BufferedReader(new FileReader(f)); + String line; + int pos; + while ((line = br.readLine()) != null) { + pos = line.indexOf("="); + if (pos >= 0) put(line.substring(0, pos), line.substring(pos + 1)); + } + br.close(); + } + + public httpHeader(HashMap reverseMappingCache, Map othermap) { + // creates a case insensitive map from another map + super(insensitiveCollator); + this.reverseMappingCache = reverseMappingCache; + + // load with data + if (othermap != null) this.putAll(othermap); + } + + + // we override the put method to make use of the reverseMappingCache + public Object put(Object key, Object value) { + String k = (String) key; + if (reverseMappingCache == null) { + return super.put(k, value); + } else { + if (reverseMappingCache.containsKey(k.toUpperCase())) { + // we put in the value using the reverse mapping + return super.put(reverseMappingCache.get(k.toUpperCase()), value); + } else { + // we put in without a cached key and store the key afterwards + Object r = super.put(k, value); + reverseMappingCache.put(k.toUpperCase(), k); + return r; + } + } + } + + // a convenience method to access the map with fail-over deafults + public Object get(Object key, Object dflt) { + Object result = get(key); + if (result == null) return dflt; else return result; + } + + // convenience methods for storing and loading to a file system + public void store(File f) throws IOException { + FileOutputStream fos = new FileOutputStream(f); + Iterator i = keySet().iterator(); + String key, value; + while (i.hasNext()) { + key = (String) i.next(); + value = (String) get(key); + fos.write((key + "=" + value + "\r\n").getBytes()); + } + fos.flush(); + fos.close(); + } + + public String toString() { + return super.toString(); + } + /* + Connection=close + Content-Encoding=gzip + Content-Length=7281 + Content-Type=text/html + Date=Mon, 05 Jan 2004 11:55:10 GMT + Server=Apache/1.3.26 + */ + + private static TimeZone GMTTimeZone = TimeZone.getTimeZone("PST"); + private static SimpleDateFormat HTTPGMTFormatter = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'"); + private static SimpleDateFormat EMLFormatter = new SimpleDateFormat("dd MMM yyyy HH:mm:ss", Locale.US); + + public static Date parseHTTPDate(String s) { + if ((s == null) || (s.length() < 9)) return new Date(); + s = s.trim(); + if (s.charAt(3) == ',') s = s.substring(5).trim(); // we skip the name of the day + if (s.charAt(9) == ' ') s = s.substring(0, 7) + "20" + s.substring(7); // short year version + if (s.charAt(2) == ',') s = s.substring(0, 2) + s.substring(3); // ommit comma after day of week + if ((s.charAt(0) > '9') && (s.length() > 20) && (s.charAt(2) == ' ')) s = s.substring(3); + if (s.length() > 20) s = s.substring(0, 20).trim(); // truncate remaining, since that must be wrong + if (s.indexOf("Mrz") > 0) s.replaceAll("Mrz", "March"); + try { + return EMLFormatter.parse(s); + } catch (java.text.ParseException e) { + //System.out.println("ERROR long version parse: " + e.getMessage() + " at position " + e.getErrorOffset()); + serverLog.logError("HTTPC-header", "DATE ERROR (Parse): " + s); + return new Date(); + } catch (java.lang.NumberFormatException e) { + //System.out.println("ERROR long version parse: " + e.getMessage() + " at position " + e.getErrorOffset()); + serverLog.logError("HTTPC-header", "DATE ERROR (NumberFormat): " + s); + new Date(); + } + return new Date(); + } + + private Date headerDate(String kind) { + if (containsKey(kind)) return parseHTTPDate((String) get(kind)); + else return null; + } + + private static boolean isTextType(String type) { + return ((type != null) && + ((type.startsWith("text/html")) || (type.startsWith("text/plain"))) + ); + } + + public boolean isTextType() { + return isTextType(mime()); + } + + public String mime() { + return (String) get("CONTENT-TYPE", "application/octet-stream"); + } + + public Date date() { + return headerDate("Date"); + } + + public Date expires() { + return headerDate("Expires"); + } + + public Date lastModified() { + return headerDate("Last-modified"); + } + + public Date ifModifiedSince() { + return headerDate("IF-MODIFIED-SINCE"); + } + + public long age() { + Date lm = lastModified(); + if (lm == null) return Long.MAX_VALUE; else return (new Date()).getTime() - lm.getTime(); + } + + public long contentLength() { + if (containsKey("CONTENT-LENGTH")) { + try { + return Long.parseLong((String) get("CONTENT-LENGTH")); + } catch (NumberFormatException e) { + return -1; + } + } else { + return -1; + } + } + + public boolean gzip() { + return ((containsKey("CONTENT-ENCODING")) && + (((String) get("CONTENT-ENCODING")).toUpperCase().startsWith("GZIP"))); + } + /* + public static void main(String[] args) { + Collator c; + c = Collator.getInstance(Locale.US); c.setStrength(Collator.PRIMARY); + System.out.println("PRIMARY: compare(abc, ABC) = " + c.compare("abc", "ABC")); + c = Collator.getInstance(Locale.US); c.setStrength(Collator.SECONDARY); + System.out.println("SECONDARY: compare(abc, ABC) = " + c.compare("abc", "ABC")); + c = Collator.getInstance(Locale.US); c.setStrength(Collator.TERTIARY); + System.out.println("TERTIARY: compare(abc, ABC) = " + c.compare("abc", "ABC")); + c = Collator.getInstance(Locale.US); c.setStrength(Collator.IDENTICAL); + System.out.println("IDENTICAL: compare(abc, ABC) = " + c.compare("abc", "ABC")); + } + */ +} \ No newline at end of file diff --git a/source/de/anomic/http/httpTemplate.java b/source/de/anomic/http/httpTemplate.java new file mode 100644 index 000000000..360a21cf9 --- /dev/null +++ b/source/de/anomic/http/httpTemplate.java @@ -0,0 +1,302 @@ +// httpTemplate.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 16.01.2005 +// +// extended for multi- and alternatives-templates by Alexander Schier +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notice above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.http; + +import de.anomic.server.*; +import java.util.*; +import java.io.*; + +public class httpTemplate { + + private static final byte hash = (byte)'#'; + private static final byte[] hasha = {hash}; + + private static final byte lbr = (byte)'['; + private static final byte rbr = (byte)']'; + private static final byte[] pOpen = {hash, lbr}; + private static final byte[] pClose = {rbr, hash}; + + private static final byte lcbr = (byte)'{'; + private static final byte rcbr = (byte)'}'; + private static final byte[] mOpen = {hash, lcbr}; + private static final byte[] mClose = {rcbr, hash}; + + private static final byte lrbr = (byte)'('; + private static final byte rrbr = (byte)')'; + private static final byte[] aOpen = {hash, lrbr}; + private static final byte[] aClose = {rrbr, hash}; + + private static boolean transferUntil(PushbackInputStream i, OutputStream o, byte[] pattern) throws IOException { + // returns true if pattern was found; everything but the pattern has then be transfered so far + int ppos = 0; + int b, bb; + boolean equal; + while ((b = i.read()) > 0) { + if ((b & 0xFF) == pattern[0]) { + // read the whole pattern + equal = true; + for (int n = 1; n < pattern.length; n++) { + if (((bb = i.read()) & 0xFF) != pattern[n]) { + // push back all + i.unread(bb); + equal = false; + for (int nn = n - 1; nn > 0; nn--) i.unread(pattern[nn]); + break; + } + } + if (equal) return true; + } + o.write(b); + } + return false; + } + + public static void writeTemplate(InputStream in, OutputStream out, Hashtable pattern, byte[] dflt) throws IOException { + writeTemplate(in, out, pattern, dflt, ""); + } + + public static void writeTemplate(InputStream in, OutputStream out, Hashtable pattern, byte[] dflt, String prefix) throws IOException { + PushbackInputStream pis = new PushbackInputStream(in, 100); + ByteArrayOutputStream keyStream; + String key; + String multi_key; + boolean consistent; + byte[] replacement; + int bb; + + while (transferUntil(pis, out, hasha)) { + bb = pis.read(); + keyStream = new ByteArrayOutputStream(); + + if( (bb & 0xFF) == lcbr ){ //multi + if( transferUntil(pis, keyStream, mClose) ){ //close tag + //multi_key = "_" + keyStream.toString(); //for _Key + bb = pis.read(); + if( (bb & 0xFF) != 10){ //kill newline + pis.unread(bb); + } + multi_key = keyStream.toString(); //IMPORTANT: no prefix here + keyStream = new ByteArrayOutputStream(); //reset stream + + /* DEBUG - print key + value + try{ + System.out.println("Key: "+prefix+multi_key+ "; Value: "+pattern.get(prefix+multi_key)); + }catch(NullPointerException e){ + System.out.println("Key: "+prefix+multi_key); + } + */ + + //this needs multi_key without prefix + if( transferUntil(pis, keyStream, (new String(mOpen) + "/" + multi_key + new String(mClose)).getBytes()) ){ + bb = pis.read(); + if((bb & 0xFF) != 10){ //kill newline + pis.unread(bb); + } + multi_key = prefix + multi_key; //OK, now add the prefix + String text=keyStream.toString(); //text between #{key}# an #{/key}# + int num=0; + if(pattern.containsKey(multi_key) && pattern.get(multi_key) != null){ + try{ + num=(int)Integer.parseInt((String)pattern.get(multi_key)); // Key contains the iteration number as string + }catch(NumberFormatException e){ + num=0; + } + //System.out.println(multi_key + ": " + num); //DEBUG + }else{ + //0 interations - no display + //System.out.println("_"+new String(multi_key)+" is null or does not exist"); //DEBUG + } + + //Enumeration enx = pattern.keys(); while (enx.hasMoreElements()) System.out.println("KEY=" + enx.nextElement()); // DEBUG + for(int i=0;i < num;i++ ){ + PushbackInputStream pis2 = new PushbackInputStream(new ByteArrayInputStream(text.getBytes())); + //System.out.println("recursing with text(prefix="+ multi_key + "_" + i + "_" +"):"); //DEBUG + //System.out.println(text); + writeTemplate(pis2, out, pattern, dflt, multi_key + "_" + i + "_"); + }//for + }else{//transferUntil + System.out.println("TEMPLATE ERROR: No Close Key found for #{"+multi_key+"}#"); + } + } + }else if( (bb & 0xFF) == lrbr ){ //alternatives + int others=0; + String text=""; + PushbackInputStream pis2; + + transferUntil(pis, keyStream, aClose); + key = keyStream.toString(); //Caution: Key does not contain prefix + + /* DEBUG - print key + value + try{ + System.out.println("Key: "+prefix+key+ "; Value: "+pattern.get(prefix+key)); + }catch(NullPointerException e){ + System.out.println("Key: "+prefix+key); + } + */ + + keyStream=new ByteArrayOutputStream(); //clear + + int whichPattern=0; + if(pattern.containsKey(prefix + key) && pattern.get(prefix + key) != null){ + try{ + whichPattern=(int)Integer.parseInt((String)pattern.get(prefix + key)); //which alternative(index) + }catch(NumberFormatException e){ + whichPattern=0; + } + }else{ + //System.out.println("Pattern \""+new String(prefix + key)+"\" is not set"); //DEBUG + } + + int currentPattern=0; + boolean found=false; + keyStream = new ByteArrayOutputStream(); //reset stream + while(!found){ + bb=pis.read(); + if( (bb & 0xFF) == hash){ + bb=pis.read(); + if( (bb & 0xFF) == lrbr){ + transferUntil(pis, keyStream, aClose); + + //reached the end. output last string. + if(keyStream.toString().equals("/" + key)){ + pis2 = new PushbackInputStream(new ByteArrayInputStream(text.getBytes())); + //this maybe the wrong, but its the last + writeTemplate(pis2, out, pattern, dflt, prefix + key + "_"); + found=true; + }else if(others >0 && keyStream.toString().startsWith("/")){ //close nested + others--; + text += "#("+keyStream.toString()+")#"; + }else{ //nested + others++; + text += "#("+keyStream.toString()+")#"; + } + keyStream = new ByteArrayOutputStream(); //reset stream + continue; + }else{ //is not #( + pis.unread(bb);//is processed in next loop + bb = (hash);//will be added to text this loop + //text += "#"; + } + }else if( (bb & 0xFF) == ':' && others==0){//ignore :: in nested Expressions + bb=pis.read(); + if( (bb & 0xFF) == ':'){ + if(currentPattern == whichPattern){ //found the pattern + pis2 = new PushbackInputStream(new ByteArrayInputStream(text.getBytes())); + writeTemplate(pis2, out, pattern, dflt, prefix + key + "_"); + + transferUntil(pis, keyStream, (new String("#(/"+key+")#")).getBytes());//to #(/key)#. + + found=true; + } + currentPattern++; + text=""; + continue; + }else{ + text += ":"; + } + } + if(!found){ + text += (char)bb; + if(pis.available()==0){ + System.out.println("TEMPLATE ERROR: No Close Key found for #("+key+")#"); + found=true; + } + } + }//while + }else if( (bb & 0xFF) == lbr ){ //normal + if (transferUntil(pis, keyStream, pClose)) { + // pattern detected, write replacement + key = prefix + keyStream.toString(); + replacement = replacePattern(key, pattern, dflt); //replace + + /* DEBUG + try{ + System.out.println("Key: "+key+ "; Value: "+pattern.get(key)); + }catch(NullPointerException e){ + System.out.println("Key: "+key); + } + */ + + serverFileUtils.write(replacement, out); + } else { + // inconsistency, simply finalize this + serverFileUtils.copy(pis, out); + return; + } + }else{ //no match, but a single hash (output # + bb) + byte[] tmp=new byte[2]; + tmp[0]=hash; + tmp[1]=(byte)bb; + serverFileUtils.write(tmp, out); + } + } + } + + public static byte[] replacePattern(String key, Hashtable pattern, byte dflt[]){ + byte[] replacement; + Object value; + if (pattern.containsKey(key)) { + value = pattern.get(key); + if (value instanceof byte[]) replacement = (byte[]) value; + else if (value instanceof String) replacement = ((String) value).getBytes(); + else replacement = value.toString().getBytes(); + } else { + replacement = dflt; + } + return replacement; + } + + public static void main(String[] args) { + // arg1 = test input; arg2 = replacement for pattern 'test'; arg3 = default replacement + try { + InputStream i = new ByteArrayInputStream(args[0].getBytes()); + Hashtable h = new Hashtable(); + h.put("test", args[1].getBytes()); + writeTemplate(new PushbackInputStream(i, 100), System.out, h, args[2].getBytes()); + System.out.flush(); + } catch (Exception e) { + e.printStackTrace(); + } + } + +} diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java new file mode 100644 index 000000000..c9b1f9f1f --- /dev/null +++ b/source/de/anomic/http/httpc.java @@ -0,0 +1,1024 @@ +// httpc.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 26.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + Class documentation: + this class forms an http client + while http access is built-in in java libraries, it is still + necessary to implement the network interface since otherwise + there is no access to the HTTP/1.0 / HTTP/1.1 header information + that comes along each connection. +*/ + +package de.anomic.http; + +import java.io.*; +import java.net.*; +import java.text.*; +import java.lang.*; +import java.util.*; +import java.util.zip.*; +import de.anomic.server.*; +import javax.net.ssl.SSLSocketFactory; + +public class httpc { + + // statics + private static final String vDATE = "20040602"; + private static String userAgent; + public static String systemOST; + private static final int terminalMaxLength = 30000; + private static TimeZone GMTTimeZone = TimeZone.getTimeZone("PST"); + + // --- The GMT standard date format used in the HTTP protocol + private static SimpleDateFormat HTTPGMTFormatter = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US); + private static SimpleDateFormat EMLFormatter = new SimpleDateFormat("dd MMM yyyy HH:mm:ss", Locale.US); + private static SimpleDateFormat ShortFormatter = new SimpleDateFormat("yyyyMMddHHmmss"); + //Mo 06 Sep 2004 23:32 + private static HashMap reverseMappingCache = new HashMap(); + + // class variables + private Socket socket = null; // client socket for commands + private String host = null; + private long timeout; + private long handle; + + // output and input streams for client control connection + public PushbackInputStream clientInput = null; + public OutputStream clientOutput = null; + + private boolean remoteProxyUse = false; + private String savedRemoteHost = null; + private String requestPath = null; + + // the dns cache + private static HashMap nameCacheHit = new HashMap(); + //private static HashSet nameCacheMiss = new HashSet(); + + public static String dnsResolve(String host) { + // looks for the ip of host and returns ip number as string + String ip = (String) nameCacheHit.get(host); + if (ip != null) return ip; + // if (nameCacheMiss.contains(host)) return null; + try { + ip = InetAddress.getByName(host).getHostAddress(); + if ((ip != null) && (!(ip.equals("127.0.0.1"))) && (!(ip.equals("localhost")))) { + nameCacheHit.put(host, ip); + return ip; + } else { + return null; + } + } catch (UnknownHostException e) { + //nameCacheMiss.add(host); + } + return null; + } + + public static boolean dnsFetch(String host) { + // looks for the ip of host and returns false if the host was in the cache + // if it is not in the cache the ip is fetched and this resturns true + if ((nameCacheHit.get(host) != null) /*|| (nameCacheMiss.contains(host)) */) return false; + try { + String ip = InetAddress.getByName(host).getHostAddress(); + if ((ip != null) && (!(ip.equals("127.0.0.1"))) && (!(ip.equals("localhost")))) { + nameCacheHit.put(host, ip); + return true; + } else { + return false; + } + } catch (UnknownHostException e) { + //nameCacheMiss.add(host); + return false; + } + } + + + // http client + + public httpc(String server, int port, int timeout, boolean ssl, + String remoteProxyHost, int remoteProxyPort) throws IOException { + this(remoteProxyHost, remoteProxyPort, timeout, ssl); + this.remoteProxyUse = true; + this.savedRemoteHost = server + ((port == 80) ? "" : (":" + port)); + } + + public httpc(String server, int port, int timeout, boolean ssl) throws IOException { + handle = System.currentTimeMillis(); + //serverLog.logDebug("HTTPC", handle + " initialized"); + this.remoteProxyUse = false; + this.timeout = timeout; + this.savedRemoteHost = server; + try { + this.host = server + ((port == 80) ? "" : (":" + port)); + String hostip; + if ((server.equals("localhost")) || (server.equals("127.0.0.1")) || (server.startsWith("192.168.")) || (server.startsWith("10."))) { + hostip = server; + } else { + hostip = dnsResolve(server); + if (hostip == null) throw new UnknownHostException(server); + } + if (ssl) + socket = SSLSocketFactory.getDefault().createSocket(hostip, port); + else + socket = new Socket(hostip, port); + socket.setSoTimeout(timeout); // waiting time for write + //socket.setSoLinger(true, timeout); // waiting time for read + socket.setKeepAlive(true); // + clientInput = new PushbackInputStream(socket.getInputStream()); + clientOutput = socket.getOutputStream(); + // if we reached this point, we should have a connection + } catch (UnknownHostException e) { + throw new IOException("unknown host: " + server); + } + } + + // provide HTTP date handling static methods + public static String dateString(Date date) { + if (date == null) return ""; else return HTTPGMTFormatter.format(date); + } + + public static Date nowDate() { + return new GregorianCalendar(GMTTimeZone).getTime(); + } + + static { + // provide system information for client identification + String loc = System.getProperty("user.timezone", "nowhere"); + int p = loc.indexOf("/"); + if (p > 0) loc = loc.substring(0,p); + loc = loc + "/" + System.getProperty("user.language", "dumb"); + systemOST = + System.getProperty("os.arch", "no-os-arch") + " " + System.getProperty("os.name", "no-os-arch") + " " + + System.getProperty("os.version", "no-os-version") + "; " + + "java " + System.getProperty("java.version", "no-java-version") + "; " + loc; + userAgent = "yacy (www.yacy.net; v" + vDATE + "; " + systemOST + ")"; + } + + public class response { + // Response-Header = Date | Pragma | Allow | Content-Encoding | Content-Length | Content-Type | + // Expires | Last-Modified | HTTP-header + /* + Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF + 1xx: Informational - Not used, but reserved for future use + 2xx: Success - The action was successfully received, understood, and accepted. + 3xx: Redirection - Further action must be taken in order to complete the request + 4xx: Client Error - The request contains bad syntax or cannot be fulfilled + 5xx: Server Error - The server failed to fulfill an apparently valid request + */ + + // header information + public httpHeader responseHeader = null; + public String status; // the success/failure response string starting with status-code + private boolean gzip; // for gunzipping on-the-fly + private long gzipLength; // zipped-length of the response + + public response(boolean zipped) throws IOException { + + // lets start with worst-case attributes as set-up + responseHeader = new httpHeader(reverseMappingCache); + status = "503 internal error"; + gzip = false; + + // check connection status + if (clientInput == null) { + // the server has meanwhile disconnected + status = "503 lost connection to server"; + return; // in bad mood + } + + // reads in the http header, right now, right here + byte[] b = serverCore.receive(clientInput, timeout, terminalMaxLength, false); + if (b == null) { + // the server has meanwhile disconnected + status = "503 server has closed connection"; + return; // in bad mood + } + String buffer = new String(b); // this is the status response line + //System.out.println("#S#" + buffer); + int p = buffer.indexOf(" "); + if (p < 0) { + status = "500 status line parse error"; + // flush in anything that comes without parsing + while ((b = serverCore.receive(clientInput, timeout, terminalMaxLength, false)).length != 0) {} + return; // in bad mood + } + // we have a status + status = buffer.substring(p + 1).trim(); // the status code plus reason-phrase + + // check validity + if (status.startsWith("400")) { + // bad request + // flush in anything that comes without parsing + while ((b = serverCore.receive(clientInput, timeout, terminalMaxLength, false)).length != 0) {} + return; // in bad mood + } + + // at this point we should have a valid response. read in the header properties + String key = ""; + String value = ""; + while ((b = serverCore.receive(clientInput, timeout, terminalMaxLength, false)) != null) { + if (b.length == 0) break; + buffer = new String(b); + //System.out.println("#H#" + buffer); // debug + if (buffer.charAt(0) <= 32) { + // use old entry + if (key.length() == 0) throw new IOException("header corrupted - input error"); + // attach new line + if (!(responseHeader.containsKey(key))) throw new IOException("header corrupted - internal error"); + responseHeader.put(key, (String) responseHeader.get(key) + " " + buffer.trim()); + } else { + // create new entry + p = buffer.indexOf(":"); + if (p > 0) { + key = buffer.substring(0, p).trim(); + value = (String) responseHeader.get(key); + // check if the header occurred already + if (value == null) { + // create new entry + responseHeader.put(key, buffer.substring(p + 1).trim()); + } else { + // attach to old entry + responseHeader.put(key, value + "#" + buffer.substring(p + 1).trim()); + } + } else { + serverLog.logError("HTTPC", "RESPONSE PARSE ERROR: HOST='" + host + "', PATH='" + requestPath + "', STATUS='" + status + "'"); + serverLog.logError("HTTPC", "..............BUFFER: " + buffer); + } + } + } + // finished with reading header + + // we will now manipulate the header if the content is gzip encoded, because + // reading the content with "writeContent" will gunzip on-the-fly + gzip = ((zipped) && (responseHeader.gzip())); + + if (gzip) { + // change attributes in case of gzip decoding + gzipLength = responseHeader.contentLength(); + responseHeader.remove("CONTENT-ENCODING"); // we fake that we don't have encoding, since what comes out does not have gzip and we also don't know what was encoded + responseHeader.remove("CONTENT-LENGTH"); // we cannot use the length during gunzippig yet; still we can hope that it works + } else { + gzipLength = -1; + } + + //System.out.println("###incoming header: " + responseHeader.toString()); + + // the body must be read separately by the get/writeContent methods + //System.out.println("## connection is " + ((socket.isClosed()) ? "closed" : "open") + "."); + } + + public boolean success() { + return ((status.charAt(0) == '2') || (status.charAt(0) == '3')); + } + + public byte[] writeContent(OutputStream procOS) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + writeContentX(procOS, bos); + bos.flush(); + return bos.toByteArray(); + } + + public void writeContent(OutputStream procOS, File file) throws IOException { + // this writes the input stream to either another output stream or + // a file or both. + FileOutputStream bufferOS = null; + if (file != null) bufferOS = new FileOutputStream(file); + writeContentX(procOS, bufferOS); + if (bufferOS != null) { + bufferOS.close(); + if (file.length() == 0) file.delete(); + } + } + + public void writeContentX(OutputStream procOS, OutputStream bufferOS) throws IOException { + // we write length bytes, but if length == -1 (or < 0) then we + // write until the input stream closes + // procOS == null -> no write to procOS + // file == null -> no write to file + // If the Content-Encoding is gzip, we gunzip on-the-fly + // and change the Content-Encoding and Content-Length attributes in the header + byte[] buffer = new byte[2048]; + int l; + long len = 0; + + // find out length + long length = responseHeader.contentLength(); + + // we have three methods of reading: length-based, length-based gzip and connection-close-based + if (length > 0) { + // we read exactly 'length' bytes + try { + while ((len < length) && ((l = clientInput.read(buffer)) >= 0)) { + if (procOS != null) procOS.write(buffer, 0, l); + if (bufferOS != null) bufferOS.write(buffer, 0, l); + len += l; + } + } catch (java.net.SocketException e) { + // this is an error: + throw new IOException("Socket exception: " + e.getMessage()); + } catch (java.net.SocketTimeoutException e) { + // this is an error: + throw new IOException("Socket time-out: " + e.getMessage()); + } + } else if ((gzip) && (gzipLength > 0) && (gzipLength < 100000)) { + //System.out.println("PERFORMING NEW GZIP-LENGTH-BASED HTTPC: gzipLength=" + gzipLength); // DEBUG + // we read exactly 'gzipLength' bytes; first copy into buffer: + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + while ((len < gzipLength) && ((l = clientInput.read(buffer)) >= 0)) { + baos.write(buffer, 0, l); + len += l; + } + baos.flush(); + // now uncompress + InputStream dis = (InputStream) new GZIPInputStream(new ByteArrayInputStream(baos.toByteArray())); + try { + while ((l = dis.read(buffer)) > 0) { + if (procOS != null) procOS.write(buffer, 0, l); + if (bufferOS != null) bufferOS.write(buffer, 0, l); + len += l; + } + } catch (java.net.SocketException e) { + // this is an error: + throw new IOException("Socket exception: " + e.getMessage()); + } catch (java.net.SocketTimeoutException e) { + // this is an error: + throw new IOException("Socket time-out: " + e.getMessage()); + } + baos.close(); baos = null; + } else { + // no content-length was given, thus we read until the connection closes + InputStream dis = (gzip) ? (InputStream) new GZIPInputStream(clientInput) : (InputStream) clientInput; + try { + while ((l = dis.read(buffer, 0, buffer.length)) >= 0) { + if (procOS != null) procOS.write(buffer, 0, l); + if (bufferOS != null) bufferOS.write(buffer, 0, l); + } + } catch (java.net.SocketException e) { + // this is not an error: it's ok, we waited for that + } catch (java.net.SocketTimeoutException e) { + // the same here; should be ok. + } + } + + // close the streams + if (procOS != null) procOS.flush(); + if (bufferOS != null) bufferOS.flush(); + buffer = null; + } + + public void print() { + serverLog.logInfo("HTTPC", "RESPONSE: status=" + status + ", header=" + responseHeader.toString()); + } + + } + + public void close() { + // closes the connection + try { + clientInput.close(); + clientOutput.close(); + socket.close(); + } catch (IOException e) {} + } + + // method is either GET, HEAD or POST + private void send(String method, String path, httpHeader header, boolean zipped) throws IOException { + // scheduled request through request-response objects/threads + + // check and correct path + if ((path == null) || (path.length() == 0)) path = "/"; + + // for debuggug: + requestPath = path; + + // prepare header + if (header == null) header = new httpHeader(); + + // set some standard values + if (!(header.containsKey("Accept"))) + header.put("Accept", "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"); + if (!(header.containsKey("Accept-Charset"))) + header.put("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7"); + if (!(header.containsKey("Accept-Language"))) + header.put("Accept-Language", "en-us,en;q=0.5"); + if (!(header.containsKey("Keep-Alive"))) + header.put("Keep-Alive", "300"); + + // set user agent. The user agent is only set if the value does not yet exists. + // this gives callers the opportunity, to change the user agent themselves, and + // it will not be changed. + if (!(header.containsKey("User-Agent"))) header.put("User-Agent", userAgent); + + // set the host attribute. This is in particular necessary, if we contact another proxy + // the host is mandatory, if we use HTTP/1.1 + if (!(header.containsKey("Host"))) { + if (this.remoteProxyUse) + header.put("Host", savedRemoteHost); + else + header.put("Host", this.host); + } + + if (!(header.containsKey("Connection"))) { + header.put("Connection", "close"); + } + + // advertise a little bit... + if ((!(header.containsKey("Referer"))) || (((String) header.get("Referer")).trim().length() == 0)) { + header.put("Referer", + (((System.currentTimeMillis() >> 10) & 1) == 0) ? + "http://www.anomic.de" : + "http://www.yacy.net/yacy"); + } + + // stimulate zipping or not + // we can unzip, and we will return it always as unzipped, unless not wanted + if (header.containsKey("Accept-Encoding")) { + String encoding = (String) header.get("Accept-Encoding"); + if (zipped) { + if (encoding.indexOf("gzip") < 0) { + // add the gzip encoding + //System.out.println("!!! adding gzip encoding"); + header.put("Accept-Encoding", "gzip,deflate" + ((encoding.length() == 0) ? "" : (";" + encoding))); + } + } else { + int pos = encoding.indexOf("gzip"); + if (pos >= 0) { + // remove the gzip encoding + //System.out.println("!!! removing gzip encoding"); + header.put("Accept-Encoding", encoding.substring(0, pos) + encoding.substring(pos + 4)); + } + } + } else { + if (zipped) header.put("Accept-Encoding", "gzip,deflate"); + } + + //header = new httpHeader(); header.put("Host", this.host); // debug + + // send request + if ((this.remoteProxyUse) && (!(method.equals("CONNECT")))) + path = "http://" + this.savedRemoteHost + path; + serverCore.send(clientOutput, method + " " + path + " HTTP/1.0"); // if set to HTTP/1.1, servers give time-outs? + + // send header + //System.out.println("***HEADER for path " + path + ": PROXY TO SERVER = " + header.toString()); // DEBUG + Iterator i = header.keySet().iterator(); + String key; + String value; + int pos; + while (i.hasNext()) { + key = (String) i.next(); + value = (String) header.get(key); + while ((pos = value.lastIndexOf("#")) >= 0) { + // special handling is needed if a key appeared several times, which is valid. + // all lines with same key are combined in one value, separated by a "#" + serverCore.send(clientOutput, key + ": " + value.substring(pos + 1).trim()); + //System.out.println("**+" + key + ": " + value.substring(pos + 1).trim()); // debug + value = value.substring(0, pos).trim(); + } + serverCore.send(clientOutput, key + ": " + value); + //System.out.println("***" + key + ": " + value); // debug + } + + // send terminating line + serverCore.send(clientOutput, ""); + clientOutput.flush(); + + // this is the place where www.stern.de refuses to answer ..??? + } + + + private boolean shallTransportZipped(String path) { + return (!((path.endsWith(".gz")) || (path.endsWith(".tgz")) || + (path.endsWith(".jpg")) || (path.endsWith(".jpeg")) || + (path.endsWith(".gif")) | (path.endsWith(".zip")))); + } + + public response GET(String path, httpHeader requestHeader) throws IOException { + //serverLog.logDebug("HTTPC", handle + " requested GET '" + path + "', time = " + (System.currentTimeMillis() - handle)); + try { + boolean zipped = shallTransportZipped(path); + send("GET", path, requestHeader, zipped); + response r = new response(zipped); + //serverLog.logDebug("HTTPC", handle + " returned GET '" + path + "', time = " + (System.currentTimeMillis() - handle)); + return r; + } catch (SocketException e) { + throw new IOException(e.getMessage()); + } + } + + public response HEAD(String path, httpHeader requestHeader) throws IOException { + try { + send("HEAD", path, requestHeader, false); + return new response(false); + // in this case the caller should not read the response body, + // since there is none... + } catch (SocketException e) { + throw new IOException(e.getMessage()); + } + } + + public response POST(String path, httpHeader requestHeader, InputStream ins) throws IOException { + try { + send("POST", path, requestHeader, false); + // if there is a body to the call, we would have a CONTENT-LENGTH tag in the requestHeader + String cl = (String) requestHeader.get("CONTENT-LENGTH"); + int len, c; + byte[] buffer = new byte[512]; + if (cl != null) { + len = Integer.parseInt(cl); + // transfer len bytes from ins to the server + while ((len > 0) && ((c = ins.read(buffer)) >= 0)) { + clientOutput.write(buffer, 0, c); + len -= c; + } + } else { + len = 0; + while ((c = ins.read(buffer)) >= 0) { + clientOutput.write(buffer, 0, c); + len += c; + } + requestHeader.put("CONTENT-LENGTH", "" + len); + } + clientOutput.flush(); + return new response(false); + } catch (SocketException e) { + throw new IOException(e.getMessage()); + } + } + + public response CONNECT(String host, int port, httpHeader requestHeader) throws IOException { + try { + send("CONNECT", host + ":" + port, requestHeader, false); + return new response(false); + } catch (SocketException e) { + throw new IOException(e.getMessage()); + } + } + + + public response POST(String path, httpHeader requestHeader, serverObjects args, Hashtable files) throws IOException { + // make shure, the header has a boundary information like + // CONTENT-TYPE=multipart/form-data; boundary=----------0xKhTmLbOuNdArY + if (requestHeader == null) requestHeader = new httpHeader(); + String boundary = (String) requestHeader.get("CONTENT-TYPE"); + if (boundary == null) { + // create a boundary + boundary = "multipart/form-data; boundary=----------" + java.lang.System.currentTimeMillis(); + requestHeader.put("CONTENT-TYPE", boundary); + } + // extract the boundary string + int pos = boundary.toUpperCase().indexOf("BOUNDARY="); + if (pos < 0) { + // again, create a boundary + boundary = "multipart/form-data; boundary=----------" + java.lang.System.currentTimeMillis(); + requestHeader.put("CONTENT-TYPE", boundary); + pos = boundary.indexOf("boundary="); + } + boundary = "--" + boundary.substring(pos + "boundary=".length()); + + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + // in contrast to GET and HEAD, this method also transports a message body + // the body consists of repeated boundaries and values in between + if (args.size() != 0) { + // we have values for the POST, start with one boundary + String key, value; + Enumeration e = args.keys(); + while (e.hasMoreElements()) { + // start with a boundary + buf.write(boundary.getBytes()); + buf.write(serverCore.crlf); + // write value + key = (String) e.nextElement(); + value = (String) args.get(key, ""); + if ((files != null) && (files.containsKey(key))) { + // we are about to write a file + buf.write(("Content-Disposition: form-data; name=" + '"' + key + '"' + "; filename=" + '"' + value + '"').getBytes()); + buf.write(serverCore.crlf); + buf.write(serverCore.crlf); + buf.write((byte[]) files.get(key)); + buf.write(serverCore.crlf); + } else { + // write a single value + buf.write(("Content-Disposition: form-data; name=" + '"' + key + '"').getBytes()); + buf.write(serverCore.crlf); + buf.write(serverCore.crlf); + buf.write(value.getBytes()); + buf.write(serverCore.crlf); + } + } + // finish with a boundary + buf.write(boundary.getBytes()); + buf.write(serverCore.crlf); + //buf.write("" + serverCore.crlfString); + } + // create body array + buf.close(); + byte[] body = buf.toByteArray(); + //System.out.println("DEBUG: PUT BODY=" + new String(body)); + // size of that body + requestHeader.put("CONTENT-LENGTH", "" + body.length); + // send the header + //System.out.println("header=" + requestHeader); + send("POST", path, requestHeader, false); + // send the body + //System.out.println("body=" + buf.toString()); + serverCore.send(clientOutput, body); + + return new response(false); + } + + /* +DEBUG: PUT BODY=------------1090358578442 +Content-Disposition: form-data; name="youare" + +Ty2F86ekSWM5 +------------1090358578442 +Content-Disposition: form-data; name="key" + +6EkPPOl7 +------------1090358578442 +Content-Disposition: form-data; name="iam" + +HnTvzwV7SCJR +------------1090358578442 +Content-Disposition: form-data; name="process" + +permission +------------1090358578442 + + */ + + /* +------------0xKhTmLbOuNdArY +Content-Disposition: form-data; name="file1"; filename="dir.gif" +Content-Type: image/gif + +GIF89 +------------0xKhTmLbOuNdArY +Content-Disposition: form-data; name="file2"; filename="" + + +------------0xKhTmLbOuNdArY +Content-Disposition: form-data; name="upload" + +do upload +------------0xKhTmLbOuNdArY-- + +###### Listing Properties ###### +# METHOD=POST +### Header Values: +# EXT=html +# HTTP=HTTP/1.1 +# ACCEPT-ENCODING=gzip, deflate;q=1.0, identity;q=0.5, *;q=0 +# HOST=localhost:8080 +# PATH=/testcgi/doit.html +# CONTENT-LENGTH=474 +# CONTENT-TYPE=multipart/form-data; boundary=----------0xKhTmLbOuNdArY +# ARGC=0 +# CONNECTION=close +# USER-AGENT=Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/103u (KHTML, like Gecko) Safari/100.1 +### Call Properties: +###### End OfList ###### + */ + + public static byte[] singleGET(String host, int port, String path, int timeout, + String user, String password, boolean ssl, + String proxyHost, int proxyPort, + httpHeader requestHeader) throws IOException { + if (requestHeader == null) requestHeader = new httpHeader(); + if ((user != null) && (password != null) && (user.length() != 0)) { + requestHeader.put("Authorization", serverCodings.standardCoder.encodeBase64String(user + ":" + password)); + } + httpc con; + if ((proxyHost == null) || (proxyPort == 0)) + con = new httpc(host, port, timeout, ssl); + else + con = new httpc(host, port, timeout, ssl, proxyHost, proxyPort); + httpc.response res = con.GET(path, null); + if (res.status.startsWith("2")) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + res.writeContent(bos, null); + con.close(); + return bos.toByteArray(); + } else { + return res.status.getBytes(); + } + } + + public static byte[] singleGET(URL u, int timeout, + String user, String password, + String proxyHost, int proxyPort) throws IOException { + int port = u.getPort(); + boolean ssl = u.getProtocol().equals("https"); + if (port < 0) port = (ssl) ? 443: 80; + String path = u.getPath(); + String query = u.getQuery(); + if ((query != null) && (query.length() > 0)) path = path + "?" + query; + return singleGET(u.getHost(), port, path, timeout, user, password, ssl, proxyHost, proxyPort, null); + } + + /* + public static byte[] singleGET(String url, int timeout) throws IOException { + try { + return singleGET(new URL(url), timeout, null, null, null, 0); + } catch (MalformedURLException e) { + throw new IOException("Malformed URL: " + e.getMessage()); + } + } + */ + + public static byte[] singlePOST(String host, int port, String path, int timeout, + String user, String password, boolean ssl, + String proxyHost, int proxyPort, + httpHeader requestHeader, serverObjects props) throws IOException { + if (requestHeader == null) requestHeader = new httpHeader(); + if ((user != null) && (password != null) && (user.length() != 0)) { + requestHeader.put("Authorization", serverCodings.standardCoder.encodeBase64String(user + ":" + password)); + } + httpc con; + if ((proxyHost == null) || (proxyPort == 0)) + con = new httpc(host, port, timeout, ssl); + else + con = new httpc(host, port, timeout, ssl, proxyHost, proxyPort); + httpc.response res = con.POST(path, null, props, null); + //System.out.println("response=" + res.toString()); + if (res.status.startsWith("2")) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + res.writeContent(bos, null); + con.close(); + return bos.toByteArray(); + } else { + return res.status.getBytes(); + } + } + + public static byte[] singlePOST(URL u, int timeout, + String user, String password, + String proxyHost, int proxyPort, + serverObjects props) throws IOException { + int port = u.getPort(); + boolean ssl = u.getProtocol().equals("https"); + if (port < 0) port = (ssl) ? 443 : 80; + String path = u.getPath(); + String query = u.getQuery(); + if ((query != null) && (query.length() > 0)) path = path + "?" + query; + return singlePOST(u.getHost(), port, path, timeout, user, password, ssl, proxyHost, proxyPort, null, props); + } + + public static byte[] singlePOST(String url, int timeout, serverObjects props) throws IOException { + try { + return singlePOST(new URL(url), timeout, null, null, null, 0, props); + } catch (MalformedURLException e) { + throw new IOException("Malformed URL: " + e.getMessage()); + } + } + + public static Vector wget(URL url, int timeout, String user, String password, String proxyHost, int proxyPort) throws IOException { + // splitting of the byte array into lines + byte[] a = singleGET(url, timeout, user, password, proxyHost, proxyPort); + if (a == null) return null; + int s = 0; + int e; + Vector v = new Vector(); + while (s < a.length) { + e = s; while (e < a.length) if (a[e++] < 32) {e--; break;} + v.add(new String(a, s, e - s)); + s = e; while (s < a.length) if (a[s++] >= 32) {s--; break;} + } + return v; + } + + public static httpHeader whead(URL url, int timeout, String user, String password, String proxyHost, int proxyPort) throws IOException { + // generate request header + httpHeader requestHeader = new httpHeader(); + if ((user != null) && (password != null) && (user.length() != 0)) { + requestHeader.put("Authorization", serverCodings.standardCoder.encodeBase64String(user + ":" + password)); + } + // parse query + int port = url.getPort(); + boolean ssl = url.getProtocol().equals("https"); + if (port < 0) port = (ssl) ? 443 : 80; + String path = url.getPath(); + String query = url.getQuery(); + if ((query != null) && (query.length() > 0)) path = path + "?" + query; + String host = url.getHost(); + // start connection + httpc con; + if ((proxyHost == null) || (proxyPort == 0)) + con = new httpc(host, port, timeout, ssl); + else + con = new httpc(host, port, timeout, ssl, proxyHost, proxyPort); + httpc.response res = con.HEAD(path, requestHeader); + if (res.status.startsWith("2")) { + // success + return res.responseHeader; + } else { + // fail + return res.responseHeader; + } + } + + /* + public static Vector wget(String url) { + try { + return wget(new URL(url), 5000, null, null, null, 0); + } catch (IOException e) { + Vector ll = new Vector(); + ll.add("503 " + e.getMessage()); + return ll; + } + } + */ + + public static Vector wput(URL url, int timeout, String user, String password, String proxyHost, int proxyPort, serverObjects props) throws IOException { + // splitting of the byte array into lines + byte[] a = singlePOST(url, timeout, user, password, proxyHost, proxyPort, props); + //System.out.println("wput-out=" + new String(a)); + int s = 0; + int e; + Vector v = new Vector(); + while (s < a.length) { + e = s; while (e < a.length) if (a[e++] < 32) {e--; break;} + v.add(new String(a, s, e - s)); + s = e; while (s < a.length) if (a[s++] >= 32) {s--; break;} + } + return v; + } + + /* + public static Vector wput(String url, serverObjects props) { + try { + return wput(url, 5000, null, null, null, 0, props); + } catch (IOException e) { + serverLog.logError("HTTPC", "wput exception for url " + url + ": " + e.getMessage()); + e.printStackTrace(); + Vector ll = new Vector(); + ll.add("503 " + e.getMessage()); + return ll; + } + } + */ + + public static void main(String[] args) { + System.out.println("ANOMIC.DE HTTP CLIENT v" + vDATE); + String url = args[0]; + if (!(url.toUpperCase().startsWith("HTTP://"))) url = "http://" + url; + Vector text = new Vector(); + if (args.length == 4) { + int timeout = Integer.parseInt(args[1]); + String proxyHost = args[2]; + int proxyPort = Integer.parseInt(args[3]); + try { + text = wget(new URL(url), timeout, null, null, proxyHost, proxyPort); + } catch (MalformedURLException e) { + System.out.println("The url '" + url + "' is wrong."); + } catch (IOException e) { + System.out.println("Error loading url '" + url + "': " + e.getMessage()); + } + } /*else { + serverObjects post = new serverObjects(); + int p; + for (int i = 1; i < args.length; i++) { + p = args[i].indexOf("="); + if (p > 0) post.put(args[i].substring(0, p), args[i].substring(p + 1)); + } + text = wput(url, post); + }*/ + Enumeration i = text.elements(); + while (i.hasMoreElements()) System.out.println((String) i.nextElement()); + } + +} + +/* +import java.net.*; +import java.io.*; +import javax.net.ssl.*; +import javax.security.cert.X509Certificate; +import java.security.KeyStore; + + + //The application can be modified to connect to a server outside + //the firewall by following SSLSocketClientWithTunneling.java. + +public class SSLSocketClientWithClientAuth { + + public static void main(String[] args) throws Exception { + String host = null; + int port = -1; + String path = null; + for (int i = 0; i < args.length; i++) + System.out.println(args[i]); + + if (args.length < 3) { + System.out.println( + "USAGE: java SSLSocketClientWithClientAuth " + + "host port requestedfilepath"); + System.exit(-1); + } + + try { + host = args[0]; + port = Integer.parseInt(args[1]); + path = args[2]; + } catch (IllegalArgumentException e) { + System.out.println("USAGE: java SSLSocketClientWithClientAuth " + + "host port requestedfilepath"); + System.exit(-1); + } + + try { + + SSLSocketFactory factory = null; + try { + SSLContext ctx; + KeyManagerFactory kmf; + KeyStore ks; + char[] passphrase = "passphrase".toCharArray(); + + ctx = SSLContext.getInstance("TLS"); + kmf = KeyManagerFactory.getInstance("SunX509"); + ks = KeyStore.getInstance("JKS"); + + ks.load(new FileInputStream("testkeys"), passphrase); + + kmf.init(ks, passphrase); + ctx.init(kmf.getKeyManagers(), null, null); + + factory = ctx.getSocketFactory(); + } catch (Exception e) { + throw new IOException(e.getMessage()); + } + + SSLSocket socket = (SSLSocket)factory.createSocket(host, port); + + socket.startHandshake(); + + PrintWriter out = new PrintWriter( + new BufferedWriter( + new OutputStreamWriter( + socket.getOutputStream()))); + out.println("GET " + path + " HTTP/1.1"); + out.println(); + out.flush(); + + if (out.checkError()) + System.out.println( + "SSLSocketClient: java.io.PrintWriter error"); + + BufferedReader in = new BufferedReader( + new InputStreamReader( + socket.getInputStream())); + + String inputLine; + + while ((inputLine = in.readLine()) != null) + System.out.println(inputLine); + + in.close(); + out.close(); + socket.close(); + + } catch (Exception e) { + e.printStackTrace(); + } + } +} +*/ diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java new file mode 100644 index 000000000..b092e7152 --- /dev/null +++ b/source/de/anomic/http/httpd.java @@ -0,0 +1,853 @@ +// httpd.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 03.01.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + Class documentation: + Instances of this class can be passed as argument to the serverCore. + The generic server dispatches HTTP commands and calls the + method GET, HEAD or POST in this class + these methods parse the command line and decide wether to call + a proxy servlet or a file server servlet +*/ + +package de.anomic.http; + +import java.io.*; +import java.net.*; +import java.util.*; +import java.text.*; +import de.anomic.server.*; + +public class httpd implements serverHandler { + + // static objects + public static final String vDATE = "<>"; + public static final String copyright = "[ HTTP SERVER: AnomicHTTPD v" + vDATE + " by Michael Christen / www.anomic.de ]"; + public static final String hline = "-------------------------------------------------------------------------------"; + private static HashMap reverseMappingCache = new HashMap(); + private static httpdHandler proxyHandler = null; // a servlet that holds the proxy functions + private static httpdHandler fileHandler = null; // a servlet that holds the file serving functions + private static serverSwitch switchboard = null; + private static String virtualHost = null; + + // class objects + private serverCore.Session session; // holds the session object of the calling class + private InetAddress userAddress; // the address of the client + private boolean allowProxy; + private boolean allowServer; + + // for authentication + private String proxyAccountBase64MD5; + private String serverAccountBase64MD5; + private String clientIP; + + // class methods + + public httpd(serverSwitch s, httpdHandler fileHandler, httpdHandler proxyHandler) { + // handler info + this.switchboard = s; + this.fileHandler = fileHandler; + this.proxyHandler = proxyHandler; + this.virtualHost = switchboard.getConfig("fileHost","localhost"); + + // authentication: by default none + this.proxyAccountBase64MD5 = null; + this.serverAccountBase64MD5 = null; + this.clientIP = null; + } + + // must be called at least once, but can be called again to re-use the object. + public void initSession(serverCore.Session session) throws IOException { + this.session = session; + this.userAddress = session.userAddress; // client InetAddress + this.clientIP = userAddress.getHostAddress(); + if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost"; + if (this.clientIP.equals("0:0:0:0:0:0:0:1")) this.clientIP = "localhost"; + if (this.clientIP.equals("127.0.0.1")) this.clientIP = "localhost"; + String proxyClient = switchboard.getConfig("proxyClient", "*"); + String serverClient = switchboard.getConfig("serverClient", "*"); + this.allowProxy = (proxyClient.equals("*")) ? true : match(clientIP, proxyClient); + this.allowServer = (serverClient.equals("*")) ? true : match(clientIP, serverClient); + + // check if we want to allow this socket to connect us + if (!((allowProxy) || (allowServer))) { + throw new IOException("CONNECTION FROM " + clientIP + " FORBIDDEN"); + } + + proxyAccountBase64MD5 = null; + serverAccountBase64MD5 = null; + } + + private static boolean match(String key, String latch) { + // the latch is a comma-separated list of patterns + // each pattern may contain one wildcard-character '*' which matches anything + StringTokenizer st = new StringTokenizer(latch,","); + String pattern; + int pos; + while (st.hasMoreTokens()) { + pattern = st.nextToken(); + pos = pattern.indexOf("*"); + if (pos < 0) { + // no wild card: exact match + if (key.equals(pattern)) return true; + } else { + // wild card: match left and right side of pattern + if ((key.startsWith(pattern.substring(0, pos))) && + (key.endsWith(pattern.substring(pos + 1)))) return true; + } + } + return false; + } + + public String greeting() { // OBLIGATORIC FUNCTION + // a response line upon connection is send to client + // if no response line is wanted, return "" or null + return null; + } + + public String error(Throwable e) { // OBLIGATORIC FUNCTION + // return string in case of any error that occurs during communication + // is always (but not only) called if an IO-dependent exception occurrs. + e.printStackTrace(); + return "501 Exception occurred: " + e.getMessage(); + } + + private String readLine() { + // reads a line from the input socket + // this function is provided by the server through a passed method on initialization + byte[] l = this.session.readLine(); + if (l == null) return null; else return new String(l); + } + + private httpHeader readHeader() { + httpHeader header = new httpHeader(reverseMappingCache); + int p; + String line; + String key; + String value; + while ((line = readLine()) != null) { + if (line.length() == 0) break; // this seperates the header of the HTTP request from the body + //System.out.println("***" + line); // debug + // parse the header line: a property seperated with the ':' sign + p = line.indexOf(":"); + if (p >= 0) { + // store a property + key = line.substring(0, p).trim(); + value = (String) header.get(key); + // check if the header occurred already + if (value == null) { + // create new entry + header.put(key, line.substring(p + 1).trim()); + } else { + // value can occur double times, attach with '#' - separator + header.put(key, value + "#" + line.substring(p + 1).trim()); + } + } + } + return header; + } + + public Boolean GET(String arg) throws IOException { + Properties prop = parseQuery(arg); + prop.setProperty("METHOD", "GET"); + prop.setProperty("CLIENTIP", clientIP); + + // we now know the HTTP version. depending on that, we read the header + httpHeader header; + String httpVersion = prop.getProperty("HTTP", "HTTP/0.9"); + if (httpVersion.equals("HTTP/0.9")) + header = new httpHeader(reverseMappingCache); + else + header = readHeader(); + + // managing keep-alive: in HTTP/0.9 and HTTP/1.0 every connection is closed + // afterwards. In HTTP/1.1 (and above, in the future?) connections are + // persistent by default, but closed with the "Connection: close" + // property. + boolean persistent = (!((httpVersion.equals("HTTP/0.9")) || (httpVersion.equals("HTTP/1.0")))); + String connection = prop.getProperty("Connection", "close").toLowerCase(); + if (connection.equals("close")) persistent = false; + if (connection.equals("keep-alive")) persistent = true; + + //System.out.println("HEADER: " + header.toString()); + + // return multi-line message + if (prop.getProperty("HOST").equals(virtualHost)) { + // pass to server + if (allowServer) { + if (serverAccountBase64MD5 == null) serverAccountBase64MD5 = switchboard.getConfig("serverAccountBase64MD5", ""); + if (serverAccountBase64MD5.length() == 0) { + // no authenticate requested + if (fileHandler == null) fileHandler = new httpdFileHandler(this.switchboard); + fileHandler.doGet(prop, header, this.session.out); + } else { + String auth = (String) header.get("Authorization"); + if (auth == null) { + // authorization requested, but no authorizeation given in header. Ask for authenticate: + session.out.write((httpVersion + " 401 log-in required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } else if (serverAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(auth.trim().substring(6)))) { + // we are authorized + if (fileHandler == null) fileHandler = new httpdFileHandler(this.switchboard); + fileHandler.doGet(prop, header, this.session.out); + } else { + // wrong password given: ask for authenticate again + serverLog.logInfo("HTTPD", "Wrong log-in for account 'server' in HTTPD.GET " + prop.getProperty("PATH") + " from IP " + clientIP); + session.out.write((httpVersion + " 401 log-in required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } + } else { + // not authorized through firewall blocking (ip does not match filter) + session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.crlfString + serverCore.crlfString + "you are not allowed to connect to this server, because you are using the non-granted IP " + clientIP + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("serverClient", "*") + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } else { + // pass to proxy + if (allowProxy) { + if (proxyAccountBase64MD5 == null) proxyAccountBase64MD5 = switchboard.getConfig("proxyAccountBase64MD5", ""); + if ((proxyAccountBase64MD5.length() == 0) || + (proxyAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(((String) header.get("Authorization", "xxxxxx")).trim().substring(6))))) { + // we are authorized or no authenticate requested + if (proxyHandler != null) proxyHandler.doGet(prop, header, this.session.out); + } else { + // ask for authenticate + session.out.write((httpVersion + " 407 Proxy Authentication Required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } else { + // not authorized through firewall blocking (ip does not match filter) + session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.crlfString + serverCore.crlfString + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + clientIP + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } + return (persistent) ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION; + } + + public Boolean HEAD(String arg) throws IOException { + Properties prop = parseQuery(arg); + prop.setProperty("METHOD", "HEAD"); + prop.setProperty("CLIENTIP", clientIP); + + // we now know the HTTP version. depending on that, we read the header + httpHeader header; + String httpVersion = prop.getProperty("HTTP", "HTTP/0.9"); + if (httpVersion.equals("HTTP/0.9")) + header = new httpHeader(reverseMappingCache); + else + header = readHeader(); + + // return multi-line message + if (prop.getProperty("HOST").equals(virtualHost)) { + // pass to server + if (allowServer) { + if (serverAccountBase64MD5 == null) serverAccountBase64MD5 = switchboard.getConfig("serverAccountBase64MD5", ""); + if (serverAccountBase64MD5.length() == 0) { + // no authenticate requested + if (fileHandler == null) fileHandler = new httpdFileHandler(this.switchboard); + fileHandler.doHead(prop, header, this.session.out); + } else { + String auth = (String) header.get("Authorization"); + if (auth == null) { + // authorization requested, but no authorizeation given in header. Ask for authenticate: + session.out.write((httpVersion + " 401 log-in required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } else if (serverAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(auth.trim().substring(6)))) { + // we are authorized + if (fileHandler == null) fileHandler = new httpdFileHandler(this.switchboard); + fileHandler.doHead(prop, header, this.session.out); + } else { + // wrong password given: ask for authenticate again + serverLog.logInfo("HTTPD", "Wrong log-in for account 'server' in HTTPD.HEAD " + prop.getProperty("PATH") + " from IP " + clientIP); + session.out.write((httpVersion + " 401 log-in required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } + } else { + // not authorized through firewall blocking (ip does not match filter) + session.out.write((httpVersion + " 403 refused (IP not granted)" + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } else { + // pass to proxy + if (allowProxy) { + if (proxyAccountBase64MD5 == null) proxyAccountBase64MD5 = switchboard.getConfig("proxyAccountBase64MD5", ""); + if ((proxyAccountBase64MD5.length() == 0) || + (proxyAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(((String) header.get("Authorization", "xxxxxx")).trim().substring(6))))) { + // we are authorized or no authenticate requested + if (proxyHandler != null) proxyHandler.doHead(prop, header, this.session.out); + } else { + // ask for authenticate + session.out.write((httpVersion + " 407 Proxy Authentication Required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } else { + // not authorized through firewall blocking (ip does not match filter) + session.out.write((httpVersion + " 403 refused (IP not granted)" + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } + return serverCore.TERMINATE_CONNECTION; + } + + public Boolean POST(String arg) throws IOException { + Properties prop = parseQuery(arg); + prop.setProperty("METHOD", "POST"); + prop.setProperty("CLIENTIP", clientIP); + + // we now know the HTTP version. depending on that, we read the header + httpHeader header; + String httpVersion = prop.getProperty("HTTP", "HTTP/0.9"); + if (httpVersion.equals("HTTP/0.9")) + header = new httpHeader(reverseMappingCache); + else + header = readHeader(); + + boolean persistent = (!((httpVersion.equals("HTTP/0.9")) || (httpVersion.equals("HTTP/1.0")))); + String connection = prop.getProperty("Connection", "close").toLowerCase(); + if (connection.equals("close")) persistent = false; + if (connection.equals("keep-alive")) persistent = true; + + // return multi-line message + if (prop.getProperty("HOST").equals(virtualHost)) { + // pass to server + if (allowServer) { + if (serverAccountBase64MD5 == null) serverAccountBase64MD5 = switchboard.getConfig("serverAccountBase64MD5", ""); + if (serverAccountBase64MD5.length() == 0) { + // no authenticate requested + if (fileHandler == null) fileHandler = new httpdFileHandler(this.switchboard); + fileHandler.doPost(prop, header, this.session.out, this.session.in); + } else { + String auth = (String) header.get("Authorization"); + if (auth == null) { + // ask for authenticate + session.out.write((httpVersion + " 401 log-in required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } else if (serverAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(auth.trim().substring(6)))) { + // we are authorized + if (fileHandler == null) fileHandler = new httpdFileHandler(this.switchboard); + fileHandler.doPost(prop, header, this.session.out, this.session.in); + } else { + // wrong password given: ask for authenticate again + serverLog.logInfo("HTTPD", "Wrong log-in for account 'server' in HTTPD.POST " + prop.getProperty("PATH") + " from IP " + clientIP); + session.out.write((httpVersion + " 401 log-in required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } + } else { + // not authorized through firewall blocking (ip does not match filter) + session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.crlfString + serverCore.crlfString + "you are not allowed to connect to this server, because you are using the non-granted IP " + clientIP + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("serverClient", "*") + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } else { + // pass to proxy + if (allowProxy) { + if (proxyAccountBase64MD5 == null) proxyAccountBase64MD5 = switchboard.getConfig("proxyAccountBase64MD5", ""); + if ((proxyAccountBase64MD5.length() == 0) || + (proxyAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(((String) header.get("Authorization", "xxxxxx")).trim().substring(6))))) { + // we are authorized or no authenticate requested + if (proxyHandler != null) proxyHandler.doPost(prop, header, this.session.out, this.session.in); + } else { + // ask for authenticate + session.out.write((httpVersion + " 407 Proxy Authentication Required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } else { + // not authorized through firewall blocking (ip does not match filter) + session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.crlfString + serverCore.crlfString + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + clientIP + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + } + //return serverCore.RESUME_CONNECTION; + return (persistent) ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION; + } + + + public Boolean CONNECT(String arg) throws IOException { + // establish a ssh-tunneled http connection + // this is to support https + + // parse HTTP version + int pos = arg.indexOf(" "); + String httpVersion = "HTTP/1.0"; + if (pos >= 0) { + httpVersion = arg.substring(pos + 1); + arg = arg.substring(0, pos); + } + + if (!(allowProxy)) { + // not authorized through firewall blocking (ip does not match filter) + session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.crlfString + serverCore.crlfString + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + clientIP + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + + // parse port + pos = arg.indexOf(":"); + int port = 443; + if (pos >= 0) { + port = Integer.parseInt(arg.substring(pos + 1)); + arg = arg.substring(0, pos); + } + + // arg is now the host string + + // parse remaining lines + httpHeader header = readHeader(); + + if (port != 443) { + // security: connection only to ssl port + // we send a 403 (forbidden) error back + session.out.write((httpVersion + " 403 Connection to non-443 forbidden" + + serverCore.crlfString + serverCore.crlfString).getBytes()); + return serverCore.TERMINATE_CONNECTION; + } + + // prepare to pass values + Properties prop = new Properties(); + prop.setProperty("HOST", arg); + prop.setProperty("PORT", "" + port); + prop.setProperty("HTTP", httpVersion); + + // pass to proxy + if (allowProxy) { + if (proxyAccountBase64MD5 == null) proxyAccountBase64MD5 = switchboard.getConfig("proxyAccountBase64MD5", ""); + if ((proxyAccountBase64MD5.length() == 0) || + (proxyAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(((String) header.get("Authorization", "xxxxxx")).trim().substring(6))))) { + // we are authorized or no authenticate requested + if (proxyHandler != null) proxyHandler.doConnect(prop, header, (InputStream) this.session.in, this.session.out); + } else { + // ask for authenticate + session.out.write((httpVersion + " 407 Proxy Authentication Required" + serverCore.crlfString + + "WWW-Authenticate: Basic realm=\"log-in\"" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + } + } else { + // not authorized through firewall blocking (ip does not match filter) + session.out.write((httpVersion + " 403 refused (IP not granted)" + serverCore.crlfString + serverCore.crlfString + "you are not allowed to connect to this proxy, because you are using the non-granted IP " + clientIP + ". allowed are only connections that match with the following filter: " + switchboard.getConfig("proxyClient", "*") + serverCore.crlfString).getBytes()); + } + + return serverCore.TERMINATE_CONNECTION; + } + + + private Properties parseQuery(String s) { + Properties prop = new Properties(); + + // this parses a whole URL + if (s.length() == 0) { + prop.setProperty("HOST", virtualHost); + prop.setProperty("PATH", "/"); + prop.setProperty("HTTP", "HTTP/0.9"); + prop.setProperty("EXT", ""); + return prop; + } + + // store the version propery "HTTP" and cut the query at both ends + int sep = s.indexOf(" "); + if (sep >= 0) { + // HTTP version is given + prop.setProperty("HTTP", s.substring(sep + 1).trim()); + s = s.substring(0, sep).trim(); // cut off HTTP version mark + } else { + // HTTP version is not given, it will be treated as ver 0.9 + prop.setProperty("HTTP", "HTTP/0.9"); + } + + // properties of the query are stored with the prefix "&" + // additionally, the values URL and ARGC are computed + + String argsString = ""; + sep = s.indexOf("?"); + if (sep >= 0) { + // there are values attached to the query string + argsString = s.substring(sep + 1); // cut haed from tail of query + s = s.substring(0, sep); + } + prop.setProperty("URL", s); // store URL + //System.out.println("HTTPD: ARGS=" + argsString); + if (argsString.length() != 0) prop.setProperty("ARGS", argsString); // store arguments in original form + + // find out file extension + sep = s.lastIndexOf("."); + if (sep >= 0) { + if (s.indexOf("?", sep + 1) >= sep) + prop.setProperty("EXT", s.substring(sep + 1, s.indexOf("?", sep + 1)).toLowerCase()); + else if (s.indexOf("#", sep + 1) >= sep) + prop.setProperty("EXT", s.substring(sep + 1, s.indexOf("#", sep + 1)).toLowerCase()); + else + prop.setProperty("EXT", s.substring(sep + 1).toLowerCase()); + } else { + prop.setProperty("EXT", ""); + } + + // finally find host string + if (s.toUpperCase().startsWith("HTTP://")) { + // a host was given. extract it and set path + s = s.substring(7); + sep = s.indexOf("/"); + if (sep < 0) { + // this is a malformed url, something like + // http://index.html + // we are lazy and guess that it means + // /index.html + // which is a localhost access to the file servlet + prop.setProperty("HOST", virtualHost); + prop.setProperty("PATH", "/" + s); + } else { + // THIS IS THE "GOOD" CASE + // a perfect formulated url + prop.setProperty("HOST", s.substring(0, sep)); + prop.setProperty("PATH", s.substring(sep)); // yes, including beginning "/" + } + } else { + // no host in url. set path + if (s.startsWith("/")) { + // thats also fine, its a perfect localhost access + // in this case, we simulate a + // http://localhost/s + // access by setting a virtual host + prop.setProperty("HOST", virtualHost); + prop.setProperty("PATH", s); + } else { + // the client 'forgot' to set a leading '/' + // this is the same case as above, with some lazyness + prop.setProperty("HOST", virtualHost); + prop.setProperty("PATH", "/" + s); + } + } + return prop; + } + + + // some static methods that needs to be used from any CGI + // and also by the httpdFileHandler + // but this belongs to the protocol handler, this class. + + + public static int parseArgs(serverObjects args, PushbackInputStream in, int length) throws IOException { + // this is a quick hack using a previously coded parseMultipart based on a buffer + // should be replaced sometime by a 'right' implementation + byte[] buffer = new byte[length]; + in.read(buffer); + int argc = parseArgs(args, new String(buffer)); + buffer = null; + return argc; + } + + public static int parseArgs(serverObjects args, String argsString) { + // this parses a arg string that can either be attached to a URL query + // or can be given as result of a post method + // the String argsString is supposed to be constructed as + // ='&'='&'= + // the calling function must strip off a possible leading '?' char + if (argsString.length() == 0) return 0; + argsString = argsString + "&"; // for technical reasons + int sep; + int eqp; + int argc = 0; + // Textfield1=default+value+Textfield+1&Textfield2=default+value+Textfield+2&selection1=sel1&selection2=othervalue1&selection2=sel2&selection3=sel3&Menu1=SubEnry11&radio1=button1&check1=button2&check1=button3&hidden1=&sButton1=enter+%281%29 + while (argsString.length() > 0) { + eqp = argsString.indexOf("="); + sep = argsString.indexOf("&"); + if ((eqp <= 0) || (sep <= 0)) break; + // resulting equations are inserted into the property args with leading '&' + args.put(parseArg(argsString.substring(0, eqp)), parseArg(argsString.substring(eqp + 1, sep))); + argsString = argsString.substring(sep + 1); + argc++; + } + // we return the number of parsed arguments + return argc; + } + + private static String parseArg(String s) { + // this parses a given value-string from a http property + // we replace all "+" by spaces + // and resolve %-escapes with two-digit hex attributes + int pos = 0; + String result = ""; + while (pos < s.length()) { + if (s.charAt(pos) == '+') { + result += " "; pos++; + } else if (s.charAt(pos) == '%') { + result += (char) Integer.parseInt(s.substring(pos + 1, pos + 3), 16); + pos += 3; + } else { + result += s.charAt(pos++); + } + } + return result; + } + + + public static HashMap parseMultipart(httpHeader header, serverObjects args, PushbackInputStream in, int length) throws IOException { + // this is a quick hack using a previously coded parseMultipart based on a buffer + // should be replaced sometime by a 'right' implementation + byte[] buffer = new byte[length]; + int c, a = 0; + while (a < length) { + c = in.read(buffer, a, length - a); + if (c <= 0) break; + a += c; + } + //System.out.println("MULTIPART-BUFFER=" + new String(buffer)); + HashMap files = parseMultipart(header, args, buffer); + buffer = null; + return files; + } + + public static HashMap parseMultipart(httpHeader header, serverObjects args, byte[] buffer) throws IOException { + // we parse a multipart message and put results into the properties + // find/identify boundary marker + //System.out.println("DEBUG parseMultipart = <<" + new String(buffer) + ">>"); + String s = (String) header.get("CONTENT-TYPE"); + if (s == null) return null; + int q; + int p = s.toLowerCase().indexOf("boundary="); + if (p < 0) throw new IOException("boundary marker in multipart not found"); + // boundaries start with additional leading "--", see RFC1867 + byte[] boundary = ("--" + s.substring(p + 9)).getBytes(); + + // eat up first boundary + // the buffer must start with a boundary + byte[] line = readLine(0, buffer); + int pos = nextPos; + if ((line == null) || (!(equals(line, 0, boundary, 0, boundary.length)))) + throw new IOException("boundary not recognized: " + ((line == null) ? "NULL" : new String(line)) + ", boundary = " + new String(boundary)); + + // we need some constants + byte[] namec = (new String("name=")).getBytes(); + byte[] filenamec = (new String("filename=")).getBytes(); + byte[] semicolonc = (new String(";")).getBytes(); + byte[] quotec = new byte[] {(byte) '"'}; + + // now loop over boundaries + byte [] name; + byte [] filename; + HashMap files = new HashMap(); + int argc = 0; + //System.out.println("DEBUG: parsing multipart body:" + new String(buffer)); + while (pos < buffer.length) { // boundary enumerator + // here the 'pos' marker points to the first line in a section after a boundary line + line = readLine(pos, buffer); pos = nextPos; + // termination if line is empty + if (line.length == 0) break; + // find name tag in line + p = indexOf(0, line, namec); + if (p < 0) throw new IOException("tag name in marker section not found: '" + new String(line) + "'"); // a name tag must always occur + p += namec.length + 1; // first position of name value + q = indexOf(p, line, quotec); + if (q < 0) throw new IOException("missing quote in name tag: '" + new String(line) + "'"); + name = new byte[q - p]; + java.lang.System.arraycopy(line, p, name, 0, q - p); + // if this line has also a filename attribute, read it + p = indexOf(q, line, filenamec); + if (p > 0) { + p += filenamec.length + 1; // first position of name value + q = indexOf(p, line, quotec); + if (q < 0) throw new IOException("missing quote in filename tag: '" + new String(line) + "'"); + filename = new byte[q - p]; + java.lang.System.arraycopy(line, p, filename, 0, q - p); + } else filename = null; + // we have what we need. more information lines may follow, but we omit parsing them + // we just skip until an empty line is reached + while (pos < buffer.length) { // line skiping + line = readLine(pos, buffer); pos = nextPos; + if ((line == null) || (line.length == 0)) break; + } + // depending on the filename tag exsistence, read now either a value for the name + // or a complete uploaded file + // to know the exact length of the value, we must identify the next boundary + p = indexOf(pos, buffer, boundary); + + // if we can't find another boundary, then this is an error in the input + if (p < 0) { + serverLog.logError("HTTPD", "ERROR in PUT body: no ending boundary. probably missing values"); + break; + } + + // we don't know if the value is terminated by lf, cr or crlf + // (it's suppose to be crlf, but we want to be lazy about wrong terminations) + if (buffer[p - 2] == serverCore.cr) // ERROR: IndexOutOfBounds: -2 + /* crlf */ q = p - 2; + else + /* cr or lf only */ q = p - 1; + // the above line is wrong if we uploaded a file that has a cr as it's last byte + // and the client's line termination symbol is only a cr or lf (which would be incorrect) + // the value is between 'pos' and 'q', while the next marker is 'p' + line = new byte[q - pos]; + java.lang.System.arraycopy(buffer, pos, line, 0, q - pos); + // in the 'line' variable we have now either a normal value or an uploadef file + if (filename == null) { + args.put(new String(name), new String(line, "ISO-8859-1")); + } else { + // we store the file in a hashtable. + // we use the same key to address the file in the hashtable as we + // use to address the filename in the properties, but without leading '&' + args.put(new String(name), new String(filename)); + files.put(new String(name), line); + } + argc++; + // finally, read the next boundary line + line = readLine(p, buffer); + pos = nextPos; + } + header.put("ARGC", ("" + argc)); // store argument count + return files; + } + +/* +------------1090358578442 +Content-Disposition: form-data; name="youare" + +Ty2F86ekSWM5 +------------1090358578442 +Content-Disposition: form-data; name="key" + +6EkPPOl7 +------------1090358578442 +Content-Disposition: form-data; name="iam" + +HnTvzwV7SCJR +------------1090358578442 +Content-Disposition: form-data; name="process" + +permission +------------1090358578442 + +*/ + + static int nextPos = -1; + private static byte[] readLine(int start, byte[] array) { + // read a string from an array; line ending is always CRLF + // but we are also fuzzy with that: may also be only CR or LF + // if no remaining cr, crlf or lf can be found, return null + if (start > array.length) return null; + int pos = indexOf(start, array, serverCore.crlf); nextPos = pos + 2; + if (pos < 0) {pos = indexOf(start, array, new byte[] {serverCore.cr}); nextPos = pos + 1;} + if (pos < 0) {pos = indexOf(start, array, new byte[] {serverCore.lf}); nextPos = pos + 1;} + if (pos < 0) {nextPos = start; return null;} + byte[] result = new byte[pos - start]; + java.lang.System.arraycopy(array, start, result, 0, pos - start); + return result; + } + + public static int indexOf(int start, byte[] array, byte[] pattern) { + // return a position of a pattern in an array + if (start > array.length - pattern.length) return -1; + if (pattern.length == 0) return start; + int i; + for (int pos = start; pos <= array.length - pattern.length; pos++) + if ((array[pos] == pattern[0]) && (equals(array, pos, pattern, 0, pattern.length))) + return pos; + return -1; + } + + public static boolean equals(byte[] a, int aoff, byte[] b, int boff, int len) { + //System.out.println("equals: a = " + new String(a) + ", aoff = " + aoff + ", b = " + new String(b) + ", boff = " + boff + ", length = " + len); + if ((aoff + len > a.length) || (boff + len > b.length)) return false; + for (int i = 0; i < len; i++) if (a[aoff + i] != b[boff + i]) return false; + //System.out.println("TRUE!"); + return true; + } + + public Object clone() { + return new httpd(this.switchboard, this.fileHandler, this.proxyHandler); + } + + public static boolean isTextMime(String mime, Set whitelist) { + if (whitelist.contains(mime)) return true; + // some mime-types are given as "text/html; charset=...", so look for ";" + if (mime.length() == 0) return false; + int pos = mime.indexOf(';'); + if (pos < 0) return false; + return whitelist.contains(mime.substring(0, pos)); + } + +} + +/* +### +### Messages of the Server +### + +# success Messages +HTTPStatus200 = OK; The URL was found. It contents follows. +HTTPStatus201 = Created; A URL was created in response to a POST. +HTTPStatus202 = Accepted; The request was accepted for processing later. +HTTPStatus203 = Non-Authoritative; The information here is unofficial. +HTTPStatus204 = No Response; The request is successful, but there is no data to send. + +# redirection +HTTPStatus300 = Moved; The URL has permanently moved to a new location. +HTTPStatus301 = Found; The URL can be temporarily found at a new location. + +# client errors +HTTPStatus400 = Bad Request; Syntax error in the request. +HTTPStatus401 = Unauthorized; The client is not authorized to access this web page. +HTTPStatus402 = Payment Required; A payment is required to access this web page. +HTTPStatus403 = Forbidden; This URL is forbidden. No authorization is required, it won't help. +HTTPStatus404 = Not Found; This page is not on the server. + +# server errors +HTTPStatus500 = Internal Error; The server encountered an unexpected error. +HTTPStatus501 = Not Implemented; The client requested an unimplemented feature. +HTTPStatus502 = Service Overloaded; The server reached the maximum number of connections. +HTTPStatus503 = Gateway timeout; Fetching data from remote service failed. +*/ diff --git a/source/de/anomic/http/httpdAbstractHandler.java b/source/de/anomic/http/httpdAbstractHandler.java new file mode 100644 index 000000000..1f1c6be91 --- /dev/null +++ b/source/de/anomic/http/httpdAbstractHandler.java @@ -0,0 +1,74 @@ +// httpdAbstractHandler.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 25.10.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + Documentation: + the servlet interface + an actual servlet for the AnomicHTTPD must provide a class that implements + this interface. The resulting class is then placed in a folder that contains + all servlets and is configured in the httpd.conf configuration file. + servlet classes in that directory are then automatically selected as CGI + extensions to the server. + The core functionality of file serving is also implemented as servlet. +*/ + +package de.anomic.http; + +import java.io.*; +import java.util.*; +import java.text.*; + +public abstract class httpdAbstractHandler { + + // static tools + + private static int fileCounter = 0; // for unique file names + + private static SimpleDateFormat DateFileNameFormatter = + new SimpleDateFormat("yyyyMMddHHmmss"); + + protected static String uniqueDateString() { + String c = "" + fileCounter; + fileCounter++; if (fileCounter>9999) fileCounter = 0; + while (c.length() < 4) { c = "0" + c; } + return "FILE" + DateFileNameFormatter.format(httpc.nowDate()) + c; + } + +} diff --git a/source/de/anomic/http/httpdFileHandler.java b/source/de/anomic/http/httpdFileHandler.java new file mode 100644 index 000000000..32d1fdd55 --- /dev/null +++ b/source/de/anomic/http/httpdFileHandler.java @@ -0,0 +1,481 @@ +// httpdFileHandler.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 22.06.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +/* + Class documentation: + this class provides a file servlet and CGI interface + for the httpd server. + Whenever this server is addressed to load a local file, + this class searches for the file in the local path as + configured in the setting property 'rootPath' + The servlet loads the file and returns it to the client. + Every file can also act as an template for the built-in + CGI interface. There is no specific path for CGI functions. + CGI functionality is triggered, if for the file to-be-served + 'template.html' also a file 'template.class' exists. Then, + the class file is called with the GET/POST properties that + are attached to the http call. + Possible variable hand-over are: + - form method GET + - form method POST, enctype text/plain + - form method POST, enctype multipart/form-data + The class that creates the CGI respond must have at least one + static method of the form + public static java.util.Hashtable respond(java.util.HashMap, serverSwitch) + In the HashMap, the GET/POST variables are handed over. + The return value is a Property object that contains replacement + key/value pairs for the patterns in the template file. + The templates must have the form + either '#['']#' for single attributes, or + '#{''}#' and '#{/''}#' for enumerations of + values '#['']#'. + A single value in repetitions/enumerations in the template has + the property key '_''_' + Please see also the example files 'test.html' and 'test.java' +*/ + +package de.anomic.http; + +import java.io.*; +import java.util.*; +import java.text.*; +import java.lang.reflect.*; +import de.anomic.server.*; + +public class httpdFileHandler extends httpdAbstractHandler implements httpdHandler { + + // class variables + private Properties mimeTable = null; + private serverClassLoader provider = null; + private File htRootPath = null; + private File htDocsPath = null; + private File htTemplatePath = null; + private HashMap templates = null; + private String[] defaultFiles = null; + + private serverSwitch switchboard; + private String adminAccountBase64MD5; + + public httpdFileHandler(serverSwitch switchboard) { + this.switchboard = switchboard; + + if (mimeTable == null) { + // load the mime table + mimeTable = new Properties(); + String mimeTablePath = switchboard.getConfig("mimeConfig",""); + try { + serverLog.logSystem("HTTPDFiles", "Loading mime mapping file " + mimeTablePath); + mimeTable.load(new FileInputStream(new File(switchboard.getRootPath(), mimeTablePath))); + } catch (Exception e) { + serverLog.logError("HTTPDFiles", "ERROR: path to configuration file or configuration invalid\n" + e); + System.exit(1); + } + } + + // create default files array + defaultFiles = switchboard.getConfig("defaultFiles","index.html").split(","); + if (defaultFiles.length == 0) defaultFiles = new String[] {"index.html"}; + + // create a htRootPath: system pages + if (htRootPath == null) { + htRootPath = new File(switchboard.getRootPath(), switchboard.getConfig("htRootPath","htroot")); + if (!(htRootPath.exists())) htRootPath.mkdir(); + } + + // create a htDocsPath: user defined pages + if (htDocsPath == null) { + htDocsPath = new File(switchboard.getRootPath(), switchboard.getConfig("htDocsPath", "htdocs")); + if (!(htDocsPath.exists())) htDocsPath.mkdir(); + } + + // create a htTemplatePath + if (htTemplatePath == null) { + htTemplatePath = new File(switchboard.getRootPath(), switchboard.getConfig("htTemplatePath","htroot/env/templates")); + if (!(htTemplatePath.exists())) htTemplatePath.mkdir(); + } + + if (templates == null) templates = loadTemplates(htTemplatePath); + + // create a class loader + if (provider == null) { + provider = new serverClassLoader(/*this.getClass().getClassLoader()*/); + // debug + /* + Package[] ps = ((cachedClassLoader) provider).packages(); + for (int i = 0; i < ps.length; i++) System.out.println("PACKAGE IN PROVIDER: " + ps[i].toString()); + */ + } + adminAccountBase64MD5 = null; + + serverLog.logSystem("HTTPDFileHandler", "File Handler Initialized"); + } + + private void respondHeader(OutputStream out, int retcode, + String conttype, long contlength, + Date moddate, Date expires, + String cookie) throws IOException { + try { + out.write(("HTTP/1.1 " + retcode + " OK\r\n").getBytes()); + out.write(("Server: AnomicHTTPD (www.anomic.de)\r\n").getBytes()); + out.write(("Date: " + httpc.dateString(httpc.nowDate()) + "\r\n").getBytes()); + if (expires != null) out.write(("Expires: " + httpc.dateString(expires) + "\r\n").getBytes()); + out.write(("Content-type: " + conttype /* "image/gif", "text/html" */ + "\r\n").getBytes()); + out.write(("Last-modified: " + httpc.dateString(moddate) + "\r\n").getBytes()); + out.write(("Content-length: " + contlength +"\r\n").getBytes()); + out.write(("Pragma: no-cache\r\n").getBytes()); + // out.write(("Accept-ranges: bytes\r\n").getBytes()); + if (cookie != null) out.write(("Set-Cookie: " + cookie + "\r\n").getBytes()); + out.write(("\r\n").getBytes()); + out.flush(); + } catch (Exception e) { + // any interruption may be caused be network error or because the user has closed + // the windows during transmission. We simply pass it as IOException + throw new IOException(e.getMessage()); + } + } + + private void textMessage(OutputStream out, int retcode, String body) throws IOException { + respondHeader(out, retcode, "text/plain", body.length(), httpc.nowDate(), null, null); + out.write(body.getBytes()); + out.flush(); + } + + public void doGet(Properties conProp, httpHeader requestHeader, OutputStream response) throws IOException { + doResponse(conProp, requestHeader, response, null); + } + + public void doHead(Properties conProp, httpHeader requestHeader, OutputStream response) throws IOException { + doResponse(conProp, requestHeader, response, null); + } + + public void doPost(Properties conProp, httpHeader requestHeader, OutputStream response, PushbackInputStream body) throws IOException { + doResponse(conProp, requestHeader, response, body); + } + + public void doResponse(Properties conProp, httpHeader requestHeader, OutputStream out, PushbackInputStream body) throws IOException { + + String userAgent = (String) requestHeader.get("USER-AGENT"); + if (userAgent == null) userAgent = ""; + userAgent = userAgent.trim().toLowerCase(); + //userAgent = "portalmmm n400i"; // debug + //boolean iMode = (userAgent.startsWith("portalmmm")); + //if (iMode) System.out.println("DETECTED IMODE"); + + //System.out.println("HTTPD-REQUEST FROM CLIENT: " + userAgent); // DEBUG + + // prepare response + String method = conProp.getProperty("METHOD"); + String path = conProp.getProperty("PATH"); + String argsString = conProp.getProperty("ARGS"); // is null if no args were given + + // check hack attacks in path + if (path.indexOf("..") >= 0) { + out.write(("HTTP/1.0 403 bad path\r\n").getBytes()); + out.write(("\r\n").getBytes()); + out.flush(); + return; + } + + // check permission/granted access + if ((path.endsWith("_p.html")) && + ((adminAccountBase64MD5 = switchboard.getConfig("adminAccountBase64MD5", "")).length() != 0)) { + // authentication required + String auth = (String) requestHeader.get("Authorization"); + if (auth == null) { + // no authorization given in response. Ask for that + out.write(("HTTP/1.1 401 log-in required\r\n").getBytes()); + out.write(("WWW-Authenticate: Basic realm=\"admin log-in\"\r\n").getBytes()); + out.write(("\r\n").getBytes()); + out.flush(); + return; + } else if (!(adminAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(auth.trim().substring(6))))) { + // a wrong authentication was given. Ask again + serverLog.logInfo("HTTPD", "Wrong log-in for account 'admin' in http file handler for path '" + path + "' from host '" + conProp.getProperty("CLIENTIP", "unknown-IP") + "'"); + try {Thread.currentThread().sleep(3000);} catch (InterruptedException e) {} // add a delay to make brute-force harder + out.write(("HTTP/1.1 401 log-in required\r\n").getBytes()); + out.write(("WWW-Authenticate: Basic realm=\"admin log-in\"\r\n").getBytes()); + out.write(("\r\n").getBytes()); + out.flush(); + return; + } + } + + // parse arguments + serverObjects args = new serverObjects(); + int argc; + if (argsString == null) { + // no args here, maybe a POST with multipart extension + int length; + //System.out.println("HEADER: " + requestHeader.toString()); // DEBUG + if ((method.equals("POST")) && + (requestHeader.containsKey("CONTENT-LENGTH"))) { + // if its a POST, it can be either multipart or as args in the body + length = Integer.parseInt((String) requestHeader.get("CONTENT-LENGTH")); + if ((requestHeader.containsKey("CONTENT-TYPE")) && + (((String) requestHeader.get("CONTENT-TYPE")).toLowerCase().startsWith("multipart"))) { + // parse multipart + HashMap files = httpd.parseMultipart(requestHeader, args, body, length); + // integrate these files into the args + if (files != null) { + Iterator fit = files.entrySet().iterator(); + Map.Entry entry; + while (fit.hasNext()) { + entry = (Map.Entry) fit.next(); + args.put(((String) entry.getKey()) + "$file", entry.getValue()); + } + } + argc = Integer.parseInt((String) requestHeader.get("ARGC")); + } else { + // parse args in body + argc = httpd.parseArgs(args, body, length); + } + } else { + // no args + argsString = null; + args = null; + argc = 0; + } + } else { + // simple args in URL (stuff after the "?") + argc = httpd.parseArgs(args, argsString); + } + + //if (args != null) System.out.println("***ARGS=" + args.toString()); // DEBUG + + // check for cross site scripting - attacks in request arguments + if (argc > 0) { + // check all values for occurrences of script values + Enumeration e = args.elements(); // enumeration of values + Object val; + while (e.hasMoreElements()) { + val = e.nextElement(); + if ((val != null) && (val instanceof String) && (((String) val).indexOf("= 0)) { + // deny request + out.write(("HTTP/1.0 403 bad post values\r\n").getBytes()); + out.write(("\r\n").getBytes()); + out.flush(); + return; + } + } + } + + // we are finished with parsing + // the result of value hand-over is in args and argc + if (path.length() == 0) { + textMessage(out, 400, "Bad Request\r\n"); + out.flush(); + return; + } + + Date filedate; + long filelength; + File rc = null; + try { + // locate the file + if (!(path.startsWith("/"))) { + // attach leading slash + path = "/" + path; + } + + // find defaults + File file = null; + String testpath = path; + if (path.endsWith("/")) { + // attach default file name + for (int i = 0; i < defaultFiles.length; i++) { + testpath = path + defaultFiles[i]; + file = new File(htRootPath, testpath); + if (!(file.exists())) file = new File(htDocsPath, testpath); + if (file.exists()) {path = testpath; break;} + } + } else { + file = new File(htRootPath, path); + if (!(file.exists())) file = new File(htDocsPath, path); + } + + /* + if ((iMode) && (path.endsWith(".html"))) { + file = new File(htRootPath, path.substring(0, path.length() - 4) + "ihtml"); + if (!(file.exists())) file = new File(htDocsPath, path.substring(0, path.length() - 4) + "ihtml"); + if (!(file.exists())) file = new File(htRootPath, path); + if (!(file.exists())) file = new File(htDocsPath, path); + //System.out.println("IMODE PATH = " + file.toString()); + } + */ + + if ((file.exists()) && (file.canRead())) { + // we have found a file that can be written to the client + // if this file uses templates, then we use the template + // re-write - method to create an result + serverObjects tp = new serverObjects(); + filedate = new Date(file.lastModified()); + String mimeType = mimeTable.getProperty(conProp.getProperty("EXT",""),"text/html"); + byte[] result; + if (path.endsWith("html") || path.endsWith("xml") || path.endsWith("rss") || path.endsWith("csv")) { + rc = rewriteClassFile(file); + if (rc != null) { + // CGI-class: call the class to create a property for rewriting + try { + requestHeader.put("CLIENTIP", conProp.getProperty("CLIENTIP")); + requestHeader.put("PATH", path); + // in case that there are no args given, args = null or empty hashmap + tp = (serverObjects) rewriteMethod(rc).invoke(null, new Object[] {requestHeader, args, switchboard}); + // if no args given , then tp will be an empty Hashtable object (not null) + if (tp == null) tp = new serverObjects(); + // check if the servlets requests authentification + if (tp.containsKey("AUTHENTICATE")) { + String account = tp.get("AUTHENTICATE", ""); + out.write(("HTTP/1.1 401 log-in required\r\n").getBytes()); + out.write(("WWW-Authenticate: Basic realm=\"" + account + "\"\r\n").getBytes()); + out.write(("\r\n").getBytes()); + out.flush(); + return; + } + // add the application version to every rewrite table + tp.put("version", switchboard.getConfig("version", "")); + tp.put("uptime", ((System.currentTimeMillis() - Long.parseLong(switchboard.getConfig("startupTime","0"))) / 1000) / 60); // uptime in minutes + //System.out.println("respond props: " + ((tp == null) ? "null" : tp.toString())); // debug + } catch (InvocationTargetException e) { + System.out.println("INTERNAL ERROR: " + e.toString() + ":" + + e.getMessage() + + " target exception at " + rc + ": " + + e.getTargetException().toString() + ":" + + e.getTargetException().getMessage()); + e.printStackTrace(); + rc = null; + } + filedate = new Date(System.currentTimeMillis()); + } + // read templates + tp.putAll(templates); + // rewrite the file + ByteArrayOutputStream o = new ByteArrayOutputStream(); + FileInputStream fis = new FileInputStream(file); + httpTemplate.writeTemplate(fis, o, tp, "-UNRESOLVED_PATTERN-".getBytes()); + o.close(); + result = o.toByteArray(); + } else { // no html + // write the file to the client + result = serverFileUtils.read(file); + } + // check mime type again using the result array: these are 'magics' + if (serverByteBuffer.equals(result, 1, "PNG".getBytes())) mimeType = mimeTable.getProperty("png","text/html"); + else if (serverByteBuffer.equals(result, 0, "GIF89".getBytes())) mimeType = mimeTable.getProperty("gif","text/html"); + else if (serverByteBuffer.equals(result, 6, "JFIF".getBytes())) mimeType = mimeTable.getProperty("jpg","text/html"); + //System.out.print("MAGIC:"); for (int i = 0; i < 10; i++) System.out.print(Integer.toHexString((int) result[i]) + ","); System.out.println(); + // write the array to the client + respondHeader(out, 200, mimeType, result.length, filedate, null, null); + Thread.currentThread().sleep(200); // this solved the message problem (!!) + serverFileUtils.write(result, out); + } else { + textMessage(out, 404, "404 File not Found\r\n"); // would be a possible vuln to return original the original path + } + } catch (Exception e) { + //textMessage(out, 503, "Exception with query: " + path + "; '" + e.toString() + ":" + e.getMessage() + "'\r\n"); + //e.printStackTrace(); + System.out.println("ERROR: Exception with query: " + path + "; '" + e.toString() + ":" + e.getMessage() + "'\r\n"); + } + out.flush(); + if (!(requestHeader.get("Connection", "close").equals("keep-alive"))) { + // wait a little time until everything closes so that clients can read from the streams/sockets + try {Thread.currentThread().sleep(1000);} catch (InterruptedException e) {} + } + } + + private static HashMap loadTemplates(File path) { + // reads all templates from a path + // we use only the folder from the given file path + HashMap result = new HashMap(); + if (path == null) return result; + if (!(path.isDirectory())) path = path.getParentFile(); + if ((path == null) || (!(path.isDirectory()))) return result; + String[] templates = path.list(); + int c; + for (int i = 0; i < templates.length; i++) { + if (templates[i].endsWith(".template")) try { + //System.out.println("TEMPLATE " + templates[i].substring(0, templates[i].length() - 9) + ": " + new String(buf, 0, c)); + result.put(templates[i].substring(0, templates[i].length() - 9), + new String(serverFileUtils.read(new File(path, templates[i])))); + } catch (Exception e) {} + } + return result; + } + + private File rewriteClassFile(File template) { + try { + String f = template.getCanonicalPath(); + int p = f.lastIndexOf("."); + if (p < 0) return null; + f = f.substring(0, p) + ".class"; + //System.out.println("constructed class path " + f); + File cf = new File(f); + if (cf.exists()) return cf; + return null; + } catch (IOException e) { + return null; + } + } + + private Method rewriteMethod(File classFile) { + Method m = null; + // now make a class out of the stream + try { + //System.out.println("**DEBUG** loading class file " + classFile); + Class c = provider.loadClass(classFile); + Class[] params = new Class[] { + Class.forName("de.anomic.http.httpHeader"), + Class.forName("de.anomic.server.serverObjects"), + Class.forName("de.anomic.server.serverSwitch")}; + m = c.getMethod("respond", params); + } catch (ClassNotFoundException e) { + System.out.println("INTERNAL ERROR: class " + classFile + " is missing:" + e.getMessage()); + } catch (NoSuchMethodException e) { + System.out.println("INTERNAL ERROR: method respond not found in class " + classFile + ": " + e.getMessage()); + } + //System.out.println("found method: " + m.toString()); + return m; + } + + public void doConnect(Properties conProp, httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) { + throw new UnsupportedOperationException(); + } + +} diff --git a/source/de/anomic/http/httpdHandler.java b/source/de/anomic/http/httpdHandler.java new file mode 100644 index 000000000..a63c98a02 --- /dev/null +++ b/source/de/anomic/http/httpdHandler.java @@ -0,0 +1,154 @@ +// httpdHandler.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 03.01.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + Documentation: + the servlet interface + an actual servlet for the AnomicHTTPD must provide a class that implements + this interface. The resulting class is then placed in a folder that contains + all servlets and is configured in the httpd.conf configuration file. + servlet classes in that directory are then automatically selected as CGI + extensions to the server. + The core functionality of file serving is also implemented as servlet. +*/ + +package de.anomic.http; + +import java.io.*; +import java.util.*; + +public interface httpdHandler { + + void doGet(Properties conProp, httpHeader header, OutputStream response) throws IOException; + /* + The GET method means retrieve whatever information (in the form of an + entity) is identified by the Request-URI. If the Request-URI refers + to a data-producing process, it is the produced data which shall be + returned as the entity in the response and not the source text of the + process, unless that text happens to be the output of the process. + + The semantics of the GET method change to a "conditional GET" if the + request message includes an If-Modified-Since, If-Unmodified-Since, + If-Match, If-None-Match, or If-Range header field. A conditional GET + method requests that the entity be transferred only under the + circumstances described by the conditional header field(s). The + conditional GET method is intended to reduce unnecessary network + usage by allowing cached entities to be refreshed without requiring + multiple requests or transferring data already held by the client. + + The semantics of the GET method change to a "partial GET" if the + request message includes a Range header field. A partial GET requests + that only part of the entity be transferred, as described in section + 14.35. The partial GET method is intended to reduce unnecessary + network usage by allowing partially-retrieved entities to be + completed without transferring data already held by the client. + */ + + void doHead(Properties conProp, httpHeader header, OutputStream response) throws IOException; + /* + The HEAD method is identical to GET except that the server MUST NOT + return a message-body in the response. The metainformation contained + in the HTTP headers in response to a HEAD request SHOULD be identical + to the information sent in response to a GET request. This method can + be used for obtaining metainformation about the entity implied by the + request without transferring the entity-body itself. This method is + often used for testing hypertext links for validity, accessibility, + and recent modification. + + The response to a HEAD request MAY be cacheable in the sense that the + information contained in the response MAY be used to update a + previously cached entity from that resource. If the new field values + indicate that the cached entity differs from the current entity (as + would be indicated by a change in Content-Length, Content-MD5, ETag + or Last-Modified), then the cache MUST treat the cache entry as + stale. + */ + + void doPost(Properties conProp, httpHeader header, OutputStream response, PushbackInputStream body) throws IOException; + /* + The POST method is used to request that the origin server accept the + entity enclosed in the request as a new subordinate of the resource + identified by the Request-URI in the Request-Line. POST is designed + to allow a uniform method to cover the following functions: + + - Annotation of existing resources; + + - Posting a message to a bulletin board, newsgroup, mailing list, + or similar group of articles; + + - Providing a block of data, such as the result of submitting a + form, to a data-handling process; + + - Extending a database through an append operation. + + The actual function performed by the POST method is determined by the + server and is usually dependent on the Request-URI. The posted entity + is subordinate to that URI in the same way that a file is subordinate + to a directory containing it, a news article is subordinate to a + newsgroup to which it is posted, or a record is subordinate to a + database. + + The action performed by the POST method might not result in a + resource that can be identified by a URI. In this case, either 200 + (OK) or 204 (No Content) is the appropriate response status, + depending on whether or not the response includes an entity that + describes the result. + + If a resource has been created on the origin server, the response + SHOULD be 201 (Created) and contain an entity which describes the + status of the request and refers to the new resource, and a Location + header (see section 14.30). + + Responses to this method are not cacheable, unless the response + includes appropriate Cache-Control or Expires header fields. However, + the 303 (See Other) response can be used to direct the user agent to + retrieve a cacheable resource. + + POST requests MUST obey the message transmission requirements set out + in section 8.2. + */ + + void doConnect(Properties conProp, httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException; + /* this is only needed for https proxies. http daemons should throw a + * UnsupportedOperationException + */ + + //public long getLastModified(Properties conProp); +} diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java new file mode 100644 index 000000000..7cbd696ee --- /dev/null +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -0,0 +1,1060 @@ +// httpdProxyHandler.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 10.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// Contributions: +// [AS] Alexander Schier: Blacklist (404 response for AGIS hosts) +// [TL] Timo Leise: url-wildcards for blacklists + +/* + Class documentation: + This class is a servlet to the httpd daemon. It is accessed each time + an URL in a GET, HEAD or POST command contains the whole host information + or a host is given in the header host field of an HTTP/1.0 / HTTP/1.1 + command. + Transparency is maintained, whenever appropriate. We change header + atributes if necessary for the indexing mechanism; i.e. we do not + support gzip-ed encoding. We also do not support unrealistic + 'expires' values that would force a cache to be flushed immediately + pragma non-cache attributes are supported +*/ + + +package de.anomic.http; + +import java.io.*; +import java.net.*; +import java.util.*; +import java.text.*; +import de.anomic.htmlFilter.*; +import de.anomic.server.*; +import de.anomic.tools.*; +import de.anomic.yacy.*; +import de.anomic.http.*; +import de.anomic.plasma.*; + + +public class httpdProxyHandler extends httpdAbstractHandler implements httpdHandler { + + // static variables + // can only be instantiated upon first instantiation of this class object + private static plasmaSwitchboard switchboard = null; + private static plasmaHTCache cacheManager = null; + public static serverLog log; + public static HashSet yellowList = null; + public static TreeMap blackListURLs = null; + private static int timeout = 30000; + private static boolean yacyTrigger = true; + public static boolean remoteProxyUse = false; + public static String remoteProxyHost = ""; + public static int remoteProxyPort = -1; + public static String remoteProxyNoProxy = ""; + public static String[] remoteProxyNoProxyPatterns = null; + private static HashSet remoteProxyAllowProxySet = new HashSet(); + private static HashSet remoteProxyDisallowProxySet = new HashSet(); + private static htmlFilterTransformer transformer = null; + public static String userAgent = "yacy (" + httpc.systemOST +") yacy.net"; + private File htRootPath = null; + + // class methods + public httpdProxyHandler(serverSwitch sb) { + if (switchboard == null) { + switchboard = (plasmaSwitchboard) sb; + cacheManager = switchboard.getCacheManager(); + + // load remote proxy data + remoteProxyHost = switchboard.getConfig("remoteProxyHost",""); + try { + remoteProxyPort = Integer.parseInt(switchboard.getConfig("remoteProxyPort","3128")); + } catch (NumberFormatException e) { + remoteProxyPort = 3128; + } + remoteProxyUse = switchboard.getConfig("remoteProxyUse","false").equals("true"); + remoteProxyNoProxy = switchboard.getConfig("remoteProxyNoProxy",""); + remoteProxyAllowProxySet = new HashSet(); + remoteProxyDisallowProxySet = new HashSet(); + remoteProxyNoProxyPatterns = remoteProxyNoProxy.split(","); + + // set loglevel + int loglevel = Integer.parseInt(switchboard.getConfig("proxyLoglevel", "2")); + log = new serverLog("HTTPDProxy", loglevel); + + // set timeout + timeout = Integer.parseInt(switchboard.getConfig("clientTimeout", "10000")); + + // create a htRootPath: system pages + if (htRootPath == null) { + htRootPath = new File(switchboard.getRootPath(), switchboard.getConfig("htRootPath","htroot")); + if (!(htRootPath.exists())) htRootPath.mkdir(); + } + + // load a transformer + try { + ClassLoader cp = new serverClassLoader(this.getClass().getClassLoader()); + Class transformerClass = cp.loadClass(switchboard.getConfig("pageTransformerClass", "")); + transformer = (htmlFilterTransformer) transformerClass.newInstance(); + transformer.init(switchboard.getConfig("pageTransformerArg", "")); // this is usually the blueList + } catch (Exception e) { + transformer = null; + } + + String f; + // load the yellow-list + f = switchboard.getConfig("proxyYellowList", null); + if (f != null) yellowList = loadSet("yellow", f); else yellowList = new HashSet(); + + // load the black-list / inspired by [AS] + f = switchboard.getConfig("proxyBlackListsActive", null); + if (f != null) blackListURLs = loadBlacklist("black", f, "/"); else blackListURLs = new TreeMap(); + log.logSystem("Proxy Handler Initialized"); + } + } + + + private static HashSet loadSet(String setname, String filename) { + HashSet set = new HashSet(); + try { + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); + String line; + while ((line = br.readLine()) != null) { + line = line.trim(); + if ((line.length() > 0) && (!(line.startsWith("#")))) set.add(line.trim().toLowerCase()); + } + br.close(); + serverLog.logInfo("PROXY", "read " + setname + " set from file " + filename); + } catch (IOException e) {} + return set; + } + + private static TreeMap loadMap(String mapname, String filename, String sep) { + TreeMap map = new TreeMap(); + try { + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); + String line; + int pos; + while ((line = br.readLine()) != null) { + line = line.trim(); + if ((line.length() > 0) && (!(line.startsWith("#"))) && ((pos = line.indexOf(sep)) > 0)) + map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim()); + } + br.close(); + serverLog.logInfo("PROXY", "read " + mapname + " map from file " + filename); + } catch (IOException e) {} + return map; + } + + public static TreeMap loadBlacklist(String mapname, String filenames, String sep) { + TreeMap map = new TreeMap(); + if (switchboard == null) return map; // not initialized yet + File listsPath = new File(switchboard.getRootPath(), switchboard.getConfig("listsPath", "DATA/LISTS")); + String filenamesarray[] = filenames.split(","); + String filename = ""; + if(filenamesarray.length >0) + for(int i = 0; i < filenamesarray.length; i++) + map.putAll(loadMap(mapname, (new File(listsPath, filenamesarray[i])).toString(), sep)); + return map; + } + + private static String domain(String host) { + String domain = host; + int pos = domain.lastIndexOf("."); + if (pos >= 0) { + // truncate from last part + domain = domain.substring(0, pos); + pos = domain.lastIndexOf("."); + if (pos >= 0) { + // truncate from first part + domain = domain.substring(pos + 1); + } + } + return domain; + } + + private boolean blacklistedURL(String hostlow, String path) { + if (blackListURLs == null) return false; + + int index = 0; + + // [TL] While "." are found within the string + while ((index = hostlow.indexOf(".", index + 1)) != -1) { + if (blackListURLs.get(hostlow.substring(0, index + 1) + "*") != null) { + //System.out.println("Host blocked: " + hostlow.substring(0, index+1) + "*"); + return true; + } + } + + index = hostlow.length(); + while ((index = hostlow.lastIndexOf(".", index - 1)) != -1) { + if (blackListURLs.get("*" + hostlow.substring(index, hostlow.length())) != null) { + //System.out.println("Host blocked: " + "*" + hostlow.substring(index, host.length())); + return true; + } + } + + String pp = ""; // path-pattern + return (((pp = (String) blackListURLs.get(hostlow)) != null) && + ((pp.equals("*")) || (path.substring(1).matches(pp)))); + } + + public void handleOutgoingCookies(httpHeader requestHeader, String targethost, String clienthost) { + // request header may have double-entries: they are accumulated in one entry + // by the httpd and separated by a "#" in the value field + /* + The syntax for the header is: + + cookie = "Cookie:" cookie-version + 1*((";" | ",") cookie-value) + cookie-value = NAME "=" VALUE [";" path] [";" domain] + cookie-version = "$Version" "=" value + NAME = attr + VALUE = value + path = "$Path" "=" value + domain = "$Domain" "=" value + */ + if (requestHeader.containsKey("Cookie")) { + Object[] entry = new Object[]{new Date(), clienthost, requestHeader.get("Cookie")}; + switchboard.outgoingCookies.put(targethost, entry); + } + } + + public void handleIncomingCookies(httpHeader respondHeader, String serverhost, String targetclient) { + // respond header may have double-entries: they are accumulated in one entry + // by the httpc and separated by a "#" in the value field + /* + The syntax for the Set-Cookie response header is + + set-cookie = "Set-Cookie:" cookies + cookies = 1#cookie + cookie = NAME "=" VALUE *(";" cookie-av) + NAME = attr + VALUE = value + cookie-av = "Comment" "=" value + | "Domain" "=" value + | "Max-Age" "=" value + | "Path" "=" value + | "Secure" + | "Version" "=" 1*DIGIT + */ + if (respondHeader.containsKey("Set-Cookie")) { + Object[] entry = new Object[]{new Date(), targetclient, respondHeader.get("Set-Cookie")}; + switchboard.incomingCookies.put(serverhost, entry); + } + } + + public void doGet(Properties conProp, httpHeader requestHeader, OutputStream respond) throws IOException { + // prepare response + // conProp : a collection of properties about the connection, like URL + // requestHeader : The header lines of the connection from the request + // args : the argument values of a connection, like &-values in GET and values within boundaries in POST + // files : files within POST boundaries, same key as in args + + if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction(); + + Date requestDate = new Date(); // remember the time... + String method = conProp.getProperty("METHOD"); + String host = conProp.getProperty("HOST"); + String path = conProp.getProperty("PATH"); // always starts with leading '/' + String args = conProp.getProperty("ARGS"); // may be null if no args were given + String ip = conProp.getProperty("CLIENTIP"); // the ip from the connecting peer + + int port; + int pos; + + if ((pos = host.indexOf(":")) < 0) { + port = 80; + } else { + port = Integer.parseInt(host.substring(pos + 1)); + host = host.substring(0, pos); + } + + String ext; + if ((pos = path.lastIndexOf('.')) < 0) { + ext = ""; + } else { + ext = path.substring(pos + 1).toLowerCase(); + } + + URL url = null; + try { + if (args == null) + url = new URL("http", host, port, path); + else + url = new URL("http", host, port, path + "?" + args); + } catch (MalformedURLException e) { + serverLog.logError("PROXY", "ERROR: internal error with url generation: host=" + + host + ", port=" + port + ", path=" + path + ", args=" + args); + url = null; + } + //System.out.println("GENERATED URL:" + url.toString()); // debug + + // check the blacklist + // blacklist idea inspired by [AS]: + // respond a 404 for all AGIS ("all you get is shit") servers + String hostlow = host.toLowerCase(); + if (blacklistedURL(hostlow, path)) { + try { + respondHeader(respond,"404 Not Found (AGIS)", new httpHeader(null)); + respond.write(("404 (generated): URL '" + hostlow + "' blocked by yacy proxy (blacklisted)\r\n").getBytes()); + respond.flush(); + serverLog.logInfo("PROXY", "AGIS blocking of host '" + hostlow + "'"); // debug + return; + } catch (Exception ee) {} + } + + // handle outgoing cookies + handleOutgoingCookies(requestHeader, host, ip); + + // set another userAgent, if not yellowlisted + if ((yellowList != null) && (!(yellowList.contains(domain(hostlow))))) { + // change the User-Agent + requestHeader.put("User-Agent", userAgent); + } + + // set a scraper and a htmlFilter + OutputStream hfos = null; + htmlFilterContentScraper scraper = null; + + // resolve yacy and yacyh domains + String yAddress = yacyCore.seedDB.resolveYacyAddress(host); + + // re-calc the url path + String remotePath = (args == null) ? path : (path + "?" + args); // with leading '/' + + // attach possible yacy-sublevel-domain + if ((yAddress != null) && + ((pos = yAddress.indexOf("/")) >= 0) && + (!(remotePath.startsWith("/env"))) // this is the special path, staying always at root-level + ) remotePath = yAddress.substring(pos) + remotePath; + + // decide wether to use a cache entry or connect to the network + File cacheFile = cacheManager.getCachePath(url); + String urlHash = plasmaCrawlLURL.urlHash(url); + httpHeader cachedResponseHeader = null; + boolean cacheExists = ((cacheFile.exists()) && (cacheFile.isFile()) && + ((cachedResponseHeader = cacheManager.getCachedResponse(urlHash)) != null)); + + // why are files unzipped upon arrival? why not zip all files in cache? + // This follows from the following premises + // (a) no file shall be unzip-ed more than once to prevent unnessesary computing time + // (b) old cache entries shall be comparable with refill-entries to detect/distiguish case 3+4 + // (c) the indexing mechanism needs files unzip-ed, a schedule could do that later + // case b and c contradicts, if we use a scheduler, because files in a stale cache would be unzipped + // and the newly arrival would be zipped and would have to be unzipped upon load. But then the + // scheduler is superfluous. Therefore the only reminding case is + // (d) cached files shall be either all zipped or unzipped + // case d contradicts with a, because files need to be unzipped for indexing. Therefore + // the only remaining case is to unzip files right upon load. Thats what we do here. + + // finally use existing cache if appropriate + // here we must decide weather or not to save the data + // to a cache + // we distinguish four CACHE STATE cases: + // 1. cache fill + // 2. cache fresh - no refill + // 3. cache stale - refill - necessary + // 4. cache stale - refill - superfluous + // in two of these cases we trigger a scheduler to handle newly arrived files: + // case 1 and case 3 + plasmaHTCache.Entry hpc; + if (cacheExists) { + // we respond on the request by using the cache + + hpc = cacheManager.newEntry(requestDate, 0, url, requestHeader, "200 OK", cachedResponseHeader, null, null, switchboard.defaultProxyProfile); + + if (hpc.shallUseCache()) { + // the cache is fresh + + try { + // replace date field in old header by actual date, this is according to RFC + cachedResponseHeader.put("Date", httpc.dateString(httpc.nowDate())); + + // maybe the content length is missing + if (!(cachedResponseHeader.containsKey("CONTENT-LENGTH"))) + cachedResponseHeader.put("CONTENT-LENGTH", (String) ("" + cacheFile.length())); + + // check if we can send a 304 instead the complete content + if (requestHeader.containsKey("IF-MODIFIED-SINCE")) { + // conditional request: freshness of cache for that condition was already + // checked within shallUseCache(). Now send only a 304 response + log.logInfo("CACHE HIT/304 " + cacheFile.toString()); + + // send cached header with replaced date and added length + respondHeader(respond, "304 OK", cachedResponseHeader); // respond with 'not modified' + + } else { + // unconditional request: send content of cache + log.logInfo("CACHE HIT/203 " + cacheFile.toString()); + + // send cached header with replaced date and added length + respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative' + + // make a transformer + if (((ext == null) || (!(switchboard.extensionBlack.contains(ext)))) && + ((cachedResponseHeader == null) || (httpd.isTextMime(cachedResponseHeader.mime(), switchboard.mimeWhite)))) { + hfos = new htmlFilterOutputStream(respond, null, transformer, (ext.length() == 0)); + } else { + hfos = respond; + } + + // send also the complete body now from the cache + // simply read the file and transfer to out socket + InputStream is = new FileInputStream(cacheFile); + byte[] buffer = new byte[2048]; + int l; + while ((l = is.read(buffer)) > 0) {hfos.write(buffer, 0, l);} + is.close(); + if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); + } + // that's it! + } catch (SocketException e) { + // this happens if the client stops loading the file + // we do nothing here + respondError(respond, "111 socket error: " + e.getMessage(), 1, url.toString()); + } + } else { + // the cache is (supposed to be) stale + + // delete the cache + long sizeBeforeDelete = cacheFile.length(); + cacheFile.delete(); + + // take a new file from the server + httpc remote = null; + httpc.response res = null; + + try { + // open the connection + if (yAddress == null) { + remote = newhttpc(host, port, timeout); + } else { + remote = newhttpc(yAddress, timeout); + } + //System.out.println("HEADER: CLIENT TO PROXY = " + requestHeader.toString()); // DEBUG + + // send request + res = remote.GET(remotePath, requestHeader); + long contentLength = res.responseHeader.contentLength(); + + // make a scraper and transformer + if (((ext == null) || (!(switchboard.extensionBlack.contains(ext)))) && + (httpd.isTextMime(res.responseHeader.mime(), switchboard.mimeWhite))) { + scraper = new htmlFilterContentScraper(url); + hfos = new htmlFilterOutputStream(respond, scraper, transformer, (ext.length() == 0)); + if (((htmlFilterOutputStream) hfos).binarySuspect()) { + scraper = null; // forget it, may be rubbish + log.logDebug("Content of " + url + " is probably binary. deleted scraper."); + } + } else { + log.logDebug("Resource " + url + " has wrong extension (" + ext + ") or wrong mime-type (" + res.responseHeader.mime() + "). not scraped"); + scraper = null; + hfos = respond; + } + + // reserver cache entry + hpc = cacheManager.newEntry(requestDate, 0, url, requestHeader, res.status, res.responseHeader, scraper, null, switchboard.defaultProxyProfile); + + // handle incoming cookies + handleIncomingCookies(res.responseHeader, host, ip); + + // request has been placed and result has been returned. work off response + try { + respondHeader(respond, res.status, res.responseHeader); + String storeError; + if ((storeError = hpc.shallStoreCache()) == null) { + // we write a new cache entry + if ((contentLength > 0) && // known + (contentLength < 1048576)) // 1 MB + { + byte[] cacheArray = res.writeContent(hfos); + if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); + // before we came here we deleted a cache entry + if (sizeBeforeDelete == cacheArray.length) { + cacheArray = null; + hpc.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD; + cacheManager.stackProcess(hpc); // unnecessary update + } else { + hpc.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; + cacheManager.stackProcess(hpc, cacheArray); // necessary update, write response header to cache + } + } else { + cacheFile.getParentFile().mkdirs(); + res.writeContent(hfos, cacheFile); + if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); + // before we came here we deleted a cache entry + if (sizeBeforeDelete == cacheFile.length()) { + hpc.status = plasmaHTCache.CACHE_STALE_RELOAD_BAD; + cacheManager.stackProcess(hpc); // unnecessary update + } else { + hpc.status = plasmaHTCache.CACHE_STALE_RELOAD_GOOD; + cacheManager.stackProcess(hpc); // necessary update, write response header to cache + } + } + } else { + // no caching + log.logDebug(cacheFile.toString() + " not cached: " + storeError); + res.writeContent(hfos, null); + if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); + // before we came here we deleted a cache entry + hpc.status = plasmaHTCache.CACHE_STALE_NO_RELOAD; + cacheManager.stackProcess(hpc); + } + } catch (SocketException e) { + // this may happen if the client suddenly closes its connection + // maybe the user has stopped loading + // in that case, we are not responsible and just forget it + // but we clean the cache also, since it may be only partial + // and most possible corrupted + if (cacheFile.exists()) cacheFile.delete(); + } + remote.close(); + } catch (Exception e) { + // this may happen if the targeted host does not exist or anything with the + // remote server was wrong. + // in any case, sending a 404 is appropriate + try { + if ((e.toString().indexOf("unknown host")) > 0) { + respondHeader(respond,"404 unknown host", new httpHeader(null)); + } else { + respondHeader(respond,"404 Not Found", new httpHeader(null)); + respond.write(("Exception occurred:\r\n").getBytes()); + respond.write((e.toString() + "\r\n").getBytes()); + respond.write(("[TRACE: ").getBytes()); + e.printStackTrace(new PrintStream(respond)); + respond.write(("]\r\n").getBytes()); + } + } catch (Exception ee) {} + } + } + } else { + // we take a new file from the net and respond with that + try { + // open the connection + //httpc remote = newhttpc(host, port, timeout); + httpc remote; + if (yAddress == null) { + remote = newhttpc(host, port, timeout); + } else { + remote = newhttpc(yAddress, timeout); + } + //System.out.println("HEADER: CLIENT TO PROXY = " + requestHeader.toString()); // DEBUG + + // send request + httpc.response res = remote.GET(remotePath, requestHeader); + long contentLength = res.responseHeader.contentLength(); + + // make a scraper and transformer + if (((ext == null) || (!(switchboard.extensionBlack.contains(ext)))) && + (httpd.isTextMime(res.responseHeader.mime(), switchboard.mimeWhite))) { + scraper = new htmlFilterContentScraper(url); + hfos = new htmlFilterOutputStream(respond, scraper, transformer, (ext.length() == 0)); + if (((htmlFilterOutputStream) hfos).binarySuspect()) { + scraper = null; // forget it, may be rubbish + log.logDebug("Content of " + url + " is probably binary. deleted scraper."); + } + } else { + log.logDebug("Resource " + url + " has wrong extension (" + ext + ") or wrong mime-type (" + res.responseHeader.mime() + "). not scraped"); + scraper = null; + hfos = respond; + } + + // reserve cache entry + hpc = cacheManager.newEntry(requestDate, 0, url, requestHeader, res.status, res.responseHeader, scraper, null, switchboard.defaultProxyProfile); + + // handle incoming cookies + handleIncomingCookies(res.responseHeader, host, ip); + + // request has been placed and result has been returned. work off response + try { + //System.out.println("HEADER: SERVER TO PROXY = [" + res.status + "] " + ((httpHeader) res.responseHeader).toString()); // DEBUG + respondHeader(respond, res.status, res.responseHeader); + String storeError; + if ((storeError = hpc.shallStoreCache()) == null) { + // we write a new cache entry + if ((contentLength > 0) && (contentLength < 1048576)) { + // write to buffer + byte[] cacheArray = res.writeContent(hfos); + if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); + // enQueue new entry with response header and file as byte[] + hpc.status = plasmaHTCache.CACHE_FILL; + cacheManager.stackProcess(hpc, cacheArray); + } else try { + // write to file system directly + cacheFile.getParentFile().mkdirs(); + res.writeContent(hfos, cacheFile); + if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); + // enQueue new entry with response header + hpc.status = plasmaHTCache.CACHE_FILL; + cacheManager.stackProcess(hpc); + } catch (FileNotFoundException e) { + // this may happen if there are no write rights whatsoever + // (do nothing) + /* + Exception occurred: + java.io.FileNotFoundException: + /opt/yacy_pre_v0.314_20041219/DATA/HTCACHE/www.spiegel.de/fotostrecke/0,5538,PB64-SUQ9NDYwNyZucj0z,00.html + (Permission denied) + */ + } + } else { + // no caching + //System.out.println("DEBUG: " + res.status + " " + cacheFile.toString()); // debug + log.logDebug(cacheFile.toString() + " not cached: " + storeError); + res.writeContent(hfos, null); + if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); + // no old file and no load. just data passing + hpc.status = plasmaHTCache.CACHE_PASSING; + cacheManager.stackProcess(hpc); + } + } catch (SocketException e) { + // this may happen if the client suddenly closes its connection + // maybe the user has stopped loading + // in that case, we are not responsible and just forget it + // but we clean the cache also, since it may be only partial + // and most possible corrupted + if (cacheFile.exists()) cacheFile.delete(); + respondHeader(respond,"404 client unexpectedly closed connection", new httpHeader(null)); + } + remote.close(); + } catch (Exception e) { + // this may happen if the targeted host does not exist or anything with the + // remote server was wrong. + // in any case, sending a 404 is appropriate + try { + if ((e.toString().indexOf("unknown host")) > 0) { + respondHeader(respond,"404 unknown host", new httpHeader(null)); + } else { + respondHeader(respond,"404 resource not available (generic exception: " + e.toString() + ")", new httpHeader(null)); + //respond.write(("Exception occurred:\r\n").getBytes()); + //respond.write((e.toString() + "\r\n").getBytes()); + //respond.write(("[TRACE: ").getBytes()); + //e.printStackTrace(new PrintStream(respond)); + //respond.write(("]\r\n").getBytes()); + /* http://www.geocrawler.com/archives/3/201/1999/8/50/2505805/ + > java.net.ConnectException: Connection refused + */ + e.printStackTrace(); + } + } catch (Exception ee) {} + } + } + respond.flush(); + } + + + private void respondError(OutputStream respond, String origerror, int errorcase, String url) { + try { + // set rewrite values + serverObjects tp = new serverObjects(); + tp.put("errormessage", errorcase); + tp.put("httperror", origerror); + tp.put("url", url); + + // rewrite the file + File file = new File(htRootPath, "/proxymsg/error.html"); + byte[] result; + ByteArrayOutputStream o = new ByteArrayOutputStream(); + FileInputStream fis = new FileInputStream(file); + httpTemplate.writeTemplate(fis, o, tp, "-UNRESOLVED_PATTERN-".getBytes()); + o.close(); + result = o.toByteArray(); + + // return header + httpHeader header = new httpHeader(); + header.put("Date", httpc.dateString(httpc.nowDate())); + header.put("Content-type", "text/html"); + header.put("Content-length", "" + o.size()); + header.put("Pragma", "no-cache"); + + // write the array to the client + respondHeader(respond, origerror, header); + serverFileUtils.write(result, respond); + respond.flush(); + } catch (IOException e) { + + } + } + + + + + public void doHead(Properties conProp, httpHeader requestHeader, OutputStream respond) throws IOException { + String method = conProp.getProperty("METHOD"); + String host = conProp.getProperty("HOST"); + String path = conProp.getProperty("PATH"); + String args = conProp.getProperty("ARGS"); // may be null if no args were given + int port; + int pos; + if ((pos = host.indexOf(":")) < 0) { + port = 80; + } else { + port = Integer.parseInt(host.substring(pos + 1)); + host = host.substring(0, pos); + } + + // check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers + String hostlow = host.toLowerCase(); + if (blacklistedURL(hostlow, path)) { + try { + respondHeader(respond,"404 Not Found (AGIS)", new httpHeader(null)); + respond.write(("404 (generated): URL '" + hostlow + "' blocked by yacy proxy (blacklisted)\r\n").getBytes()); + respond.flush(); + serverLog.logInfo("PROXY", "AGIS blocking of host '" + hostlow + "'"); // debug + return; + } catch (Exception ee) {} + } + + // set another userAgent, if not yellowlisted + if (!(yellowList.contains(domain(hostlow)))) { + // change the User-Agent + requestHeader.put("User-Agent", userAgent); + } + + // resolve yacy and yacyh domains + String yAddress = yacyCore.seedDB.resolveYacyAddress(host); + + // re-calc the url path + String remotePath = (args == null) ? path : (path + "?" + args); + + // attach possible yacy-sublevel-domain + if ((yAddress != null) && ((pos = yAddress.indexOf("/")) >= 0)) remotePath = yAddress.substring(pos) + remotePath; + + httpc remote = null; + httpc.response res = null; + + try { + // open the connection + if (yAddress == null) { + remote = newhttpc(host, port, timeout); + } else { + remote = newhttpc(yAddress, timeout); // with [AS] patch + } + res = remote.HEAD(remotePath, requestHeader); + respondHeader(respond, res.status, res.responseHeader); + } catch (Exception e) { + try { + respondHeader(respond,"404 Not Found", new httpHeader(null)); + respond.write(("Exception occurred:\r\n").getBytes()); + respond.write((e.toString() + "\r\n").getBytes()); + respond.write(("[TRACE: ").getBytes()); + e.printStackTrace(new PrintStream(respond)); + respond.write(("]\r\n").getBytes()); + } catch (Exception ee) {} + } + respond.flush(); + } + + public void doPost(Properties conProp, httpHeader requestHeader, OutputStream respond, PushbackInputStream body) throws IOException { + String host = conProp.getProperty("HOST"); + String path = conProp.getProperty("PATH"); + String args = conProp.getProperty("ARGS"); // may be null if no args were given + int port; + int pos; + if ((pos = host.indexOf(":")) < 0) { + port = 80; + } else { + port = Integer.parseInt(host.substring(pos + 1)); + host = host.substring(0, pos); + } + + // set another userAgent, if not yellowlisted + if (!(yellowList.contains(domain(host).toLowerCase()))) { + // change the User-Agent + requestHeader.put("User-Agent", userAgent); + } + + // resolve yacy and yacyh domains + String yAddress = yacyCore.seedDB.resolveYacyAddress(host); + + // re-calc the url path + String remotePath = (args == null) ? path : (path + "?" + args); + + // attach possible yacy-sublevel-domain + if ((yAddress != null) && ((pos = yAddress.indexOf("/")) >= 0)) remotePath = yAddress.substring(pos) + remotePath; + + httpc remote = null; + httpc.response res = null; + + try { + if (yAddress == null) { + remote = newhttpc(host, port, timeout); + } else { + remote = newhttpc(yAddress, timeout); + } + res = remote.POST(remotePath, requestHeader, body); + respondHeader(respond, res.status, res.responseHeader); + res.writeContent(respond, null); + remote.close(); + } catch (Exception e) { + try { + respondHeader(respond,"404 Not Found", new httpHeader(null)); + respond.write(("Exception occurred:\r\n").getBytes()); + respond.write((e.toString() + "\r\n").getBytes()); + respond.write(("[TRACE: ").getBytes()); + e.printStackTrace(new PrintStream(respond)); + respond.write(("]\r\n").getBytes()); + } catch (Exception ee) {} + } + respond.flush(); + } + + + + public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException { + String host = conProp.getProperty("HOST"); + int port = Integer.parseInt(conProp.getProperty("PORT")); + String httpVersion = conProp.getProperty("HTTP"); + int timeout = Integer.parseInt(switchboard.getConfig("clientTimeout", "10000")); + + // possibly branch into PROXY-PROXY connection + if (remoteProxyUse) { + httpc remoteProxy = new httpc(host, port, timeout, false, remoteProxyHost, remoteProxyPort); + httpc.response response = remoteProxy.CONNECT(host, port, requestHeader); + response.print(); + if (response.success()) { + // replace connection details + host = remoteProxyHost; + port = remoteProxyPort; + // go on (see below) + } else { + // pass error response back to client + respondHeader(clientOut, response.status, response.responseHeader); + return; + } + } + + // try to establish connection to remote host + Socket sslSocket = new Socket(host, port); + sslSocket.setSoTimeout(timeout); // waiting time for write + sslSocket.setSoLinger(true, timeout); // waiting time for read + InputStream promiscuousIn = sslSocket.getInputStream(); + OutputStream promiscuousOut = sslSocket.getOutputStream(); + + // now then we can return a success message + clientOut.write((httpVersion + " 200 Connection established" + serverCore.crlfString + + "Proxy-agent: YACY" + serverCore.crlfString + + serverCore.crlfString).getBytes()); + + log.logInfo("SSL CONNECTION TO " + host + ":" + port + " ESTABLISHED"); + + // start stream passing with mediate processes + try { + Mediate cs = new Mediate(sslSocket, clientIn, promiscuousOut); + Mediate sc = new Mediate(sslSocket, promiscuousIn, clientOut); + cs.start(); + sc.start(); + while ((sslSocket != null) && + (sslSocket.isBound()) && + (!(sslSocket.isClosed())) && + (sslSocket.isConnected()) && + ((cs.isAlive()) || (sc.isAlive()))) { + // idle + try {Thread.currentThread().sleep(1000);} catch (InterruptedException e) {} // wait a while + } + // set stop mode + cs.pleaseTerminate(); + sc.pleaseTerminate(); + // wake up thread + cs.interrupt(); + sc.interrupt(); + // ...hope they have terminated... + } catch (IOException e) { + //System.out.println("promiscuous termination: " + e.getMessage()); + } + + } + + public class Mediate extends Thread { + + boolean terminate; + Socket socket; + InputStream in; + OutputStream out; + + public Mediate(Socket socket, InputStream in, OutputStream out) throws IOException { + this.terminate = false; + this.in = in; + this.out = out; + this.socket = socket; + } + + public void run() { + byte[] buffer = new byte[512]; + int len; + try { + while ((socket != null) && + (socket.isBound()) && + (!(socket.isClosed())) && + (socket.isConnected()) && + (!(terminate)) && + (in != null) && + (out != null) && + ((len = in.read(buffer)) >= 0) + ) { + out.write(buffer, 0, len); + } + } catch (IOException e) {} + } + + public void pleaseTerminate() { + terminate = true; + } + } + + private httpc newhttpc(String server, int port, int timeout) throws IOException { + // a new httpc connection, combined with possible remote proxy + boolean useProxy = remoteProxyUse; + // check no-proxy rule + if ((useProxy) && (!(remoteProxyAllowProxySet.contains(server)))) { + if (remoteProxyDisallowProxySet.contains(server)) { + useProxy = false; + } else { + // analyse remoteProxyNoProxy; + // set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly + int i = 0; + while (i < remoteProxyNoProxyPatterns.length) { + if (server.matches(remoteProxyNoProxyPatterns[i])) { + // disallow proxy for this server + remoteProxyDisallowProxySet.add(server); + useProxy = false; + break; + } + i++; + } + if (i == remoteProxyNoProxyPatterns.length) { + // no pattern matches: allow server + remoteProxyAllowProxySet.add(server); + } + } + } + // branch to server/proxy + if (useProxy) { + return new httpc(server, port, timeout, false, remoteProxyHost, remoteProxyPort); + } else { + return new httpc(server, port, timeout, false); + } + } + + private httpc newhttpc(String address, int timeout) throws IOException { + // a new httpc connection for :/ syntax + // this is called when a '.yacy'-domain is used + int p = address.indexOf(":"); + if (p < 0) return null; + String server = address.substring(0, p); + address = address.substring(p + 1); + // remove possible path elements (may occur for 'virtual' subdomains + p = address.indexOf("/"); + if (p >= 0) address = address.substring(0, p); // cut it off + int port = Integer.parseInt(address); + // normal creation of httpc object + return newhttpc(server, port, timeout); + } + + private void respondHeader(OutputStream respond, String status, httpHeader header) throws IOException, SocketException { + String s; + + // prepare header + //header.put("Server", "AnomicHTTPD (www.anomic.de)"); + if (!(header.containsKey("date"))) header.put("Date", httpc.dateString(httpc.nowDate())); + if (!(header.containsKey("content-type"))) header.put("Content-type", "text/html"); // fix this + + // write status line + respond.write(("HTTP/1.1 " + status + "\r\n").getBytes()); + + //System.out.println("HEADER: PROXY TO CLIENT = " + header.toString()); // DEBUG + + // write header + Iterator i = header.keySet().iterator(); + String key; + String value; + int pos; + //System.out.println("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv"); + while (i.hasNext()) { + key = (String) i.next(); + if (!(key.startsWith("#"))) { // '#' in key is reserved for proxy attributes as artificial header values + value = (String) header.get(key); + if (!(key.equals("Location"))) while ((pos = value.lastIndexOf("#")) >= 0) { + // special handling is needed if a key appeared several times, which is valid. + // all lines with same key are combined in one value, separated by a "#" + respond.write((key + ": " + value.substring(pos + 1).trim() + "\r\n").getBytes()); + //System.out.println("#" + key + ": " + value.substring(pos + 1).trim()); + value = value.substring(0, pos).trim(); + } + respond.write((key + ": " + value + "\r\n").getBytes()); + //System.out.println("#" + key + ": " + value); + } + } + + // end header + respond.write(("\r\n").getBytes()); + respond.flush(); + } + + + private void textMessage(OutputStream out, String body) throws IOException { + out.write(("HTTP/1.1 200 OK\r\n").getBytes()); + out.write(("Server: AnomicHTTPD (www.anomic.de)\r\n").getBytes()); + out.write(("Date: " + httpc.dateString(httpc.nowDate()) + "\r\n").getBytes()); + out.write(("Content-type: text/plain\r\n").getBytes()); + out.write(("Content-length: " + body.length() +"\r\n").getBytes()); + out.write(("\r\n").getBytes()); + out.flush(); + out.write(body.getBytes()); + out.flush(); + } + + private void transferFile(OutputStream out, File f) throws IOException { + InputStream source = new FileInputStream(f); + byte[] buffer = new byte[4096]; + int bytes_read; + while ((bytes_read = source.read(buffer)) > 0) out.write(buffer, 0, bytes_read); + out.flush(); + source.close(); + } + +} + +/* +proxy test: + +http://www.chipchapin.com/WebTools/cookietest.php? +http://xlists.aza.org/moderator/cookietest/cookietest1.php +http://vancouver-webpages.com/proxy/cache-test.html + +*/ diff --git a/source/de/anomic/http/httpdSwitchboard.java b/source/de/anomic/http/httpdSwitchboard.java new file mode 100644 index 000000000..e36e58ea9 --- /dev/null +++ b/source/de/anomic/http/httpdSwitchboard.java @@ -0,0 +1,72 @@ +// httpdSwitchboard.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 15.07.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.http; + +import java.io.*; +import java.util.*; +import de.anomic.server.*; + +public class httpdSwitchboard extends serverAbstractSwitch implements serverSwitch { + + private LinkedList cacheStack; + + public httpdSwitchboard(String rootPath, String initPath, String configPath) throws IOException { + super(rootPath, initPath, configPath); + cacheStack = new LinkedList(); + } + + public int queueSize() { + return cacheStack.size(); + } + + public void enQueue(Object job) { + cacheStack.addLast(job); + } + + public void deQueue() { + System.out.println("Process: " + cacheStack.removeFirst().toString()); + } + + public serverObjects action(String actionName, serverObjects actionInput) { + return null; + } + +} diff --git a/source/de/anomic/kelondro/kelondroAbstractRA.java b/source/de/anomic/kelondro/kelondroAbstractRA.java new file mode 100644 index 000000000..90a2abae1 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroAbstractRA.java @@ -0,0 +1,216 @@ +// kelondroAbstractRA.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 09.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; + +import de.anomic.server.*; + +abstract class kelondroAbstractRA implements kelondroRA { + + // logging support + protected String name = null; + public String name() { + return name; + } + + // pseudo-native methods: + abstract public int read() throws IOException; + abstract public void write(int b) throws IOException; + + abstract public int read(byte[] b, int off, int len) throws IOException; + abstract public void write(byte[] b, int off, int len) throws IOException; + + abstract public void seek(long pos) throws IOException; + abstract public void close() throws IOException; + + // derivated methods: + public byte readByte() throws IOException { + int ch = this.read(); + if (ch < 0) throw new IOException(); + return (byte)(ch); + } + + public void writeByte(int v) throws IOException { + this.write(v); + } + + public short readShort() throws IOException { + int ch1 = this.read(); + int ch2 = this.read(); + if ((ch1 | ch2) < 0) throw new IOException(); + return (short) ((ch1 << 8) + (ch2 << 0)); + } + + public void writeShort(int v) throws IOException { + this.write((v >>> 8) & 0xFF); this.write((v >>> 0) & 0xFF); + } + + public int readInt() throws IOException { + int ch1 = this.read(); + int ch2 = this.read(); + int ch3 = this.read(); + int ch4 = this.read(); + if ((ch1 | ch2 | ch3 | ch4) < 0) throw new IOException("kelondroAbstractRA.readInt: wrong values; ch1=" + ch1 + ", ch2=" + ch2 + ", ch3=" + ch3 + ", ch4=" + ch4); + return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)); + } + + public void writeInt(int v) throws IOException { + this.write((v >>> 24) & 0xFF); this.write((v >>> 16) & 0xFF); + this.write((v >>> 8) & 0xFF); this.write((v >>> 0) & 0xFF); + } + + public long readLong() throws IOException { + return ((long) (readInt()) << 32) + (readInt() & 0xFFFFFFFFL); + } + + public void writeLong(long v) throws IOException { + this.write((int) (v >>> 56) & 0xFF); this.write((int) (v >>> 48) & 0xFF); + this.write((int) (v >>> 40) & 0xFF); this.write((int) (v >>> 32) & 0xFF); + this.write((int) (v >>> 24) & 0xFF); this.write((int) (v >>> 16) & 0xFF); + this.write((int) (v >>> 8) & 0xFF); this.write((int) (v >>> 0) & 0xFF); + } + + public void write(byte[] b) throws IOException { + this.write(b, 0, b.length); + } + + private static final byte cr = 13; + private static final byte lf = 10; + private static final String crlf = new String(new byte[] {cr, lf}); + + public void writeLine(String line) throws IOException { + this.write((line + crlf).getBytes()); + } + + public String readLine() throws IOException { + // with these functions, we consider a line as always terminated by CRLF + serverByteBuffer sb = new serverByteBuffer(); + int c; + while (true) { + c = read(); + if (c < 0) { + if (sb.length() == 0) return null; else return sb.toString(); + } + if (c == cr) continue; + if (c == lf) return sb.toString(); + sb.append((byte) c); + } + } + + public void writeProperties(Properties props, String comment) throws IOException { + this.seek(0); + writeLine("# " + comment); + Enumeration e = props.propertyNames(); + String key, value; + while (e.hasMoreElements()) { + key = (String) e.nextElement(); + value = props.getProperty(key, ""); + writeLine(key + "=" + value); + } + writeLine("# EOF"); + } + + public Properties readProperties() throws IOException { + this.seek(0); + Properties props = new Properties(); + String line; + int pos; + while ((line = readLine()) != null) { + line = line.trim(); + if (line.equals("# EOF")) return props; + if ((line.length() == 0) || (line.startsWith("#"))) continue; + pos = line.indexOf("="); + if (pos < 0) continue; + props.setProperty(line.substring(0, pos).trim(), line.substring(pos + 1).trim()); + } + return props; + } + + public void writeMap(Map map, String comment) throws IOException { + this.seek(0); + writeLine("# " + comment); + Iterator i = map.keySet().iterator(); + String key, value; + while (i.hasNext()) { + key = (String) i.next(); + value = (String) map.get(key); + writeLine(key + "=" + value); + } + writeLine("# EOF"); + } + + public Map readMap() throws IOException { + this.seek(0); + TreeMap map = new TreeMap(); + String line; + int pos; + while ((line = readLine()) != null) { // very slow readLine???? + line = line.trim(); + if (line.equals("# EOF")) return map; + if ((line.length() == 0) || (line.startsWith("#"))) continue; + pos = line.indexOf("="); + if (pos < 0) continue; + map.put(line.substring(0, pos), line.substring(pos + 1)); + } + return map; + } + + public void writeArray(byte[] b) throws IOException { + // this does not write the content to the see position + // but to the very beginning of the record + // some additional bytes will ensure that we know the correct content size later on + seek(0); + writeInt(b.length); + write(b); + } + + public byte[] readArray() throws IOException { + seek(0); + int l = readInt(); + byte[] b = new byte[l]; + read(b, 0, l); + return b; + } + +} diff --git a/source/de/anomic/kelondro/kelondroBufferedRA.java b/source/de/anomic/kelondro/kelondroBufferedRA.java new file mode 100644 index 000000000..83439e0c7 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroBufferedRA.java @@ -0,0 +1,189 @@ +// kelondroBufferedRA.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 06.10.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; + +public class kelondroBufferedRA extends kelondroAbstractRA implements kelondroRA { + + protected kelondroRA ra; + protected kelondroMScoreCluster bufferScore; + protected HashMap bufferMemory; + private int bufferMaxElements; + private int bufferElementSize; + private long seekpos; + + public kelondroBufferedRA(kelondroRA ra, int buffersize, int elementsize) throws FileNotFoundException { + this.ra = ra; + this.name = ra.name(); + this.bufferMemory = new HashMap(); + this.bufferScore = new kelondroMScoreCluster(); + this.bufferElementSize = elementsize; + this.bufferMaxElements = (int) (buffersize / bufferElementSize); + this.seekpos = 0; + } + + private int bufferElementNumber(long address) { + return (int) address / bufferElementSize; + } + + private int bufferElementOffset(long address) { + return (int) address % bufferElementSize; + } + + private byte[] readBuffer(int bufferNr) throws IOException { + Integer bufferNrI = new Integer(bufferNr); + byte[] buffer = (byte[]) bufferMemory.get(bufferNrI); + if (buffer == null) { + if (bufferMemory.size() >= bufferMaxElements) { + // delete elements in buffer if buffer too big + Iterator it = bufferScore.scores(true); + Integer element = (Integer) it.next(); + bufferMemory.remove(element); + int age = bufferScore.deleteScore(element); + int minclean = bufferMaxElements / 8; + int cleaned = 1; + while ((cleaned++ < minclean) && (it.hasNext())) { + element = (Integer) it.next(); + bufferMemory.remove(element); + age = bufferScore.deleteScore(element); + } + de.anomic.server.serverLog.logDebug("CACHE: " + name, "GC; cleaned=" + (cleaned - 1) + ", age=" + ((((int) (0xFFFFFFFFL & System.currentTimeMillis())) - age) / 1000)); + } + // add new element + buffer = new byte[bufferElementSize]; + //System.out.println("buffernr=" + bufferNr + ", elSize=" + bufferElementSize); + ra.seek(bufferNr * bufferElementSize); + ra.read(buffer, 0, bufferElementSize); + bufferMemory.put(bufferNrI, buffer); + } + bufferScore.setScore(bufferNrI, (int) (0xFFFFFFFFL & System.currentTimeMillis())); + return buffer; + } + + /* + private static int log2i(int x) { + int log = 0; + while (x != 0) {x >>= 1; log++;} + return log; + } + */ + + private void writeBuffer(byte[] buffer, int bufferNr) throws IOException { + if (buffer == null) return; + Integer bufferNrI = new Integer(bufferNr); + ra.seek(bufferNr * bufferElementSize); + ra.write(buffer, 0, bufferElementSize); + bufferScore.setScore(bufferNrI, (int) (0xFFFFFFFFL & System.currentTimeMillis())); + } + + // pseudo-native method read + public int read() throws IOException { + int bn = bufferElementNumber(seekpos); + int offset = bufferElementOffset(seekpos); + seekpos++; + return 0xFF & readBuffer(bn)[offset]; + } + + // pseudo-native method write + public void write(int b) throws IOException { + int bn = bufferElementNumber(seekpos); + int offset = bufferElementOffset(seekpos); + byte[] buffer = readBuffer(bn); + seekpos++; + buffer[offset] = (byte) b; + writeBuffer(buffer, bn); + } + + public int read(byte[] b, int off, int len) throws IOException { + int bn1 = bufferElementNumber(seekpos); + int bn2 = bufferElementNumber(seekpos + len - 1); + int offset = bufferElementOffset(seekpos); + byte[] buffer = readBuffer(bn1); + if (bn1 == bn2) { + // simple case + //System.out.println("C1: bn1=" + bn1 + ", offset=" + offset + ", off=" + off + ", len=" + len); + System.arraycopy(buffer, offset, b, off, len); + seekpos += len; + return len; + } else { + // do recursively + int thislen = bufferElementSize - offset; + //System.out.println("C2: bn1=" + bn1 + ", bn2=" + bn2 +", offset=" + offset + ", off=" + off + ", len=" + len + ", thislen=" + thislen); + System.arraycopy(buffer, offset, b, off, thislen); + seekpos += thislen; + return thislen + read(b, thislen, len - thislen); + } + } + + public void write(byte[] b, int off, int len) throws IOException { + int bn1 = bufferElementNumber(seekpos); + int bn2 = bufferElementNumber(seekpos + len - 1); + int offset = bufferElementOffset(seekpos); + byte[] buffer = readBuffer(bn1); + if (bn1 == bn2) { + // simple case + System.arraycopy(b, off, buffer, offset, len); + seekpos += len; + writeBuffer(buffer, bn1); + } else { + // do recursively + int thislen = bufferElementSize - offset; + System.arraycopy(b, off, buffer, offset, thislen); + seekpos += thislen; + writeBuffer(buffer, bn1); + write(b, thislen, len - thislen); + } + } + + public void seek(long pos) throws IOException { + seekpos = pos; + } + + public void close() throws IOException { + ra.close(); + bufferScore = null; + bufferMemory = null; + } + +} diff --git a/source/de/anomic/kelondro/kelondroDyn.java b/source/de/anomic/kelondro/kelondroDyn.java new file mode 100644 index 000000000..0d2808290 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroDyn.java @@ -0,0 +1,469 @@ +// kelondroDyn.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 09.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + This class extends the kelondroTree and adds dynamic data handling + A dynamic content is created, by using several tree nodes and + combining them over a set of associated keys. + Example: a byte[] of length 1000 shall be stored in a kelondroTree + with node size 256. The key for the entry is 'entry'. + Then kelondroDyn stores the first part of four into the entry + 'entry00', the second into 'entry01', and so on. + +*/ + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; +import de.anomic.server.*; + +public class kelondroDyn extends kelondroTree { + + private static final int counterlen = 8; + + private byte[] segmentCacheKey, segmentCacheContent; + private int keylen; + private int reclen; + private int segmentCount; + + public kelondroDyn(File file, long buffersize /*bytes*/, int key, int nodesize) throws IOException { + // creates a new dynamic tree + super(file, buffersize, new int[] {key + counterlen, nodesize}, 1, 8); + this.keylen = columnSize(0) - counterlen; + this.reclen = columnSize(1); + this.segmentCacheKey = null; + this.segmentCacheContent = null; + // init counter: write into text field + this.segmentCount = 0; + writeSegmentCount(); + } + + public kelondroDyn(File file, long buffersize) throws IOException{ + // this opens a file with an existing dynamic tree + super(file, buffersize); + this.keylen = columnSize(0) - counterlen; + this.reclen = columnSize(1); + this.segmentCacheKey = null; + this.segmentCacheContent = null; + this.segmentCount = 0; + //Iterator i = keys(true); while (i.hasNext()) segmentCount++; + //writeSegmentCount(); + //readSegmentCount(); + } + + private void writeSegmentCount() { + try { + setText(0, serverCodings.enhancedCoder.encodeBase64Long((long) segmentCount, 8).getBytes()); + } catch (Exception e) { + + } + } + + private void readSegmentCount() { + try { + segmentCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(getText(0))); + } catch (Exception e) { + segmentCount = 0; + writeSegmentCount(); + } + } + + public synchronized int sizeDyn() { + //this.segmentCount = 0; + //Iterator i = keys(true); while (i.hasNext()) segmentCount++; + //return segmentCount; + return super.size(); + } + + private static String counter(int c) { + String s = Integer.toHexString(c); + while (s.length() < counterlen) s = "0" + s; + return s; + } + + private byte[] dynKey(String key, int record) { + if (key.length() > keylen) throw new RuntimeException("key len out of limit:" + key.length()); + while (key.length() < keylen) key = key + "_"; + key = key + counter(record); + return key.getBytes(); + } + + private String origKey(byte[] rawKey) { + int n = keylen - 1; + if (n >= rawKey.length) n = rawKey.length - 1; + while ((n > 0) && (rawKey[n] == (byte) '_')) n--; + return new String(rawKey, 0, n + 1); + } + + public class dynKeyIterator implements Iterator { + // the iterator iterates all keys, which are byte[] objects + Iterator ri; + String nextKey; + public dynKeyIterator(Iterator iter) { + ri = iter; + nextKey = n(); + } + public boolean hasNext() { + return nextKey != null; + } + public Object next() { + String result = nextKey; + nextKey = n(); + return origKey(result.getBytes()); + } + public void remove() { + throw new UnsupportedOperationException("no remove in RawKeyIterator"); + } + private String n() { + byte[] g; + String k; + String v; + int c; + while (ri.hasNext()) { + g = ((byte[][]) ri.next())[0]; + if (g == null) return null; + k = new String(g, 0, keylen); + v = new String(g, keylen, counterlen); + try {c = Integer.parseInt(v, 16);} catch (NumberFormatException e) {c = -1;} + if (c == 0) return k; + } + return null; + } + } + + public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating) throws IOException { + // iterates only the keys of the Nodes + // enumerated objects are of type String + return new dynKeyIterator(super.rows(up, rotating)); + } + + public synchronized dynKeyIterator dynKeys(boolean up, boolean rotating, byte[] firstKey) throws IOException { + return new dynKeyIterator(super.rows(up, rotating, firstKey)); + } + + private byte[] getValueCached(byte[] key) throws IOException { + + if ((segmentCacheKey != null) && (serverByteBuffer.equals(key, segmentCacheKey))) { + // use cache + return segmentCacheContent; + } else { + // read from db + byte[][] r = get(key); + if (r == null) return null; + + // update cache + segmentCacheKey = key; + segmentCacheContent = r[1]; + + // return result + return r[1]; + } + } + + private synchronized void setValueCached(byte[] key, byte[] value) throws IOException { + + // update cache + segmentCacheKey = key; + segmentCacheContent = value; + + // update storage + put(key, value); + } + + public synchronized byte[] getDyn(String key, int pos, int len) throws IOException { + int recpos = pos % reclen; + int reccnt = pos / reclen; + byte[] segment1; + // read first within a single record + if ((recpos == 0) && (reclen == len)) { + segment1 = getValueCached(dynKey(key, reccnt)); + if (segment1 == null) return null; + } else { + byte[] buf = getValueCached(dynKey(key, reccnt)); + if (buf == null) return null; + //System.out.println("read: buf.length="+buf.length+",recpos="+recpos+",len="+len); + if (len < (reclen - recpos)) { + segment1 = new byte[len]; + System.arraycopy(buf, recpos, segment1, 0, len); + } else { + segment1 = new byte[reclen - recpos]; + System.arraycopy(buf, recpos, segment1, 0, reclen - recpos); + } + } + // if this is all, return + if (recpos + len <= reclen) return segment1; + // read from several records + // we combine recursively all participating records + // we have two segments: the one in the starting record, and the remaining + // segment 1 in record : start = recpos, length = reclen - recpos + // segment 2 in record +1: start = 0, length = len - reclen + recpos + // recursively step further + byte[] segment2 = getDyn(key, pos + segment1.length, len - segment1.length); + if (segment2 == null) return null; + // now combine the two segments into the result + byte[] result = new byte[len]; + System.arraycopy(segment1, 0, result, 0, segment1.length); + System.arraycopy(segment2, 0, result, segment1.length, segment2.length); + return result; + } + + public synchronized void putDyn(String key, int pos, byte[] b, int off, int len) throws IOException { + int recpos = pos % reclen; + int reccnt = pos / reclen; + byte[] buf; + // first write current record + if ((recpos == 0) && (reclen == len)) { + if (off == 0) { + setValueCached(dynKey(key, reccnt), b); + } else { + buf = new byte[len]; + System.arraycopy(b, off, buf, 0, len); + setValueCached(dynKey(key, reccnt), b); + } + } else { + buf = getValueCached(dynKey(key, reccnt)); + if (buf == null) buf = new byte[reclen]; + //System.out.println("write: b.length="+b.length+",off="+off+",len="+(reclen-recpos)); + if (len < (reclen - recpos)) + System.arraycopy(b, off, buf, recpos, len); + else + System.arraycopy(b, off, buf, recpos, reclen - recpos); + setValueCached(dynKey(key, reccnt), buf); + } + // if more records are necessary, write to them also recursively + if (recpos + len > reclen) { + putDyn(key, pos + reclen - recpos, b, off + reclen - recpos, len - reclen + recpos); + } + } + + public synchronized void remove(String key) throws IOException { + // remove value in cache and tree + if (key == null) return; + int recpos = 0; + byte[] k; + while (super.get(k = dynKey(key, recpos)) != null) { + segmentCacheKey = null; + segmentCacheContent = null; + super.remove(k); + recpos++; + } + //segmentCount--; writeSegmentCount(); + } + + public synchronized boolean existsDyn(String key) throws IOException { + return (getValueCached(dynKey(key, 0)) != null); + } + + public synchronized kelondroRA getRA(String filekey) throws IOException { + // this returns always a RARecord, even if no existed bevore + return new RARecord(filekey); + } + + public class RARecord extends kelondroAbstractRA implements kelondroRA { + + int seekpos = 0; + String filekey; + + public RARecord(String filekey) { + this.filekey = filekey; + } + + public int read() throws IOException { + byte[] b = getDyn(filekey, seekpos++, 1); + return (b == null) ? -1 : b[0] & 0xFF; + } + + public void write(int i) throws IOException { + byte[] b = new byte[1]; + b[0] = (byte) i; + putDyn(filekey, seekpos++, b, 0, 1); + } + + public int read(byte[] b, int off, int len) throws IOException { + byte[] buf = getDyn(filekey, seekpos, len); + if (buf == null) return 0; + System.arraycopy(buf, 0, b, off, len); + seekpos += len; + return len; + } + + public void write(byte[] b, int off, int len) throws IOException { + putDyn(filekey, seekpos, b, off, len); + seekpos += len; + } + + public void seek(long pos) throws IOException { + seekpos = (int) pos; + } + + public void close() throws IOException { + // no need to do anything here + } + + } + + public synchronized void writeFile(String key, File f) throws IOException { + // reads a file from the FS and writes it into the database + kelondroRA kra = getRA(key); + byte[] buffer = new byte[1024]; + byte[] result = new byte[(int) f.length()]; + FileInputStream fis = new FileInputStream(f); + int i; + int pos = 0; + while ((i = fis.read(buffer)) > 0) { + System.arraycopy(buffer, 0, result, pos, i); + pos += i; + } + fis.close(); + kra.writeArray(result); + kra.close(); + } + + public synchronized void readFile(String key, File f) throws IOException { + // reads a file from the DB and writes it to the FS + kelondroRA kra = getRA(key); + byte[] result = kra.readArray(); + FileOutputStream fos = new FileOutputStream(f); + fos.write(result); + fos.close(); + kra.close(); + } + + public static void main(String[] args) { + // test app for DB functions + // reads/writes files to a database table + // arguments: + // {-f2db/-db2f} + + if (args.length == 0) { + randomtest(20); + } else if (args.length == 1) { + // open a db and list keys + try { + kelondroDyn kd = new kelondroDyn(new File(args[0]), 0x100000); + System.out.println(kd.size() + " elements in DB"); + Iterator i = kd.dynKeys(true, false); + while (i.hasNext()) System.out.println((String) i.next()); + kd.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + if (args.length == 4) { + boolean writeFile = (args[0].equals("-db2f")); + File db = new File(args[1]); + String key = args[2]; + File f = new File(args[3]); + kelondroDyn kd; + try { + if (db.exists()) kd = new kelondroDyn(db, 0x100000); else kd = new kelondroDyn(db, 0x100000, 80, 200); + if (writeFile) kd.readFile(key, f); else kd.writeFile(key, f); + } catch (IOException e) { + System.out.println("ERROR: " + e.toString()); + } + } + } + + public static void randomtest(int elements) { + System.out.println("random " + elements + ":"); + String s = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".substring(0, elements); + String t, d; + char c; + kelondroDyn tt; + File testFile = new File("test.db"); + byte[] b; + byte[] cont; + try { + int steps = 0; + while (true) { + if (testFile.exists()) testFile.delete(); + tt = new kelondroDyn(testFile, 0, 4 ,100); + steps = ((int) System.currentTimeMillis() % 7) * (((int) System.currentTimeMillis() + 17) % 11); + t = s; + d = ""; + System.out.println("NEW SESSION"); + for (int i = 0; i < steps; i++) { + if ((d.length() < 3) || ((t.length() > 0) && (((int) System.currentTimeMillis() % 7) < 3))) { + // add one + c = t.charAt((int) (System.currentTimeMillis() % (long) t.length())); + b = testWord(c); + cont = new byte[(int) (System.currentTimeMillis() % (long) 777)]; + tt.putDyn(new String(b), 0, cont, 0, cont.length); + d = d + c; + t = t.substring(0, t.indexOf(c)) + t.substring(t.indexOf(c) + 1); + System.out.println("added " + new String(b) + ", " + cont.length + " bytes"); + } else { + // delete one + c = d.charAt((int) (System.currentTimeMillis() % (long) d.length())); + b = testWord(c); + tt.remove(new String(b)); + d = d.substring(0, d.indexOf(c)) + d.substring(d.indexOf(c) + 1); + t = t + c; + System.out.println("removed " + new String(b)); + } + if (countElementsDyn(tt) != tt.sizeDyn()) { + System.out.println("wrong size: count=" + countElementsDyn(tt) + ", size=" + tt.sizeDyn() + "; Tree:"); + //tt.print(); + //break; + } + } + //tt.print(); + tt.close(); + } + + } catch (Exception e) { + e.printStackTrace(); + System.out.println("TERMINATED"); + } + } + + public static int countElementsDyn(kelondroDyn t) { + int count = 0; + try { + Iterator iter = t.dynKeys(true, false); + while (iter.hasNext()) {count++; if (iter.next() == null) System.out.println("ERROR! null element found");} + return count; + } catch (IOException e) { + return -1; + } + } +} diff --git a/source/de/anomic/kelondro/kelondroDynTree.java b/source/de/anomic/kelondro/kelondroDynTree.java new file mode 100644 index 000000000..28e12c4f1 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroDynTree.java @@ -0,0 +1,366 @@ +// kelondroDynTree.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 20.09.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + */ + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; + +public class kelondroDynTree { + + // basic data structures + private int[] columns; + private kelondroDyn table; + private Hashtable treeRAHandles; + private File file; + + // some properties to control caching and buffering + private int maxcountCache = 1000, maxsizeCache = 100; + private int maxcountBuffer = 1000, maxsizeBuffer = 100; + private long maxageCache = 60000, cycletimeCache = 10000; + private long maxageBuffer = 60000, cycletimeBuffer = 10000; + private long buffersize = 0; + + // data structures for the cache and buffer + private Hashtable buffer, cache; + private long cycleCache, cycleBuffer; + + public kelondroDynTree(File file, long buffersize, int keylength, int nodesize, int[] columns) throws IOException { + // creates a new DynTree + this.file = file; + this.columns = columns; + this.buffer = new Hashtable(); + this.cache = new Hashtable(); + this.cycleCache = Long.MIN_VALUE; + this.cycleBuffer = Long.MIN_VALUE; + if (file.exists()) throw new IOException("DynTree " + file.toString() + " already exists"); + this.table = new kelondroDyn(file, buffersize, keylength, nodesize); + this.treeRAHandles = new Hashtable(); + } + + public kelondroDynTree(File file, long buffersize) throws IOException { + // opens an existing DynTree + this.file = file; + this.buffer = new Hashtable(); + this.cache = new Hashtable(); + this.cycleCache = Long.MIN_VALUE; + this.cycleBuffer = Long.MIN_VALUE; + if (!(file.exists())) throw new IOException("DynTree " + file.toString() + " does not exist"); + this.table = new kelondroDyn(file, buffersize); + // read one element to measure the size of columns + if (table.size() == 0) throw new IOException("DynTree " + file.toString() + " is empty. Should not."); + this.treeRAHandles = new Hashtable(); + Iterator i = table.dynKeys(true, false); + String onekey = (String) i.next(); + kelondroTree onetree = getTree(onekey); + this.columns = new int[onetree.columns()]; + for (int j = 0; j < columns.length; j++) columns[j] = onetree.columnSize(j); + closeTree(onekey); + } + + public void close() throws IOException { + Enumeration e = treeRAHandles.keys(); + while (e.hasMoreElements()) closeTree((String) e.nextElement()); + int size = table.size(); + table.close(); + if (size == 0) this.file.delete(); + } + + + public void setReadCacheAttr(int maxcount, int maxsize, long maxage, long cycletime) { + maxcountCache = maxcount; + maxsizeCache = maxsize; + maxageCache = maxage; + cycletimeCache = cycletime; + } + + public void setWriteBufferAttr(int maxcount, int maxsize, long maxage, long cycletime) { + maxcountBuffer = maxcount; + maxsizeBuffer = maxsize; + maxageBuffer = maxage; + cycletimeBuffer = cycletime; + } + + protected boolean existsTree(String key) throws IOException { + return table.existsDyn(key); + } + + protected kelondroTree newTree(String key) throws IOException { + if (table.existsDyn(key)) throw new IOException("table " + key + " already exists."); + kelondroRA ra = table.getRA(key); // works always, even with no-existing entry + treeRAHandles.put(key, ra); + return new kelondroTree(ra, buffersize, columns); + } + + protected kelondroTree getTree(String key) throws IOException { + if (table.existsDyn(key)) { + kelondroRA ra = table.getRA(key); + treeRAHandles.put(key, ra); + return new kelondroTree(ra, buffersize); + } else { + return null; + } + } + + protected void closeTree(String key) throws IOException { + kelondroRA ra = (kelondroRA) treeRAHandles.get(key); + if (ra != null) { + ra.close(); + treeRAHandles.remove(key); + } + } + + protected void removeTree(String key) throws IOException { + kelondroRA ra = (kelondroRA) treeRAHandles.get(key); + if (ra != null) { + ra.close(); + treeRAHandles.remove(key); + } + table.remove(key); + } + + + /*******************************************************/ + + protected class treeCache { + + private String tablename; + private Hashtable cache; + public long timestamp; + + treeCache(String tablename) { + this.tablename = tablename; + this.cache = new Hashtable(); // for key-row relations + this.timestamp = Long.MAX_VALUE; // to flag no-update + } + + public byte[][] get(byte[] key) throws IOException { + byte[][] entry = (byte[][]) cache.get(key); + if (entry == null) { + kelondroTree t = getTree(this.tablename); + entry = t.get(key); + t.close(); + this.cache.put(key, entry); + this.timestamp = System.currentTimeMillis(); + } + return entry; + } + + protected void put(byte[][] entry) { // this is only used internal + this.cache.put(entry[0], entry); + this.timestamp = System.currentTimeMillis(); + } + + protected void remove(byte[] key) { + this.cache.remove(key); + this.timestamp = System.currentTimeMillis(); + } + } + + protected class treeBuffer { + + private String tablename; + private Hashtable buffer; + public long timestamp; + + treeBuffer(String tablename) { + this.tablename = tablename; + this.buffer = new Hashtable(); // for key-row relations + this.timestamp = Long.MAX_VALUE; // to flag no-update + } + + public void put(byte[][] entry) { + this.buffer.put(entry[0], entry); + this.timestamp = System.currentTimeMillis(); + } + + public void remove(byte[] key) { + this.buffer.remove(key); + this.timestamp = System.currentTimeMillis(); + } + + protected void flush() throws IOException { + this.timestamp = System.currentTimeMillis(); + if (this.buffer.size() == 0) return; + Enumeration e = this.buffer.keys(); + kelondroTree t = getTree(this.tablename); + byte[][] entry; + byte[] key; + while (e.hasMoreElements()) { + key = (byte[]) e.nextElement(); + entry = (byte[][]) this.buffer.get(key); + t.put(entry); + } + t.close(); + } + } + + /*******************************************************/ + + + // read cached + public synchronized byte[][] get(String tablename, byte[] key) throws IOException { + treeCache tc = (treeCache) cache.get(table); + if (tc == null) { + tc = new treeCache(tablename); + cache.put(tablename, tc); + } + return tc.get(key); + } + + + // clean-up method for cache: + private void flushCache() { + if ((System.currentTimeMillis() - this.cycleCache < this.cycletimeCache) && + (cache.size() < this.maxcountCache)) return; + this.cycleCache = System.currentTimeMillis(); + // collect all caches which have a time > maxagecache + Enumeration e = cache.keys(); + String tablename; + treeCache tc; + while (e.hasMoreElements()) { + tablename = (String) e.nextElement(); + tc = (treeCache) cache.get(tablename); + if ((System.currentTimeMillis() - tc.timestamp > this.maxageCache) || + (tc.cache.size() > this.maxsizeCache) || + (cache.size() > this.maxcountCache)) { + cache.remove(tablename); + } + } + } + + // write buffered + public synchronized void put(String tablename, byte[][] newrow) { + treeBuffer tb = (treeBuffer) buffer.get(tablename); + if (tb == null) { + tb = new treeBuffer(tablename); + } + treeCache tc = (treeCache) cache.get(table); + if (tc == null) { + tc = new treeCache(tablename); + cache.put(tablename, tc); + } + tb.put(newrow); + tc.put(newrow); + flushBuffer(); + } + + public synchronized void remove(String tablename, byte[] key) { + treeBuffer tb = (treeBuffer) buffer.get(tablename); + if (tb == null) { + tb = new treeBuffer(tablename); + } + treeCache tc = (treeCache) cache.get(table); + if (tc == null) { + tc = new treeCache(tablename); + cache.put(tablename, tc); + } + tb.remove(key); + tc.remove(key); + flushBuffer(); + } + + public synchronized void removeAll(String tablename) throws IOException { + buffer.remove(table); + cache.remove(table); + kelondroTree t = getTree(tablename); + t.removeAll(); + flushBuffer(); + } + + // clean-up method for buffer: + private void flushBuffer() { + if ((System.currentTimeMillis() - this.cycleBuffer < this.cycletimeBuffer) && + (buffer.size() < this.maxcountBuffer)) return; + this.cycleBuffer = System.currentTimeMillis(); + // collect all buffers which have a time > maxageBuffer + Enumeration e = buffer.keys(); + String tablename; + treeBuffer tb; + while (e.hasMoreElements()) { + tablename = (String) e.nextElement(); + tb = (treeBuffer) buffer.get(tablename); + if ((System.currentTimeMillis() - tb.timestamp > this.maxageBuffer) || + (tb.buffer.size() > this.maxsizeBuffer) || + (buffer.size() > this.maxcountBuffer)) { + try {tb.flush();} catch (IOException ee) {} + tb = null; + buffer.remove(tablename); + } + } + } + + + /*******************************************************/ + + public static void main(String[] args) { + // test app + try { + System.out.println("start"); + File file = new File("D:\\bin\\testDyn.db"); + if (file.exists()) { + kelondroDynTree dt = new kelondroDynTree(file, 0x100000L); + System.out.println("opened: table keylength=" + dt.table.columnSize(0) + ", sectorsize=" + dt.table.columnSize(1) + ", " + dt.table.size() + " entries."); + } else { + kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 16, 512, new int[] {10,20,30}); + String name; + kelondroTree t; + byte[][] line = new byte[][] {"".getBytes(), "abc".getBytes(), "def".getBytes()}; + for (int i = 1; i < 100; i++) { + name = "test" + i; + t = dt.newTree(name); + for (int j = 1; j < 10; j++) { + line[0] = ("entry" + j).getBytes(); + t.put(line); + } + dt.closeTree(name); + } + } + System.out.println("finished"); + } catch (IOException e) { + e.printStackTrace(); + } + } + +} diff --git a/source/de/anomic/kelondro/kelondroFScoreCluster.java b/source/de/anomic/kelondro/kelondroFScoreCluster.java new file mode 100644 index 000000000..f1de79d2f --- /dev/null +++ b/source/de/anomic/kelondro/kelondroFScoreCluster.java @@ -0,0 +1,145 @@ +// kelondroScore.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.09.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + * This class manages counted words, + * in a word-count table. + * word counts can be increased, and the words can be enumerated + * in order of their count. + */ + + +package de.anomic.kelondro; + +import de.anomic.server.*; +import java.io.*; +import java.util.*; + +public class kelondroFScoreCluster { + + private static final int wordlength = 32; + private static final int countlength = 6; + //private static final int nodesize = 4048; + private kelondroTree refcountDB; + private kelondroTree countrefDB; + + public kelondroFScoreCluster(File refcountDBfile, File countrefDBfile) throws IOException { + if ((refcountDBfile.exists()) && (countrefDBfile.exists())) { + refcountDB = new kelondroTree(refcountDBfile, 0x100000L); + refcountDB.setText(0, serverCodings.enhancedCoder.encodeBase64Long(0, countlength).getBytes()); // counter of all occurrences + countrefDB = new kelondroTree(countrefDBfile, 0x100000L); + countrefDB.setText(0, serverCodings.enhancedCoder.encodeBase64Long(0, countlength).getBytes()); + } else if ((!(refcountDBfile.exists())) && (!(countrefDBfile.exists()))) { + refcountDB = new kelondroTree(refcountDBfile, 0x100000L, new int[] {wordlength, countlength}, 1, countlength); + countrefDB = new kelondroTree(countrefDBfile, 0x100000L, new int[] {countlength + wordlength, 4}, 1, countlength); + } else { + throw new IOException("both word/count db files must exists"); + } + } + + public void addScore(String word) throws IOException { + word = word.toLowerCase(); + byte[][] record = refcountDB.get(word.getBytes()); + long c; + String cs; + if (record == null) { + // new entry + c = 0; + } else { + // delete old entry + c = serverCodings.enhancedCoder.decodeBase64Long(new String(record[1])); + cs = serverCodings.enhancedCoder.encodeBase64Long(c, countlength); + countrefDB.remove((cs + word).getBytes()); + c++; + } + cs = serverCodings.enhancedCoder.encodeBase64Long(c, countlength); + refcountDB.put(word.getBytes(), cs.getBytes()); + countrefDB.put((cs + word).getBytes(), new byte[] {0,0,0,0}); + // increase overall counter + refcountDB.setText(0, serverCodings.enhancedCoder.encodeBase64Long(getTotalCount() + 1, countlength).getBytes()); + } + + public long getTotalCount() throws IOException { + return serverCodings.enhancedCoder.decodeBase64Long(new String(refcountDB.getText(0))); + } + + public int getElementCount() { + return refcountDB.size(); + } + + public long getScore(String word) throws IOException { + word = word.toLowerCase(); + byte[][] record = refcountDB.get(word.getBytes()); + if (record == null) { + return 0; + } else { + return serverCodings.enhancedCoder.decodeBase64Long(new String(record[1])); + } + } + + public Iterator scores(boolean up) throws IOException { + // iterates '-' Strings + return new scoreIterator(up, false); + } + + private class scoreIterator implements Iterator { + // iteration of score objects + + kelondroTree.rowIterator iterator; + + public scoreIterator(boolean up, boolean rotating) throws IOException { + iterator = countrefDB.rows(up, rotating); + } + + public boolean hasNext() { + return iterator.hasNext(); + } + + public Object next() { + String s = new String(((byte[][]) iterator.next())[0]); + return s.substring(countlength) + "-" + serverCodings.enhancedCoder.decodeBase64Long(s.substring(0, countlength)); + } + + public void remove() { + } + + } +} diff --git a/source/de/anomic/kelondro/kelondroFileRA.java b/source/de/anomic/kelondro/kelondroFileRA.java new file mode 100644 index 000000000..37fb429cd --- /dev/null +++ b/source/de/anomic/kelondro/kelondroFileRA.java @@ -0,0 +1,118 @@ +// kelondroFileRA.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 05.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; + +public class kelondroFileRA extends kelondroAbstractRA implements kelondroRA { + + protected RandomAccessFile RAFile; + + public kelondroFileRA(String file) throws FileNotFoundException { + this(new File(file)); + } + + public kelondroFileRA(File file) throws FileNotFoundException { + this.name = file.getName(); + RAFile = new RandomAccessFile(file, "rw"); + } + + // pseudo-native method read + public int read() throws IOException { + return RAFile.read(); + } + + // pseudo-native method write + public void write(int b) throws IOException { + RAFile.write(b); + } + + public int read(byte[] b, int off, int len) throws IOException { + return RAFile.read(b, off, len); + } + + public void write(byte[] b, int off, int len) throws IOException { + RAFile.write(b, off, len); + } + + public void seek(long pos) throws IOException { + RAFile.seek(pos); + } + + public void close() throws IOException { + RAFile.close(); + } + + + // some static tools + public static void writeProperties(File f, Properties props, String comment) throws IOException { + File fp = f.getParentFile(); + if (fp != null) fp.mkdirs(); + kelondroRA kra = new kelondroFileRA(f); + kra.writeProperties(props, comment); + kra.close(); + } + + public static Properties readProperties(File f) throws IOException { + kelondroRA kra = new kelondroFileRA(f); + Properties props = kra.readProperties(); + kra.close(); + return props; + } + + public static void writeMap(File f, Map map, String comment) throws IOException { + File fp = f.getParentFile(); + if (fp != null) fp.mkdirs(); + kelondroRA kra = new kelondroFileRA(f); + kra.writeMap(map, comment); + kra.close(); + } + + public static Map readMap(File f) throws IOException { + kelondroRA kra = new kelondroFileRA(f); + Map map = kra.readMap(); + kra.close(); + return map; + } + +} diff --git a/source/de/anomic/kelondro/kelondroMScoreCluster.java b/source/de/anomic/kelondro/kelondroMScoreCluster.java new file mode 100644 index 000000000..f1898ce0b --- /dev/null +++ b/source/de/anomic/kelondro/kelondroMScoreCluster.java @@ -0,0 +1,315 @@ +// kelondroMScoreCluster.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.09.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +import java.util.*; +import java.text.*; + +public class kelondroMScoreCluster { + + private TreeMap refkeyDB; + private TreeMap keyrefDB; + private long gcount; + private int encnt; + + public kelondroMScoreCluster() { + refkeyDB = new TreeMap(); + keyrefDB = new TreeMap(); + gcount = 0; + encnt = 0; + } + + public static SimpleDateFormat shortFormatter = new SimpleDateFormat("yyyyMMddHHmmss"); + + public static int string2score(String s) { + // this can be used to calculate a score from a string + try { // try a number + return Integer.parseInt(s); + } catch (NumberFormatException e) { + try { // try a date + return (int) ((90000000 + System.currentTimeMillis() - shortFormatter.parse(s).getTime()) / 60000); + } catch (ParseException ee) { + // try it lex + int len = s.length(); + if (len > 5) len = 5; + int c = 0; + for (int i = 0; i < len; i++) { + c <<= 6; + c += plainByteArray[(byte) s.charAt(i)]; + } + for (int i = len; i < 5; i++) c <<= 6; + return c; + } + } + } + + private static byte[] plainByteArray; + static { + plainByteArray = new byte[256]; + for (int i = 0; i < 32; i++) plainByteArray[i] = (byte) i; + for (int i = 32; i < 96; i++) plainByteArray[i] = (byte) (i - 32); + for (int i = 96; i < 128; i++) plainByteArray[i] = (byte) (i - 64); + for (int i = 128; i < 256; i++) plainByteArray[i] = (byte) (i & 0X20); + } + + private long scoreKey(int elementNr, int elementCount) { + return (((long) (elementCount & 0xFFFFFFFFL)) << 32) | ((long) (elementNr & 0xFFFFFFFFL)); + } + + public long totalCount() { + return gcount; + } + + public int size() { + return refkeyDB.size(); + } + + public void incScore(Object[] objs) { + addScore(objs, 1); + } + + public void addScore(Object[] objs, int count) { + if (objs != null) + for (int i = 0; i < objs.length; i++) + addScore(objs[i], count); + } + + public void setScore(Object[] objs, int count) { + if (objs != null) + for (int i = 0; i < objs.length; i++) + setScore(objs[i], count); + } + + public void incScore(Object obj) { + addScore(obj, 1); + } + + public void addScore(Object obj, int count) { + if (obj == null) return; + Long cs = (Long) refkeyDB.get(obj); + long c; + int ec = count; + int en; + if (cs == null) { + // new entry + en = encnt++; + } else { + // delete old entry + keyrefDB.remove(cs); + c = cs.longValue(); + ec += (int) ((c & 0xFFFFFFFF00000000L) >> 32); + //System.out.println("Debug:" + ec); + en = (int) (c & 0xFFFFFFFFL); + } + + // set new value + c = scoreKey(en, ec); + cs = new Long(c); + refkeyDB.put(obj, cs); + keyrefDB.put(cs, obj); + + // increase overall counter + gcount += count; + } + + public void setScore(Object obj, int count) { + if (obj == null) return; + //System.out.println("setScore " + obj.getClass().getName()); + Long cs = (Long) refkeyDB.get(obj); + long c; + int ec = count; + int en; + if (cs == null) { + // new entry + en = encnt++; + } else { + // delete old entry + keyrefDB.remove(cs); + c = cs.longValue(); + gcount -= (c & 0xFFFFFFFF00000000L) >> 32; + en = (int) (c & 0xFFFFFFFFL); + // decrease overall counter + gcount -= c; + } + + // set new value + c = scoreKey(en, ec); + cs = new Long(c); + refkeyDB.put(obj, cs); + keyrefDB.put(cs, obj); + + // increase overall counter + gcount += count; + } + + public int deleteScore(Object obj) { + if (obj == null) return -1; + Long cs = (Long) refkeyDB.get(obj); + if (cs == null) { + return -1; + } else { + // delete entry + keyrefDB.remove(cs); + refkeyDB.remove(obj); + // decrease overall counter + gcount -= cs.longValue(); + return (int) ((cs.longValue() & 0xFFFFFFFF00000000L) >> 32); + } + } + + public boolean existsScore(Object obj) { + return (refkeyDB.get(obj) != null); + } + + public int getScore(Object obj) { + if (obj == null) return 0; + Long cs = (Long) refkeyDB.get(obj); + if (cs == null) { + return 0; + } else { + return (int) ((cs.longValue() & 0xFFFFFFFF00000000L) >> 32); + } + } + + public Object getMaxObject() { + if (refkeyDB.size() == 0) return null; + //return getScores(1, false)[0]; + return keyrefDB.get((Long) keyrefDB.lastKey()); + } + + public Object getMinObject() { + if (refkeyDB.size() == 0) return null; + //return getScores(1, true)[0]; + return keyrefDB.get((Long) keyrefDB.firstKey()); + } + + public Object[] getScores(int maxCount, boolean up) { + return getScores(maxCount, up, Integer.MIN_VALUE, Integer.MAX_VALUE); + } + + public Object[] getScores(int maxCount, boolean up, int minScore, int maxScore) { + if (maxCount > refkeyDB.size()) maxCount = refkeyDB.size(); + Object[] s = new Object[maxCount]; + Iterator it = scores(up, minScore, maxScore); + int i = 0; + while ((i < maxCount) && (it.hasNext())) s[i++] = (Object) it.next(); + if (i < maxCount) { + // re-copy the result array + Object[] sc = new Object[i]; + System.arraycopy(s, 0, sc, 0, i); + s = sc; + sc = null; + } + return s; + } + + public Iterator scores(boolean up) { + return scores(up, Integer.MIN_VALUE, Integer.MAX_VALUE); + } + + public Iterator scores(boolean up, int minScore, int maxScore) { + return new scoreIterator(up, minScore, maxScore); + } + + private class scoreIterator implements Iterator { + + boolean up; + TreeMap keyrefDBcopy; + Object n; + int min, max; + + public scoreIterator(boolean up, int minScore, int maxScore) { + this.up = up; + this.min = minScore; + this.max = maxScore; + this.keyrefDBcopy = (TreeMap) keyrefDB.clone(); // NoSuchElementException here? + internalNext(); + } + + public boolean hasNext() { + return (n != null); + } + + private void internalNext() { + Long key; + int score = (max + min) / 2; + while (keyrefDBcopy.size() > 0) { + key = (Long) ((up) ? keyrefDBcopy.firstKey() : keyrefDBcopy.lastKey()); + n = keyrefDBcopy.get(key); + keyrefDBcopy.remove(key); + score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32); + if ((score >= min) && (score <= max)) return; + if (((up) && (score > max)) || ((!(up)) && (score < min))) { + keyrefDBcopy = new TreeMap(); + n = null; + return; + } + } + n = null; + } + + public Object next() { + Object o = n; + internalNext(); + return o; + } + + public void remove() { + if (n != null) deleteScore(n); + } + + } + + public static void main(String[] args) { + System.out.println("Test for Score: start"); + long time = System.currentTimeMillis(); + kelondroMScoreCluster s = new kelondroMScoreCluster(); + for (int i = 0; i < 10000; i++) s.addScore("score#" + i + "xxx" + i + "xxx" + i + "xxx" + i + "xxx", i/10); + System.out.println("result:"); + Object[] result; + result = s.getScores(s.size(), true); + for (int i = 0; i < s.size(); i++) System.out.println("up: " + result[i]); + result = s.getScores(s.size(), false); + for (int i = 0; i < s.size(); i++) System.out.println("down: " + result[i]); + System.out.println("Test for Score: finish. time = " + (System.currentTimeMillis() - time)); + System.out.println("total=" + s.totalCount() + ", elements=" + s.size()); + } +} diff --git a/source/de/anomic/kelondro/kelondroMScoreIndex.java b/source/de/anomic/kelondro/kelondroMScoreIndex.java new file mode 100644 index 000000000..53b920095 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroMScoreIndex.java @@ -0,0 +1,213 @@ +// kelondroMScoreIndex.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.09.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +import java.util.*; + +public class kelondroMScoreIndex { + + private kelondroMScoreCluster scoreCluster; + private HashMap objects; // encnt/object - relation + private TreeMap handles; // encnt/encnt - relation, ordered by objects + private int encnt; + + protected class objcomp implements Comparator { + private HashMap os; + public objcomp(HashMap objs) { + os = objs; + } + public int compare(Object o1, Object o2) { + if (o1 instanceof Integer) o1 = os.get(o1); + if (o2 instanceof Integer) o2 = os.get(o2); + return ((Comparable) o1).compareTo(o2); + } + public boolean equals(Object obj) { + return false; + } + } + + public kelondroMScoreIndex() { + encnt = 0; + objects = new HashMap(); // storage space for values + handles = new TreeMap(new objcomp(objects)); // int-handle/value relation + scoreCluster = new kelondroMScoreCluster(); // scores for int-handles + } + + public long totalCount() { + return scoreCluster.totalCount(); + } + + public int size() { + return handles.size(); + } + + public void incScore(Object[] objs) { + addScore(objs, 1); + } + + public void addScore(Object[] objs, int count) { + if (objs != null) + for (int i = 0; i < objs.length; i++) + addScore(objs[i], count); + } + + public void setScore(Object[] objs, int count) { + if (objs != null) + for (int i = 0; i < objs.length; i++) + setScore(objs[i], count); + } + + public void incScore(Object obj) { + addScore(obj, 1); + } + + public void addScore(Object obj, int count) { + // get handle + Integer handle = (Integer) handles.get(obj); + if (handle == null) { + // new object + handle = new Integer(encnt++); + objects.put(handle, obj); + handles.put(handle, handle); + } + // add score + scoreCluster.addScore(handle, count); + } + + public void setScore(Object obj, int count) { + // get handle + Integer handle = (Integer) handles.get(obj); + if (handle == null) { + // new object + handle = new Integer(encnt++); + objects.put(handle, obj); + handles.put(handle, handle); + } + // set score + scoreCluster.setScore(handle, count); + } + + public void deleteScore(Object obj) { + // get handle + Integer handle = (Integer) handles.get(obj); + if (handle != null) { + objects.remove(handle); + handles.remove(handle); + scoreCluster.deleteScore(handle); + } + } + + public int getScore(Object obj) { + // get handle + Integer handle = (Integer) handles.get(obj); + if (handle == null) return -1; + return scoreCluster.getScore(handle); + } + + public Object[] getScores(int count, boolean up, boolean weight, char weightsep) { + return new Object[1]; + } + + public Object[] getScores(int maxCount, boolean up) { + return getScores(maxCount, up, Integer.MIN_VALUE, Integer.MAX_VALUE); + } + + public Object[] getScores(int maxCount, boolean up, int minScore, int maxScore) { + if (maxCount > handles.size()) maxCount = handles.size(); + Object[] s = new Object[maxCount]; + Iterator it = scores(up, minScore, maxScore); + int i = 0; + while ((i < maxCount) && (it.hasNext())) s[i++] = (Object) it.next(); + if (i < maxCount) { + // re-copy the result array + Object[] sc = new Object[i]; + System.arraycopy(s, 0, sc, 0, i); + s = sc; + sc = null; + } + return s; + } + + public Iterator scores(boolean up) { + return scores(up, Integer.MIN_VALUE, Integer.MAX_VALUE); + } + + public Iterator scores(boolean up, int minScore, int maxScore) { + return new scoreIterator(up, minScore, maxScore); + } + + private class scoreIterator implements Iterator { + + Iterator scoreClusterIterator; + + public scoreIterator(boolean up, int minScore, int maxScore) { + this.scoreClusterIterator = scoreCluster.scores(up, minScore, maxScore); + } + + public boolean hasNext() { + return scoreClusterIterator.hasNext(); + } + + public Object next() { + return objects.get(scoreClusterIterator.next()); + } + + public void remove() { + scoreClusterIterator.remove(); + } + } + + public static void main(String[] args) { + System.out.println("Test for Score: start"); + long time = System.currentTimeMillis(); + kelondroMScoreIndex s = new kelondroMScoreIndex(); + for (int i = 0; i < 10000; i++) s.addScore("score#" + i + "xxx" + i + "xxx" + i + "xxx" + i + "xxx", i/10); + System.out.println("result:"); + Object[] result; + result = s.getScores(s.size(), true); + for (int i = 0; i < s.size(); i++) System.out.println("up: " + result[i]); + result = s.getScores(s.size(), false); + for (int i = 0; i < s.size(); i++) System.out.println("down: " + result[i]); + System.out.println("Test for Score: finish. time = " + (System.currentTimeMillis() - time)); + System.out.println("total=" + s.totalCount() + ", element=" + s.size()); + } + +} diff --git a/source/de/anomic/kelondro/kelondroMSetTools.java b/source/de/anomic/kelondro/kelondroMSetTools.java new file mode 100644 index 000000000..cbe1df19c --- /dev/null +++ b/source/de/anomic/kelondro/kelondroMSetTools.java @@ -0,0 +1,394 @@ +// kelondroMSetTools.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.12.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.kelondro; + +import java.util.*; + +public class kelondroMSetTools { + + + public static Comparator fastStringComparator = fastStringComparator(true); + + // ------------------------------------------------------------------------------------------------ + // helper methods + + private static int compare(Object a, Object b, Comparator c) { + if (c != null) return c.compare(a,b); + if ((a instanceof String) && (b instanceof String)) return ((String) a).compareTo((String) b); + throw new ClassCastException(); + } + + private static int log2(int x) { + int l = 0; + while (x > 0) {x = x >> 1; l++;} + return l; + } + + // ------------------------------------------------------------------------------------------------ + // join + // We distinguish two principal solutions + // - constructive join (generate new data structure) + // - destructive join (remove non-valid elements from given data structure) + // The alogorithm to perform the join can be also of two kind: + // - join by pairvise enumeration + // - join by iterative tests (where we distinguish left-right and right-left tests) + + public static TreeMap joinConstructive(TreeMap map, TreeSet set) { + // comparators must be equal + if ((map == null) || (set == null)) return null; + if (map.comparator() != set.comparator()) return null; + if ((map.size() == 0) || (set.size() == 0)) return new TreeMap(map.comparator()); + + // decide which method to use + int high = ((map.size() > set.size()) ? map.size() : set.size()); + int low = ((map.size() > set.size()) ? set.size() : map.size()); + int stepsEnum = 10 * (high + low - 1); + int stepsTest = 12 * log2(high) * low; + + // start most efficient method + if (stepsEnum > stepsTest) { + if (map.size() < set.size()) + return joinConstructiveByTestSetInMap(map, set); + else + return joinConstructiveByTestMapInSet(map, set); + } else { + return joinConstructiveByEnumeration(map, set); + } + } + + private static TreeMap joinConstructiveByTestSetInMap(TreeMap map, TreeSet set) { + Iterator si = set.iterator(); + TreeMap result = new TreeMap(map.comparator()); + Object o; + while (si.hasNext()) { + o = si.next(); + if (map.containsKey(o)) result.put(o, map.get(o)); + } + return result; + } + + private static TreeMap joinConstructiveByTestMapInSet(TreeMap map, TreeSet set) { + Iterator mi = map.keySet().iterator(); + TreeMap result = new TreeMap(map.comparator()); + Object o; + while (mi.hasNext()) { + o = mi.next(); + if (set.contains(o)) result.put(o, map.get(o)); + } + return result; + } + + private static TreeMap joinConstructiveByEnumeration(TreeMap map, TreeSet set) { + // implement pairvise enumeration + Comparator comp = map.comparator(); + Iterator mi = map.keySet().iterator(); + Iterator si = set.iterator(); + TreeMap result = new TreeMap(map.comparator()); + int c; + if ((mi.hasNext()) && (si.hasNext())) { + Object mobj = mi.next(); + Object sobj = si.next(); + while (true) { + c = compare(mobj, sobj, comp); + if (c < 0) { + if (mi.hasNext()) mobj = mi.next(); else break; + } else if (c > 0) { + if (si.hasNext()) sobj = si.next(); else break; + } else { + result.put(mobj, map.get(mobj)); + if (mi.hasNext()) mobj = mi.next(); else break; + if (si.hasNext()) sobj = si.next(); else break; + } + } + } + return result; + } + + // now the same for set-set + public static TreeSet joinConstructive(TreeSet set1, TreeSet set2) { + // comparators must be equal + if ((set1 == null) || (set2 == null)) return null; + if (set1.comparator() != set2.comparator()) return null; + if ((set1.size() == 0) || (set2.size() == 0)) return new TreeSet(set1.comparator()); + + // decide which method to use + int high = ((set1.size() > set2.size()) ? set1.size() : set2.size()); + int low = ((set1.size() > set2.size()) ? set2.size() : set1.size()); + int stepsEnum = 10 * (high + low - 1); + int stepsTest = 12 * log2(high) * low; + + // start most efficient method + if (stepsEnum > stepsTest) { + if (set1.size() < set2.size()) + return joinConstructiveByTest(set1, set2); + else + return joinConstructiveByTest(set2, set1); + } else { + return joinConstructiveByEnumeration(set1, set2); + } + } + + private static TreeSet joinConstructiveByTest(TreeSet small, TreeSet large) { + Iterator mi = small.iterator(); + TreeSet result = new TreeSet(small.comparator()); + Object o; + while (mi.hasNext()) { + o = mi.next(); + if (large.contains(o)) result.add(o); + } + return result; + } + + private static TreeSet joinConstructiveByEnumeration(TreeSet set1, TreeSet set2) { + // implement pairvise enumeration + Comparator comp = set1.comparator(); + Iterator mi = set1.iterator(); + Iterator si = set2.iterator(); + TreeSet result = new TreeSet(set1.comparator()); + int c; + if ((mi.hasNext()) && (si.hasNext())) { + Object mobj = mi.next(); + Object sobj = si.next(); + while (true) { + c = compare(mobj, sobj, comp); + if (c < 0) { + if (mi.hasNext()) mobj = mi.next(); else break; + } else if (c > 0) { + if (si.hasNext()) sobj = si.next(); else break; + } else { + result.add(mobj); + if (mi.hasNext()) mobj = mi.next(); else break; + if (si.hasNext()) sobj = si.next(); else break; + } + } + } + return result; + } + + + + // ------------------------------------------------------------------------------------------------ + // exclude + + public static TreeMap excludeConstructive(TreeMap map, TreeSet set) { + // comparators must be equal + if (map == null) return null; + if (set == null) return map; + if (map.comparator() != set.comparator()) return null; + if ((map.size() == 0) || (set.size() == 0)) return map; + + return excludeConstructiveByTestMapInSet(map, set); + //return excludeConstructiveByEnumeration(map, set); + } + + private static TreeMap excludeConstructiveByTestMapInSet(TreeMap map, TreeSet set) { + Iterator mi = map.keySet().iterator(); + TreeMap result = new TreeMap(map.comparator()); + Object o; + while (mi.hasNext()) { + o = mi.next(); + if (!(set.contains(o))) result.put(o, map.get(o)); + } + return result; + } + + private static TreeMap excludeConstructiveByEnumeration(TreeMap map, TreeSet set) { + // returns map without the elements in set + // enumerates objects + Comparator comp = map.comparator(); + Iterator mi = map.keySet().iterator(); + Iterator si = set.iterator(); + TreeMap result = new TreeMap(map.comparator()); + int c; + if ((mi.hasNext()) && (si.hasNext())) { + Object mobj = mi.next(); + Object sobj = si.next(); + while (true) { + c = compare(mobj, sobj, comp); + if (c < 0) { + result.put(mobj, map.get(mobj)); + if (mi.hasNext()) mobj = mi.next(); else break; + } else if (c > 0) { + if (si.hasNext()) sobj = si.next(); else break; + } else { + if (mi.hasNext()) mobj = mi.next(); else break; + if (si.hasNext()) sobj = si.next(); else { + // final flush + result.put(mobj, map.get(mobj)); + while (mi.hasNext()) { + mobj = mi.next(); + result.put(mobj, map.get(mobj)); + } + break; + } + } + } + } + return result; + } + + public static void excludeDestructive(TreeMap map, TreeSet set) { + // comparators must be equal + if (map == null) return; + if (set == null) return; + if (map.comparator() != set.comparator()) return; + if ((map.size() == 0) || (set.size() == 0)) return; + + if (map.size() < set.size()) + excludeDestructiveByTestMapInSet(map, set); + else + excludeDestructiveByTestSetInMap(map, set); + } + + private static void excludeDestructiveByTestMapInSet(TreeMap map, TreeSet set) { + Iterator mi = map.keySet().iterator(); + while (mi.hasNext()) if (set.contains(mi.next())) mi.remove(); + } + + private static void excludeDestructiveByTestSetInMap(TreeMap map, TreeSet set) { + Iterator si = set.iterator(); + while (si.hasNext()) map.remove(si.next()); + } + + // and the same again with set-set + public static void excludeDestructive(TreeSet set1, TreeSet set2) { + // comparators must be equal + if (set1 == null) return; + if (set2 == null) return; + if (set1.comparator() != set2.comparator()) return; + if ((set1.size() == 0) || (set2.size() == 0)) return; + + if (set1.size() < set2.size()) + excludeDestructiveByTestSmallInLarge(set1, set2); + else + excludeDestructiveByTestLargeInSmall(set1, set2); + } + + private static void excludeDestructiveByTestSmallInLarge(TreeSet small, TreeSet large) { + Iterator mi = small.iterator(); + while (mi.hasNext()) if (large.contains(mi.next())) mi.remove(); + } + + private static void excludeDestructiveByTestLargeInSmall(TreeSet large, TreeSet small) { + Iterator si = small.iterator(); + while (si.hasNext()) large.remove(si.next()); + } + + // ------------------------------------------------------------------------------------------------ + + public static Comparator fastStringComparator(boolean ascending) { + return new stringComparator(ascending); + } + + private static class stringComparator implements Comparator { + // fast ordering + boolean asc = true; + public stringComparator(boolean ascending) { + asc = ascending; + } + public int compare(Object o1, Object o2) { + // returns o1o2:1 + int l1 = ((String) o1).length(); + int l2 = ((String) o2).length(); + if (l1 == l2) { + for (int i = 0; i < l1; i++) { + if (((byte) ((String) o1).charAt(i)) < ((byte) ((String) o2).charAt(i))) return (asc) ? -1 : 1; + if (((byte) ((String) o1).charAt(i)) > ((byte) ((String) o2).charAt(i))) return (asc) ? 1 : -1; + } + return 0; + //return ((String) o1).compareTo((String) o2); + } else { + return l1 < l2 ? ((asc) ? -1 : 1) : ((asc) ? 1 : -1); + } + } + public boolean equals(Object obj) { + return false; + } + } + + // ------------------------------------------------------------------------------------------------ + + public static void main(String[] args) { + TreeMap m = new TreeMap(); + TreeSet s = new TreeSet(); + m.put("a", "a"); + m.put("x", "x"); + m.put("f", "f"); + m.put("h", "h"); + m.put("w", "w"); + m.put("7", "7"); + m.put("t", "t"); + m.put("k", "k"); + m.put("y", "y"); + m.put("z", "z"); + s.add("a"); + s.add("b"); + s.add("c"); + s.add("k"); + s.add("l"); + s.add("m"); + s.add("n"); + s.add("o"); + s.add("p"); + s.add("q"); + s.add("r"); + s.add("s"); + s.add("t"); + s.add("x"); + System.out.println("Compare " + m.toString() + " with " + s.toString()); + System.out.println("Join=" + joinConstructiveByEnumeration(m, s)); + System.out.println("Join=" + joinConstructiveByTestMapInSet(m, s)); + System.out.println("Join=" + joinConstructiveByTestSetInMap(m, s)); + System.out.println("Join=" + joinConstructive(m, s)); + System.out.println("Exclude=" + excludeConstructiveByEnumeration(m, s)); + + /* + for (int low = 0; low < 10; low++) + for (int high = 0; high < 100; high=high + 10) { + int stepsEnum = 10 * high; + int stepsTest = 12 * log2(high) * low; + System.out.println("low=" + low + ", high=" + high + ", stepsEnum=" + stepsEnum + ", stepsTest=" + stepsTest + "; best method is " + ((stepsEnum < stepsTest) ? "joinByEnumeration" : "joinByTest")); + } + */ + + } + +} diff --git a/source/de/anomic/kelondro/kelondroMap.java b/source/de/anomic/kelondro/kelondroMap.java new file mode 100644 index 000000000..a92186c41 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroMap.java @@ -0,0 +1,438 @@ +// kelondroMap.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 26.10.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; + +public class kelondroMap { + + private static final int cachesize = 500; + + private kelondroDyn dyn; + private kelondroMScoreCluster cacheScore; + private HashMap cache; + private long startup; + private String[] sortfields, accfields; + private HashMap sortClusterMap; // a String-kelondroMScoreCluster - relation + private HashMap accMap; // to store accumulations of specific fields + private int elementCount; + private writeQueue writeWorker; + + public kelondroMap(kelondroDyn dyn) { + this(dyn, null, null); + } + + public kelondroMap(kelondroDyn dyn, String[] sortfields, String[] accfields) { + this.dyn = dyn; + this.cache = new HashMap(); + this.cacheScore = new kelondroMScoreCluster(); + this.startup = System.currentTimeMillis(); + this.elementCount = 0; + + // create fast ordering clusters and acc fields + this.sortfields = sortfields; + this.accfields = accfields; + + kelondroMScoreCluster[] cluster = null; + if (sortfields == null) sortClusterMap = null; else { + sortClusterMap = new HashMap(); + cluster = new kelondroMScoreCluster[sortfields.length]; + for (int i = 0; i < sortfields.length; i++) cluster[i] = new kelondroMScoreCluster(); + } + + Long[] accumulator = null; + if (accfields == null) accMap = null; else { + accMap = new HashMap(); + accumulator = new Long[accfields.length]; + for (int i = 0; i < accfields.length; i++) accumulator[i] = new Long(0); + } + + // fill cluster and accumulator with values + if ((sortfields != null) || (accfields != null)) try { + kelondroDyn.dynKeyIterator it = dyn.dynKeys(true, false); + String key, value; + long valuel; + Map map; + while (it.hasNext()) { + key = (String) it.next(); + map = get(key); + + if (sortfields != null) for (int i = 0; i < sortfields.length; i++) { + value = (String) map.get(sortfields[i]); + if (value != null) cluster[i].setScore(key, kelondroMScoreCluster.string2score(value)); + } + + if (accfields != null) for (int i = 0; i < accfields.length; i++) { + value = (String) map.get(sortfields[i]); + if (value != null) try { + valuel = Long.parseLong(value); + accumulator[i] = new Long(accumulator[i].longValue() + valuel); + } catch (NumberFormatException e) {} + } + elementCount++; + } + } catch (IOException e) {} + + // fill cluster + if (sortfields != null) for (int i = 0; i < sortfields.length; i++) sortClusterMap.put(sortfields[i], cluster[i]); + + // fill acc map + if (accfields != null) for (int i = 0; i < accfields.length; i++) accMap.put(accfields[i], accumulator[i]); + + // initialize a writeQueue and start it + writeWorker = new writeQueue(); + writeWorker.start(); + } + + class writeQueue extends Thread { + + LinkedList queue; + boolean run; + + public writeQueue() { + run = true; + queue = new LinkedList(); + } + + public void stack(String key) { + //System.out.println("kelondroMap: stack(" + dyn.entryFile.name() + ") " + key); + if (this.isAlive()) + queue.addLast(key); + else + workoff(key); + } + + public synchronized void workoff() { + if (queue.size() > 0) workoff((String) queue.removeFirst()); + } + + public synchronized void dequeue(String key) { + // take out one entry + ListIterator i = queue.listIterator(); + String k; + while (i.hasNext()) { + k = (String) i.next(); + if (k.equals(key)) { + i.remove(); + return; + } + } + } + + public void workoff(String key) { + //System.out.println("kelondroMap: workoff(" + dyn.entryFile.name() + ") " + key); + Map map = (Map) cache.get(key); + if (map == null) return; + try { + writeKra(key, map, ""); + } catch (IOException e) { + System.out.println("PANIC! Critical Error in kelondroMap.writeQueue.workoff(" + dyn.entryFile.name() + "): " + e.getMessage()); + e.printStackTrace(); + run = false; + } + } + + public void run() { + try {sleep(((System.currentTimeMillis() / 3) % 10) * 10000);} catch (InterruptedException e) {} // offset start + + //System.out.println("XXXX! " + (System.currentTimeMillis() / 1000) + " " + dyn.entryFile.name()); + int c; + while (run) { + c = 0; while ((run) && (c++ < 10)) try {sleep(1000);} catch (InterruptedException e) {} + //System.out.println("PING! " + (System.currentTimeMillis() / 1000) + " " + dyn.entryFile.name()); + while (queue.size() > 0) { + if (run) try {sleep(5000 / queue.size());} catch (InterruptedException e) {} + workoff(); + } + } + while (queue.size() > 0) workoff(); + } + + public void terminate(boolean waitFor) { + run = false; + if (waitFor) while (this.isAlive()) try {sleep(500);} catch (InterruptedException e) {} + } + } + + /* + public synchronized boolean has(String key) throws IOException { + return (cache.containsKey(key)) || (dyn.existsDyn(key)); + } + */ + + public synchronized void set(String key, Map newMap) throws IOException { + // update elementCount + if ((sortfields != null) || (accfields != null)) { + Map oldMap = get(key, false); + if (oldMap == null) { + // new element + elementCount++; + } else { + // element exists, update acc + if (accfields != null) updateAcc(oldMap, false); + } + } + + // stack to write queue + writeWorker.stack(key); + + // check for space in cache + checkCacheSpace(); + + // write map to cache + cacheScore.setScore(key, (int) ((System.currentTimeMillis() - startup) / 1000)); + cache.put(key, newMap); + + + // update sortCluster + if (sortClusterMap != null) updateSortCluster(key, newMap); + + // update accumulators with new values (add) + if (accfields != null) updateAcc(newMap, true); + } + + private synchronized void writeKra(String key, Map newMap, String comment) throws IOException { + // write map to kra + kelondroRA kra = dyn.getRA(key); + kra.writeMap(newMap, comment); + kra.close(); + } + + private void updateAcc(Map map, boolean add) { + String value; + long valuel; + Long accumulator; + for (int i = 0; i < accfields.length; i++) { + value = (String) map.get(accfields[i]); + if (value != null) try { + valuel = Long.parseLong(value); + accumulator = (Long) accMap.get(accfields[i]); + if (add) + accMap.put(accfields[i], new Long(accumulator.longValue() + valuel)); + else + accMap.put(accfields[i], new Long(accumulator.longValue() - valuel)); + } catch (NumberFormatException e) {} + } + } + + private void updateSortCluster(String key, Map map) { + String value; + kelondroMScoreCluster cluster; + for (int i = 0; i < sortfields.length; i++) { + value = (String) map.get(sortfields[i]); + if (value != null) { + cluster = (kelondroMScoreCluster) sortClusterMap.get(sortfields[i]); + cluster.setScore(key, kelondroMScoreCluster.string2score(value)); + sortClusterMap.put(sortfields[i], cluster); + } + } + } + + public synchronized void remove(String key) throws IOException { + // update elementCount + if ((sortfields != null) || (accfields != null)) { + Map map = get(key); + if (map != null) { + // update count + elementCount--; + + // update accumulators (subtract) + if (accfields != null) updateAcc(map, false); + + // remove from sortCluster + if (sortfields != null) deleteSortCluster(key); + } + } + + // remove from queue + writeWorker.dequeue(key); + + // remove from cache + cacheScore.deleteScore(key); + cache.remove(key); + + // remove from file + dyn.remove(key); + } + + private void deleteSortCluster(String key) { + kelondroMScoreCluster cluster; + for (int i = 0; i < sortfields.length; i++) { + cluster = (kelondroMScoreCluster) sortClusterMap.get(sortfields[i]); + cluster.deleteScore(key); + sortClusterMap.put(sortfields[i], cluster); + } + } + + public synchronized Map get(String key) throws IOException { + return get(key, true); + } + + private synchronized Map get(String key, boolean storeCache) throws IOException { + // load map from cache + Map map = (Map) cache.get(key); + if (map != null) return map; + + // load map from kra + if (!(dyn.existsDyn(key))) return null; + kelondroRA kra = dyn.getRA(key); + map = kra.readMap(); + kra.close(); + + if (storeCache) { + // cache it also + checkCacheSpace(); + // write map to cache + cacheScore.setScore(key, (int) ((System.currentTimeMillis() - startup) / 1000)); + cache.put(key, map); + } + + // return value + return map; + } + + private synchronized void checkCacheSpace() { + // check for space in cache + if (cache.size() >= cachesize) { + // delete one entry + String delkey = (String) cacheScore.getMinObject(); + cacheScore.deleteScore(delkey); + cache.remove(delkey); + } + } + + public synchronized kelondroDyn.dynKeyIterator keys(boolean up, boolean rotating) throws IOException { + // simple enumeration of key names without special ordering + return dyn.dynKeys(up, rotating); + } + + public synchronized kelondroDyn.dynKeyIterator keys(boolean up, boolean rotating, byte[] firstKey) throws IOException { + // simple enumeration of key names without special ordering + return dyn.dynKeys(up, rotating, firstKey); + } + + public synchronized Iterator keys(boolean up, /* sorted by */ String field) { + // sorted iteration using the sortClusters + if (sortClusterMap == null) return null; + kelondroMScoreCluster cluster = (kelondroMScoreCluster) sortClusterMap.get(field); + if (cluster == null) return null; // sort field does not exist + return cluster.scores(up); + } + + public synchronized mapIterator maps(boolean up, boolean rotating) throws IOException { + return new mapIterator(keys(up, rotating)); + } + + public synchronized mapIterator maps(boolean up, boolean rotating, byte[] firstKey) throws IOException { + return new mapIterator(keys(up, rotating, firstKey)); + } + + public synchronized mapIterator maps(boolean up, String field) { + return new mapIterator(keys(up, field)); + } + + public synchronized long getAcc(String field) { + Long accumulator = (Long) accMap.get(field); + if (accumulator == null) return -1; else return accumulator.longValue(); + } + + public synchronized int size() { + if ((sortfields != null) || (accfields != null)) return elementCount; else return dyn.size(); + } + + public void close() throws IOException { + // finish queue + writeWorker.terminate(true); + + // close cluster + if (sortClusterMap != null) { + for (int i = 0; i < sortfields.length; i++) sortClusterMap.remove(sortfields[i]); + sortClusterMap = null; + } + cache = null; + cacheScore = null; + + // close file + dyn.close(); + } + + + public class mapIterator implements Iterator { + // enumerates Map-Type elements + // the key is also included in every map that is returned; it's key is 'key' + + Iterator keyIterator; + boolean finish; + + public mapIterator(Iterator keyIterator) { + this.keyIterator = keyIterator; + this.finish = false; + } + + public boolean hasNext() { + return (!(finish)) && (keyIterator.hasNext()); + } + + public Object next() { + String nextKey = (String) keyIterator.next(); + if (nextKey == null) { + finish = true; + return null; + } + try { + Map map = get(nextKey); + map.put("key", nextKey); + return map; + } catch (IOException e) { + finish = true; + return null; + } + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + } +} diff --git a/source/de/anomic/kelondro/kelondroRA.java b/source/de/anomic/kelondro/kelondroRA.java new file mode 100644 index 000000000..f81c32771 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroRA.java @@ -0,0 +1,99 @@ +// kelondroRA.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 09.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +/* + The random access interface for the kelondro database. + kelondro stores data always through the kelondroRecords class, + which in turn also needs a random access file or similar + to store the database structure. To provide more than + ony file - random-access, we need an abstract interface. +*/ + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; + +public interface kelondroRA { + + // logging support + public String name(); + + // pseudo-native methods: + public int read() throws IOException; + public void write(int b) throws IOException; + + public int read(byte[] b, int off, int len) throws IOException; + public void write(byte[] b, int off, int len) throws IOException; + + public void seek(long pos) throws IOException; + public void close() throws IOException; + + // derivated methods: + public byte readByte() throws IOException; + public void writeByte(int v) throws IOException; + + public short readShort() throws IOException; + public void writeShort(int v) throws IOException; + + public int readInt() throws IOException; + public void writeInt(int v) throws IOException; + + public long readLong() throws IOException; + public void writeLong(long v) throws IOException; + + public void write(byte[] b) throws IOException; + + public void writeLine(String line) throws IOException; + public String readLine() throws IOException; + + // tool methods that organize the complete content + public void writeProperties(Properties props, String comment) throws IOException; + public Properties readProperties() throws IOException; + + public void writeMap(Map props, String comment) throws IOException; + public Map readMap() throws IOException; + + public void writeArray(byte[] b) throws IOException; + public byte[] readArray() throws IOException; + +} diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java new file mode 100644 index 000000000..30d9955cd --- /dev/null +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -0,0 +1,869 @@ +// kelondroRecords.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2003, 2004 +// last major change: 11.01.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + The Kelondro Database + + Kelondro Records are the basis for the tree structures of the needed database + Therefore, the name is inspired by the creek words 'fakelo'=file and 'dentro'=tree. + We omitted the 'fa' and 'de' in 'fakelodentro', + making it sounding better by replacing the 't' by 'd'. + The Kelondro Records are also used for non-tree structures like the KelondroStack. + The purpose of these structures are file-based storage of lists/stacks and + indexeable information. + + We use the following structure: + + Handle : handles are simply the abstraction of integer indexe's. + We don't want to mix up integer values as node pointers + with handles. This makes node indexes more robust against + manipulation that is too far away of thinking about records + Simply think that handles are like cardinals that are used + like pointers. + Node : The emelentary storage piece for one information fragment. + All Records, which are essentially files with a definitive + structure, are constructed of a list of Node elements, but + the Node Handles that are carried within the Node overhead + prefix construct a specific structure, like trees or stacks. +*/ + + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; + +public class kelondroRecords { + + // constants + private static int NUL = Integer.MIN_VALUE; // the meta value for the kelondroRecords' NUL abstraction + + // static seek pointers + private static long POS_MAGIC = 0; // 1 byte, byte: file type magic + private static long POS_BUSY = POS_MAGIC + 1; // 1 byte, byte: marker for synchronization + private static long POS_PORT = POS_BUSY + 1; // 2 bytes, short: hint for remote db access + private static long POS_DESCR = POS_PORT + 2; // 60 bytes, string: any description string + private static long POS_COLUMNS = POS_DESCR + 60; // 2 bytes, short: number of columns in one entry + private static long POS_OHBYTEC = POS_COLUMNS + 2; // 2 bytes, number of extra bytes on each Node + private static long POS_OHHANDLEC = POS_OHBYTEC + 2; // 2 bytes, number of Handles on each Node + private static long POS_USEDC = POS_OHHANDLEC + 2; // 4 bytes, int: used counter + private static long POS_FREEC = POS_USEDC + 4; // 4 bytes, int: free counter + private static long POS_FREEH = POS_FREEC + 4; // 4 bytes, int: free pointer (to free chain start) + private static long POS_MD5PW = POS_FREEH + 4; // 16 bytes, string (encrypted password to this file) + private static long POS_ENCRYPTION = POS_MD5PW + 16; // 16 bytes, string (method description) + private static long POS_OFFSET = POS_ENCRYPTION + 16; // 8 bytes, long (seek position of first record) + private static long POS_INTPROPC = POS_OFFSET + 8; // 4 bytes, int: number of INTPROP elements + private static long POS_TXTPROPC = POS_INTPROPC + 4; // 4 bytes, int: number of TXTPROP elements + private static long POS_TXTPROPW = POS_TXTPROPC + 4; // 4 bytes, int: width of TXTPROP elements + private static long POS_COLWIDTHS = POS_TXTPROPW + 4; // array of 4 bytes, int[]: sizes of columns + // after this configuration field comes: + // POS_HANDLES: INTPROPC * 4 bytes : INTPROPC Integer properties, randomly accessible + // POS_TXTPROPS: TXTPROPC * TXTPROPW : an array of TXTPROPC byte arrays of width TXTPROPW that can hold any string + // POS_NODES : (USEDC + FREEC) * (overhead + sum(all: COLWIDTHS)) : Node Objects + + // values that are only present at run-time + private String filename; // the database's file name + protected kelondroRA entryFile; // the database file + private int overhead; // OHBYTEC + 4 * OHHANDLEC = size of additional control bytes + private int recordsize; // (overhead + sum(all: COLWIDTHS)) = the overall size of a record + + // dynamic run-time seek pointers + private long POS_HANDLES = 0; // starts after end of POS_COLWIDHS which is POS_COLWIDTHS + COLWIDTHS.length * 4 + private long POS_TXTPROPS = 0; // starts after end of POS_HANDLES which is POS_HANDLES + HANDLES.length * 4 + private long POS_NODES = 0; // starts after end of POS_TXTPROPS which is POS_TXTPROPS + TXTPROPS.length * TXTPROPW + + // dynamic variables that are back-ups of stored values in file; read/defined on instantiation + private int USEDC; // counter of used elements + private int FREEC; // counter of free elements in list of free Nodes + private Handle FREEH; // pointer to first element in list of free Nodes, empty = NUL + private short OHBYTEC; // number of extra bytes in each node + private short OHHANDLEC; // number of handles in each node + private int COLWIDTHS[]; // array with widths of columns + private Handle HANDLES[]; // array with handles + private byte[] TXTPROPS[]; // array with text properties + private int TXTPROPW; // size of a single TXTPROPS element + + // caching buffer + private HashMap cache; // the cache; holds Node objects + private int cachesize; // number of cache records + private long startup; // startup time; for cache aging + private kelondroMScoreCluster cacheScore; // controls cache aging + + + public kelondroRecords(File file, long buffersize /* bytes */, + short ohbytec, short ohhandlec, + int[] columns, int FHandles, int txtProps, int txtPropWidth) throws IOException { + // creates a new file + // file: the file that shall be created + // oha : overhead size array of four bytes: oha[0]=# of bytes, oha[1]=# of shorts, oha[2]=# of ints, oha[3]=# of longs, + // columns: array with size of column width; columns.length is number of columns + // FHandles: number of integer properties + // txtProps: number of text properties + + if (file.exists()) + throw new IOException("kelondroRecords: tree file " + file + " already exist"); + this.filename = file.getCanonicalPath(); + kelondroRA raf = new kelondroFileRA(this.filename); + init(raf, ohbytec, ohhandlec, columns, FHandles, txtProps, txtPropWidth); + this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); + if (cachesize <= 0) { + cachesize = 0; + this.cache = null; + this.cacheScore = null; + } else { + this.cache = new HashMap(); + this.cacheScore = new kelondroMScoreCluster(); + } + this.startup = System.currentTimeMillis(); + } + + public kelondroRecords(kelondroRA ra, long buffersize /* bytes */, + short ohbytec, short ohhandlec, + int[] columns, int FHandles, int txtProps, int txtPropWidth) throws IOException { + this.filename = null; + init(ra, ohbytec, ohhandlec, columns, FHandles, txtProps, txtPropWidth); + this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); + if (cachesize <= 0) { + cachesize = 0; + this.cache = null; + this.cacheScore = null; + } else { + this.cache = new HashMap(); + this.cacheScore = new kelondroMScoreCluster(); + } + this.startup = System.currentTimeMillis(); + } + + private void init(kelondroRA ra, short ohbytec, short ohhandlec, + int[] columns, int FHandles, int txtProps, int txtPropWidth) throws IOException { + + // store dynamic run-time data + this.entryFile = ra; + this.overhead = ohbytec + 4 * ohhandlec; + this.recordsize = this.overhead; + for (int i = 0; i < columns.length; i++) this.recordsize += columns[i]; + + // store dynamic run-time seek pointers + POS_HANDLES = POS_COLWIDTHS + columns.length * 4; + POS_TXTPROPS = POS_HANDLES + FHandles * 4; + POS_NODES = POS_TXTPROPS + txtProps * txtPropWidth; + + // store dynamic back-up variables + USEDC = 0; + FREEC = 0; + FREEH = new Handle(NUL); + OHBYTEC = ohbytec; + OHHANDLEC = ohhandlec; + COLWIDTHS = columns; + HANDLES = new Handle[FHandles]; for (int i = 0; i < FHandles; i++) HANDLES[i] = new Handle(NUL); + TXTPROPS = new byte[txtProps][]; for (int i = 0; i < txtProps; i++) TXTPROPS[i] = new byte[0]; + TXTPROPW = txtPropWidth; + + // write data to file + entryFile.seek(POS_MAGIC); entryFile.writeByte(4); // magic marker for this file type + entryFile.seek(POS_BUSY); entryFile.writeByte(0); // unlock: default + entryFile.seek(POS_PORT); entryFile.writeShort(4444); // default port (not used yet) + entryFile.seek(POS_DESCR); entryFile.write("--AnomicRecords file structure--".getBytes()); + entryFile.seek(POS_COLUMNS); entryFile.writeShort(this.COLWIDTHS.length); + entryFile.seek(POS_OHBYTEC); entryFile.writeShort(OHBYTEC); + entryFile.seek(POS_OHHANDLEC); entryFile.writeShort(OHHANDLEC); + entryFile.seek(POS_USEDC); entryFile.writeInt(this.USEDC); + entryFile.seek(POS_FREEC); entryFile.writeInt(this.FREEC); + entryFile.seek(POS_FREEH); entryFile.writeInt(this.FREEH.index); + entryFile.seek(POS_MD5PW); entryFile.write("PASSWORDPASSWORD".getBytes()); + entryFile.seek(POS_ENCRYPTION); entryFile.write("ENCRYPTION!#$%&?".getBytes()); + entryFile.seek(POS_OFFSET); entryFile.writeLong(POS_NODES); + entryFile.seek(POS_INTPROPC); entryFile.writeInt(FHandles); + entryFile.seek(POS_TXTPROPC); entryFile.writeInt(txtProps); + entryFile.seek(POS_TXTPROPW); entryFile.writeInt(txtPropWidth); + + // write configuration arrays + for (int i = 0; i < this.COLWIDTHS.length; i++) { + entryFile.seek(POS_COLWIDTHS + 4 * i); + entryFile.writeInt(COLWIDTHS[i]); + } + for (int i = 0; i < this.HANDLES.length; i++) { + entryFile.seek(POS_HANDLES + 4 * i); + entryFile.writeInt(NUL); + HANDLES[i] = new Handle(NUL); + } + for (int i = 0; i < this.TXTPROPS.length; i++) { + entryFile.seek(POS_TXTPROPS + TXTPROPW * i); + for (int j = 0; j < TXTPROPW; j++) entryFile.writeByte(0); + } + + // thats it! + } + + public kelondroRecords(File file, long buffersize) throws IOException{ + // opens an existing tree + if (!file.exists()) throw new IOException("kelondroRecords: tree file " + file + " does not exist"); + + this.filename = file.getCanonicalPath(); + kelondroRA raf = new kelondroFileRA(this.filename); + init(raf); + this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); + if (cachesize <= 0) { + cachesize = 0; + this.cache = null; + this.cacheScore = null; + } else { + this.cache = new HashMap(); + this.cacheScore = new kelondroMScoreCluster(); + } + this.startup = System.currentTimeMillis(); + } + + public kelondroRecords(kelondroRA ra, long buffersize) throws IOException{ + this.filename = null; + init(ra); + this.cachesize = (int) (buffersize / ((long) (overhead + recordsize))); + if (cachesize <= 0) { + cachesize = 0; + this.cache = null; + this.cacheScore = null; + } else { + this.cache = new HashMap(); + this.cacheScore = new kelondroMScoreCluster(); + } + this.startup = System.currentTimeMillis(); + } + + private void init(kelondroRA ra) throws IOException{ + // assign values that are only present at run-time + this.entryFile = ra; + + // read dynamic variables that are back-ups of stored values in file; read/defined on instantiation + entryFile.seek(POS_USEDC); this.USEDC = entryFile.readInt(); + entryFile.seek(POS_FREEC); this.FREEC = entryFile.readInt(); + entryFile.seek(POS_FREEH); this.FREEH = new Handle(entryFile.readInt()); + + entryFile.seek(POS_OHBYTEC); OHBYTEC = entryFile.readShort(); + entryFile.seek(POS_OHHANDLEC); OHHANDLEC = entryFile.readShort(); + + entryFile.seek(POS_COLUMNS); this.COLWIDTHS = new int[entryFile.readShort()]; + entryFile.seek(POS_INTPROPC); this.HANDLES = new Handle[entryFile.readInt()]; + entryFile.seek(POS_TXTPROPC); this.TXTPROPS = new byte[entryFile.readInt()][]; + entryFile.seek(POS_TXTPROPW); this.TXTPROPW = entryFile.readInt(); + + // calculate dynamic run-time seek pointers + POS_HANDLES = POS_COLWIDTHS + COLWIDTHS.length * 4; + POS_TXTPROPS = POS_HANDLES + HANDLES.length * 4; + POS_NODES = POS_TXTPROPS + TXTPROPS.length * TXTPROPW; + + // read configuration arrays + for (int i = 0; i < COLWIDTHS.length; i++) { + entryFile.seek(POS_COLWIDTHS + 4 * i); + COLWIDTHS[i] = entryFile.readInt(); + } + for (int i = 0; i < HANDLES.length; i++) { + entryFile.seek(POS_HANDLES + 4 * i); + HANDLES[i] = new Handle(entryFile.readInt()); + } + for (int i = 0; i < TXTPROPS.length; i++) { + entryFile.seek(POS_TXTPROPS + TXTPROPW * i); + TXTPROPS[i] = new byte[TXTPROPW]; + entryFile.read(TXTPROPS[i], 0, TXTPROPS[i].length); + } + + // assign remaining values that are only present at run-time + this.overhead = OHBYTEC + 4 * OHHANDLEC; + this.recordsize = this.overhead; for (int i = 0; i < COLWIDTHS.length; i++) this.recordsize += COLWIDTHS[i]; + } + + + protected Node newNode(byte[][] v) { + return new Node(v); + } + + protected Node getNode(Handle handle, Node parentNode, int referenceInParent) { + if (cachesize == 0) return new Node(handle, parentNode, referenceInParent); + Node n = (Node) cache.get(handle); + if (n == null) { + return new Node(handle, parentNode, referenceInParent); + } else { + cacheScore.setScore(handle, (int) ((System.currentTimeMillis() - startup) / 1000)); + return n; + } + } + + protected void deleteNode(Handle handle) throws IOException { + if (cachesize != 0) { + Node n = (Node) cache.get(handle); + if (n != null) { + cacheScore.deleteScore(handle); + cache.remove(handle); + } + } + dispose(handle); + } + + private void checkCacheSpace() { + // check for space in cache + if (cachesize == 0) return; + if (cache.size() >= cachesize) { + // delete one entry + try { + Handle delkey = (Handle) cacheScore.getMinObject(); // error (see below) here + cacheScore.deleteScore(delkey); + cache.remove(delkey); + } catch (NoSuchElementException e) { + System.out.println("strange kelondroRecords error: " + e.getMessage() + "; cachesize=" + cachesize + ", cache.size()=" + cache.size() + ", cacheScore.size()=" + cacheScore.size()); + // this is a strange error and could be caused by internal java problems + // we simply clear the cache + this.cacheScore = new kelondroMScoreCluster(); + this.cache = new HashMap(); + + /* + java.util.NoSuchElementException + at java.util.TreeMap.key(TreeMap.java:431) + at java.util.TreeMap.firstKey(TreeMap.java:286) + at de.anomic.kelondro.kelondroMScoreCluster$scoreIterator.internalNext(kelondroMScoreCluster.java:235) + at de.anomic.kelondro.kelondroMScoreCluster$scoreIterator.(kelondroMScoreCluster.java:224) + at de.anomic.kelondro.kelondroMScoreCluster.scores(kelondroMScoreCluster.java:209) + at de.anomic.kelondro.kelondroMScoreCluster.getScores(kelondroMScoreCluster.java:191) + at de.anomic.kelondro.kelondroMScoreCluster.getScores(kelondroMScoreCluster.java:185) + at de.anomic.kelondro.kelondroMScoreCluster.getMinObject(kelondroMScoreCluster.java:181) + at de.anomic.kelondro.kelondroRecords.checkCacheSpace(kelondroRecords.java:344) + at de.anomic.kelondro.kelondroRecords.access$1500(kelondroRecords.java:74) + at de.anomic.kelondro.kelondroRecords$Node.updateNode(kelondroRecords.java:624) + at de.anomic.kelondro.kelondroRecords$Node.(kelondroRecords.java:399) + at de.anomic.kelondro.kelondroRecords$Node.(kelondroRecords.java:350) + at de.anomic.kelondro.kelondroRecords.getNode(kelondroRecords.java:321) + at de.anomic.kelondro.kelondroTree$Search.searchproc(kelondroTree.java:265) + at de.anomic.kelondro.kelondroTree$Search.(kelondroTree.java:246) + at de.anomic.kelondro.kelondroTree.get(kelondroTree.java:317) + at de.anomic.plasma.plasmaURL.exists(plasmaURL.java:106) + at de.anomic.plasma.plasmaSwitchboard.stackCrawl(plasmaSwitchboard.java:389) + at de.anomic.plasma.plasmaSwitchboard.processResourceStack(plasmaSwitchboard.java:304) + at de.anomic.plasma.plasmaSwitchboard.deQueue(plasmaSwitchboard.java:262) + at yacyProxy.main(yacyProxy.java:228) + */ + } + } + } + + public class Node { + // an Node holds all information of one row of data. This includes the key to the entry + // which is stored as entry element at position 0 + // an Node object can be created in two ways: + // 1. instantiation with an index number. After creation the Object does not hold any + // value information until such is retrieved using the getValue() method + // 2. instantiation with a value array. the values are not directly written into the + // file. Expanding the tree structure is then done using the save() method. at any + // time it is possible to verify the save state using the saved() predicate. + // Therefore an entry object has three modes: + // a: holding an index information only (saved() = true) + // b: holding value information only (saved() = false) + // c: holding index and value information at the same time (saved() = true) + // which can be the result of one of the two processes as follow: + // (i) created with index and after using the getValue() method, or + // (ii) created with values and after calling the save() method + // the method will therefore throw an IllegalStateException when the following + // process step is performed: + // - create the Node with index and call then the save() method + // this case can be decided with + // ((index != NUL) && (values == null)) + // The save() method represents the insert function for the tree. Balancing functions + // are applied automatically. While balancing, the Node does never change its index key, + // but its parent/child keys. + private byte[] ohBytes = null; // the overhead bytes, OHBYTEC values + private Handle[] ohHandle= null; // the overhead handles, OHHANDLEC values + private byte[][] values = null; // an array of byte[] nodes is the value vector + private Handle handle = new Handle(NUL); // index of the entry, by default NUL means undefined + private Node(byte[][] v) { + // this defines an entry, but it does not lead to writing these entry values to the file + // storing this entry can be done using the 'save()' command + if (v == null) throw new IllegalArgumentException("Node values = NULL"); + if ((v.length != COLWIDTHS.length) || (v.length < 1)) + throw new IllegalArgumentException("Node value vector has wrong length"); + this.values = v; + this.handle = new Handle(NUL); + this.ohBytes = null; + this.ohHandle = null; + } + private Node(Handle handle, Node parentNode, int referenceInParent) { + // this creates an entry with an pre-reserved entry position + // values can be written using the setValues() method + // but we expect that values are already there in the file ready to be read which we do not here + if (handle == null) throw new IllegalArgumentException("INTERNAL ERROR: node handle is null."); + + // the parentNode can be given if an auto-fix in the following case is wanted + if (handle.index > USEDC + FREEC) { + if (parentNode == null) { + throw new IllegalArgumentException("INTERNAL ERROR: node handle index exceeds size. No auto-fix node was submitted. This is a serious failure."); + } else { + try { + Handle[] handles = parentNode.getOHHandle(); + handles[referenceInParent] = null; + parentNode.setOHHandle(handles); + throw new IllegalArgumentException("INTERNAL ERROR: node handle index exceeds size. The bad node has been auto-fixed"); + } catch (IOException ee) { + throw new IllegalArgumentException("INTERNAL ERROR: node handle index exceeds size. It was tried to fix the bad node, but failed with an IOException: " + ee.getMessage()); + } + } + } + + // set values and read node + this.values = null; + this.handle.index = handle.index; + this.ohBytes = null; + this.ohHandle = null; + updateNode(); + } + protected Handle handle() { + // if this entry has an index, return it + if (this.handle.index == NUL) throw new IllegalStateException("the entry has no index assigned"); + return new Handle(this.handle.index); + } + protected synchronized void setOHByte(byte[] b) throws IOException { + if (b == null) throw new IllegalArgumentException("setOHByte: setting null value does not make any sense"); + if (b.length != OHBYTEC) throw new IllegalArgumentException("setOHByte: wrong array size"); + if (this.handle.index == NUL) throw new IllegalStateException("setOHByte: no handle assigned"); + if (this.ohBytes == null) this.ohBytes = new byte[OHBYTEC]; + entryFile.seek(seekpos(this.handle)); + for (int j = 0; j < ohBytes.length; j++) { + ohBytes[j] = b[j]; + entryFile.writeByte(b[j]); + } + updateNode(); + } + protected synchronized void setOHHandle(Handle[] i) throws IOException { + if (i == null) throw new IllegalArgumentException("setOHint: setting null value does not make any sense"); + if (i.length != OHHANDLEC) throw new IllegalArgumentException("setOHHandle: wrong array size"); + if (this.handle.index == NUL) throw new IllegalStateException("setOHHandle: no handle assigned"); + if (this.ohHandle == null) this.ohHandle = new Handle[OHHANDLEC]; + entryFile.seek(seekpos(this.handle) + OHBYTEC); + for (int j = 0; j < ohHandle.length; j++) { + ohHandle[j] = i[j]; + if (i[j] == null) + entryFile.writeInt(NUL); + else + entryFile.writeInt(i[j].index); + } + updateNode(); + } + protected synchronized byte[] getOHByte() throws IOException { + if (ohBytes == null) { + if (this.handle.index == NUL) throw new IllegalStateException("Cannot load OH values"); + ohBytes = new byte[OHBYTEC]; + entryFile.seek(seekpos(this.handle)); + for (int j = 0; j < ohBytes.length; j++) { + ohBytes[j] = entryFile.readByte(); + } + updateNode(); + } + return ohBytes; + } + protected synchronized Handle[] getOHHandle() throws IOException { + if (ohHandle == null) { + if (this.handle.index == NUL) throw new IllegalStateException("Cannot load OH values"); + ohHandle = new Handle[OHHANDLEC]; + entryFile.seek(seekpos(this.handle) + OHBYTEC); + int i; + for (int j = 0; j < ohHandle.length; j++) { + i = entryFile.readInt(); + ohHandle[j] = (i == NUL) ? null : new Handle(i); + } + updateNode(); + } + return ohHandle; + } + public synchronized byte[][] setValues(byte[][] row) throws IOException { + // if the index is defined, then write values directly to the file, else only to the object + byte[][] result = getValues(); // previous value (this loads the values if not already happened) + if (this.values == null) this.values = new byte[COLWIDTHS.length][]; + for (int i = 0; i < values.length; i++) { + this.values[i] = row[i]; + } + if (this.handle.index != NUL) { + // store data directly to database + long seek = seekpos(this.handle) + overhead; + for (int i = 0; i < values.length; i++) { + entryFile.seek(seek); + if (values[i] == null) { + for (int j = 0; j < COLWIDTHS[i]; j++) entryFile.writeByte(0); + } else if (values[i].length >= COLWIDTHS[i]) { + entryFile.write(values[i], 0 , COLWIDTHS[i]); + } else { + entryFile.write(values[i]); + for (int j = values[i].length; j < COLWIDTHS[i]; j++) entryFile.writeByte(0); + } + seek = seek + COLWIDTHS[i]; + } + } + //System.out.print("setValues result: "); for (int i = 0; i < values.length; i++) System.out.print(new String(result[i]) + " "); System.out.println("."); + updateNode(); + return result; // return previous value + } + + public synchronized byte[] getKey() throws IOException { + if ((values == null) || (values[0] == null)) { + // load from database, but ONLY the key! + if (this.handle.index == NUL) { + throw new IllegalStateException("Cannot load Key"); + } else { + values = new byte[COLWIDTHS.length][]; + entryFile.seek(seekpos(this.handle) + overhead); + values[0] = new byte[COLWIDTHS[0]]; + entryFile.read(values[0], 0, values[0].length); + for (int i = 1; i < COLWIDTHS.length; i++) values[i] = null; + updateNode(); + return values[0]; + } + } else { + return values[0]; + } + } + + public synchronized byte[][] getValues() throws IOException { + if ((values == null) || (values[0] == null)) { + // load ALL values from database + if (this.handle.index == NUL) { + throw new IllegalStateException("Cannot load values"); + } else { + values = new byte[COLWIDTHS.length][]; + long seek = seekpos(this.handle) + overhead; + for (int i = 0; i < COLWIDTHS.length; i++) { + entryFile.seek(seek); + values[i] = new byte[COLWIDTHS[i]]; + entryFile.read(values[i], 0, values[i].length); + seek = seek + COLWIDTHS[i]; + } + updateNode(); + return values; + } + } else if ((values.length > 1) && (values[1] == null)) { + // only the key has been read; load the remaining + long seek = seekpos(this.handle) + overhead + COLWIDTHS[0]; + for (int i = 1; i < COLWIDTHS.length; i++) { + entryFile.seek(seek); + values[i] = new byte[COLWIDTHS[i]]; + entryFile.read(values[i], 0, values[i].length); + seek = seek + COLWIDTHS[i]; + } + updateNode(); + return values; + } else { + return values; + } + } + protected void save() throws IOException { + // this is called when an entry was defined with values only and not by retrieving with an index + // if this happens, nothing of the internal array values have been written to the file + // then writing to the file is done here + // can only be called if the index has not been defined yet + if (this.handle.index != NUL) { + throw new IllegalStateException("double assignement of handles"); + } + // create new index by expanding the file at the end + // or by recycling used records + this.handle = new Handle(); + // place the data to the file + if ((values == null) || ((values != null) && (values.length > 1) && (values[1] == null))) { + // there is nothing to save + throw new IllegalStateException("no values to save"); + } + entryFile.seek(seekpos(this.handle)); + if (ohBytes == null) {for (int i = 0; i < OHBYTEC; i++) entryFile.writeByte(0);} + else {for (int i = 0; i < OHBYTEC; i++) entryFile.writeByte(ohBytes[i]);} + if (ohHandle == null) {for (int i = 0; i < OHHANDLEC; i++) entryFile.writeInt(0);} + else {for (int i = 0; i < OHHANDLEC; i++) entryFile.writeInt(ohHandle[i].index);} + long seek = seekpos(this.handle) + overhead; + for (int i = 0; i < values.length; i++) { + entryFile.seek(seek); + if (values[i] == null) { + for (int j = 0; j < COLWIDTHS[i]; j++) entryFile.writeByte(0); + } else if (values[i].length >= COLWIDTHS[i]) { + entryFile.write(values[i], 0, COLWIDTHS[i]); + } else { + entryFile.write(values[i]); + for (int j = values[i].length; j < COLWIDTHS[i]; j++) entryFile.writeByte(0); + } + seek = seek + COLWIDTHS[i]; + } + updateNode(); + } + public String toString() { + if (this.handle.index == NUL) return "NULL"; + String s = Integer.toHexString(this.handle.index); + while (s.length() < 4) s = "0" + s; + try { + byte[] b = getOHByte(); + for (int i = 0; i < b.length; i++) s = s + ":b" + b[i]; + Handle[] h = getOHHandle(); + for (int i = 0; i < h.length; i++) if (h[i] == null) s = s + ":hNULL"; else s = s + ":h" + h[i].toString(); + byte[][] content = getValues(); + for (int i = 0; i < content.length; i++) s = s + ":" + (new String(content[i])).trim(); + } catch (IOException e) { + s = s + ":***LOAD ERROR***:" + e.getMessage(); + } + return s; + } + private void updateNode() { + if (cachesize != 0) { + if (!(cache.containsKey(handle))) checkCacheSpace(); + cache.put(handle, this); + cacheScore.setScore(handle, (int) ((System.currentTimeMillis() - startup) / 1000)); + } + } + } + + public synchronized int columns() { + return this.COLWIDTHS.length; + } + + public synchronized int columnSize(int column) { + if ((column < 0) || (column >= this.COLWIDTHS.length)) return -1; + return this.COLWIDTHS[column]; + } + + private long seekpos(Handle handle) { + return POS_NODES + ((long) recordsize * handle.index); + } + + // additional properties + protected void setHandle(int pos, Handle handle) throws IOException { + if (pos >= HANDLES.length) throw new IllegalArgumentException("setHandle: handle array exceeded"); + if (handle == null) handle = new Handle(NUL); + HANDLES[pos] = handle; + entryFile.seek(POS_HANDLES + 4 * pos); + entryFile.writeInt(handle.index); + } + + protected Handle getHandle(int pos) throws IOException { + if (pos >= HANDLES.length) throw new IllegalArgumentException("getHandle: handle array exceeded"); + return (HANDLES[pos].index == NUL) ? null : HANDLES[pos]; + } + + // custom texts + public void setText(int pos, byte[] text) throws IOException { + if (pos >= TXTPROPS.length) throw new IllegalArgumentException("setText: text array exceeded"); + if (text.length > TXTPROPW) throw new IllegalArgumentException("setText: text lemgth exceeded"); + if (text == null) text = new byte[0]; + TXTPROPS[pos] = text; + entryFile.seek(POS_TXTPROPS + TXTPROPW * pos); + entryFile.write(text); + } + + public byte[] getText(int pos) throws IOException { + if (pos >= TXTPROPS.length) throw new IllegalArgumentException("getText: text array exceeded"); + return TXTPROPS[pos]; + } + + // Removes all mappings from this map (optional operation). + public synchronized void clear() { + throw new UnsupportedOperationException("clear not supported"); + } + + // Returns true if this map contains no key-value mappings. + public synchronized boolean isEmpty() { + return (USEDC == 0); + } + + // Returns the number of key-value mappings in this map. + public synchronized int size() { + return this.USEDC; + } + + protected int free() { + return this.FREEC; + } + + private void dispose(Handle h) throws IOException { + // delete element with handle h + // this element is then connected to the deleted-chain and can be re-used + // change counter + USEDC--; entryFile.seek(POS_USEDC); entryFile.writeInt(USEDC); + FREEC++; entryFile.seek(POS_FREEC); entryFile.writeInt(FREEC); + // change pointer + if (this.FREEH.index == NUL) { + // the first entry + entryFile.seek(seekpos(h)); entryFile.writeInt(NUL); // write null link at end of free-list + } else { + // another entry + entryFile.seek(seekpos(h)); entryFile.writeInt(this.FREEH.index); // extend free-list + } + // write new FREEH Handle link + this.FREEH = h; + entryFile.seek(POS_FREEH); entryFile.writeInt(this.FREEH.index); + } + + public synchronized void close() throws IOException { + if (this.entryFile != null) this.entryFile.close(); + this.entryFile = null; + } + + protected static String[] line2args(String line) { + // parse the command line + if ((line == null) || (line.length() == 0)) return null; + String args[]; + StringTokenizer st = new StringTokenizer(line); + + args = new String[st.countTokens()]; + for (int i = 0; st.hasMoreTokens(); i++) { + args[i] = st.nextToken(); + } + st = null; + return args; + } + + protected static boolean equals(byte[] a, byte[] b) { + if (a == b) return true; + if ((a == null) || (b == null)) return false; + if (a.length != b.length) return false; + for (int n = 0; n < a.length; n++) if (a[n] != b[n]) return false; + return true; + } + + public static byte[] long2bytes(long x, int length) { + byte[] b = new byte[length]; + for (int i = length - 1; i >= 0; i--) { + b[i] = (byte) (x & 0XFF); + x >>= 8; + } + return b; + } + + public static long bytes2long(byte[] b) { + long x = 0; + for (int i = 0; i < b.length; i++) x = (x << 8) | (0xff & (int) b[i]); + return x; + } + + public synchronized void print(boolean records) { + System.out.println("REPORT FOR FILE '" + this.filename + "':"); + System.out.println("--"); + System.out.println("CONTROL DATA"); + System.out.print( " HANDLES : " + HANDLES.length + " int-values"); + if (HANDLES.length == 0) System.out.println(); else { + System.out.print(" {" + HANDLES[0].toString()); + for (int i = 1; i < HANDLES.length; i++) System.out.print(", " + HANDLES[i].toString()); + System.out.println("}"); + } + System.out.print( " TXTPROPS : " + TXTPROPS.length + " strings, max length " + TXTPROPW + " bytes"); + if (TXTPROPS.length == 0) System.out.println(); else { + System.out.print(" {'" + (new String(TXTPROPS[0])).trim()); System.out.print("'"); + for (int i = 1; i < TXTPROPS.length; i++) System.out.print(", '" + (new String(TXTPROPS[i])).trim() + "'"); + System.out.println("}"); + } + System.out.println(" USEDC : " + this.USEDC); + System.out.println(" FREEC : " + this.FREEC); + System.out.println(" FREEH : " + FREEH.toString()); + System.out.println(" Data Offset: 0x" + Long.toHexString(POS_NODES)); + System.out.println("--"); + System.out.println("RECORDS"); + System.out.print( " Columns : " + columns() + " columns {" + COLWIDTHS[0]); + for (int i = 1; i < columns(); i++) System.out.print(", " + COLWIDTHS[i]); + System.out.println("}"); + System.out.println(" Overhead : " + this.overhead + " bytes ("+ OHBYTEC + " OH bytes, " + OHHANDLEC + " OH Handles)"); + System.out.println(" Recordsize : " + this.recordsize + " bytes"); + System.out.println("--"); + + if (!(records)) return; + // print also all records + for (int i = 0; i < USEDC + FREEC; i++) System.out.println("NODE: " + new Node(new Handle(i), null, 0).toString()); + } + + public String toString() { + return size() + " RECORDS IN FILE " + filename; + } + + protected class Handle implements Comparable { + private int index; + private Handle() throws IOException { + // reserves a new record and returns index of record + // the return value is not a seek position + // the seek position can be retrieved using the seekpos() function + if (FREEC == 0) { + // generate new entry + index = USEDC + FREEC; + USEDC++; entryFile.seek(POS_USEDC); entryFile.writeInt(USEDC); + } else { + // re-use record from free-list + USEDC++; entryFile.seek(POS_USEDC); entryFile.writeInt(USEDC); + FREEC--; entryFile.seek(POS_FREEC); entryFile.writeInt(FREEC); + // take link + if (FREEH.index == NUL) { + System.out.println("INTERNAL ERROR (DATA INCONSISTENCY): re-use of records failed, lost " + (FREEC + 1) + " records. Affected file: " + filename); + // try to heal.. + USEDC = USEDC + FREEC + 1; entryFile.seek(POS_USEDC); entryFile.writeInt(USEDC); + FREEC = 0; entryFile.seek(POS_FREEC); entryFile.writeInt(FREEC); + index = USEDC - 1; + } else { + index = FREEH.index; + // read link to next element to FREEH chain + entryFile.seek(seekpos(FREEH)); FREEH.index = entryFile.readInt(); + // write new FREEH link + entryFile.seek(POS_FREEH); entryFile.writeInt(FREEH.index); + } + } + } + private Handle(int index) { + this.index = index; + } + public String toString() { + if (index == NUL) return "NULL"; + String s = Integer.toHexString(index); + while (s.length() < 4) s = "0" + s; + return s; + } + public boolean equals(Handle h) { + return (this.index == h.index); + } + public boolean equals(Object h) { + return (this.index == ((Handle) h).index); + } + public int compare(Object h0, Object h1) { + if (((Handle) h0).index < ((Handle) h1).index) return -1; + if (((Handle) h0).index > ((Handle) h1).index) return 1; + return 0; + } + public int compareTo(Object h) { // this is needed for a treeMap compare + if (index < ((Handle) h).index) return -1; + if (index > ((Handle) h).index) return 1; + return 0; + } + + } + + +} diff --git a/source/de/anomic/kelondro/kelondroStack.java b/source/de/anomic/kelondro/kelondroStack.java new file mode 100644 index 000000000..8b9df2e86 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroStack.java @@ -0,0 +1,351 @@ +// kelondroStack.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 11.01.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + This class extends the kelondroRecords and adds a stack structure +*/ + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; + +public class kelondroStack extends kelondroRecords { + + // define the Over-Head-Array + private static short thisOHBytes = 0; // our record definition does not need extra bytes + private static short thisOHHandles = 2; // and two handles overhead for a double-chained list + private static short thisFHandles = 2; // two file handles for root handle and handle to last lement + + // define pointers for OH array access + private static int left = 0; // pointer for OHHandle-array: handle()-Value of left child Node + private static int right = 1; // pointer for OHHandle-array: handle()-Value of right child Node + private static int root = 0; // pointer for FHandles-array: pointer to root node + private static int toor = 1; // pointer for FHandles-array: pointer to root node + + public kelondroStack(File file, long buffersize, int key, int value) throws IOException { + this(file, buffersize, new int[] {key, value}); + } + + public kelondroStack(File file, long buffersize, int[] columns) throws IOException { + // this creates a new tree + super(file, buffersize, thisOHBytes, thisOHHandles, columns, thisFHandles, columns.length /*txtProps*/, 80 /*txtPropWidth*/); + setHandle(root, null); // define the root value + setHandle(toor, null); // define the toor value + } + + public kelondroStack(File file, long buffersize) throws IOException{ + // this opens a file with an existing tree + super(file, buffersize); + } + + public class Counter implements Iterator { + Handle nextHandle = null; + public Counter() throws IOException { + nextHandle = getHandle(root); + } + public boolean hasNext() { + return (nextHandle != null); + } + public Object next() { + Handle ret = nextHandle; + try { + nextHandle = getNode(nextHandle, null, 0).getOHHandle()[right]; + } catch (IOException e) { + System.err.println("IO error at Counter:next()"); + } + return getNode(ret, null, 0); + } + public void remove() { + throw new UnsupportedOperationException("no remove here.."); + } + } + + + public synchronized void push(byte[][] row) throws IOException { + if (row.length != columns()) throw new IllegalArgumentException("push: wrong row length " + row.length + "; must be " + columns()); + // check if there is already a stack + if (getHandle(toor) == null) { + if (getHandle(root) != null) throw new RuntimeException("push: internal organisation of root and toor"); + // create node + Node n = newNode(row); + n.save(); + n.setOHHandle(new Handle[] {null, null}); + n.setValues(row); + // assign handles + setHandle(root, n.handle()); + setHandle(toor, n.handle()); + // thats it + } else { + // expand the list at the end + Node n = newNode(row); + n.save(); + n.setOHHandle(new Handle[] {getHandle(toor),null}); + Node n1 = getNode(getHandle(toor), null, 0); + n1.setOHHandle(new Handle[] {n1.getOHHandle()[left], n.handle()}); + // assign handles + setHandle(toor, n.handle()); + // thats it + } + } + + public synchronized byte[][] pop() throws IOException { + // return row ontop of the stack and shrink stack by one + Handle h = getHandle(toor); + if (h == null) return null; + Node n = getNode(h, null, 0); + byte[][] ret = n.getValues(); + // shrink stack + Handle l = n.getOHHandle()[left]; + if (l == null) { + // the stack will be empty, write the root handle + setHandle(root, null); + } else { + // un-link the previous record + Node k = getNode(l, null, 0); + k.setOHHandle(new Handle[] {k.getOHHandle()[left], null}); + } + setHandle(toor, l); + deleteNode(h); + return ret; + } + + public synchronized byte[][] top() throws IOException { + // return row ontop of the stack + Handle h = getHandle(toor); + if (h == null) return null; + return getNode(h, null, 0).getValues(); + } + + public synchronized byte[][] top(int dist) throws IOException { + // return row ontop of the stack + // with dist == 0 this is the same function as with top() + Handle h = getHandle(toor); + if (h == null) return null; + if (dist >= size()) return null; // that would exceed the stack + while (dist-- > 0) h = getNode(h, null, 0).getOHHandle()[left]; // track through elements + return getNode(h, null, 0).getValues(); + } + + public synchronized byte[][] pot() throws IOException { + // return row on the bottom of the stack and remove record + Handle h = getHandle(root); + if (h == null) return null; + Node n = getNode(h, null, 0); + byte[][] ret = n.getValues(); + // shrink stack + Handle r = n.getOHHandle()[right]; + if (r == null) { + // the stack will be empty, write the toor handle + setHandle(toor, null); + } else { + // un-link the next record + Node k = getNode(r, null, 0); + k.setOHHandle(new Handle[] {null, k.getOHHandle()[right]}); + } + setHandle(root, r); + deleteNode(h); + return ret; + } + + public synchronized byte[][] bot() throws IOException { + // return row on the bottom of the stack + Handle h = getHandle(root); + if (h == null) return null; + return getNode(h, null, 0).getValues(); + } + + public synchronized byte[][] bot(int dist) throws IOException { + // return row on bottom of the stack + // with dist == 0 this is the same function as with bot() + Handle h = getHandle(root); + if (h == null) return null; + if (dist >= size()) return null; // that would exceed the stack + while (dist-- > 0) h = getNode(h, null, 0).getOHHandle()[right]; // track through elements + return getNode(h, null, 0).getValues(); + } + + /* + public synchronized byte[][] seekPop(byte[] key, long maxdepth) throws IOException { + } + + public synchronized byte[][] seekPot(byte[] key, long maxdepth) throws IOException { + } + */ + + public Iterator iterator() { + // iterates the elements in an ordered way. returns Node - type Objects + try { + return new Counter(); + } catch (IOException e) { + throw new RuntimeException("error creating an iteration: " + e.getMessage()); + } + } + + public int imp(File file, String separator) throws IOException { + // imports a value-separated file, returns number of records that have been read + RandomAccessFile f = new RandomAccessFile(file,"r"); + String s; + StringTokenizer st; + int recs = 0; + byte[][] buffer = new byte[columns()][]; + int c; + int line = 0; + while ((s = f.readLine()) != null) { + s = s.trim(); + line++; + if ((s.length() > 0) && (!(s.startsWith("#")))) { + st = new StringTokenizer(s, separator); + // buffer the entry + c = 0; + while ((c < columns()) && (st.hasMoreTokens())) { + buffer[c++] = st.nextToken().trim().getBytes(); + } + if ((st.hasMoreTokens()) || (c != columns())) { + System.err.println("inapropriate number of entries in line " + line); + } else { + push(buffer); + recs++; + } + + } + } + return recs; + } + + public String hp(Handle h) { + if (h == null) return "NULL"; else return h.toString(); + } + + public void print() { + super.print(false); + Handle h; + Node n; + try { + Iterator it = iterator(); + while (it.hasNext()) { + h = (Handle) it.next(); + n = getNode(h, null, 0); + System.out.println("> NODE " + hp(h) + + "; left " + hp(n.getOHHandle()[left]) + ", right " + hp(n.getOHHandle()[right])); + System.out.print(" KEY:'" + (new String(n.getValues()[0])).trim() + "'"); + for (int j = 1; j < columns(); j++) + System.out.print(", V[" + j + "]:'" + (new String(n.getValues()[j])).trim() + "'"); + System.out.println(); + } + System.out.println(); + } catch (IOException e) { + System.out.println("File error: " + e.getMessage()); + } + } + + private static void cmd(String[] args) { + System.out.print("kelondroStack "); + for (int i = 0; i < args.length; i++) System.out.print(args[i] + " "); + System.out.println(""); + byte[] ret = null; + try { + if ((args.length > 4) || (args.length < 2)) { + System.err.println("usage: kelondroStack -c|-p|-v|-g|-i|-s [file]|[key [value]] "); + System.err.println("( create, push, view, (g)pop, imp, shell)"); + System.exit(0); + } else if (args.length == 2) { + kelondroStack fm = new kelondroStack(new File(args[1]), 0x100000); + if (args[0].equals("-v")) { + fm.print(); + ret = null; + } else if (args[0].equals("-g")) { + fm = new kelondroStack(new File(args[1]), 0x100000); + byte[][] ret2 = fm.pop(); + ret = ((ret2 == null) ? null : ret2[1]); + fm.close(); + } + fm.close(); + } else if (args.length == 3) { + if (args[0].equals("-i")) { + kelondroStack fm = new kelondroStack(new File(args[2]), 0x100000); + int i = fm.imp(new File(args[1]),";"); + fm.close(); + ret = (i + " records imported").getBytes(); + } else if (args[0].equals("-s")) { + String db = args[2]; + BufferedReader f = new BufferedReader(new FileReader(args[1])); + String m; + while (true) { + m = f.readLine(); + if (m == null) break; + if ((m.length() > 1) && (!m.startsWith("#"))) { + m = m + " " + db; + cmd(line2args(m)); + } + } + ret = null; + } + } else if (args.length == 4) { + if (args[0].equals("-c")) { + // create + File f = new File(args[3]); + if (f.exists()) f.delete(); + int[] lens = new int[2]; + lens[0] = Integer.parseInt(args[1]); + lens[1] = Integer.parseInt(args[2]); + kelondroStack fm = new kelondroStack(f, 0x100000, lens); + fm.close(); + } else if (args[0].equals("-p")) { + kelondroStack fm = new kelondroStack(new File(args[3]), 0x100000); + fm.push(new byte[][] {args[1].getBytes(), args[2].getBytes()}); + fm.close(); + } + } + if (ret == null) + System.out.println("NULL"); + else + System.out.println(new String(ret)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public static void main(String[] args) { + cmd(args); + } + +} diff --git a/source/de/anomic/kelondro/kelondroTables.java b/source/de/anomic/kelondro/kelondroTables.java new file mode 100644 index 000000000..9010e7536 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroTables.java @@ -0,0 +1,205 @@ +// kelondroTables.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// last major change: 13.03.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// this is mainly a convenience class to bundle many kelondroMap Objects + +package de.anomic.kelondro; +import java.io.*; +import java.util.*; + +public class kelondroTables { + + HashMap mTables, tTables, sTables; + File tablesPath; + + public kelondroTables(File tablesPath) { + this.mTables = new HashMap(); + this.tTables = new HashMap(); + this.sTables = new HashMap(); + this.tablesPath = tablesPath; + if (!(tablesPath.exists())) tablesPath.mkdirs(); + } + + public void declareMaps(String tablename, int keysize, int nodesize) throws IOException { + declareMaps(tablename, keysize, nodesize, null, null); + } + + public void declareMaps(String tablename, int keysize, int nodesize, String[] sortfields, String[] accfields) throws IOException { + declareMaps(tablename, keysize, nodesize, sortfields, accfields, 0x800); + } + + public void declareMaps(String tablename, int keysize, int nodesize, String[] sortfields, String[] accfields, long buffersize /*bytes*/) throws IOException { + if (mTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareMap: table '" + tablename + "' declared twice."); + if (tTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareMap: table '" + tablename + "' declared already in other context."); + File tablefile = new File(tablesPath, "table." + tablename + ".mdb"); + kelondroDyn dyn; + if (tablefile.exists()) { + dyn = new kelondroDyn(tablefile, buffersize); + } else { + tablefile.getParentFile().mkdirs(); + dyn = new kelondroDyn(tablefile, buffersize, keysize, nodesize); + } + kelondroMap map = new kelondroMap(dyn, sortfields, accfields); + mTables.put(tablename, map); + } + + public void declareTree(String tablename, int[] columns, long buffersize /*bytes*/) throws IOException { + if (mTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared already in other context."); + if (tTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared twice."); + File tablefile = new File(tablesPath, "table." + tablename + ".tdb"); + kelondroTree Tree; + if (tablefile.exists()) { + Tree = new kelondroTree(tablefile, buffersize); + } else { + tablefile.getParentFile().mkdirs(); + Tree = new kelondroTree(tablefile, buffersize, columns); + } + tTables.put(tablename, Tree); + } + + public synchronized void update(String tablename, String key, Map map) throws IOException { + kelondroMap table = (kelondroMap) mTables.get(tablename); + if (table == null) throw new RuntimeException("kelondroTables.update: map table '" + tablename + "' does not exist."); + table.set(key, map); + mTables.put(tablename, table); + } + + public synchronized void update(String tablename, byte[][] row /* first element is the unique key = index */) throws IOException { + kelondroTree tree = (kelondroTree) tTables.get(tablename); + if (tree == null) throw new RuntimeException("kelondroTables.update: tree table '" + tablename + "' does not exist."); + tree.put(row); + tTables.put(tablename, tree); + } + + public synchronized void update(String tablename, String key, long[] row) throws IOException { + kelondroTree tree = (kelondroTree) tTables.get(tablename); + if (tree == null) throw new RuntimeException("kelondroTables.update: tree table '" + tablename + "' does not exist."); + tree.putLong(key.getBytes(), row); + tTables.put(tablename, tree); + } + + public synchronized Map selectMap(String tablename, String key) throws IOException { + kelondroMap table = (kelondroMap) mTables.get(tablename); + if (table == null) throw new RuntimeException("kelondroTables.selectMap: map table '" + tablename + "' does not exist."); + return table.get(key); + } + + public synchronized byte[][] selectByte(String tablename, String key) throws IOException { + kelondroTree tree = (kelondroTree) tTables.get(tablename); + if (tree == null) throw new RuntimeException("kelondroTables.selectByte: tree table '" + tablename + "' does not exist."); + return tree.get(key.getBytes()); + } + + public synchronized long[] selectLong(String tablename, String key) throws IOException { + kelondroTree tree = (kelondroTree) tTables.get(tablename); + if (tree == null) throw new RuntimeException("kelondroTables.selectLong: tree table '" + tablename + "' does not exist."); + return tree.getLong(key.getBytes()); + } + + public synchronized kelondroMap.mapIterator /* of Map-Elements */ maps(String tablename, boolean up, boolean rotating) throws IOException { + kelondroMap table = (kelondroMap) mTables.get(tablename); + if (table == null) throw new RuntimeException("kelondroTables.maps: map table '" + tablename + "' does not exist."); + return table.maps(up, rotating); + } + + public synchronized kelondroMap.mapIterator /* of Map-Elements */ maps(String tablename, boolean up, boolean rotating, byte[] firstKey) throws IOException { + kelondroMap table = (kelondroMap) mTables.get(tablename); + if (table == null) throw new RuntimeException("kelondroTables.maps: map table '" + tablename + "' does not exist."); + return table.maps(up, rotating, firstKey); + } + + public synchronized kelondroMap.mapIterator /* of Map-Elements */ maps(String tablename, boolean up, String field) { + kelondroMap table = (kelondroMap) mTables.get(tablename); + if (table == null) throw new RuntimeException("kelondroTables.maps: map table '" + tablename + "' does not exist."); + return table.maps(up, field); + } + + public synchronized Iterator /* of byte[][]-Elements */ rows(String tablename, boolean up, boolean rotating, byte[] firstKey) throws IOException { + kelondroTree tree = (kelondroTree) tTables.get(tablename); + if (tree == null) throw new RuntimeException("kelondroTables.bytes: tree table '" + tablename + "' does not exist."); + return tree.rows(up, rotating, firstKey); + } + + public synchronized Iterator /* of byte[][]-Elements */ rows(String tablename, boolean up, boolean rotating) throws IOException { + kelondroTree tree = (kelondroTree) tTables.get(tablename); + if (tree == null) throw new RuntimeException("kelondroTables.selectOrderBy: tree table '" + tablename + "' does not exist."); + return tree.rows(up, rotating); + } + + // if you need the long-values from a row-iteration, please use kelondroRecords.bytes2long to convert from byte[] to long + + public synchronized void delete(String tablename, String key) throws IOException { + kelondroMap table = (kelondroMap) mTables.get(tablename); + if (table != null) {table.remove(key); mTables.put(tablename, table); return;} + + kelondroTree Tree = (kelondroTree) tTables.get(tablename); + if (Tree != null) {Tree.remove(key.getBytes()); tTables.put(tablename, Tree); return;} + + throw new RuntimeException("kelondroTables.delete: table '" + tablename + "' does not exist."); + } + + public synchronized long accumulator(String tablename, String field) { + kelondroMap table = (kelondroMap) mTables.get(tablename); + if (table == null) throw new RuntimeException("kelondroTables.accumulator: map table '" + tablename + "' does not exist."); + return table.getAcc(field); + } + + public synchronized int size(String tablename) { + kelondroMap table = (kelondroMap) mTables.get(tablename); + if (table != null) return table.size(); + + kelondroTree Tree = (kelondroTree) tTables.get(tablename); + if (Tree != null) return Tree.size(); + + throw new RuntimeException("kelondroTables.accumulator: table '" + tablename + "' does not exist."); + } + + public void close() throws IOException { + Iterator tablesIt = mTables.values().iterator(); + while (tablesIt.hasNext()) ((kelondroMap) tablesIt.next()).close(); + mTables = null; + + Iterator TreeIt = tTables.values().iterator(); + while (TreeIt.hasNext()) ((kelondroTree) TreeIt.next()).close(); + tTables = null; + } + +} diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java new file mode 100644 index 000000000..24a91f9a9 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -0,0 +1,1389 @@ +// kelondroTree.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 07.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + This class extends the kelondroRecords and adds a tree structure +*/ + +package de.anomic.kelondro; + +import java.io.*; +import java.util.*; + +public class kelondroTree extends kelondroRecords implements Comparator { + + // define the Over-Head-Array + private static short thisOHBytes = 2; // our record definition of two bytes + private static short thisOHHandles = 3; // and three handles overhead + private static short thisFHandles = 1; // file handles: one for a root pointer + + // define pointers for OH array access + private static int magic = 0; // pointer for OHByte-array: marker for Node purpose; defaults to 1 + private static int balance = 1; // pointer for OHByte-array: balance value of tree node; balanced = 0 + + private static int parent = 0; // pointer for OHHandle-array: handle()-Value of parent Node + private static int leftchild = 1; // pointer for OHHandle-array: handle()-Value of left child Node + private static int rightchild = 2; // pointer for OHHandle-array: handle()-Value of right child Node + + private static int root = 0; // pointer for FHandles-array: pointer to root node + + public kelondroTree(File file, long buffersize, int key, int value) throws IOException { + this(file, buffersize, new int[] {key, value}, 1, 8); + } + + public kelondroTree(kelondroRA ra, long buffersize, int key, int value) throws IOException { + this(ra, buffersize, new int[] {key, value}, 1, 8); + } + + public kelondroTree(File file, long buffersize, int[] columns) throws IOException { + // this creates a new tree + this(file, buffersize, columns, columns.length /*txtProps*/, 80 /*txtPropWidth*/); + } + + public kelondroTree(File file, long buffersize, + int[] columns, int txtProps, int txtPropsWidth) throws IOException { + // this creates a new tree + super(file, buffersize, + thisOHBytes, thisOHHandles, + columns, thisFHandles, columns.length /*txtProps*/, 80 /*txtPropWidth*/); + setHandle(root, null); // define the root value + } + + public kelondroTree(kelondroRA ra, long buffersize, int[] columns) throws IOException { + // this creates a new tree + this(ra, buffersize, columns, columns.length /*txtProps*/, 80 /*txtPropWidth*/); + } + + public kelondroTree(kelondroRA ra, long buffersize, int[] columns, int txtProps, int txtPropsWidth) throws IOException { + // this creates a new tree + super(ra, buffersize, thisOHBytes, thisOHHandles, columns, thisFHandles, txtProps, txtPropsWidth); + setHandle(root, null); // define the root value + } + + public kelondroTree(File file, long buffersize) throws IOException{ + // this opens a file with an existing tree + super(file, buffersize); + } + + public kelondroTree(kelondroRA ra, long buffersize) throws IOException{ + // this opens a file with an existing tree + super(ra, buffersize); + } + + private static byte abs(byte b) { + // for height computation + if (b < 0) return (byte) -b; else return b; + } + + // Returns the value to which this map maps the specified key. + public synchronized byte[][] get(byte[] key) throws IOException { + Search search = new Search(key); + if (search.found()) { + return search.getMatcher().getValues(); + } else { + return null; + } + } + + public synchronized long[] getLong(byte[] key) throws IOException { + byte[][] row = get(key); + long[] longs = new long[columns() - 1]; + if (row == null) { + for (int i = 0; i < columns() - 1; i++) { + longs[i] = 0; + } + } else { + for (int i = 0; i < columns() - 1; i++) { + longs[i] = bytes2long(row[i + 1]); + } + } + return longs; + } + + public class Search { + + // a search object combines the results of a search in the tree, which are + // - the searched object is found, an index pointing to the node can be returned + // - the object was not found, an index pointing to an appropriate possible parent node can + // be returned, together with the information wether the new key shall be left or right child. + // + + private byte[] key; + private Node thenode, parentnode; + private boolean found; // property if node was found + private byte child; // -1: left child; 0: root node; 1: right child + + protected Search(byte[] key) throws IOException { + this.key = key; + searchproc(); + } + protected Search(Node node) throws IOException { + this.key = node.getKey(); + searchproc(); + } + + private void searchproc() throws IOException { + // searchs the database for the key and stores the result in the thisHandle + // if the key was found, then found=true, thisHandle and leftchild is set; + // else found=false and thisHandle and leftchild is undefined + Handle thisHandle = getHandle(root); + parentnode = null; + if (key == null) { + child = 0; + if (thisHandle == null) { + thenode = null; + found = false; + } else { + thenode = getNode(thisHandle, null, 0); + found = true; + } + } else { + thenode = null; + child = 0; + found = false; + int c; + Handle[] handles; + while (thisHandle != null) { + try { + parentnode = thenode; + thenode = getNode(thisHandle, thenode, (child == -1) ? leftchild : rightchild); + } catch (IllegalArgumentException e) { + System.out.println("WARNING: kelondroTree.Search.searchproc: fixed a broken handle"); + found = false; + return; + } + //System.out.print("Comparing key = '" + new String(key) + "' with '" + new String(thenode.node().getKey()) + "':"); // debug + c = compare(key, thenode.getKey()); + //System.out.println(c); // debug + if (c == 0) { + found = true; + return; + } else if (c < 0) { + child = -1; + thisHandle = thenode.getOHHandle()[leftchild]; + } else { + child = 1; + thisHandle = thenode.getOHHandle()[rightchild]; + } + } + } + // we reached a node where we must insert the new value + // all values are set, just return + } + + public boolean found() { + return found; + } + + public Node getMatcher() throws IOException { + if (found) return thenode; + else throw new IllegalArgumentException("wrong access of matcher"); + } + + public Node getParent() throws IOException { + if (found) return parentnode; else return thenode; + } + + public boolean isRoot() throws IOException { + if (found) throw new IllegalArgumentException("wrong access of isRoot"); + else return (child == 0); + } + + public boolean isLeft() throws IOException { + if (found) throw new IllegalArgumentException("wrong access of leftchild"); + else return (child == -1); + } + + public boolean isRight() throws IOException { + if (found) throw new IllegalArgumentException("wrong access of leftchild"); + else return (child == 1); + } + } + + public synchronized boolean isChild(Node childn, Node parentn, int child) throws IOException { + if (childn == null) throw new IllegalArgumentException("isLeftChild: Node parameter is NULL"); + Handle lc = parentn.getOHHandle()[child]; + if (lc == null) return false; + return (lc.equals(childn.handle())); + } + + private class nodeIterator implements Iterator { + // we implement an iteration! (not a recursive function as the structure would suggest...) + // the iterator iterates Handle objects + Node nextNode = null; + boolean up, rot; + LinkedList nodeStack; + + public nodeIterator(boolean up, boolean rotating) throws IOException { + this(up, rotating, (up) ? firstNode() : lastNode()); + } + + public nodeIterator(boolean up, boolean rotating, Node start) throws IOException { + this.up = up; + this.rot = rotating; + this.nextNode = start; + + // fill node stack for start node + nodeStack = new LinkedList(); + + Handle searchHandle = getHandle(root); + if (searchHandle == null) {nextNode = null; return;} + + Node searchNode = getNode(searchHandle, null, 0); + byte[] startKey = start.getKey(); + int c, ct; + while ((c = compare(startKey, searchNode.getKey())) != 0) { + // the current 'thisNode' is not the start node, put it on the stack + ct = (c < 0) ? leftchild : rightchild; + nodeStack.addLast(new Object[]{searchNode, new Integer(ct)}); + + // go to next node + searchHandle = searchNode.getOHHandle()[ct]; + if (searchHandle == null) throw new IllegalArgumentException("start node does not exist (handle null)"); + searchNode = getNode(searchHandle, searchNode, ct); + if (searchNode == null) throw new IllegalArgumentException("start node does not exist (node null)"); + } + // now every parent node to the start node is on the stack + } + + public boolean hasNext() { + return nextNode != null; + } + + public Object next() { + if (nextNode == null) return null; + Object ret = nextNode; + + // middle-case + + try { + int childtype = (up) ? rightchild : leftchild; + Handle childHandle = nextNode.getOHHandle()[childtype]; + if (childHandle != null) { + //System.out.println("go to other leg, stack size=" + nodeStack.size()); + // we have walked one leg of the tree; now go to the other one: step down to next child + nodeStack.addLast(new Object[]{nextNode, new Integer(childtype)}); + nextNode = getNode(childHandle, nextNode, childtype); + childtype = (up) ? leftchild : rightchild; + while ((childHandle = nextNode.getOHHandle()[childtype]) != null) { + try { + nodeStack.addLast(new Object[]{nextNode, new Integer(childtype)}); + nextNode = getNode(childHandle, nextNode, childtype); + } catch (IllegalArgumentException e) { + // return what we have + nodeStack.removeLast(); + return ret; + } + } + // thats it: we are at a place where we can't go further + // nextNode is correct + } else { + //System.out.println("go up"); + // we have walked along both legs of the child-trees. + + // Now step up. + if (nodeStack.size() == 0) { + nextNode = null; + } else { + Object[] stacktop; + Node parent = null; + int parentpointer = (up) ? rightchild : leftchild; + while ((nodeStack.size() != 0) && (parentpointer == ((up) ? rightchild : leftchild))) { + //System.out.println("step up"); + // go on, walk up further + stacktop = (Object[]) nodeStack.removeLast(); // top of stack: Node/parentpointer pair + parent = (Node) stacktop[0]; + parentpointer = ((Integer) stacktop[1]).intValue(); + } + if ((nodeStack.size() == 0) && (parentpointer == ((up) ? rightchild : leftchild))) { + nextNode = null; + } else { + nextNode = parent; + } + } + } + } catch (IOException e) { + nextNode = null; + } + + return ret; + } + + public void remove() { + throw new java.lang.UnsupportedOperationException("kelondroTree: remove in kelondro Tables not yet supported"); + } + } + + public synchronized long[] putLong(byte[] key, long[] newlongs) throws IOException { + byte[][] newrow = new byte[newlongs.length + 1][]; + newrow[0] = key; + for (int i = 0; i < newlongs.length; i++) { + newrow[i + 1] = long2bytes(newlongs[i], columnSize(i + 1)); + } + byte[][] oldrow = put(newrow); + long[] oldlongs = new long[columns() - 1]; + if (oldrow == null) { + for (int i = 0; i < columns() - 1; i++) { + oldlongs[i] = 0; + } + } else { + for (int i = 0; i < columns() - 1; i++) { + oldlongs[i] = bytes2long(oldrow[i + 1]); + } + } + return oldlongs; + } + + // Associates the specified value with the specified key in this map + public synchronized byte[][] put(byte[][] newrow) throws IOException { + if (newrow.length != columns()) throw new IllegalArgumentException("put: wrong row length " + newrow.length + "; must be " + columns()); + // first try to find the key element in the database + Search searchResult = new Search(newrow[0]); + if (searchResult.found()) { + // a node with this key exist. simply overwrite the content and return old content + Node e = searchResult.getMatcher(); + byte[][] result = e.setValues(newrow); + return result; + } else if (searchResult.isRoot()) { + // a node with this key does not exist and there is no node at all + // this therefore creates the root node if an only if there was no root Node yet + if (getHandle(root) != null) + throw new IllegalArgumentException("tried to create root node twice"); + // we dont have any Nodes in the file, so start here to create one + Node e = newNode(newrow); + e.save(); + // write the propetries + e.setOHByte(new byte[] {1, 0}); // {magic, balance} + e.setOHHandle(new Handle[] {null, null, null}); // {parent, leftchild, rightchild} + // do updates + setHandle(root, e.handle()); + return null; + } else { + // a node with this key does not exist + // this creates a new node if there is already at least a root node + // to create the new node, it is necessary to assign it to a parent + // it must also be defined weather this new node is a left child of the + // parent or not. It is checked if the parent node already has a child on + // that side, but not if the assigned position is appropriate. + + // create new node and assign values + Node theNode = newNode(newrow); theNode.save(); + Node parentNode = searchResult.getParent(); + Handle[] parentOHHandle; + byte[] parentOHByte; + + theNode.setOHByte(new byte[] {1, 0}); // fresh {magic, balance} + theNode.setOHHandle(new Handle[] {parentNode.handle(), null, null}); // {parent, leftchild, rightchild} + + // check consistency and link new node to parent node + parentOHHandle = parentNode.getOHHandle(); // {parent, leftchild, rightchild} + if (searchResult.isLeft()) { + if (parentOHHandle[leftchild] != null) throw new IllegalArgumentException("tried to create leftchild node twice"); + parentOHHandle[leftchild] = theNode.handle(); + } else if (searchResult.isRight()) { + if (parentOHHandle[rightchild] != null) throw new IllegalArgumentException("tried to create rightchild node twice"); + parentOHHandle[rightchild] = theNode.handle(); + } else { + throw new IllegalArgumentException("neither left nor right child"); + } + parentNode.setOHHandle(parentOHHandle); + + // now update recursively the node balance of the parentNode + // what do we have: + // - new Node, called 'theNode' + // - parent Node + + // set balance factor in parent node(s) + boolean increasedHight = true; + byte prevHight; + String path = ""; + while (increasedHight) { + + // update balance + parentOHByte = parentNode.getOHByte(); // {magic, balance} + parentOHHandle = parentNode.getOHHandle(); // {parent, leftchild, rightchild} + prevHight = parentOHByte[balance]; + if ((parentOHHandle[leftchild] != null) && (parentOHHandle[leftchild].equals(theNode.handle()))) { + //isLeftchild + parentOHByte[balance]++; + path = "L" + path; + } + if ((parentOHHandle[rightchild] != null) && (parentOHHandle[rightchild].equals(theNode.handle()))) { + parentOHByte[balance]--; + path = "R" + path; + } + increasedHight = ((abs(parentOHByte[balance]) - abs(prevHight)) > 0); + parentNode.setOHByte(parentOHByte); + + // here we either stop because we had no increased hight, + // or we have a balance greater then 1 or less than -1 and we do rotation + // or we crawl up the tree and change the next balance + if (!(increasedHight)) break; // finished + + // check rotation need + if (abs(parentOHByte[balance]) > 1) { + // rotate and stop then + //System.out.println("* DB DEBUG: " + path.substring(0,2) + " ROTATION AT NODE " + parentNode.handle().toString() + ": BALANCE=" + parentOHByte[balance]); + if (path.startsWith("LL")) { + LL_RightRotation(parentNode, theNode); + break; + } + if (path.startsWith("RR")) { + RR_LeftRotation(parentNode, theNode); + break; + } + if (path.startsWith("RL")) { + Handle parentHandle = parentNode.handle(); + LL_RightRotation(theNode, getNode(theNode.getOHHandle()[leftchild], theNode, leftchild)); + parentNode = getNode(parentHandle, null, 0); // reload the parent node + RR_LeftRotation(parentNode, getNode(parentNode.getOHHandle()[rightchild], parentNode, rightchild)); + break; + } + if (path.startsWith("LR")) { + Handle parentHandle = parentNode.handle(); + RR_LeftRotation(theNode, getNode(theNode.getOHHandle()[rightchild], theNode, rightchild)); + parentNode = getNode(parentHandle, null, 0); // reload the parent node + LL_RightRotation(parentNode, getNode(parentNode.getOHHandle()[leftchild], parentNode, leftchild)); + break; + } + break; + } else { + // crawl up the tree + if (parentOHHandle[parent] == null) { + // root reached: stop + break; + } else { + theNode = parentNode; + parentNode = getNode(parentOHHandle[parent] /*previous handles*/, null, 0); + } + } + } + return null; // that means: no previous stored value present + } + } + + private void assignChild(Node parentNode, Node childNode, int childType) throws IOException { + Handle[] parentHandle = parentNode.getOHHandle(); + Handle[] childHandle = childNode.getOHHandle(); + + parentHandle[childType] = childNode.handle(); + childHandle[parent] = parentNode.handle(); + + parentNode.setOHHandle(parentHandle); + childNode.setOHHandle(childHandle); + } + + private void replace(Node oldNode, Node oldNodeParent, Node newNode) throws IOException { + // this routine looks where the oldNode is connected to, and replaces + // the anchor's link to the oldNode by the newNode-link + // the new link gets the anchor as parent link assigned + // the oldNode will not be updated, so this must be done outside this routine + Handle[] oldHandle = oldNode.getOHHandle(); // {parent, leftchild, rightchild} + // distinguish case where the oldNode is the root node + if (oldNodeParent == null) { + // this is the root, update root + setHandle(root, newNode.handle()); + // update new Node + Handle[] newHandle = newNode.getOHHandle(); + newHandle[parent] = null; + newNode.setOHHandle(newHandle); + } else { + // not the root, find parent + Handle[] parentHandle = oldNodeParent.getOHHandle(); + // ok, we have the parent, but for updating the child link we must know + // if the oldNode was left or right child + if ((parentHandle[leftchild] != null) && (parentHandle[leftchild].equals(oldNode.handle()))) { + // update left node from parent + parentHandle[leftchild] = newNode.handle(); + } + if ((parentHandle[rightchild] != null) && (parentHandle[rightchild].equals(oldNode.handle()))) { + // update right node from parent + parentHandle[rightchild] = newNode.handle(); + } + oldNodeParent.setOHHandle(parentHandle); + // update new Node + Handle[] newHandle = newNode.getOHHandle(); + newHandle[parent] = oldNodeParent.handle(); + newNode.setOHHandle(newHandle); + } + // finished. remember that we did not set the links to the oldNode + // we have also not set the children of the newNode. + // this must be done somewhere outside this function. + // if the oldNode is not needed any more, it can be disposed (check childs first). + } + + private static byte max0(byte b) { + if (b > 0) return b; else return 0; + } + + private static byte min0(byte b) { + if (b < 0) return b; else return 0; + } + + private void LL_RightRotation(Node parentNode, Node childNode) throws IOException { + // replace the parent node; the parent is afterwards unlinked + Handle p2Handle = parentNode.getOHHandle()[parent]; + Node p2Node = (p2Handle == null) ? null : getNode(p2Handle, null, 0); + replace(parentNode, p2Node, childNode); + + // set the left son of the parent to the right son of the childNode + Handle childOfChild = childNode.getOHHandle()[rightchild]; + if (childOfChild == null) { + Handle[] parentHandle = parentNode.getOHHandle(); + parentHandle[leftchild] = null; + parentNode.setOHHandle(parentHandle); + } else { + assignChild(parentNode, getNode(childOfChild, childNode, rightchild), leftchild); + } + + // link the old parent node as the right child of childNode + assignChild(childNode, parentNode, rightchild); + + // - newBal(parent) = oldBal(parent) - 1 - max(oldBal(leftChild), 0) + // - newBal(leftChild) = oldBal(leftChild) - 1 + min(newBal(parent), 0) + byte[] parentBytes = parentNode.getOHByte(); + byte[] childBytes = childNode.getOHByte(); + byte oldBalParent = parentBytes[balance]; + byte oldBalChild = childBytes[balance]; + parentBytes[balance] = (byte) (oldBalParent - 1 - max0(oldBalChild)); + childBytes[balance] = (byte) (oldBalChild - 1 + min0(parentBytes[balance])); + parentNode.setOHByte(parentBytes); + childNode.setOHByte(childBytes); + } + + private void RR_LeftRotation(Node parentNode, Node childNode) throws IOException { + // replace the parent node; the parent is afterwards unlinked + Handle p2Handle = parentNode.getOHHandle()[parent]; + Node p2Node = (p2Handle == null) ? null : getNode(p2Handle, null, 0); + replace(parentNode, p2Node, childNode); + + // set the left son of the parent to the right son of the childNode + Handle childOfChild = childNode.getOHHandle()[leftchild]; + if (childOfChild == null) { + Handle[] parentHandle = parentNode.getOHHandle(); + parentHandle[rightchild] = null; + parentNode.setOHHandle(parentHandle); + } else { + assignChild(parentNode, getNode(childOfChild, childNode, leftchild), rightchild); + } + + // link the old parent node as the left child of childNode + assignChild(childNode, parentNode, leftchild); + + // - newBal(parent) = oldBal(parent) + 1 - min(oldBal(rightChild), 0) + // - newBal(rightChild) = oldBal(rightChild) + 1 + max(newBal(parent), 0) + byte[] parentBytes = parentNode.getOHByte(); + byte[] childBytes = childNode.getOHByte(); + byte oldBalParent = parentBytes[balance]; + byte oldBalChild = childBytes[balance]; + parentBytes[balance] = (byte) (oldBalParent + 1 - min0(oldBalChild)); + childBytes[balance] = (byte) (oldBalChild + 1 + max0(parentBytes[balance])); + parentNode.setOHByte(parentBytes); + childNode.setOHByte(childBytes); + } + + // Associates the specified value with the specified key in this map + public synchronized byte[] put(byte[] key, byte[] value) throws IOException { + byte[][] row = new byte[2][]; + row[0] = key; + row[1] = value; + byte[][] ret = put(row); + if (ret == null) return null; else return ret[1]; + } + + // Removes the mapping for this key from this map if present (optional operation). + public synchronized byte[][] remove(byte[] key) throws IOException { + Search search = new Search(key); + if (search.found()) { + Node result = search.getMatcher(); + byte[][] values = result.getValues(); + remove(result, search.getParent()); + return values; + } else { + return null; + } + } + + public synchronized void removeAll() throws IOException { + while (size() > 0) remove(lastNode(), null); + } + + public synchronized void remove(Node node, Node parentOfNode) throws IOException { + // there are three cases when removing a node + // - the node is a leaf - it can be removed easily + // - the node has one child - the child replaces the node + // - the node has two childs - it can be replaced either + // by the greatest node of the left child or the smallest + // node of the right child + + Handle[] handles = node.getOHHandle(); + Node childnode; + if ((handles[leftchild] == null) && (handles[rightchild] == null)) { + // easy case: the node is a leaf + if (parentOfNode == null) { + // this is the root! + setHandle(root, null); + } else { + Handle[] h = parentOfNode.getOHHandle(); + if ((h[leftchild] != null) && (h[leftchild].equals(node.handle()))) h[leftchild] = null; + if ((h[rightchild] != null) && (h[rightchild].equals(node.handle()))) h[rightchild] = null; + parentOfNode.setOHHandle(h); + } + } else if ((handles[leftchild] != null) && (handles[rightchild] == null)) { + replace(node, parentOfNode, getNode(handles[leftchild], node, leftchild)); + } else if ((handles[leftchild] == null) && (handles[rightchild] != null)) { + replace(node, parentOfNode, getNode(handles[rightchild], node, rightchild)); + } else { + // difficult case: node has two children + Node repl = lastNode(getNode(handles[leftchild], node, leftchild)); + //System.out.println("last node is " + repl.toString()); + // we remove that replacement node and put it where the node was + // this seems to be recursive, but is not since the replacement + // node cannot have two children (it would not have been the smallest or greatest) + Handle[] replha = repl.getOHHandle(); + Node n; + Handle[] h; + // remove leaf + if ((replha[leftchild] == null) && (replha[rightchild] == null)) { + // the replacement cannot be the root, so simply remove from parent node + n = getNode(replha[parent], null, 0); // parent node of replacement node + h = n.getOHHandle(); + if ((h[leftchild] != null) && (h[leftchild].equals(repl.handle()))) h[leftchild] = null; + if ((h[rightchild] != null) && (h[rightchild].equals(repl.handle()))) h[rightchild] = null; + n.setOHHandle(h); + } else if ((replha[leftchild] != null) && (replha[rightchild] == null)) { + try { + childnode = getNode(replha[leftchild], repl, leftchild); + replace(repl, getNode(replha[parent], null, 0), childnode); + } catch (IllegalArgumentException e) { + // now treat the situation as if that link had been null before + n = getNode(replha[parent], null, 0); // parent node of replacement node + h = n.getOHHandle(); + if ((h[leftchild] != null) && (h[leftchild].equals(repl.handle()))) h[leftchild] = null; + if ((h[rightchild] != null) && (h[rightchild].equals(repl.handle()))) h[rightchild] = null; + n.setOHHandle(h); + } + } else if ((replha[leftchild] == null) && (replha[rightchild] != null)) { + try { + childnode = getNode(replha[rightchild], repl, rightchild); + replace(repl, getNode(replha[parent], null, 0), childnode); + } catch (IllegalArgumentException e) { + // now treat the situation as if that link had been null before + n = getNode(replha[parent], null, 0); // parent node of replacement node + h = n.getOHHandle(); + if ((h[leftchild] != null) && (h[leftchild].equals(repl.handle()))) h[leftchild] = null; + if ((h[rightchild] != null) && (h[rightchild].equals(repl.handle()))) h[rightchild] = null; + n.setOHHandle(h); + } + } + //System.out.println("node before reload is " + node.toString()); + node = getNode(node.handle(), null, 0); // reload the node, it is possible that it has been changed + //System.out.println("node after reload is " + node.toString()); + + // now plant in the replha node + byte[] b = node.getOHByte(); // save bytes of disappearing node + handles = node.getOHHandle(); // save handles of disappearing node + replace(node, parentOfNode, repl); + repl.setOHByte(b); // restore bytes + repl.setOHHandle(handles); // restore handles + // last thing to do: change uplinks of children to this new node + if (handles[leftchild] != null) { + n = getNode(handles[leftchild], node, leftchild); + h = n.getOHHandle(); + h[parent] = repl.handle(); + n.setOHHandle(h); + } + if (handles[rightchild] != null) { + n = getNode(handles[rightchild], node, rightchild); + h = n.getOHHandle(); + h[parent] = repl.handle(); + n.setOHHandle(h); + } + } + deleteNode(node.handle()); + } + + private Node firstNode() throws IOException { + Handle h = getHandle(root); + if (h == null) return null; + return firstNode(getNode(h, null, 0)); + } + + private Node firstNode(Node node) throws IOException { + if (node == null) throw new IllegalArgumentException("firstNode: node=null"); + Handle h = node.getOHHandle()[leftchild]; + while (h != null) { + try { + node = getNode(h, node, leftchild); + } catch (IllegalArgumentException e) { + // return what we have + return node; + } + h = node.getOHHandle()[leftchild]; + } + return node; + } + + private Node lastNode() throws IOException { + Handle h = getHandle(root); + if (h == null) return null; + return lastNode(getNode(h, null, 0)); + } + + private Node lastNode(Node node) throws IOException { + if (node == null) throw new IllegalArgumentException("lastNode: node=null"); + Handle h = node.getOHHandle()[rightchild]; + while (h != null) { + try { + node = getNode(h, node, rightchild); + } catch (IllegalArgumentException e) { + // return what we have + return node; + } + h = node.getOHHandle()[rightchild]; + } + return node; + } + + public synchronized Iterator nodeIterator(boolean up, boolean rotating) { + // iterates the elements in a sorted way. returns Node - type Objects + try { + return new nodeIterator(up, rotating); + } catch (IOException e) { + throw new RuntimeException("error creating an iteration: " + e.getMessage()); + } + } + + public synchronized Iterator nodeIterator(boolean up, boolean rotating, byte[] firstKey) { + // iterates the elements in a sorted way. returns Node - type Objects + try { + Search s = new Search(firstKey); + if (s.found()) { + return new nodeIterator(up, rotating, s.getMatcher()); + } else { + Node nn = s.getParent(); + if (nn == null) { + return (new HashSet()).iterator(); // an empty iterator + } else { + return new nodeIterator(up, rotating, nn); + } + } + } catch (IOException e) { + throw new RuntimeException("error creating an iteration: " + e.getMessage()); + } + } + + /* + public synchronized keyIterator keys(boolean up, boolean rotating) throws IOException { + // iterates only the keys of the Nodes + // enumerated objects are of type byte[] + // iterates the elements in a sorted way. + return new keyIterator(new nodeIterator(up, rotating)); + } + + public synchronized keyIterator keys(boolean up, boolean rotating, byte[] firstKey) throws IOException { + Search s = new Search(firstKey); + if (s.found()) { + return new keyIterator(new nodeIterator(up, rotating, s.getMatcher())); + } else { + Node nn = s.getParent(); + if (nn == null) { + return (keyIterator) (new HashSet()).iterator(); + } else { + return new keyIterator(new nodeIterator(up, rotating, nn)); + } + } + } + + public class keyIterator implements Iterator { + // the iterator iterates all keys, which are byte[] objects + Iterator nodeIterator; + public keyIterator(nodeIterator nodeIterator) { + this.nodeIterator = nodeIterator; + } + public boolean hasNext() { + return nodeIterator.hasNext(); + } + public Object next() { + try { + return ((Node) nodeIterator.next()).getKey(); + } catch (IOException e) { + return null; + } + } + public void remove() { + nodeIterator.remove(); + } + } + */ + + public synchronized rowIterator rows(boolean up, boolean rotating) throws IOException { + // iterates only the keys of the Nodes + // enumerated objects are of type byte[] + // iterates the elements in a sorted way. + return new rowIterator(new nodeIterator(up, rotating)); + } + + public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { + Search s = new Search(firstKey); + if (s.found()) { + return new rowIterator(new nodeIterator(up, rotating, s.getMatcher())); + } else { + Node nn = s.getParent(); + if (nn == null) { + return (Iterator) (new HashSet()).iterator(); + } else { + return new rowIterator(new nodeIterator(up, rotating, nn)); + } + } + } + + public class rowIterator implements Iterator { + + Iterator nodeIterator; + + public rowIterator(Iterator nodeIterator) { + this.nodeIterator = nodeIterator; + } + + public boolean hasNext() { + return (nodeIterator.hasNext()); + } + + public Object next() { + try { + return ((Node) nodeIterator.next()).getValues(); + } catch (IOException e) { + return null; + } + } + + public void remove() { + } + + } + + public synchronized int imp(File file, String separator) throws IOException { + // imports a value-separated file, returns number of records that have been read + + RandomAccessFile f = new RandomAccessFile(file,"r"); + String s; + StringTokenizer st; + int recs = 0; + byte[][] buffer = new byte[columns()][]; + int c; + int line = 0; + while ((s = f.readLine()) != null) { + s = s.trim(); + line++; + if ((s.length() > 0) && (!(s.startsWith("#")))) { + st = new StringTokenizer(s, separator); + // buffer the entry + c = 0; + while ((c < columns()) && (st.hasMoreTokens())) { + buffer[c++] = st.nextToken().trim().getBytes(); + } + if ((st.hasMoreTokens()) || (c != columns())) { + System.err.println("inapropriate number of entries in line " + line); + } else { + put(buffer); + recs++; + } + + } + } + return recs; + } + + public synchronized int height() { + try { + Handle h = getHandle(root); + if (h == null) return 0; + return height(getNode(h, null, 0)); + } catch (IOException e) { + return 0; + } + } + + private int height(Node node) throws IOException { + if (node == null) return 0; + Handle[] childs = node.getOHHandle(); + int hl = (childs[leftchild] == null) ? 0 : height(getNode(childs[leftchild], node, leftchild)); + int hr = (childs[rightchild] == null) ? 0 : height(getNode(childs[rightchild], node, rightchild)); + if (hl > hr) return hl + 1; else return hr + 1; + } + + public String np(Object n) { + if (n == null) return "NULL"; else return n.toString(); + } + + public void print() throws IOException { + super.print(false); + int height = height(); + System.out.println("HEIGHT = " + height); + Vector thisline = new Vector(); + thisline.add(getHandle(root)); + Vector nextline; + Handle handle; + Node node; + int linelength, width = (1 << (height - 1)) * (columnSize(0) + 1); + Handle[] childs; + String key; + for (int h = 1; h < height; h++) { + linelength = width / (thisline.size() * 2); + nextline = new Vector(); + for (int i = 0; i < thisline.size(); i++) { + handle = (Handle) thisline.elementAt(i); + if (handle == null) { + node = null; + key = "[..]"; + } else { + node = getNode(handle, null, 0); + if (node == null) key = "NULL"; else key = new String(node.getKey()); + } + System.out.print(key); + for (int j = 0; j < (linelength - key.length()); j++) System.out.print("-"); + System.out.print("+"); + for (int j = 0; j < (linelength - 1); j++) System.out.print(" "); + if (node == null) { + nextline.add(null); + nextline.add(null); + } else { + childs = node.getOHHandle(); + nextline.add(childs[leftchild]); + nextline.add(childs[rightchild]); + } + } + System.out.println(); + for (int i = 0; i < thisline.size(); i++) { + System.out.print("|"); + for (int j = 0; j < (linelength - 1); j++) System.out.print(" "); + System.out.print("|"); + for (int j = 0; j < (linelength - 1); j++) System.out.print(" "); + } + System.out.println(); + thisline = nextline; + nextline = null; + } + // now print last line + if ((thisline != null) && (width >= 0)) { + linelength = width / thisline.size(); + for (int i = 0; i < thisline.size(); i++) { + handle = (Handle) thisline.elementAt(i); + if (handle == null) { + node = null; + key = "NULL"; + } else { + node = getNode(handle, null, 0); + if (node == null) key = "NULL"; else key = new String(node.getKey()); + } + System.out.print(key); + for (int j = 0; j < (linelength - key.length()); j++) System.out.print(" "); + } + } + System.out.println(); + } + /* + public void print() { + super.print(false); + Handle h; + Node n; + Iterator it = iterator(true); + while (it.hasNext()) { + n = (Node) it.next(); + System.out.println("> NODE " + np(n)); + try { + System.out.println(" magic " + n.getOHByte()[magic] + + ", balance " + n.getOHByte()[balance] + + ", parent " + np(n.getOHHandle()[parent]) + + ", left " + np(n.getOHHandle()[leftchild]) + + ", right " + np(n.getOHHandle()[rightchild])); + } catch (IOException e) { + System.out.println("File error: " + e.getMessage()); + } + + System.out.print(" KEY:'" + (new String(n.getValue(0))).trim() + "'"); + for (int j = 1; j < columns(); j++) + System.out.print(", V[" + j + "]:'" + (new String(n.getValue(j))).trim() + "'"); + + //System.out.println(); + } + System.out.println(); + } + */ + + private static void cmd(String[] args) { + System.out.print("kelondroTree "); + for (int i = 0; i < args.length; i++) System.out.print(args[i] + " "); + System.out.println(""); + byte[] ret = null; + try { + if ((args.length > 4) || (args.length < 1)) { + System.err.println("usage: kelondroTree -c|-u|-v|-g|-d|-i|-s [file]|[key [value]] "); + System.err.println("( create, update, view, get, delete, imp, shell)"); + System.exit(0); + } else if (args.length == 1) { + if (args[0].equals("-t")) { + // test script + File testFile = new File("test.db"); + while (testFile.exists()) testFile.delete(); + kelondroTree fm = new kelondroTree(testFile, 0x100000, 4, 4); + byte[] dummy = "".getBytes(); + fm.put("abc0".getBytes(), dummy); fm.put("bcd0".getBytes(), dummy); + fm.put("def0".getBytes(), dummy); fm.put("bab0".getBytes(), dummy); + fm.put("abc1".getBytes(), dummy); fm.put("bcd1".getBytes(), dummy); + fm.put("def1".getBytes(), dummy); fm.put("bab1".getBytes(), dummy); + fm.put("abc2".getBytes(), dummy); fm.put("bcd2".getBytes(), dummy); + fm.put("def2".getBytes(), dummy); fm.put("bab2".getBytes(), dummy); + fm.put("abc3".getBytes(), dummy); fm.put("bcd3".getBytes(), dummy); + fm.put("def3".getBytes(), dummy); fm.put("bab3".getBytes(), dummy); + fm.print(); + fm.remove("def1".getBytes()); fm.remove("bab1".getBytes()); + fm.remove("abc2".getBytes()); fm.remove("bcd2".getBytes()); + fm.remove("def2".getBytes()); fm.remove("bab2".getBytes()); + fm.put("def1".getBytes(), dummy); fm.put("bab1".getBytes(), dummy); + fm.put("abc2".getBytes(), dummy); fm.put("bcd2".getBytes(), dummy); + fm.put("def2".getBytes(), dummy); fm.put("bab2".getBytes(), dummy); + fm.print(); + fm.close(); + ret = null; + } + } else if (args.length == 2) { + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000); + if (args[0].equals("-v")) { + fm.print(); + ret = null; + } + fm.close(); + } else if (args.length == 3) { + if (args[0].equals("-d")) { + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000); + fm.remove(args[2].getBytes()); + fm.close(); + } else if (args[0].equals("-i")) { + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000); + int i = fm.imp(new File(args[1]),";"); + fm.close(); + ret = (i + " records imported").getBytes(); + } else if (args[0].equals("-s")) { + String db = args[2]; + BufferedReader f = new BufferedReader(new FileReader(args[1])); + String m; + while (true) { + m = f.readLine(); + if (m == null) break; + if ((m.length() > 1) && (!m.startsWith("#"))) { + m = m + " " + db; + cmd(line2args(m)); + } + } + ret = null; + } else if (args[0].equals("-g")) { + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000); + byte[][] ret2 = fm.get(args[2].getBytes()); + ret = ((ret2 == null) ? null : ret2[1]); + fm.close(); + } else if (args[0].equals("-n")) { + kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000); + //byte[][] keys = fm.getSequentialKeys(args[2].getBytes(), 500, true); + Iterator rowIt = fm.rows(true, false, args[2].getBytes()); + Vector v = new Vector(); + while (rowIt.hasNext()) v.add(new String(((byte[][]) rowIt.next())[0])); + ret = v.toString().getBytes(); + fm.close(); + } + } else if (args.length == 4) { + if (args[0].equals("-c")) { + // create + File f = new File(args[3]); + if (f.exists()) f.delete(); + int[] lens = new int[2]; + lens[0] = Integer.parseInt(args[1]); + lens[1] = Integer.parseInt(args[2]); + kelondroTree fm = new kelondroTree(f, 0x100000, lens); + fm.close(); + } else if (args[0].equals("-u")) { + kelondroTree fm = new kelondroTree(new File(args[3]), 0x100000); + ret = fm.put(args[1].getBytes(), args[2].getBytes()); + fm.close(); + } + } + if (ret == null) + System.out.println("NULL"); + else + System.out.println(new String(ret)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + public synchronized int compare(Object a, Object b) { + try { + if ((a instanceof byte[]) && (b instanceof byte[])) { + return compare((byte[]) a, (byte[]) b); + } else if ((a instanceof Node) && (b instanceof Node)) { + return compare(((Node) a).getKey(), ((Node) b).getKey()); + } else throw new IllegalArgumentException("Object type or Object type combination not supported"); + } catch (IOException e) { + throw new IllegalStateException("IOException: " + e.getMessage()); + } + } + + // Compares its two arguments for order. + // Returns -1, 0, or 1 as the first argument + // is less than, equal to, or greater than the second. + // two arrays are also equal if one array is a subset of the other's array with filled-up char(0)-values + public synchronized int compare(byte[] a, byte[] b) { + int i = 0; + int al = a.length; + int bl = b.length; + int len = (al > bl) ? bl : al; + while (i < len) { + if (a[i] > b[i]) return 1; + if (a[i] < b[i]) return -1; + // else the bytes are equal and it may go on yet undecided + i++; + } + // check if we have a zero-terminated equality + if ((i == al) && (i < bl) && (b[i] == 0)) return 0; + if ((i == bl) && (i < al) && (a[i] == 0)) return 0; + // no, decide by length + if (al > bl) return 1; + if (al < bl) return -1; + // no, they are equal + return 0; + } + + // Returns the comparator used to order this map, + // or null if this map uses its keys' natural order. + + public synchronized Comparator comparator() { + return this; + } + + public static void main(String[] args) { + //cmd(args); + bigtest(Integer.parseInt(args[0])); + //randomtest(Integer.parseInt(args[0])); + //smalltest(); + } + + public static String[] permutations(int letters) { + String p = ""; + for (int i = 0; i < letters; i++) p = p + ((char) (((int)'A') + i)); + return permutations(p); + } + public static String[] permutations(String source) { + if (source.length() == 0) return new String[0]; + if (source.length() == 1) return new String[]{source}; + char c = source.charAt(0); + String[] recres = permutations(source.substring(1)); + String[] result = new String[source.length() * recres.length]; + for (int perm = 0; perm < recres.length; perm++) { + result[perm * source.length()] = c + recres[perm]; + for (int pos = 1; pos < source.length() - 1; pos++) { + result[perm * source.length() + pos] = recres[perm].substring(0, pos) + c + recres[perm].substring(pos); + } + result[perm * source.length() + source.length() - 1] = recres[perm] + c; + } + return result; + } + + public static byte[] testWord(char c) { + return new byte[]{(byte) c, 32, 32, 32}; + } + + public static kelondroTree testTree(File f, String testentities) throws IOException { + if (f.exists()) f.delete(); + kelondroTree tt = new kelondroTree(f, 0, 4, 4); + byte[] b; + for (int i = 0; i < testentities.length(); i++) { + b = testWord(testentities.charAt(i)); + tt.put(b, b); + } + return tt; + } + + public static void randomtest(int elements) { + System.out.println("random " + elements + ":"); + String s = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".substring(0, elements); + String t, d; + char c; + kelondroTree tt; + File testFile = new File("test.db"); + byte[] b; + try { + int steps = 0; + while (true) { + if (testFile.exists()) testFile.delete(); + tt = new kelondroTree(testFile, 20000, 4 ,4); + steps = 10 + ((int) System.currentTimeMillis() % 7) * (((int) System.currentTimeMillis() + 17) % 11); + t = s; + d = ""; + System.out.println("NEW SESSION"); + for (int i = 0; i < steps; i++) { + if ((d.length() < 3) || ((t.length() > 0) && (((int) System.currentTimeMillis() % 7) < 2))) { + // add one + c = t.charAt((int) (System.currentTimeMillis() % (long) t.length())); + b = testWord(c); + tt.put(b, b); + d = d + c; + t = t.substring(0, t.indexOf(c)) + t.substring(t.indexOf(c) + 1); + System.out.println("added " + new String(b)); + } else { + // delete one + c = d.charAt((int) (System.currentTimeMillis() % (long) d.length())); + b = testWord(c); + tt.remove(b); + d = d.substring(0, d.indexOf(c)) + d.substring(d.indexOf(c) + 1); + t = t + c; + System.out.println("removed " + new String(b)); + } + if (countElements(tt) != tt.size()) { + System.out.println("wrong size for "); + tt.print(); + } + // check all words within + for (int j = 0; j < d.length(); j++) { + if (tt.get(testWord(d.charAt(j))) == null) { + System.out.println("missing entry " + d.charAt(j)); + tt.print(); + } + } + // check all words outside + for (int j = 0; j < t.length(); j++) { + if (tt.get(testWord(t.charAt(j))) != null) { + System.out.println("superfluous entry " + t.charAt(j)); + tt.print(); + } + } + if (tt.get(testWord('z')) != null) { + System.out.println("superfluous entry z"); + tt.print(); + } + } + tt.print(); + tt.close(); + } + + } catch (Exception e) { + e.printStackTrace(); + System.out.println("TERMINATED"); + } + } + + public static void smalltest() { + File f = new File("test.db"); + if (f.exists()) f.delete(); + try { + kelondroTree tt = new kelondroTree(f, 0, 4, 4); + byte[] b; + b = testWord('b'); tt.put(b, b); + b = testWord('c'); tt.put(b, b); + b = testWord('a'); tt.put(b, b); + System.out.println("elements: " + countElements(tt)); + tt.print(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public static void bigtest(int elements) { + System.out.println("perm " + elements + ":"); + String[] s = permutations(elements); + kelondroTree tt; + File testFile = new File("test.db"); + byte[] b; + try { + for (int i = 0; i < s.length; i++) { + System.out.println("probing tree " + i + " for permutation " + s[i]); + // generate tree and delete elements + tt = testTree(testFile, s[i]); + //tt.print(); + if (countElements(tt) != tt.size()) { + System.out.println("wrong size for " + s[i]); + tt.print(); + } + tt.close(); + for (int j = 0; j < s.length; j++) { + tt = testTree(testFile, s[i]); + //tt.print(); + // delete by permutation j + for (int elt = 0; elt < s[j].length(); elt++) { + tt.remove(testWord(s[j].charAt(elt))); + //tt.print(); + if (countElements(tt) != tt.size()) { + System.out.println("ERROR! wrong size for probe tree " + s[i] + "; probe delete " + s[j] + "; position " + elt); + tt.print(); + } + } + // add another one + //tt.print(); + /* + b = testWord('0'); tt.put(b, b); + b = testWord('z'); tt.put(b, b); + b = testWord('G'); tt.put(b, b); + b = testWord('t'); tt.put(b, b); + if (countElements(tt) != tt.size()) { + System.out.println("ERROR! wrong size for probe tree " + s[i] + "; probe delete " + s[j] + "; final add"); + tt.print(); + } + tt.print(); + */ + // close this + tt.close(); + } + } + System.out.println("FINISHED"); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("TERMINATED"); + } + } + + public static int countElements(kelondroTree t) { + int count = 0; + Iterator iter = t.nodeIterator(true, false); + while (iter.hasNext()) {count++; if (iter.next() == null) System.out.println("ERROR! null element found");} + return count; + } +} diff --git a/source/de/anomic/net/ftpc.java b/source/de/anomic/net/ftpc.java new file mode 100644 index 000000000..dbf75c447 --- /dev/null +++ b/source/de/anomic/net/ftpc.java @@ -0,0 +1,1976 @@ +// ftpc.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// main implementation finished: 28.05.2002 +// last major change: 06.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.net; + +import java.lang.reflect.*; +import java.util.*; +import java.io.*; +import java.text.DateFormat; +import java.net.*; +import de.anomic.server.*; + +public class ftpc { + + private static final String vDATE = "20040506"; + private static final String logPrefix = "FTPC: "; + + + private InputStream in; + private PrintStream out; + private PrintStream err; + private boolean glob = true; // glob = false -> filenames are taken literally for mget, .. + + // for time measurement + private static final TimeZone GMTTimeZone = TimeZone.getTimeZone("PST"); // the GMT Time Zone + + // transfer type + private static final char transferType = 'i'; // transfer binary + + // block size [1K by default] + private static final int blockSize = 1024; + + // client socket for commands + private Socket ControlSocket = null; + + // socket for data transactions + private ServerSocket DataSocketActive = null; + private Socket DataSocketPassive = null; + private boolean DataSocketPassiveMode = true; + + // output and input streams for client control connection + private BufferedReader clientInput = null; + private DataOutputStream clientOutput = null; + + // server this client is connected to + private String account = null; + + // client prompt + private String prompt = "ftp [local]>"; + + String[] cmd; + + File currentPath; + + public ftpc() { + this(System.in, System.out, System.err); + } + + public ftpc(java.io.InputStream ins, java.io.PrintStream outs, java.io.PrintStream errs) { + + try { + System.setSecurityManager(new sm()); + } catch (java.security.AccessControlException e) { + } + + this.in = ins; + this.out = outs; + this.err = errs; + + this.currentPath = new File(System.getProperty("user.dir")); + try { + this.currentPath = new File(this.currentPath.getCanonicalPath()); + } catch (IOException e) {} + + } + + public void shell(String server) { + String command; + + java.io.PrintWriter pw = null; + if (out != null) { + pw = new java.io.PrintWriter(out); + } + + try { + java.io.BufferedReader stdin = + new java.io.BufferedReader(new java.io.InputStreamReader(in)); + if (server != null) exec("open " + server, true); + while (true) { + + // prompt + if (pw != null) {pw.print(prompt); pw.flush();} + + // read a line + while ((command = stdin.readLine()) == null) + if (pw != null) {pw.print(prompt); pw.flush();} + + // execute + if (!exec(command, false)) break; + + } + } catch (Exception e) { + err.println(logPrefix + "---- Error - ftp exception: " + e); + e.printStackTrace(err); + } + } + + public boolean exec(String command, boolean promptIt) { + if ((command == null) || (command.length() == 0)) return true; + int pos; + String com; + boolean ret = true; + while (command.length() > 0) { + pos = command.indexOf(";"); if (pos < 0) pos = command.indexOf("\n"); + if (pos < 0) { + com = command; + command = ""; + } else { + com = command.substring(0,pos); + command = command.substring(pos + 1); + } + if (promptIt) out.println(logPrefix + prompt + com); + cmd = line2args(com); + try { + ret = (((Boolean) getClass().getMethod( + cmd[0].toUpperCase(), + new Class[0] + ).invoke(this, new Object[0])).booleanValue()); + } catch (InvocationTargetException e) { + if (e.getMessage() == null) {} + else if (ControlSocket == null) { + // the error was probably caused because there is no connection + err.println(logPrefix + "---- not connected. no effect."); + e.printStackTrace(); + return ret; + } else { + err.println(logPrefix + "---- ftp internal exception: target exception " + e); + return ret; + } + } catch (IllegalAccessException e) { + err.println(logPrefix + "---- ftp internal exception: wrong access " + e); + return ret; + } catch (NoSuchMethodException e) { + // consider first that the user attempted to execute a java command from + // the current path; either local or remote + if (ControlSocket == null) { + // try a local exec + try { + javaexec(cmd); + } catch (Exception ee) { + err.println(logPrefix + "---- Command '" + cmd[0] + "' not supported. Try 'HELP'."); + } + } else { + // try a remote exec + exec("java " + com, false); + } + return ret; + } + } + return ret; + } + + + private String[] line2args(String line) { + // parse the command line + if ((line == null) || (line.length() == 0)) return null; + // pre-parse + String line1=""; + boolean quoted = false; + for (int i = 0; i < line.length(); i++) { + if (quoted) { + if (line.charAt(i) == '"') { + quoted = false; + } else { + line1 = line1 + line.charAt(i); + } + } else { + if (line.charAt(i) == '"') { + quoted = true; + } else if (line.charAt(i) == ' ') { + line1 = line1 + '|'; + } else { + line1 = line1 + line.charAt(i); + } + } + } + // construct StringTokenizer + String args[]; + StringTokenizer st = new StringTokenizer(line1,"|"); + // read tokens from string + args = new String[st.countTokens()]; + for (int i = 0; st.hasMoreTokens(); i++) { + args[i] = st.nextToken(); + } + st = null; + return args; + } + + private static String[] shift(String args[]) { + if ((args == null) || (args.length == 0)) return args; else { + String newArgs[] = new String[args.length-1]; + System.arraycopy(args, 1, newArgs, 0, args.length-1); + return newArgs; + } + } + + class cl extends ClassLoader { + + public cl() { + super(); + } + + public Class loadClass(String classname, boolean resolve) throws ClassNotFoundException { + Class c = findLoadedClass(classname); + if (c == null) try { + // second try: ask the system + c = findSystemClass(classname); + } catch (ClassNotFoundException e) { + // third try: load myself + File f = new File(System.getProperty("user.dir"), classname + ".class"); + int length = (int)f.length(); + byte[] classbytes = new byte[length]; + try { + DataInputStream in = new DataInputStream(new FileInputStream(f)); + in.readFully(classbytes); + in.close(); + c = defineClass(classname, classbytes, 0, classbytes.length); + } catch (FileNotFoundException ee) { + throw new ClassNotFoundException(); + } catch (IOException ee) { + throw new ClassNotFoundException(); + } + } + if (resolve) resolveClass(c); + return c; + } + + } + + private void javaexec(String[] inArgs) { + String obj = inArgs[0]; + String args[] = new String[inArgs.length-1]; + + // remove the object name from the array of arguments + System.arraycopy(inArgs, 1, args, 0, inArgs.length-1); + + // Build the argument list for invoke() method. + Object argList[] = new Object[1]; + argList[0] = args; + + Properties pr = System.getProperties(); + String origPath = (String) pr.get("java.class.path"); + try { + + // set the user.dir to the actual local path + pr.put("user.dir", this.currentPath.toString()); + + // add the current path to the classpath + //pr.put("java.class.path", "" + pr.get("user.dir") + pr.get("path.separator") + origPath); + + //err.println(logPrefix + "System Properties: " + pr.toString()); + + System.setProperties(pr); + + // locate object + Class c = (new cl()).loadClass(obj); + //Class c = this.getClass().getClassLoader().loadClass(obj); + + // locate public static main(String[]) method + Class[] parameterType = new Class[1]; + parameterType[0] = Class.forName("[Ljava.lang.String;"); + Method m = c.getMethod("main", parameterType); + + // invoke object.main() + Object result = m.invoke(null, argList); + parameterType = null; + m = null; + + // handle result + if (result != null) out.println(logPrefix + "returns " + result); + + // set the local path to the user.dir (which may have changed) + this.currentPath = new File((String) pr.get("user.dir")); + + } catch (ClassNotFoundException e) { + // err.println(logPrefix + "---- cannot find class file " + obj + ".class"); + // class file does not exist, go silently over it to not show everybody that the + // system attempted to load a class file + err.println(logPrefix + "---- Command '" + obj + "' not supported. Try 'HELP'."); + } catch (NoSuchMethodException e) { + err.println(logPrefix + "---- no \"public static main(String args[])\" in " + obj); + } catch (InvocationTargetException e) { + Throwable orig = e.getTargetException(); + if (orig.getMessage() == null) {} else { + err.println(logPrefix + "---- Exception from " + obj + ": " + orig.getMessage()); + orig.printStackTrace(err); + } + } catch (IllegalAccessException e) { + err.println(logPrefix + "---- Illegal access for " + obj + ": class is probably not declared as public"); + e.printStackTrace(err); + } catch (NullPointerException e) { + err.println(logPrefix + "---- main(String args[]) is not defined as static for " + obj); +/* + } catch (IOException e) { + // class file does not exist, go silently over it to not show everybody that the + // system attempted to load a class file + err.println(logPrefix + "---- Command '" + obj + "' not supported. Try 'HELP'."); +*/ + } catch (Exception e) { + err.println(logPrefix + "---- Exception caught: " + e); + e.printStackTrace(err); + } + + // set the classpath to its original definition + pr.put("java.class.path", origPath); + + } + + // FTP CLIENT COMMANDS ------------------------------------ + + public boolean ASCII() { + if (cmd.length != 1) { + err.println(logPrefix + "---- Syntax: ASCII (no parameter)"); + return true; + } + try { + literal("TYPE A"); + } catch (IOException e) { + err.println(logPrefix + "---- Error: ASCII transfer type not supported by server."); + } + return true; + } + + public boolean BINARY() { + if (cmd.length != 1) { + err.println(logPrefix + "---- Syntax: BINARY (no parameter)"); + return true; + } + try { + literal("TYPE I"); + } catch (IOException e) { + err.println(logPrefix + "---- Error: BINARY transfer type not supported by server."); + } + return true; + } + + public boolean BYE() { + return QUIT(); + } + + public boolean CD() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: CD "); + return true; + } + if (ControlSocket == null) return LCD(); + try { + // send cwd command + send("CWD " + cmd[1]); + + String reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + } catch (IOException e) { + err.println(logPrefix + "---- Error: change of working directory to path " + cmd[1] + " failed."); + } + return true; + } + + public boolean CLOSE() { + return DISCONNECT(); + } + + private void rmForced(String path) throws IOException { + // first try: send DELE command (to delete a file) + send("DELE " + path); + // read reply + String reply1 = receive(); + if (Integer.parseInt(reply1.substring(0, 1)) != 2) { + // second try: send a RMD command (to delete a directory) + send("RMD " + path); + // read reply + String reply2 = receive(); + if (Integer.parseInt(reply2.substring(0, 1)) != 2) { + // third try: test if this thing is a directory or file and send appropriate error message + if (isFolder(path)) + throw new IOException(reply2); + else + throw new IOException(reply1); + } + } + } + + public boolean DEL() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: DEL "); + return true; + } + if (ControlSocket == null) return LDEL(); + try { + rmForced(cmd[1]); + } catch (IOException e) { + err.println(logPrefix + "---- Error: deletion of file " + cmd[1] + " failed."); + } + return true; + } + + + public boolean RM() { + return DEL(); + } + + public boolean DIR() { + if (cmd.length > 2) { + err.println(logPrefix + "---- Syntax: DIR [|]"); + return true; + } + if (ControlSocket == null) return LDIR(); + try { + Vector l; + if (cmd.length == 2) l = list(cmd[1],false); else l = list(".",false); + Enumeration x = l.elements(); + out.println(logPrefix + "---- v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v"); + while (x.hasMoreElements()) out.println(logPrefix + (String) x.nextElement()); + out.println(logPrefix + "---- ^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^"); + } catch (IOException e) { + err.println(logPrefix + "---- Error: remote list not available"); + } + return true; + } + + public boolean DISCONNECT() { + try { + // send delete command + send("QUIT"); + + // read status reply + String reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + + // cleanup + if (ControlSocket != null) { + clientOutput.close(); + clientInput.close(); + ControlSocket.close(); + } + + if (DataSocketActive != null) DataSocketActive.close(); + if (DataSocketPassive != null) DataSocketPassive.close(); + + out.println(logPrefix + "---- Connection closed."); + } catch (IOException e) { + err.println(logPrefix + "---- Connection to server lost."); + } + this.account = null; + this.ControlSocket = null; + this.DataSocketActive = null; + this.DataSocketPassive = null; + this.clientInput = null; + this.clientOutput = null; + this.prompt = "ftp [local]>"; + return true; + } + + public boolean EXIT() { + return QUIT(); + } + + + public boolean GET() { + if ((cmd.length < 2) || (cmd.length > 3)) { + err.println(logPrefix + "---- Syntax: GET []"); + return true; + } + String remote = (new File(cmd[1])).getName(); + File local; + File l; + if (cmd.length == 2) { + l = new File(remote); + if (l.isAbsolute()) local = l; else local = new File(this.currentPath, remote); + } else { + l = new File(cmd[2]); + if (l.isAbsolute()) local = l; else local = new File(this.currentPath, cmd[2]); + } + if (local.exists()) { + err.println(logPrefix + "---- Error: local file " + local.toString() + " already exists."); + err.println(logPrefix + " File " + remote + " not retrieved. Local file unchanged."); + } else { + if (cmd.length == 2) + retrieveFilesRecursively(remote, false); + else try { + get(local.getAbsolutePath(), remote); + } catch (IOException e) { + err.println(logPrefix + "---- Error: retrieving file " + remote + " failed. (" + e.getMessage() + ")"); + } + } + return true; + } + + + private void retrieveFilesRecursively(String remote, boolean delete) { + File local; + File l = new File(remote); + if (l.isAbsolute()) local = l; else local = new File(this.currentPath, remote); + try { + get(local.getAbsolutePath(), remote); + try {if (delete) rmForced(remote);} catch (IOException eee) { + err.println(logPrefix + "---- Warning: remote file or path " + remote + " cannot be removed."); + } + } catch (IOException e) { + if (e.getMessage().startsWith("550")) { + // maybe it's a "not a plain file" error message", then it can be a folder + // test if this exists (then it should be a folder) + if (isFolder(remote)) { + // copy the whole directory + exec("cd \"" + remote + "\";lmkdir \"" + remote + "\";lcd \"" + remote + "\"",true); + //exec("mget *",true); + try { + Enumeration files = list(".",false).elements(); + while (files.hasMoreElements()) retrieveFilesRecursively((String) files.nextElement(), delete); + } catch (IOException ee) {} + exec("cd ..;lcd ..", true); + try {if (delete) rmForced(remote);} catch (IOException eee) { + err.println(logPrefix + "---- Warning: remote file or path " + remote + " cannot be removed."); + } + } else { + err.println(logPrefix + "---- Error: remote file or path " + remote + " does not exist."); + } + } else { + err.println(logPrefix + "---- Error: retrieving file " + remote + " failed. (" + e.getMessage() + ")"); + } + } + } + + private boolean isFolder(String path) { + try { + send("CWD " + path); + String reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + send("CWD .."); + reply = receive(); + return true; + } catch (IOException e) { + return false; + } + } + + public boolean GLOB() { + if (cmd.length != 1) { + err.println(logPrefix + "---- Syntax: GLOB (no parameter)"); + return true; + } + this.glob = !this.glob; + out.println(logPrefix + "---- globbing is now turned " + ((this.glob) ? "ON" : "OFF")); + return true; + } + + public boolean HASH() { + err.println(logPrefix + "---- no games implemented"); + return true; + } + + public boolean JAR() { + sun.tools.jar.Main.main(shift(cmd)); + return true; + } + + + public boolean JJENCODE() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: JJENCODE "); + return true; + } + String path = cmd[1]; + + File dir = new File(path); + File newPath = dir.isAbsolute() ? dir : new File(this.currentPath, path); + if (newPath.exists()) { + if (newPath.isDirectory()) { +// exec("cd \"" + remote + "\";lmkdir \"" + remote + "\";lcd \"" + remote + "\"",true); +/* +if not exist %1\nul goto :error +cd %1 +c:\jdk1.2.2\bin\jar -cfM0 ..\%1.jar *.* +cd .. +c:\jdk1.2.2\bin\jar -cfM %1.jj %1.jar +del %1.jar +*/ + String s = ""; + String[] l = newPath.list(); + for (int i = 0; i < l.length; i++) s = s + " \"" + l[i] + "\""; + exec("cd \"" + path + "\";jar -cfM0 ../\"" + path + ".jar\"" + s, true); + exec("cd ..;jar -cfM \"" + path + ".jj\" \"" + path + ".jar\"", true); + exec("rm \"" + path + ".jar\"", true); + } else { + err.println(logPrefix + "---- Error: local path " + newPath.toString() + " denotes not to a directory."); + } + } else { + err.println(logPrefix + "---- Error: local path " + newPath.toString() + " does not exist."); + } + return true; + } + + public boolean JJDECODE() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: JJENCODE "); + return true; + } + String path = cmd[1]; + File dir = new File(path); + File newPath = dir.isAbsolute() ? dir : new File(this.currentPath, path); + File newFolder = new File(newPath.toString() + ".dir"); + if (newPath.exists()) { + if (!newPath.isDirectory()) { + if (!newFolder.mkdir()) { +/* +if not exist %1.jj goto :error +mkdir %1.dir +copy %1.jj %1.dir\ > %1.dummy && del %1.dummy +cd %1.dir +c:\jdk1.2.2\bin\jar -xf %1.jj +del %1.jj +c:\jdk1.2.2\bin\jar -xf %1.jar +del %1.jar +cd .. +*/ + exec("mkdir \"" + path + ".dir\"", true); + + } else { + err.println(logPrefix + "---- Error: target dir " + newFolder.toString() + " cannot be created"); + } + } else { + err.println(logPrefix + "---- Error: local path " + newPath.toString() + " must denote to jar/jar file"); + } + } else { + err.println(logPrefix + "---- Error: local path " + newPath.toString() + " does not exist."); + } + return true; + } + + private static String[] argList2StringArray(String argList) { + // command line parser + StringTokenizer tokens = new StringTokenizer(argList); + String[] args = new String[tokens.countTokens()]; + for (int i = 0; tokens.hasMoreTokens(); i++) args[i] = tokens.nextToken(); + tokens = null; // free mem + return args; + } + + public boolean JOIN(String[] args) { + + // make sure the specified dest file does not exist + String dest_name = args[1]; + File dest_file = new File(dest_name); + if (dest_file.exists()) { + err.println(logPrefix + "join: destination file " + dest_name + " already exists"); + return true; + } + + // prepare or search file names of the input files to be joined + String source_name; + File source_file; + int pc = -1; + // create new string array with file names + // scan first for the files + pc = 0; + source_name = dest_name + ".000"; + String argString = ""; + source_file = new File(source_name); + while ((source_file.exists()) && (source_file.isFile()) && (source_file.canRead())) { + argString = argString + " " + source_name; + pc++; + source_name = dest_name + (pc < 10 ? ".00"+pc : (pc < 100 ? ".0"+pc : "."+pc)); + source_file = new File(source_name); + } + args = argList2StringArray(argString.substring(1)); + + // do the join + FileOutputStream dest = null; + FileInputStream source = null; + byte[] buffer; + int bytes_read = 0; + + try { + // open output file + dest = new FileOutputStream(dest_file); + buffer = new byte[1024]; + + // append all source files + for (pc = 0; pc < args.length; pc++) { + // open the source file + source_name = args[pc]; + source_file = new File(source_name); + source = new FileInputStream(source_file); + + // start with the copy of one source file + while (true) { + bytes_read = source.read(buffer); + if (bytes_read == -1) break; + dest.write(buffer, 0, bytes_read); + } + + // copy finished. close source file + if (source != null) try { source.close(); } catch (IOException e) {} + } + // close the output file + if (dest != null) try { dest.close(); } catch (IOException e) {} + + // if we come to this point then everything went fine + // if the user wanted to delete the source it is save to do so now + for (pc = 0; pc < args.length; pc++) { + try { + if (!(new File(args[pc])).delete()) + System.err.println(logPrefix + "join: unable to delete file " + args[pc]); + } catch (SecurityException e) { + System.err.println(logPrefix + "join: no permission to delete file " + args[pc]); + } + } + } catch (FileNotFoundException e) { + } catch (IOException e) { + } + + // clean up + finally { + // close any opened streams + if (dest != null) try { dest.close(); } catch (IOException e) {} + if (source != null) try { source.close(); } catch (IOException e) {} + + // print appropriate message + System.err.println(logPrefix + "join created output from " + args.length + " source files"); + } + return true; + } + + public boolean COPY(String[] args) { + File dest_file = new File(args[2]); + if (dest_file.exists()) { + err.println(logPrefix + "copy: destination file " + args[2] + " already exists"); + return true; + } + int bytes_read = 0; + try { + // open output file + FileOutputStream dest = new FileOutputStream(dest_file); + byte[] buffer = new byte[1024]; + + // open the source file + File source_file = new File(args[1]); + FileInputStream source = new FileInputStream(source_file); + + // start with the copy of one source file + while (true) { + bytes_read = source.read(buffer); + if (bytes_read == -1) break; + dest.write(buffer, 0, bytes_read); + } + + // copy finished. close source file + if (source != null) try { source.close(); } catch (IOException e) {} + + // close the output file + if (dest != null) try { dest.close(); } catch (IOException e) {} + } catch (FileNotFoundException e) { + } catch (IOException e) { + } + return true; + } + + public boolean JAVA() { + String s = "JAVA"; + for (int i = 1; i< cmd.length; i++) s = s + " " + cmd[i]; + try { + send(s); + String reply = receive(); + } catch (IOException e) {} + return true; + } + + public boolean LCD() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: LCD "); + return true; + } + String path = cmd[1]; + File dir = new File(path); + File newPath = dir.isAbsolute() ? dir : new File(this.currentPath, path); + try {newPath = new File(newPath.getCanonicalPath());} catch (IOException e) {} + if (newPath.exists()) { + if (newPath.isDirectory()) { + this.currentPath = newPath; + out.println(logPrefix + "---- New local path: " + this.currentPath.toString()); + } else { + err.println(logPrefix + "---- Error: local path " + newPath.toString() + " denotes not a directory."); + } + } else { + err.println(logPrefix + "---- Error: local path " + newPath.toString() + " does not exist."); + } + return true; + } + + public boolean LDEL() { + return LRM(); + } + + public boolean LDIR() { + if (cmd.length != 1) { + err.println(logPrefix + "---- Syntax: LDIR (no parameter)"); + return true; + } + String[] name = this.currentPath.list(); + for (int n = 0; n < name.length; ++ n) out.println(logPrefix + ls(new File(this.currentPath, name[n]))); + return true; + } + + private String ls(File inode) { + if ((inode == null) || (!inode.exists())) return ""; + String s = ""; + if (inode.isDirectory()) s = s + "d"; + else if (inode.isFile()) s = s + "-"; + //else if (inode.isHidden()) s = s + "h"; + else s = s + "?"; + if (inode.canRead()) s = s + "r"; else s = s + "-"; + if (inode.canWrite()) s = s + "w"; else s = s + "-"; + s = s + " " + lenformatted("" + inode.length(),9); + DateFormat df = DateFormat.getDateTimeInstance(); + s = s + " " + df.format(new Date(inode.lastModified())); + s = s + " " + inode.getName(); + if (inode.isDirectory()) s = s + "/"; + return s; + } + + private String lenformatted(String s, int l) { + l = l - s.length(); + while (l > 0) {s = " " + s; l--;} + return s; + } + + public boolean LITERAL() { + if (cmd.length == 1) { + err.println(logPrefix + "---- Syntax: LITERAL [] (see RFC959)"); + return true; + } + String s = ""; + for (int i = 1; i < cmd.length; i++) s = s + " " + cmd[i]; + try { + literal(s.substring(1)); + } catch (IOException e) { + err.println(logPrefix + "---- Error: Syntax of FTP-command wrong. See RFC959 for details."); + } + return true; + } + + public boolean LLS() { + return LDIR(); + } + + public boolean LMD() { + return LMKDIR(); + } + + public boolean LMKDIR() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: LMKDIR "); + return true; + } + File f = new File(this.currentPath, cmd[1]); + if (f.exists()) { + err.println(logPrefix + "---- Error: local file/folder " + cmd[1] + " already exists"); + } else { + if (!f.mkdir()) err.println(logPrefix + "---- Error: creation of local folder " + cmd[1] + " failed"); + } + return true; + } + + public boolean LMV() { + if (cmd.length != 3) { + err.println(logPrefix + "---- Syntax: LMV "); + return true; + } + File from = new File(cmd[1]); + File to = new File(cmd[2]); + if (!to.exists()) { + if (from.renameTo(to)) { + out.println(logPrefix + "---- \"" + from.toString() + "\" renamed to \"" + to.toString() + "\""); + } else err.println(logPrefix + "rename failed"); + } else err.println(logPrefix + "\"" + to.toString() + "\" already exists"); + return true; + } + + public boolean LPWD() { + if (cmd.length != 1) { + err.println(logPrefix + "---- Syntax: LPWD (no parameter)"); + return true; + } + out.println(logPrefix + "---- Local path: " + this.currentPath.toString()); + return true; + } + + public boolean LRD() { + return LMKDIR(); + } + + public boolean LRMDIR() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: LRMDIR "); + return true; + } + File f = new File(this.currentPath, cmd[1]); + if (!f.exists()) { + err.println(logPrefix + "---- Error: local folder " + cmd[1] + " does not exist"); + } else { + if (!f.delete()) err.println(logPrefix + "---- Error: deletion of local folder " + cmd[1] + " failed"); + } + return true; + } + + public boolean LRM() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: LRM "); + return true; + } + File f = new File(this.currentPath, cmd[1]); + if (!f.exists()) { + err.println(logPrefix + "---- Error: local file " + cmd[1] + " does not exist"); + } else { + if (!f.delete()) err.println(logPrefix + "---- Error: deletion of file " + cmd[1] + " failed"); + } + return true; + } + + public boolean LS() { + if (cmd.length > 2) { + err.println(logPrefix + "---- Syntax: LS [|]"); + return true; + } + if (ControlSocket == null) return LLS(); + try { + Vector l; + if (cmd.length == 2) l = list(cmd[1],true); else l = list(".",true); + Enumeration x = l.elements(); + out.println(logPrefix + "---- v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v"); + while (x.hasMoreElements()) out.println(logPrefix + (String) x.nextElement()); + out.println(logPrefix + "---- ^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^"); + } catch (IOException e) { + err.println(logPrefix + "---- Error: remote list not available"); + } + return true; + } + + + private Vector list(String path, boolean extended) throws IOException { + // prepare data channel + if (DataSocketPassiveMode) createPassiveDataPort(); else createActiveDataPort(); + + // send command to the control port + if (extended) + send("LIST " + path); + else + send("NLST " + path); + + // read status of the command from the control port + String reply = receive(); + + // get status code + int status = Integer.parseInt(reply.substring(0, 1)); + + // starting data transaction + if (status == 1) { + Socket data; + if (DataSocketPassiveMode) { + data = DataSocketPassive; + } else { + data = DataSocketActive.accept(); + } + BufferedReader ClientStream = new BufferedReader(new InputStreamReader(data.getInputStream())); + + // read file system data + String line; + int i = 0; + Vector files = new Vector(); + while ((line = ClientStream.readLine()) != null) + if (!line.startsWith("total ")) files.addElement(line); + + // after stream is empty we should get control completion echo + reply = receive(); + + boolean success = (Integer.parseInt(reply.substring(0, 1)) == 2); + + // shutdown connection + ClientStream.close(); + data.close(); + + if (!success) throw new IOException(reply); + + files.trimToSize(); + return files; + } else + throw new IOException(reply); + } + + public boolean MDIR() { + return MKDIR(); + } + + public boolean MKDIR() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: MKDIR "); + return true; + } + if (ControlSocket == null) return LMKDIR(); + try { + // send mkdir command + send("MKD " + cmd[1]); + // read reply + String reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + } catch (IOException e) { + err.println(logPrefix + "---- Error: creation of folder " + cmd[1] + " failed"); + } + return true; + } + + public boolean MGET() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: MGET "); + return true; + } + try { + mget(cmd[1], false); + } catch (IOException e) { + err.println(logPrefix + "---- Error: mget failed (" + e.getMessage() + ")"); + } + return true; + } + + private void mget(String pattern, boolean remove) throws IOException { + Vector l = list(".",false); + Enumeration x = l.elements(); + String remote; + File local; + int idx; // the search for " " is only for improper lists from the server. this fails if the file name has a " " in it + while (x.hasMoreElements()) { + remote = (String) x.nextElement(); + //idx = remote.lastIndexOf(" "); + //if (idx >= 0) remote = remote.substring(idx + 1); + if (matches(remote, pattern)) { + local = new File(this.currentPath, remote); + if (local.exists()) { + err.println(logPrefix + "---- Warning: local file " + local.toString() + " overwritten."); + local.delete(); + } + retrieveFilesRecursively(remote, remove); + } + } + } + + public boolean MOVEDOWN() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: MOVEDOWN "); + return true; + } + try { + mget(cmd[1], true); + } catch (IOException e) { + err.println(logPrefix + "---- Error: movedown failed (" + e.getMessage() + ")"); + } + return true; + } + +/* + public boolean MOVEUP() { + } +*/ + + public boolean MV() { + if (cmd.length != 3) { + err.println(logPrefix + "---- Syntax: MV "); + return true; + } + if (ControlSocket == null) return LMV(); + try { + // send rename commands + send("RNFR " + cmd[1]); + // read reply + String reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + send("RNTO " + cmd[2]); + // read reply + reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + } catch (IOException e) { + err.println(logPrefix + "---- Error: rename of " + cmd[1] + " to " + cmd[2] + " failed."); + } + return true; + } + + public boolean NOOP() { + if (cmd.length != 1) { + err.println(logPrefix + "---- Syntax: NOOP (no parameter)"); + return true; + } + try { + literal("NOOP"); + } catch (IOException e) { + err.println(logPrefix + "---- Error: server does not know how to do nothing"); + } + return true; + } + + public boolean OPEN() { + if ((cmd.length < 2) || (cmd.length > 3)) { + err.println(logPrefix + "---- Syntax: OPEN []"); + return true; + } + if (ControlSocket != null) exec("close",false); // close any existing connections first + int port = 21; + if (cmd.length == 3) { + try { + port = java.lang.Integer.parseInt(cmd[2]); + } catch (NumberFormatException e) {port = 21;} + } + if (cmd[1].indexOf(":") > 0) { + // port is given + port = java.lang.Integer.parseInt(cmd[1].substring(cmd[1].indexOf(":") + 1)); + cmd[1] = cmd[1].substring(0,cmd[1].indexOf(":")); + } + try { + ControlSocket = new Socket(cmd[1], port); + clientInput = new BufferedReader(new InputStreamReader(ControlSocket.getInputStream())); + clientOutput = new DataOutputStream(ControlSocket.getOutputStream()); + + // read greeting + receive(); + out.println(logPrefix + "---- Connection to " + cmd[1] + " established."); + prompt = "ftp [" + cmd[1] + "]>"; + } catch (IOException e) { + err.println(logPrefix + "---- Error: connecting " + cmd[1] + " on port " + port + " failed."); + } + return true; + } + + public boolean PROMPT() { + err.println(logPrefix + "---- prompt is always off"); + return true; + } + + public boolean PUT() { + if ((cmd.length < 2) || (cmd.length > 3)) { + err.println(logPrefix + "---- Syntax: PUT []"); + return true; + } + File local = new File(this.currentPath, cmd[1]); + String remote = (cmd.length == 2) ? local.getName() : cmd[2]; + if (!local.exists()) { + err.println(logPrefix + "---- Error: local file " + local.toString() + " does not exist."); + err.println(logPrefix + " Remote file " + remote + " not overwritten."); + } else { + try { + put(local.getAbsolutePath(), remote); + } catch (IOException e) { + err.println(logPrefix + "---- Error: transmitting file " + local.toString() + " failed."); + } + } + return true; + } + + public boolean PWD() { + if (cmd.length > 1) { + err.println(logPrefix + "---- Syntax: PWD (no parameter)"); + return true; + } + if (ControlSocket == null) return LPWD(); + try { + // send pwd command + send("PWD"); + + // read current directory + String reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + + // parse directory name out of the reply + reply = reply.substring(5); + reply = reply.substring(0, reply.lastIndexOf('"')); + + out.println(logPrefix + "---- Current remote path is: " + reply); + } catch (IOException e) { + err.println(logPrefix + "---- Error: remote path not available"); + } + return true; + } + + public boolean REMOTEHELP() { + if (cmd.length != 1) { + err.println(logPrefix + "---- Syntax: REMOTEHELP (no parameter)"); + return true; + } + try { + literal("HELP"); + } catch (IOException e) { + err.println(logPrefix + "---- Error: remote help not supported by server."); + } + return true; + } + + public boolean RMDIR() { + if (cmd.length != 2) { + err.println(logPrefix + "---- Syntax: RMDIR "); + return true; + } + if (ControlSocket == null) return LRMDIR(); + try { + rmForced(cmd[1]); + } catch (IOException e) { + err.println(logPrefix + "---- Error: deletion of folder " + cmd[1] + " failed."); + } + return true; + } + + public boolean QUIT() { + if (ControlSocket != null) exec("close",false); + return false; + } + + public boolean RECV() { + return GET(); + } + + public boolean USER() { + if (cmd.length != 3) { + err.println(logPrefix + "---- Syntax: USER "); + return true; + } + try { + out.println(logPrefix + "---- Granted access for user " + login(cmd[1], cmd[2]) + "."); + } catch (IOException e) { + err.println(logPrefix + "---- Error: authorization of user " + cmd[1] + " failed."); + } + return true; + } + + public boolean APPEND() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean HELP() { + out.println(logPrefix + "---- ftp HELP ----"); + out.println(logPrefix + ""); + out.println(logPrefix + "This ftp client shell can act as command shell for the local host as well for the"); + out.println(logPrefix + "remote host. Commands that point to the local host are preceded by 'L'."); + out.println(logPrefix + ""); + out.println(logPrefix + "Supported Commands:"); + out.println(logPrefix + "ASCII"); + out.println(logPrefix + " switch remote server to ASCII transfer mode"); + out.println(logPrefix + "BINARY"); + out.println(logPrefix + " switch remote server to BINARY transfer mode"); + out.println(logPrefix + "BYE"); + out.println(logPrefix + " quit the command shell (same as EXIT)"); + out.println(logPrefix + "CD "); + out.println(logPrefix + " change remote path"); + out.println(logPrefix + "CLOSE"); + out.println(logPrefix + " close connection to remote host (same as DISCONNECT)"); + out.println(logPrefix + "DEL "); + out.println(logPrefix + " delete file on remote server (same as RM)"); + out.println(logPrefix + "RM "); + out.println(logPrefix + " remove file from remote server (same as DEL)"); + out.println(logPrefix + "DIR [|] "); + out.println(logPrefix + " print file information for remote directory or file"); + out.println(logPrefix + "DISCONNECT"); + out.println(logPrefix + " disconnect from remote server (same as CLOSE)"); + out.println(logPrefix + "EXIT"); + out.println(logPrefix + " quit the command shell (same as BYE)"); + out.println(logPrefix + "GET []"); + out.println(logPrefix + " load from remote server and store it locally,"); + out.println(logPrefix + " optionally to . if the is a directory,"); + out.println(logPrefix + " then all files in that directory are retrieved,"); + out.println(logPrefix + " including recursively all subdirectories."); + out.println(logPrefix + "GLOB"); + out.println(logPrefix + " toggles globbing: matching with wild cards or not"); + out.println(logPrefix + "COPY"); + out.println(logPrefix + " copies local files"); + out.println(logPrefix + "LCD "); + out.println(logPrefix + " local directory change"); + out.println(logPrefix + "LDEL "); + out.println(logPrefix + " local file delete"); + out.println(logPrefix + "LDIR"); + out.println(logPrefix + " shows local directory content"); + out.println(logPrefix + "LITERAL []"); + out.println(logPrefix + " Sends FTP commands as documented in RFC959"); + out.println(logPrefix + "LLS"); + out.println(logPrefix + " as LDIR"); + out.println(logPrefix + "LMD"); + out.println(logPrefix + " as LMKDIR"); + out.println(logPrefix + "LMV "); + out.println(logPrefix + " copies local files"); + out.println(logPrefix + "LPWD"); + out.println(logPrefix + " prints local path"); + out.println(logPrefix + "LRD"); + out.println(logPrefix + " as LMKDIR"); + out.println(logPrefix + "LRMD "); + out.println(logPrefix + " deletes local directory "); + out.println(logPrefix + "LRM "); + out.println(logPrefix + " deletes local file "); + out.println(logPrefix + "LS [|]"); + out.println(logPrefix + " prints list of remote directory or information of file "); + out.println(logPrefix + "MDIR"); + out.println(logPrefix + " as MKDIR"); + out.println(logPrefix + "MGET "); + out.println(logPrefix + " copies files from remote server that fits into the"); + out.println(logPrefix + " pattern to the local path."); + out.println(logPrefix + "MOVEDOWN "); + out.println(logPrefix + " copies files from remote server as with MGET"); + out.println(logPrefix + " and deletes them afterwards on the remote server"); + out.println(logPrefix + "MV "); + out.println(logPrefix + " moves or renames files on the local host"); + out.println(logPrefix + "NOOP"); + out.println(logPrefix + " sends the NOOP command to the remote server (which does nothing)"); + out.println(logPrefix + " This command is usually used to measure the speed of the remote server."); + out.println(logPrefix + "OPEN []"); + out.println(logPrefix + " connects the ftp shell to the remote server . Optionally,"); + out.println(logPrefix + " a port number can be given, the default port number is 21."); + out.println(logPrefix + " Example: OPEN localhost:2121 or OPEN 192.168.0.1 2121"); + out.println(logPrefix + "PROMPT"); + out.println(logPrefix + " compatibility command, that usually toggles beween prompting on or off."); + out.println(logPrefix + " ftp has prompting switched off by default and cannot switched on."); + out.println(logPrefix + "PUT []"); + out.println(logPrefix + " copies the to the remote server to the current remote path or"); + out.println(logPrefix + " optionally to the given path."); + out.println(logPrefix + "PWD"); + out.println(logPrefix + " prints current path on the remote server."); + out.println(logPrefix + "REMOTEHELP"); + out.println(logPrefix + " asks the remote server to print the help text of the remote server"); + out.println(logPrefix + "RMDIR "); + out.println(logPrefix + " removes the directory on the remote server"); + out.println(logPrefix + "QUIT"); + out.println(logPrefix + " exits the ftp application"); + out.println(logPrefix + "RECV"); + out.println(logPrefix + " as GET"); + out.println(logPrefix + "USER "); + out.println(logPrefix + " logs into the remote server with the user "); + out.println(logPrefix + " and the password "); + out.println(logPrefix + ""); + out.println(logPrefix + ""); + out.println(logPrefix + "EXAMPLE:"); + out.println(logPrefix + "a standard sessions looks like this"); + out.println(logPrefix + ">open 192.168.0.1:2121"); + out.println(logPrefix + ">user anonymous bob"); + out.println(logPrefix + ">pwd"); + out.println(logPrefix + ">ls"); + out.println(logPrefix + ">....."); + out.println(logPrefix + ""); + out.println(logPrefix + ""); + return true; + } + public boolean QUOTE() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean BELL() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean MDELETE() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean SEND() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean DEBUG() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean MLS() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean TRACE() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean MPUT() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean TYPE() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + public boolean CREATE() { + err.println(logPrefix + "---- not yet supported"); + return true; + } + + + // helper functions + + private boolean matches(String name, String pattern) { + // checks whether the string name matches with the pattern + // the pattern may contain characters '*' as wildcard for several + // characters (also none) and '?' to match exactly one characters + //out.println(logPrefix + "MATCH " + name + " " + pattern); + if (!this.glob) return name.equals(pattern); + if (pattern.equals("*")) return true; + if ((pattern.startsWith("*")) && (pattern.endsWith("*"))) + return // avoid recursion deadlock + ((matches(name, pattern.substring(1))) || + (matches(name, pattern.substring(0, pattern.length() - 1)))); + try { + int i = pattern.indexOf("?"); + if (i >= 0) { + if (!(matches(name.substring(0, i), pattern.substring(0, i)))) return false; + return (matches(name.substring(i + 1), pattern.substring(i + 1))); + } + i = pattern.indexOf("*"); + if (i >= 0) { + if (!(name.substring(0, i).equals(pattern.substring(0, i)))) return false; + if (pattern.length() == i + 1) return true; // pattern would be '*' + return (matches( + reverse(name.substring(i)), + reverse(pattern.substring(i + 1)) + "*")); + } + return name.equals(pattern); + } catch (java.lang.StringIndexOutOfBoundsException e) { + // this is normal. it's a lazy implementation + return false; + } + } + + private String reverse(String s) { + if (s.length() < 2) return s; + return reverse(s.substring(1)) + s.charAt(0); + } + + + // protocoll socket commands + + private void send(String buf) throws IOException { + clientOutput.writeBytes(buf); + clientOutput.write('\r'); + clientOutput.write('\n'); + clientOutput.flush(); + if (buf.startsWith("PASS")) { + out.println(logPrefix + "> PASS ********"); + } else { + out.println(logPrefix + "> " + buf); + } + } + + private String receive() throws IOException { + // last reply starts with 3 digit number followed by space + String reply; + + while(true) { + reply = clientInput.readLine(); + + // sanity check + if (reply == null) throw new IOException("Server has presumably shut down the connection."); + + out.println(logPrefix + "< " + reply); + //serverResponse.addElement(reply); + + if (reply.length() >= 4 && + Character.isDigit(reply.charAt(0)) && + Character.isDigit(reply.charAt(1)) && + Character.isDigit(reply.charAt(2)) && + (reply.charAt(3) == ' ')) + break; // end of reply + } + // return last reply line + return reply; + } + + + private void sendTransferType(char type) throws IOException { + send("TYPE " + type); + + String reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + } + + + private void createActiveDataPort() throws IOException { + // create data socket and bind it to free port available + DataSocketActive = new ServerSocket(0); + + // get port socket has been bound to + int DataPort = DataSocketActive.getLocalPort(); + + // client ip + InetAddress LocalIp = serverCore.publicIP(); + // InetAddress LocalIp = DataSocketActive.getInetAddress().getLocalHost(); + + // save ip address in high byte order + byte[] Bytes = LocalIp.getAddress(); + + // bytes greater than 127 should not be printed as negative + short Shorts[] = new short[4]; + for (int i = 0; i < 4; i++) { + Shorts[i] = Bytes[i]; + if (Shorts[i] < 0) Shorts[i] += 256; + } + + // send port command via control socket: + // four ip address shorts encoded and two port shorts encoded + send("PORT " + + //"127,0,0,1," + + Shorts[0] + "," + Shorts[1] + "," + Shorts[2] + "," + Shorts[3] + "," + + ((DataPort & 0xff00) >> 8) + "," + (DataPort & 0x00ff)); + + // read status of the command from the control port + String reply = receive(); + + // check status code + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + + DataSocketPassiveMode = false; + } + + private void createPassiveDataPort() throws IOException { + // send port command via control socket: + // four ip address shorts encoded and two port shorts encoded + send("PASV"); + + // read status of the command from the control port + String reply = receive(); + + // check status code + if (!(reply.substring(0, 3).equals("227"))) throw new IOException(reply); + + // parse the status return: address should start at the first number + int pos = 4; + while ((pos < reply.length()) && ((reply.charAt(pos) < '0') || (reply.charAt(pos) > '9'))) pos++; + if (pos >= reply.length()) throw new IOException(reply + " [could not parse return code]"); + reply = reply.substring(pos); pos = reply.length() - 1; + while ((pos >= 0) && ((reply.charAt(pos) < '0') || (reply.charAt(pos) > '9'))) pos--; + if (pos < 0) throw new IOException("[could not parse return code: no numbers]"); + reply = reply.substring(0, pos + 1); + StringTokenizer st = new StringTokenizer(reply, ","); + if (st.countTokens() != 6) throw new IOException("[could not parse return code: wrong number of numbers]"); + + // set the data host and port + int a = Integer.parseInt(st.nextToken()); + int b = Integer.parseInt(st.nextToken()); + int c = Integer.parseInt(st.nextToken()); + int d = Integer.parseInt(st.nextToken()); + InetAddress datahost = InetAddress.getByName(a + "." + b + "." + c + "." + d); + int high = Integer.parseInt(st.nextToken()); + int low = Integer.parseInt(st.nextToken()); + if (high < 0 || high > 255 || low < 0 || low > 255) throw new IOException("[could not parse return code: syntax error]"); + int dataport = (high << 8) + low; + + DataSocketPassive = new Socket(datahost, dataport); + DataSocketPassiveMode = true; + } + + private void get(String fileDest, String fileName) throws IOException { + // store time for statistics + long start = GregorianCalendar.getInstance(GMTTimeZone).getTime().getTime(); + + // prepare data channel + if (DataSocketPassiveMode) createPassiveDataPort(); else createActiveDataPort(); + + // set type of the transfer + sendTransferType(transferType); + + // send command to the control port + send("RETR " + fileName); + + // read status of the command from the control port + String reply = receive(); + + // get status code + int status = Integer.parseInt(reply.substring(0, 1)); + + // starting data transaction + if (status == 1) { + Socket data; + if (DataSocketPassiveMode) { + data = DataSocketPassive; + } else { + data = DataSocketActive.accept(); + } + InputStream ClientStream = data.getInputStream(); + + // create local file + RandomAccessFile outFile; + if (fileDest == null) + outFile = new RandomAccessFile(fileName, "rw"); + else + outFile = new RandomAccessFile(fileDest, "rw"); + + // write remote file to local file + byte block[] = new byte[blockSize]; + int numRead; + long length = 0; + + while ((numRead = ClientStream.read(block)) != -1) { + outFile.write(block, 0, numRead); + length = length + numRead; + } + + // after stream is empty we should get control completion echo + reply = receive(); + boolean success = (Integer.parseInt(reply.substring(0, 1)) == 2); + + // shutdown connection + outFile.close(); + ClientStream.close(); + data.close(); + + if (!success) throw new IOException(reply); + + // write statistics + long stop = GregorianCalendar.getInstance(GMTTimeZone).getTime().getTime(); + out.print("---- downloaded " + + ((length < 2048) ? length + " bytes" : ((int) length / 1024) + " kbytes") + + " in " + + (((stop - start) < 2000) ? (stop - start) + " milliseconds" : (((int) ((stop - start) / 100)) / 10) + " seconds")); + if (start == stop) err.println(logPrefix + ""); else + out.println(logPrefix + " (" + ((long) (length * 1000 / 1024 / (stop - start))) + " kbytes/second)"); + + } else + throw new IOException(reply); + } + + private void put(String fileName, String fileDest) throws IOException { + + // prepare data channel + if (DataSocketPassiveMode) createPassiveDataPort(); else createActiveDataPort(); + + // set type of the transfer + sendTransferType(transferType); + + // send command to the control port + if (fileDest == null) + send("STOR " + fileName); + else + send("STOR " + fileDest); + + // read status of the command from the control port + String reply = receive(); + + // starting data transaction + if (Integer.parseInt(reply.substring(0, 1)) == 1) { + // ftp server initiated client connection + Socket data; + if (DataSocketPassiveMode) { + data = DataSocketPassive; + } else { + data = DataSocketActive.accept(); + } + OutputStream ClientStream = data.getOutputStream(); + + // read from local file + RandomAccessFile inFile = new RandomAccessFile(fileName, "r"); + + // write remote file to local file + byte block[] = new byte[blockSize]; + int numRead; + + while ((numRead = inFile.read(block)) >= 0) { + ClientStream.write(block, 0, numRead); + } + + // shutdown and cleanup + inFile.close(); + ClientStream.close(); + + // after stream is empty we should get control completion echo + reply = receive(); + boolean success = (Integer.parseInt(reply.substring(0, 1)) == 2); + + // shutdown remote client connection + data.close(); + + if (!success) throw new IOException(reply); + + } else + throw new IOException(reply); + } + + + private String login(String account, String password) throws IOException { + + // send user name + send("USER " + account); + + String reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) == 4) throw new IOException(reply); + if (Integer.parseInt(reply.substring(0, 1)) == 2) return this.account = account; + + // send password + send("PASS " + password); + + reply = receive(); + if (Integer.parseInt(reply.substring(0, 1)) != 2) throw new IOException(reply); + + this.account = account; + return account; + } + + + private String login() throws IOException { + // force anonymous login if not already connected + if (this.account == null) { + login("anonymous", "bob@"); + return this.account; + } else + return this.account; + } + + private String sys() throws IOException { + // send system command + send("SYST"); + + // check completion + String systemType = receive(); + if (Integer.parseInt(systemType.substring(0, 1)) != 2) throw new IOException(systemType); + + // exclude status code from reply + return systemType.substring(4); + } + + + private void literal(String commandLine) throws IOException { + // send the complete line + send(commandLine); + + // read reply + String reply = receive(); + + if (Integer.parseInt(reply.substring(0, 1)) == 5) throw new IOException(reply); + } + + class ee extends SecurityException { + private int value = 0; + public ee() {} + public ee(int value) { + super(); + this.value = value; + } + public int value() { return value; } + } + + class sm extends SecurityManager { + public void checkCreateClassLoader() { } + public void checkAccess(Thread g) { } + public void checkAccess(ThreadGroup g) { } + public void checkExit(int status) { + //System.out.println(logPrefix + "ShellSecurityManager: object called System.exit(" + status + ")"); + // signal that someone is trying to terminate the JVM. + throw new ee(status); + } + public void checkExec(String cmd) { } + public void checkLink(String lib) { } + public void checkRead(FileDescriptor fd) { } + public void checkRead(String file) { } + public void checkRead(String file, Object context) { } + public void checkWrite(FileDescriptor fd) { } + public void checkWrite(String file) { } + public void checkDelete(String file) { } + public void checkConnect(String host, int port) { } + public void checkConnect(String host, int port, Object context) { } + public void checkListen(int port) { } + public void checkAccept(String host, int port) { } + public void checkMulticast(InetAddress maddr) { } + //public void checkMulticast(InetAddress maddr, byte ttl) { } + public void checkPropertiesAccess() { } + public void checkPropertyAccess(String key) { } + public void checkPropertyAccess(String key, String def) { } + public boolean checkTopLevelWindow(Object window) { return true; } + public void checkPrintJobAccess() { } + public void checkSystemClipboardAccess() { } + public void checkAwtEventQueueAccess() { } + public void checkPackageAccess(String pkg) { } + public void checkPackageDefinition(String pkg) { } + public void checkSetFactory() { } + public void checkMemberAccess(Class clazz, int which) { } + public void checkSecurityAccess(String provider) { } + } + + public static void dir(String host, + String remotePath, + String account, String password) { + try { + ftpc c = new ftpc(); + c.exec("open " + host, false); + c.exec("user " + account + " " + password, false); + c.exec("cd " + remotePath, false); + c.exec("ls", true); + c.exec("close", false); + c.exec("exit", false); + } catch (java.security.AccessControlException e) { + } + } + + public static void dirAnonymous(String host, + String remotePath) { + dir(host, remotePath, "anonymous", "anomic"); + } + + public static String put(String host, + File localFile, String remotePath, String remoteName, + String account, String password) { + // returns the log + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(baos); + ftpc c = new ftpc(System.in, out, out); + c.exec("open " + host, false); + c.exec("user " + account + " " + password, false); + if (remotePath != null) { + remotePath.replace('\\', '/'); + c.exec("cd " + remotePath, false); + } + c.exec("binary", false); + c.exec("put " + localFile.toString() + ((remoteName.length() == 0) ? "" : (" " + remoteName)), false); + c.exec("close", false); + c.exec("exit", false); + out.close(); + String log = baos.toString(); + baos.close(); + return log; + } catch (IOException e) { + return ""; + } catch (java.security.AccessControlException e) { + System.out.println("ERROR: ftp put failed:" + e.getMessage()); + e.printStackTrace(); + return ""; + } + } + + public static void get(String host, + String remoteFile, File localPath, + String account, String password) { + try { + ftpc c = new ftpc(); + c.exec("open " + host, false); + c.exec("user " + account + " " + password, false); + c.exec("lcd " + localPath.toString(), false); + c.exec("binary", false); + c.exec("get " + remoteFile, false); + c.exec("close", false); + c.exec("exit", false); + } catch (java.security.AccessControlException e) { + } + } + + public static void getAnonymous(String host, + String remoteFile, File localPath) { + get(host, remoteFile, localPath, "anonymous", "anomic"); + } + + + public static class pt implements Runnable { + String host; + File localFile; + String remotePath; + String remoteName; + String account; + String password; + public pt(String h, File l, String rp, String rn, String a, String p) { + host = h; localFile = l; remotePath = rp; remoteName = rn; account = a; password = p; + } + public final void run() { + put(host, localFile, remotePath, remoteName, account, password); + } + } + + public static Thread putAsync(String host, + File localFile, String remotePath, String remoteName, + String account, String password) { + Thread t = new Thread(new pt(host, localFile, remotePath, remoteName, account, password)); + t.start(); + return t; // return value can be used to determine status of transfer with isAlive() or join() + } + + private static void printHelp() { + System.out.println(logPrefix + "ftp help"); + System.out.println(logPrefix + "----------"); + System.out.println(logPrefix + ""); + System.out.println(logPrefix + "The following commands are supported"); + System.out.println(logPrefix + "java ftp -- (without arguments) starts the shell. Thy 'help' then for shell commands."); + System.out.println(logPrefix + "java ftp [':'] -- starts shell and connects to specified host"); + System.out.println(logPrefix + "java ftp -h -- prints this help"); + System.out.println(logPrefix + "java ftp -dir [':'] [ ]"); + System.out.println(logPrefix + "java ftp -get [':'] [ ]"); + System.out.println(logPrefix + "java ftp -put [':'] "); + System.out.println(logPrefix + ""); + } + + public static void main(String[] args) { + System.out.println(logPrefix + "WELCOME TO THE ANOMIC FTP CLIENT v" + vDATE); + System.out.println(logPrefix + "Visit http://www.anomic.de and support shareware!"); + System.out.println(logPrefix + "try -h for command line options"); + System.out.println(logPrefix + ""); + if (args.length == 0) { + (new ftpc()).shell(null); + } else if (args.length == 1) { + if (args[0].equals("-h")) { + printHelp(); + } else { + (new ftpc()).shell(args[0]); + } + } else if (args.length == 2) { + printHelp(); + } else if (args.length == 3) { + if (args[0].equals("-dir")) { + dirAnonymous(args[1], args[2]); + } else { + printHelp(); + } + } else if (args.length == 4) { + if (args[0].equals("-get")) { + getAnonymous(args[1], args[2], new File(args[3])); + } else { + printHelp(); + } + } else if (args.length == 5) { + if (args[0].equals("-dir")) { + dir(args[1], args[2], args[3], args[4]); + } else { + printHelp(); + } + } else if (args.length == 6) { + if (args[0].equals("-get")) { + get(args[1], args[2], new File(args[3]), args[4], args[5]); + } else if (args[0].equals("-put")) { + put(args[1], new File(args[2]), args[3], "", args[4], args[5]); + } else { + printHelp(); + } + } else { + printHelp(); + } + } + +} diff --git a/source/de/anomic/net/natLib.java b/source/de/anomic/net/natLib.java new file mode 100644 index 000000000..f0da54fd5 --- /dev/null +++ b/source/de/anomic/net/natLib.java @@ -0,0 +1,188 @@ +// natLib.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 04.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.net; + +import java.io.*; +import java.util.*; +import java.net.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.http.*; + +public class natLib { + + public static String getDI604(String password) { + // this pulls off the ip number from the DI-604 router/nat + /* + wget --quiet --ignore-length http://admin:@192.168.0.1:80/status.htm > /dev/null + grep -A 1 "IP Address" status.htm | tail -1 | awk '{print $1}' | awk 'BEGIN{FS=">"} {print $2}' + rm status.htm + */ + try { + Vector x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), 5000, "admin", password, null, 0); + x = nxTools.grep(x, 1, "IP Address"); + if ((x == null) || (x.size() == 0)) return null; + String line = nxTools.tail1(x); + return nxTools.awk(nxTools.awk(line, " ", 1), ">", 2); + } catch (Exception e) { + return null; + } + } + + private static String getWhatIsMyIP() { + try { + Vector x = httpc.wget(new URL("http://www.whatismyip.com/"), 5000, null, null, null, 0); + x = nxTools.grep(x, 0, "Your IP is"); + String line = nxTools.tail1(x); + return nxTools.awk(line, " ", 4); + } catch (Exception e) { + return null; + } + } + + private static String getStanford() { + try { + Vector x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), 5000, null, null, null, 0); + x = nxTools.grep(x, 0, "firewall protecting your browser"); + String line = nxTools.tail1(x); + return nxTools.awk(line, " ", 7); + } catch (Exception e) { + return null; + } + } + + private static String getIPID() { + try { + Vector x = httpc.wget(new URL("http://ipid.shat.net/"), 5000, null, null, null, 0); + x = nxTools.grep(x, 2, "Your IP address"); + String line = nxTools.tail1(x); + return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1); + } catch (Exception e) { + return null; + } + } + + private static boolean isNotLocal(String ip) { + if ((ip.equals("localhost")) || + (ip.startsWith("127")) || + (ip.startsWith("192.168")) || + (ip.startsWith("10.")) + ) return false; + return true; + } + + private static boolean isIP(String ip) { + if (ip == null) return false; + try { + InetAddress dummy = InetAddress.getByName(ip); + return true; + } catch (Exception e) { + return false; + } + } + + public static boolean isProper(String ip) { + if (ip == null) return false; + if (ip.indexOf(":") >= 0) return false; // ipv6... + return ((isNotLocal(ip)) && (isIP(ip))); + } + + private static int retrieveOptions() { + return 3; + } + + private static String retrieveFrom(int option) { + if ((option < 0) || (option >= retrieveOptions())) return null; + if (option == 0) return getWhatIsMyIP(); + if (option == 1) return getStanford(); + if (option == 2) return getIPID(); + return null; + } + + public static String retrieveIP(boolean DI604, String password) { + String ip; + if (DI604) { + // first try the simple way... + ip = getDI604(password); + if (isProper(ip)) { + //System.out.print("{DI604}"); + return ip; + } + } + + // maybe this is a dial-up connection (or LAN and DebugMode) and we can get it from java variables + InetAddress ia = serverCore.publicIP(); + if (ia != null) { + ip = ia.getHostAddress(); + if (isProper(ip)) return ip; + } + + // now go the uneasy way and ask some web responder + disorderHeap random = new disorderHeap(retrieveOptions()); + for (int i = 0; i < retrieveOptions(); i++) { + ip = retrieveFrom(random.number()); + if (isProper(ip)) return ip; + } + return null; + } + + // rDNS services: + // http://www.xdr2.net/reverse_DNS_lookup.asp + // http://remote.12dt.com/rns/ + // http://bl.reynolds.net.au/search/ + // http://www.declude.com/Articles.asp?ID=97 + // http://www.dnsstuff.com/ + + // listlist: http://www.aspnetimap.com/help/welcome/dnsbl.html + + + public static void main(String[] args) { + //System.out.println("PROBE DI604 : " + getDI604("")); + //System.out.println("PROBE whatismyip: " + getWhatIsMyIP()); + //System.out.println("PROBE stanford : " + getStanford()); + //System.out.println("PROBE ipid : " + getIPID()); + //System.out.println("retrieveIP-NAT : " + retrieveIP(true,"")); + //System.out.println("retrieveIP : " + retrieveIP(false,"12345")); + + System.out.println(isProper(args[0]) ? "yes" : "no"); + } + +} diff --git a/source/de/anomic/net/whois.java b/source/de/anomic/net/whois.java new file mode 100644 index 000000000..2eef2504d --- /dev/null +++ b/source/de/anomic/net/whois.java @@ -0,0 +1,93 @@ +// whois.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// last major change: 22.03.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.net; + + +import java.io.*; +import java.util.*; + +public class whois { + + public static Properties whois(String dom) { + try { + Process p = Runtime.getRuntime().exec("whois " + dom); + BufferedReader br = new BufferedReader(new InputStreamReader(p.getInputStream())); + String line, key, value, oldValue; + int pos; + Properties result = new Properties(); + while ((line = br.readLine()) != null) { + pos = line.indexOf(":"); + if (pos > 0) { + key = line.substring(0, pos).trim().toLowerCase(); + value = line.substring(pos + 1).trim(); + //System.out.println(key + ":" + value); + oldValue = result.getProperty(key); + result.setProperty(key, (oldValue == null) ? value : (oldValue + "; " + value)); + } + } + return result; + } catch (IOException e) { + //e.printStackTrace(); + return null; + } + } + + public static String evaluateWhois(Properties p) { + String info1, info2; + info1 = p.getProperty("netname"); + info2 = p.getProperty("descr"); + if ((info1 != null) && (info2 != null)) return info1 + " / " + info2; + info1 = p.getProperty("type"); + info2 = p.getProperty("name"); + if ((info1 != null) && (info2 != null) && (info1.toLowerCase().startsWith("person"))) return "Person: " + info2; + return "unknown"; + } + + public static void main(String[] args) { + Properties p = whois(args[0]); + if (p != null) { + System.out.println(p); + System.out.println("---" + evaluateWhois(p)); + } else { + System.out.println("whois cannot execute"); + } + } +} diff --git a/source/de/anomic/plasma/plasmaCondenser.java b/source/de/anomic/plasma/plasmaCondenser.java new file mode 100644 index 000000000..075f60e8a --- /dev/null +++ b/source/de/anomic/plasma/plasmaCondenser.java @@ -0,0 +1,627 @@ +// plasmaCondenser.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 09.01.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.plasma; + +import java.io.*; +import java.util.*; +import de.anomic.htmlFilter.*; +import de.anomic.kelondro.*; + +public class plasmaCondenser { + + private final static int numlength = 5; + + private Properties analysis; + private TreeMap words; // a string (the words) to (statProp) - relation + private HashMap sentences; + private int wordminsize; + private int wordcut; + + public plasmaCondenser(InputStream text) throws IOException { + this(text, 3, 2); + } + + public plasmaCondenser(InputStream text, int wordminsize, int wordcut) throws IOException { + this.wordminsize = wordminsize; + this.wordcut = wordcut; + analysis = new Properties(); + words = new TreeMap(); + sentences = new HashMap(); + createCondensement(text); + } + + public Properties getAnalysis() { + return analysis; + } + + public int excludeWords(TreeSet stopwords) { + // subtracts the given stopwords from the word list + // the word list shrinkes. This returns the number of shrinked words + int oldsize = words.size(); + words = kelondroMSetTools.excludeConstructive(words, stopwords); + return oldsize - words.size(); + } + + public Set getWords() { + return words.keySet(); + } + + public int wordCount(String word) { + // number of occurrences of one word + // if the word did not occur, this simply returns 0 + statProp sp = (statProp) words.get(word); + if (sp == null) return 0; + return sp.count; + } + + public static class statProp { + public int count; + public int handle; + public HashSet hash; + public statProp(int handle) { + this.count = 1; + this.handle = handle; + this.hash = new HashSet(); + } + public void inc() {count++;} + public void check(int i) {hash.add("" + i);} + + } + + + + public static String intString(int number, int length) { + String s = "" + number; + while (s.length() < length) s = "0" + s; + return s; + } + + private void createCondensement(InputStream is) throws IOException { + + words = new TreeMap(kelondroMSetTools.fastStringComparator); + sentences = new HashMap(); + HashSet currsentwords = new HashSet(); + String sentence = ""; + String word = ""; + String k; + int wordlen; + statProp sp, sp1; + int wordHandle; + int wordHandleCount = 0; + int sentenceHandle; + int sentenceHandleCount = 0; + int allwordcounter = 0; + int allsentencecounter = 0; + int idx; + Iterator it, it1; + + // read source + sievedWordsEnum wordenum = new sievedWordsEnum(is, wordminsize); + while (wordenum.hasMoreElements()) { + word = ((String) wordenum.nextElement()).toLowerCase(); + wordlen = word.length(); + if ((wordlen == 1) && (punctuation(word.charAt(0)))) { + // store sentence + if (sentence.length() > 0) { + // we store the punctuation symbol as first element of the sentence vector + allsentencecounter++; + sentence = word + sentence; + if (sentences.containsKey(sentence)) { + // sentence already exists + sp = (statProp) sentences.get(sentence); + sp.inc(); + idx = sp.handle; + sentences.put(sentence, sp); + } else { + // create new sentence + idx = sentenceHandleCount++; + sentences.put(sentence, new statProp(idx)); + } + // store to the words a link to this sentence + it = currsentwords.iterator(); + while (it.hasNext()) { + k = (String) it.next(); + sp = (statProp) words.get(k); + sp.check(idx); + words.put(k,sp); + } + } + sentence = ""; + currsentwords.clear(); + } else { + // store word + allwordcounter++; + currsentwords.add(word); + if (words.containsKey(word)) { + // word already exists + sp = (statProp) words.get(word); + wordHandle = sp.handle; + sp.inc(); + } else { + // word does not yet exist, create new word entry + wordHandle = wordHandleCount++; + sp = new statProp(wordHandle); + } + words.put(word, sp); + // we now have the unique handle of the word, put it into the sentence: + sentence = sentence + intString(wordHandle, numlength); + } + } + // finnish last sentence + if (sentence.length() > 0) { + allsentencecounter++; + sentence = "." + sentence; + if (sentences.containsKey(sentence)) { + sp = (statProp) sentences.get(sentence); + sp.inc(); + sentences.put(sentence, sp); + } else { + sentences.put(sentence, new statProp(sentenceHandleCount++)); + } + } + + //------------------- + + // we reconstruct the sentence hashtable + // and order the entries by the number of the sentence + // this structure is needed to replace double occurring words in sentences + Object[] orderedSentences = new Object[sentenceHandleCount]; + String[] s; + int wc; + it = sentences.keySet().iterator(); + while (it.hasNext()) { + sentence = (String) it.next(); + wc = (sentence.length() - 1) / numlength; + s = new String[wc + 2]; + sp = (statProp) sentences.get(sentence); + s[0] = intString(sp.count, numlength); // number of occurrences of this sentence + s[1] = sentence.substring(0,1); // the termination symbol of this sentence + for (int i = 0; i < wc; i++) { + k = sentence.substring(i * numlength + 1, (i + 1) * numlength + 1); + s[i + 2] = k; + } + orderedSentences[sp.handle] = s; + } + + Map.Entry entry; + // we search for similar words and reorganize the corresponding sentences + // a word is similar, if a shortened version is equal + it = words.entrySet().iterator(); // enumerates the keys in descending order + wordsearch: while (it.hasNext()) { + entry = (Map.Entry) it.next(); + word = (String) entry.getKey(); + wordlen = word.length(); + sp = (statProp) entry.getValue(); + for (int i = wordcut; i > 0; i--) { + if (wordlen > i) { + k = word.substring(0, wordlen - i); + if (words.containsKey(k)) { + // we will delete the word 'word' and repoint the corresponding links + // in sentences that use this word + sp1 = (statProp) words.get(k); + it1 = sp.hash.iterator(); // we iterate over all sentences that refer to this word + while (it1.hasNext()) { + idx = Integer.parseInt((String) it1.next()); // number of a sentence + s = (String[]) orderedSentences[idx]; + for (int j = 2; j < s.length; j++) { + if (s[j].equals(intString(sp.handle, numlength))) s[j] = intString(sp1.handle, numlength); + } + orderedSentences[idx] = s; + } + // update word counter + sp1.count = sp1.count + sp.count; + words.put(k, sp1); + // remove current word + it.remove(); + continue wordsearch; + } + } + } + } + + // depending on the orderedSentences structure, we rebuild the sentence HashMap to + // eliminate double occuring sentences + sentences = new HashMap(); + for (int i = 0; i < orderedSentences.length; i++) { + sentence = ""; + for (int j = 1; j < ((String[]) orderedSentences[i]).length; j++) sentence = sentence + ((String[]) orderedSentences[i])[j]; + if (sentences.containsKey(sentence)) { + // add sentence counter to counter of found sentence + sp = (statProp) sentences.get(sentence); + sp.count = sp.count + Integer.parseInt(((String[]) orderedSentences[i])[0]); + sentences.put(sentence, sp); + //System.out.println("Found double occurring sentence " + i + " = " + sp.handle); + } else { + // create new sentence entry + sp = new statProp(i); + sp.count = Integer.parseInt(((String[]) orderedSentences[i])[0]); + sentences.put(sentence, sp); + } + } + + //------------------- + + // what do we have here: + // sentences + // words + + // we now have the sentence structure and word list + // create properties with this information + + analysis.setProperty("NUMB_TEXT_BYTES", Long.toHexString(wordenum.count())); + analysis.setProperty("NUMB_WORDS", Long.toHexString(allwordcounter)); + analysis.setProperty("DIFF_WORDS", Long.toHexString(wordHandleCount)); + analysis.setProperty("SIMI_WORDS", Long.toHexString(words.size())); + analysis.setProperty("WORD_ENTROPHY", Long.toHexString((allwordcounter == 0) ? 0 : (255 * words.size() / allwordcounter))); + analysis.setProperty("NUMB_SENTENCES", Long.toHexString(allsentencecounter)); + analysis.setProperty("DIFF_SENTENCES", Long.toHexString(sentenceHandleCount)); + analysis.setProperty("SIMI_SENTENCES", Long.toHexString(sentences.size())); + analysis.setProperty("AVERAGE_WORD_OCC", Long.toHexString((words.size() == 0) ? 0 : (allwordcounter / words.size()))); + analysis.setProperty("INFORMATION_VALUE", Long.toHexString((allwordcounter == 0) ? 0 : (wordenum.count() * words.size() / allwordcounter / 16))); + + // string, characterisation of text content (a guess) + + } + + public void reconstruct() { + // we reconstruct the word hashtable + // and order the entries by the number of the sentence + // this structure is only needed to reconstruct the text + String word; + statProp sp; + Map.Entry entry; + Iterator it; + String[] orderedWords = new String[words.size()+99]; // uuiiii, the '99' is only a quick hack... + it = words.entrySet().iterator(); // enumerates the keys in ascending order + while (it.hasNext()) { + entry = (Map.Entry) it.next(); + word = (String) entry.getKey(); + sp = (statProp) entry.getValue(); + orderedWords[sp.handle] = word; + } + + Object[] orderedSentences = makeOrderedSentences(); + + // printout a reconstruction of the text + for (int i = 0; i < orderedSentences.length; i++) { + if (orderedSentences[i] != null) { + System.out.print("#T " + intString(i, numlength) + " " + ((String[]) orderedSentences[i])[0] + " "); + for (int j = 2; j < ((String[]) orderedSentences[i]).length; j++) { + System.out.print(" " + + orderedWords[Integer.parseInt(((String[]) orderedSentences[i])[j])] + ); + } + System.out.println(((String[]) orderedSentences[i])[1]); + } + } + } + + private Object[] makeOrderedSentences() { + // we reconstruct the sentence hashtable again and create by-handle ordered entries + // this structure is needed to present the strings in the right order in a printout + int wc; + Iterator it; + statProp sp; + String[] s; + String sentence; + Object[] orderedSentences = new Object[sentences.size()]; + for (int i = 0; i < sentences.size(); i++) orderedSentences[i] = null; // this array must be initialized + it = sentences.keySet().iterator(); + while (it.hasNext()) { + sentence = (String) it.next(); + wc = (sentence.length() - 1) / numlength; + s = new String[wc + 2]; + sp = (statProp) sentences.get(sentence); + s[0] = intString(sp.count, numlength); // number of occurrences of this sentence + s[1] = sentence.substring(0,1); // the termination symbol of this sentence + for (int i = 0; i < wc; i++) s[i + 2] = sentence.substring(i * numlength + 1, (i + 1) * numlength + 1); + orderedSentences[sp.handle] = s; + } + return orderedSentences; + } + + public void writeMapToFile(File out) throws IOException { + Map.Entry entry; + String k; + String word; + Iterator it; + statProp sp; + + Object[] orderedSentences = makeOrderedSentences(); + + // we reconstruct the word hashtable + // and sort the entries by the number of occurrences + // this structure is needed to print out a sorted list of words + TreeMap sortedWords = new TreeMap(kelondroMSetTools.fastStringComparator); + it = words.entrySet().iterator(); // enumerates the keys in ascending order + while (it.hasNext()) { + entry = (Map.Entry) it.next(); + word = (String) entry.getKey(); + sp = (statProp) entry.getValue(); + sortedWords.put(intString(sp.count, numlength) + intString(sp.handle, numlength), word); + } + + // start writing of words and sentences + FileWriter writer = new FileWriter(out); + writer.write("\r\n"); + it = sortedWords.entrySet().iterator(); // enumerates the keys in descending order + while (it.hasNext()) { + entry = (Map.Entry) it.next(); + k = (String) entry.getKey(); + writer.write("#W " + k.substring(numlength) + " " + k.substring(0, numlength) + " " + + ((String) entry.getValue()) + "\r\n"); + } + for (int i = 0; i < orderedSentences.length; i++) { + if (orderedSentences[i] != null) { + writer.write("#S " + intString(i, numlength) + " "); + for (int j = 0; j < ((String[]) orderedSentences[i]).length; j++) { + writer.write(((String[]) orderedSentences[i])[j] + " "); + } + writer.write("\r\n"); + } + } + writer.close(); + } + + private static boolean punctuation(char c) { + return ("!?.".indexOf(c) >= 0); + } + + public static boolean invisible(char c) { + if ((c < ' ') || (c > 'z')) return true; + return ("$%&/()=\"$%&/()=`^+*~#'-_:;,|<>[]\\".indexOf(c) >= 0); + } + + + public static Enumeration wordTokenizer(String s) { + try { + return new sievedWordsEnum(new ByteArrayInputStream(s.getBytes()), 3); + } catch (Exception e) { + return null; + } + } + + + public static class sievedWordsEnum implements Enumeration { + Object buffer = null; + unsievedWordsEnum e; + int ml; + + public sievedWordsEnum(InputStream is, int minLength) throws IOException { + e = new unsievedWordsEnum(is); + buffer = nextElement0(); + ml = minLength; + } + + private Object nextElement0() { + String s, r; + char c; + loop: while (e.hasMoreElements()) { + s = (String) e.nextElement(); + r = s.toLowerCase(); + if ((s.length() == 1) && (punctuation(s.charAt(0)))) return s; + if (s.length() < ml) continue loop; + for (int i = 0; i < r.length(); i++) { + c = r.charAt(i); + if (!(((c >= 'a') && (c <= 'z')) || + ((c >= '0') && (c <= '9')))) continue loop; // go to next while loop + //if ((c < 'a') || (c > 'z')) continue loop; // go to next while loop + } + return s; + } + return null; + } + + + public boolean hasMoreElements() { + return buffer != null; + } + + public Object nextElement() { + Object r = buffer; buffer = nextElement0(); return r; + } + + public int count() { + return e.count(); + } + } + + private static class unsievedWordsEnum implements Enumeration { + Object buffer = null; + linesFromFileEnum e; + String s; + + public unsievedWordsEnum(InputStream is) throws IOException { + e = new linesFromFileEnum(is); + s = ""; + buffer = nextElement0(); + } + + private Object nextElement0() { + String r; + while (s.length() == 0) { + if (e.hasMoreElements()) { + r = ((String) e.nextElement()).trim(); + s = ""; + for (int i = 0; i < r.length(); i++) { + if (invisible(r.charAt(i))) s = s + " "; + else if (punctuation(r.charAt(i))) s = s + " " + r.charAt(i) + " "; + else s = s + r.charAt(i); + } + s = s.trim(); + } else { + return null; + } + } + int p = s.indexOf(" "); + if (p < 0) {r = s; s = ""; return r;} + r = s.substring(0, p); + s = s.substring(p + 1).trim(); + return r; + } + + public boolean hasMoreElements() { + return buffer != null; + } + + public Object nextElement() { + Object r = buffer; buffer = nextElement0(); return r; + } + + public int count() { + return e.count(); + } + } + + private static class linesFromFileEnum implements Enumeration { + // read in lines from a given input stream + // every line starting with a '#' is treated as a comment. + + Object buffer = null; + BufferedReader raf; + int counter = 0; + + public linesFromFileEnum(InputStream is) throws IOException { + raf = new BufferedReader(new InputStreamReader(is)); + buffer = nextElement0(); + counter = 0; + } + + private Object nextElement0() { + try { + String s; + while (true) { + s = raf.readLine(); + if (s == null) {raf.close(); return null;} + if (!(s.startsWith("#"))) return s; + } + } catch (IOException e) { + try {raf.close();} catch (Exception ee) {} + return null; + } + } + + public boolean hasMoreElements() { + return buffer != null; + } + + public Object nextElement() { + if (buffer == null) { + return null; + } else { + counter = counter + ((String) buffer).length() + 1; + Object r = buffer; + buffer = nextElement0(); + return r; + } + } + + public int count() { + return counter; + } + } + + private static void addLineSearchProp(Properties prop, String s, String[] searchwords, HashSet foundsearch) { + // we store lines containing a key in search vector + int p; + String r; + s = " " + s.toLowerCase() + " "; + for (int i = 0; i < searchwords.length; i++) { + if (!(foundsearch.contains(searchwords[i]))) { + p = s.indexOf((String) searchwords[i]); + if (p >= 0) { + // we found one key in the result text + // prepare a line and put it to the property + r = s.substring(0, p) + "" + + s.substring(p, p + searchwords[i].length()) + "" + + s.substring(p + searchwords[i].length()); + prop.setProperty("key-" + searchwords[i], r); + // remember that we found this + foundsearch.add(searchwords[i]); + } + } + } + } + + public static void main(String[] args) { + if ((args.length == 0) || (args.length > 3)) System.out.println("wrong number of arguments: plasmaCondenser -text|-html "); else try { + + plasmaCondenser pc = null; + + // read and analyse file + File file = new File(args[1]); + InputStream textStream = null; + if (args[0].equals("-text")) { + // read a text file + textStream = new FileInputStream(file); + } else if (args[0].equals("-html")) { + // read a html file + htmlFilterContentScraper cs = new htmlFilterContentScraper(new java.net.URL("http://localhost/")); + htmlFilterOutputStream fos = new htmlFilterOutputStream(null, cs, null, false); + FileInputStream fis = new FileInputStream(file); + byte[] buffer = new byte[512]; + int i; + while ((i = fis.read(buffer)) > 0) fos.write(buffer, 0, i); + fis.close(); + fos.close(); + //cs.print(); + //System.out.println("TEXT:" + new String(cs.getText())); + textStream = new ByteArrayInputStream(cs.getText()); + } else { + System.out.println("first argument must be either '-text' or '-html'"); + System.exit(-1); + } + // call condenser + pc = new plasmaCondenser(textStream, 1, 0); + textStream.close(); + // output result + pc.writeMapToFile(new File(args[2])); + pc.reconstruct(); + System.out.println("ANALYSIS:" + pc.getAnalysis().toString()); + } catch (IOException e) { + System.out.println("Problem with input file: " + e.getMessage()); + } + } + + +} diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java new file mode 100644 index 000000000..9f2519d0b --- /dev/null +++ b/source/de/anomic/plasma/plasmaCrawlEURL.java @@ -0,0 +1,273 @@ +// plasmaEURL.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 09.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// EURL - noticed (known but not loaded) URL's + +package de.anomic.plasma; + +import java.io.*; +import java.net.*; +import java.util.*; +import de.anomic.kelondro.*; +import de.anomic.server.*; +import de.anomic.tools.*; +import de.anomic.http.*; + +public class plasmaCrawlEURL extends plasmaURL { + + private LinkedList rejectedStack = new LinkedList(); // strings: url + + public plasmaCrawlEURL(File cachePath, int bufferkb) throws IOException { + super(); + int[] ce = { + urlHashLength, // the url's hash + urlHashLength, // the url's referrer hash + urlHashLength, // the crawling initiator + urlHashLength, // the crawling executor + urlStringLength, // the url as string + urlNameLength, // the name of the url, from anchor tag name + urlDateLength, // the time when the url was first time appeared + urlDateLength, // the time when the url was last time tried to load + urlRetryLength, // number of load retries + urlErrorLength, // string describing load failure + urlFlagLength // extra space + }; + if (cachePath.exists()) { + // open existing cache + urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400); + } else { + // create new cache + cachePath.getParentFile().mkdirs(); + urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, ce); + } + } + + public synchronized entry newEntry(URL url, String referrer, String initiator, String executor, + String name, String failreason, bitfield flags, boolean retry) { + if ((referrer == null) || (referrer.length() < urlHashLength)) referrer = dummyHash; + if ((initiator == null) || (initiator.length() < urlHashLength)) initiator = dummyHash; + if ((executor == null) || (executor.length() < urlHashLength)) executor = dummyHash; + if (failreason == null) failreason = "unknown"; + + // create a stack entry + HashMap map = new HashMap(); + map.put("url", url); + map.put("referrer", referrer); + map.put("initiator", initiator); + map.put("executor", executor); + map.put("name", name); + map.put("failreason", failreason); + map.put("flags", flags); + rejectedStack.add(map); + entry e = new entry(url, referrer, initiator, executor, name, failreason, flags); + + // put in table + if (retry) e.store(); + return e; + } + + public synchronized entry getEntry(String hash) { + return new entry(hash); + } + + public void clearStack() { + rejectedStack.clear(); + } + + public int stackSize() { + return rejectedStack.size(); + } + + public entry getStack(int pos) { + HashMap m = (HashMap) rejectedStack.get(pos); + return new entry((URL) m.get("url"), (String) m.get("referrer"), (String) m.get("initiator"), (String) m.get("executor"), + (String) m.get("name"), (String) m.get("failreason"), (bitfield) m.get("flags")); + } + + public class entry { + + private String hash; // the url's hash + private String referrer; // the url's referrer hash + private String initiator; // the crawling initiator + private String executor; // the crawling initiator + private URL url; // the url as string + private String name; // the name of the url, from anchor tag name + private Date initdate; // the time when the url was first time appeared + private Date trydate; // the time when the url was last time tried to load + private int trycount; // number of tryings + private String failreason; // string describing reason for load fail + private bitfield flags; // extra space + + public entry(URL url, String referrer, String initiator, String executor, String name, String failreason, bitfield flags) { + // create new entry and store it into database + this.hash = urlHash(url); + this.referrer = (referrer == null) ? dummyHash : referrer; + this.initiator = initiator; + this.executor = executor; + this.url = url; + this.name = name; + this.initdate = new Date(); + this.trydate = new Date(); + this.trycount = 0; + this.failreason = failreason; + this.flags = flags; + + } + + public entry(String hash) { + // generates an plasmaEURLEntry using the url hash + // to speed up the access, the url-hashes are buffered + // in the hash cache. + // we have two options to find the url: + // - look into the hash cache + // - look into the filed properties + // if the url cannot be found, this returns null + this.hash = hash; + try { + byte[][] entry = urlHashCache.get(hash.getBytes()); + if (entry != null) { + this.referrer = new String(entry[1]); + this.initiator = new String(entry[2]); + this.executor = new String(entry[3]); + this.url = new URL(new String(entry[4]).trim()); + this.name = new String(entry[5]).trim(); + this.initdate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[6]))); + this.trydate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[7]))); + this.trycount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8])); + this.failreason = new String(entry[9]); + this.flags = new bitfield(entry[10]); + return; + } + } catch (Exception e) {} + } + + private void store() { + // stores the values from the object variables into the database + String initdatestr = serverCodings.enhancedCoder.encodeBase64Long(initdate.getTime() / 86400000, urlDateLength); + String trydatestr = serverCodings.enhancedCoder.encodeBase64Long(trydate.getTime() / 86400000, urlDateLength); + + // store the hash in the hash cache + try { + // even if the entry exists, we simply overwrite it + byte[][] entry = new byte[][] { + this.hash.getBytes(), + this.referrer.getBytes(), + this.initiator.getBytes(), + this.executor.getBytes(), + this.url.toString().getBytes(), + this.name.getBytes(), + initdatestr.getBytes(), + trydatestr.getBytes(), + serverCodings.enhancedCoder.encodeBase64Long(this.trycount, urlRetryLength).getBytes(), + this.failreason.getBytes(), + this.flags.getBytes() + }; + urlHashCache.put(entry); + } catch (IOException e) { + System.out.println("INTERNAL ERROR AT plasmaEURL:url2hash:" + e.toString()); + } + } + + public String hash() { + // return a url-hash, based on the md5 algorithm + // the result is a String of 12 bytes within a 72-bit space + // (each byte has an 6-bit range) + // that should be enough for all web pages on the world + return this.hash; + } + + public String referrer() { + return this.referrer; + } + + public URL url() { + return url; + } + + public Date initdate() { + return trydate; + } + + public Date trydate() { + return trydate; + } + + public String initiator() { + // return the creator's hash + return initiator; + } + + public String executor() { + // return the creator's hash + return executor; + } + + public String name() { + // return the creator's hash + return name; + } + + public String failreason() { + return failreason; + } + + } + + public class kenum implements Enumeration { + // enumerates entry elements + kelondroTree.rowIterator i; + public kenum(boolean up, boolean rotating) throws IOException { + i = urlHashCache.rows(up, rotating); + } + public boolean hasMoreElements() { + return i.hasNext(); + } + public Object nextElement() { + return new entry(new String(((byte[][]) i.next())[0])); + } + } + + public Enumeration elements(boolean up, boolean rotating) throws IOException { + // enumerates entry elements + return new kenum(up, rotating); + } + +} diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java new file mode 100644 index 000000000..09eb4656b --- /dev/null +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -0,0 +1,596 @@ +// plasmaCrawlLURL.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 09.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + This class provides storage functions for the plasma search engine. + - the url-specific properties, including condenser results + - the text content of the url + Both entities are accessed with a hash, which is based on the MD5 + algorithm. The MD5 is not encoded as a hex value, but a b64 value. +*/ + +package de.anomic.plasma; + +import java.io.*; +import java.net.*; +import java.util.*; +import java.text.*; +import de.anomic.kelondro.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.http.*; +import de.anomic.yacy.*; + +public class plasmaCrawlLURL extends plasmaURL { + + // result stacks; + // these have all entries of form + // strings: urlHash + initiatorHash + ExecutorHash + private LinkedList externResultStack; // 1 - remote index: retrieved by other peer + private LinkedList searchResultStack; // 2 - partly remote/local index: result of search queries + private LinkedList transfResultStack; // 3 - partly remote/local index: result of index transfer + private LinkedList proxyResultStack; // 4 - local index: result of proxy fetch/prefetch + private LinkedList lcrawlResultStack; // 5 - local index: result of local crawling + private LinkedList gcrawlResultStack; // 6 - local index: triggered external + + public plasmaCrawlLURL(File cachePath, int bufferkb) throws IOException { + super(); + int[] ce = { + urlHashLength, + urlStringLength, + urlDescrLength, + urlDateLength, + urlDateLength, + urlHashLength, + urlCopyCountLength, + urlFlagLength, + urlQualityLength, + urlLanguageLength, + urlDoctypeLength, + urlSizeLength, + urlWordCountLength + }; + int segmentsize = 0; + for (int i = 0; i < ce.length; i++) segmentsize += ce[i]; + if (cachePath.exists()) { + // open existing cache + urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400); + } else { + // create new cache + cachePath.getParentFile().mkdirs(); + + urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, ce); + } + + // init result stacks + externResultStack = new LinkedList(); + searchResultStack = new LinkedList(); + transfResultStack = new LinkedList(); + proxyResultStack = new LinkedList(); + lcrawlResultStack = new LinkedList(); + gcrawlResultStack = new LinkedList(); + + } + + public synchronized entry newEntry(URL url, String descr, Date moddate, Date loaddate, + String initiatorHash, String executorHash, + String referrerHash, int copyCount, boolean localNeed, + int quality, String language, char doctype, + long size, int wordCount, + int stackType) { + entry e = new entry(url, descr, moddate, loaddate, referrerHash, copyCount, localNeed, quality, language, doctype, size, wordCount); + if (initiatorHash == null) initiatorHash = dummyHash; + if (executorHash == null) executorHash = dummyHash; + switch (stackType) { + case 0: break; + case 1: externResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 2: searchResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 3: transfResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 4: proxyResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 5: lcrawlResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 6: gcrawlResultStack.add(e.urlHash + initiatorHash + executorHash); break; + + } + return e; + } + + public synchronized entry newEntry(String propStr, boolean setGlobal, String initiatorHash, String executorHash, int stackType) { + if ((propStr.startsWith("{")) && (propStr.endsWith("}"))) { + //System.out.println("DEBUG: propStr=" + propStr); + try { + entry e = new entry(s2p(propStr.substring(1, propStr.length() - 1)), setGlobal); + if (initiatorHash == null) initiatorHash = dummyHash; + if (executorHash == null) executorHash = dummyHash; + switch (stackType) { + case 0: break; + case 1: externResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 2: searchResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 3: transfResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 4: proxyResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 5: lcrawlResultStack.add(e.urlHash + initiatorHash + executorHash); break; + case 6: gcrawlResultStack.add(e.urlHash + initiatorHash + executorHash); break; + + } + return e; + } catch (Exception e) { + System.out.println("INTERNAL ERROR in newEntry/2: " + e.toString()); + return null; + } + } else { + return null; + } + } + + public void notifyGCrawl(String urlHash, String initiatorHash, String executorHash) { + gcrawlResultStack.add(urlHash + initiatorHash + executorHash); + } + + public synchronized entry getEntry(String hash) { + return new entry(hash); + } + + public int getStackSize(int stack) { + switch (stack) { + case 1: return externResultStack.size(); + case 2: return searchResultStack.size(); + case 3: return transfResultStack.size(); + case 4: return proxyResultStack.size(); + case 5: return lcrawlResultStack.size(); + case 6: return gcrawlResultStack.size(); + } + return -1; + } + + public String getUrlHash(int stack, int pos) { + switch (stack) { + case 1: return ((String) externResultStack.get(pos)).substring(0, urlHashLength); + case 2: return ((String) searchResultStack.get(pos)).substring(0, urlHashLength); + case 3: return ((String) transfResultStack.get(pos)).substring(0, urlHashLength); + case 4: return ((String) proxyResultStack.get(pos)).substring(0, urlHashLength); + case 5: return ((String) lcrawlResultStack.get(pos)).substring(0, urlHashLength); + case 6: return ((String) gcrawlResultStack.get(pos)).substring(0, urlHashLength); + } + return null; + } + + public String getInitiatorHash(int stack, int pos) { + switch (stack) { + case 1: return ((String) externResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2); + case 2: return ((String) searchResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2); + case 3: return ((String) transfResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2); + case 4: return ((String) proxyResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2); + case 5: return ((String) lcrawlResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2); + case 6: return ((String) gcrawlResultStack.get(pos)).substring(urlHashLength, urlHashLength * 2); + } + return null; + } + + public String getExecutorHash(int stack, int pos) { + switch (stack) { + case 1: return ((String) externResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3); + case 2: return ((String) searchResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3); + case 3: return ((String) transfResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3); + case 4: return ((String) proxyResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3); + case 5: return ((String) lcrawlResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3); + case 6: return ((String) gcrawlResultStack.get(pos)).substring(urlHashLength * 2, urlHashLength * 3); + } + return null; + } + + public void removeStack(int stack, int pos) { + switch (stack) { + case 1: externResultStack.remove(pos); break; + case 2: searchResultStack.remove(pos); break; + case 3: transfResultStack.remove(pos); break; + case 4: proxyResultStack.remove(pos); break; + case 5: lcrawlResultStack.remove(pos); break; + case 6: gcrawlResultStack.remove(pos); break; + } + } + + public void clearStack(int stack) { + switch (stack) { + case 1: externResultStack.clear(); break; + case 2: searchResultStack.clear(); break; + case 3: transfResultStack.clear(); break; + case 4: proxyResultStack.clear(); break; + case 5: lcrawlResultStack.clear(); break; + case 6: gcrawlResultStack.clear(); break; + } + } + + public void remove(String urlHash) { + super.remove(urlHash); + for (int stack = 1; stack <= 6; stack++) + for (int i = getStackSize(stack) - 1; i >= 0; i--) + if (getUrlHash(stack,i).equals(urlHash)) { removeStack(stack,i); return; } + + } + + private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); + private static String daydate(Date date) { + if (date == null) return ""; else return dayFormatter.format(date); + } + + public serverObjects genTableProps(int tabletype, int lines, boolean showInit, boolean showExec, String dfltInit, String dfltExec, String feedbackpage, boolean makeLink) { + serverObjects prop = new serverObjects(); + if (getStackSize(tabletype) == 0) { + prop.put("table", 0); + return prop; + } + prop.put("table", 1); + if (lines > getStackSize(tabletype)) lines = getStackSize(tabletype); + if (lines == getStackSize(tabletype)) { + prop.put("table_size", 0); + } else { + prop.put("table_size", 1); + prop.put("table_size_count", lines); + } + prop.put("table_size_all", getStackSize(tabletype)); + prop.put("table_feedbackpage", feedbackpage); + prop.put("table_tabletype", tabletype); + prop.put("table_showInit", (showInit) ? 1 : 0); + prop.put("table_showExec", (showExec) ? 1 : 0); + + boolean dark = true; + String urlHash, initiatorHash, executorHash; + plasmaCrawlLURL.entry urle; + yacySeed initiatorSeed, executorSeed; + String cachepath; + int c = 0; + for (int i = getStackSize(tabletype) - 1; i >= (getStackSize(tabletype) - lines); i--) { + initiatorHash = getInitiatorHash(tabletype, i); + executorHash = getExecutorHash(tabletype, i); + urlHash = getUrlHash(tabletype, i); + urle = getEntry(urlHash); + if (urle != null) try { + initiatorSeed = yacyCore.seedDB.getConnected(initiatorHash); + executorSeed = yacyCore.seedDB.getConnected(executorHash); + cachepath = urle.url().toString().substring(7); + if (cachepath.endsWith("/")) cachepath = cachepath + "ndx"; + prop.put("table_indexed_" + c + "_dark", (dark) ? 1 : 0); + prop.put("table_indexed_" + c + "_feedbackpage", feedbackpage); + prop.put("table_indexed_" + c + "_tabletype", tabletype); + prop.put("table_indexed_" + c + "_urlhash", urlHash); + prop.put("table_indexed_" + c + "_showInit", (showInit) ? 1 : 0); + prop.put("table_indexed_" + c + "_showInit_initiatorSeed", (initiatorSeed == null) ? dfltInit : initiatorSeed.getName()); + prop.put("table_indexed_" + c + "_showExec", (showExec) ? 1 : 0); + prop.put("table_indexed_" + c + "_showExec_executorSeed", (initiatorSeed == null) ? dfltExec : executorSeed.getName()); + prop.put("table_indexed_" + c + "_moddate", daydate(urle.moddate())); + prop.put("table_indexed_" + c + "_wordcount", urle.wordCount()); + prop.put("table_indexed_" + c + "_urldescr", urle.descr()); + prop.put("table_indexed_" + c + "_url", (makeLink) ? ("" + urle.url().toString() + "") : urle.url().toString()); + dark = !dark; + c++; + } catch (Exception e) { + e.printStackTrace(); + } + } + prop.put("table_indexed", c); + return prop; + } + + public class entry { + + private URL url; + private String descr; + private Date moddate; + private Date loaddate; + private String urlHash; + private String referrerHash; + private int copyCount; + private String flags; + private int quality; + private String language; + private char doctype; + private long size; + private int wordCount; + + public entry(URL url, String descr, Date moddate, Date loaddate, + String referrerHash, int copyCount, boolean localNeed, + int quality, String language, char doctype, long size, int wordCount) { + // create new entry and store it into database + this.urlHash = urlHash(url); + this.url = url; + this.descr = descr; + this.moddate = moddate; + this.loaddate = loaddate; + this.referrerHash = (referrerHash == null) ? dummyHash : referrerHash; + this.copyCount = copyCount; // the number of remote (global) copies of this object without this one + this.flags = (localNeed) ? "L " : " "; + this.quality = quality; + this.language = language; + this.doctype = doctype; + this.size = size; + this.wordCount = wordCount; + store(); + } + + public entry(String urlHash) { + // generates an plasmaLURLEntry using the url hash + // to speed up the access, the url-hashes are buffered + // in the hash cache. + // we have two options to find the url: + // - look into the hash cache + // - look into the filed properties + // if the url cannot be found, this returns null + this.urlHash = urlHash; + try { + byte[][] entry = urlHashCache.get(urlHash.getBytes()); + if (entry != null) { + this.url = new URL(new String(entry[1]).trim()); + this.descr = new String(entry[2]).trim(); + this.moddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[3]))); + this.loaddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[4]))); + this.referrerHash = new String(entry[5]); + this.copyCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[6])); + this.flags = new String(entry[7]); + this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8])); + this.language = new String(entry[9]); + this.doctype = (char) entry[10][0]; + this.size = (long) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[11])); + this.wordCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[12])); + return; + } + } catch (Exception e) { + System.out.println("INTERNAL ERROR in plasmaLURL.entry/1: " + e.toString()); + e.printStackTrace(); + } + } + + public entry(Properties prop, boolean setGlobal) { + // generates an plasmaLURLEntry using the properties from the argument + // the property names must correspond to the one from toString + //System.out.println("DEBUG-ENTRY: prop=" + prop.toString()); + this.urlHash = prop.getProperty("hash", dummyHash); + try { + byte[][] entry = urlHashCache.get(urlHash.getBytes()); + //if (entry == null) { + this.referrerHash = prop.getProperty("referrer", dummyHash); + this.moddate = shortDayFormatter.parse(prop.getProperty("mod", "20000101")); + //System.out.println("DEBUG: moddate = " + moddate + ", prop=" + prop.getProperty("mod")); + this.loaddate = shortDayFormatter.parse(prop.getProperty("load", "20000101")); + this.copyCount = Integer.parseInt(prop.getProperty("cc", "0")); + this.flags = ((prop.getProperty("local", "true").equals("true")) ? "L " : " "); + if (setGlobal) this.flags = "G "; + this.url = new URL(crypt.simpleDecode(prop.getProperty("url", ""), null)); + this.descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); + if (this.descr == null) this.descr = this.url.toString(); + this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(prop.getProperty("q", "")); + this.language = prop.getProperty("lang", "uk"); + this.doctype = prop.getProperty("dt", "t").charAt(0); + this.size = Long.parseLong(prop.getProperty("size", "0")); + this.wordCount = Integer.parseInt(prop.getProperty("wc", "0")); + store(); + //} + } catch (Exception e) { + System.out.println("INTERNAL ERROR in plasmaLURL.entry/2: " + e.toString()); + e.printStackTrace(); + } + } + + private void store() { + // stores the values from the object variables into the database + String moddatestr = serverCodings.enhancedCoder.encodeBase64Long(moddate.getTime() / 86400000, urlDateLength); + String loaddatestr = serverCodings.enhancedCoder.encodeBase64Long(loaddate.getTime() / 86400000, urlDateLength); + + // store the hash in the hash cache + try { + // even if the entry exists, we simply overwrite it + byte[][] entry = new byte[][] { + urlHash.getBytes(), + url.toString().getBytes(), + descr.getBytes(), // null? + moddatestr.getBytes(), + loaddatestr.getBytes(), + referrerHash.getBytes(), + serverCodings.enhancedCoder.encodeBase64Long(copyCount, urlCopyCountLength).getBytes(), + flags.getBytes(), + serverCodings.enhancedCoder.encodeBase64Long(quality, urlQualityLength).getBytes(), + language.getBytes(), + new byte[] {(byte) doctype}, + serverCodings.enhancedCoder.encodeBase64Long(size, urlSizeLength).getBytes(), + serverCodings.enhancedCoder.encodeBase64Long(wordCount, urlWordCountLength).getBytes(), + }; + urlHashCache.put(entry); + } catch (Exception e) { + System.out.println("INTERNAL ERROR AT plasmaStore:url2hash:" + e.toString()); + e.printStackTrace(); + } + } + + public String hash() { + // return a url-hash, based on the md5 algorithm + // the result is a String of 12 bytes within a 72-bit space + // (each byte has an 6-bit range) + // that should be enough for all web pages on the world + return this.urlHash; + } + + public URL url() { + return url; + } + + public String descr() { + return descr; + } + + public Date moddate() { + return moddate; + } + + public Date loaddate() { + return loaddate; + } + + public String referrerHash() { + // return the creator's hash + return referrerHash; + } + + public char doctype() { + return doctype; + } + + public int copyCount() { + // return number of copies of this object in the global index + return copyCount; + } + + public boolean local() { + // returns true if the url was created locally and is needed for own word index + if (flags == null) return false; + return flags.charAt(0) == 'L'; + } + + public int quality() { + return quality; + } + + public String language() { + return language; + } + + public long size() { + return size; + } + + public int wordCount() { + return wordCount; + } + + private String corePropList() { + // generate a parseable string; this is a simple property-list + try { + return + "hash=" + urlHash + + ",referrer=" + referrerHash + + ",mod=" + shortDayFormatter.format(moddate) + + ",load=" + shortDayFormatter.format(loaddate) + + ",size=" + size + + ",wc=" + wordCount + + ",cc=" + copyCount + + ",local=" + ((local()) ? "true" : "false") + + ",q=" + serverCodings.enhancedCoder.encodeBase64Long(quality, urlQualityLength) + + ",dt=" + doctype + + ",lang=" + language + + ",url=" + crypt.simpleEncode(url.toString()) + + ",descr=" + crypt.simpleEncode(descr); + } catch (Exception e) { + serverLog.logFailure("plasmaLURL.corePropList", e.getMessage()); + //e.printStackTrace(); + return null; + } + } + + public String toString(int posintext, int posinphrase, int posofphrase) { + // add information needed for remote transport + String core = corePropList(); + if (core == null) return null; + return + "{" + core + + ",posintext=" + posintext + + ",posinphrase=" + posinphrase + + ",posofphraseint=" + posofphrase + + "}"; + } + + public String toString() { + String core = corePropList(); + if (core == null) return null; + return "{" + core + "}"; + } + + public void print() { + System.out.println("URL : " + url); + System.out.println("Description : " + descr); + System.out.println("Modified : " + httpc.dateString(moddate)); + System.out.println("Loaded : " + httpc.dateString(loaddate)); + System.out.println("Size : " + size + " bytes, " + wordCount + " words"); + System.out.println("Referrer Hash : " + referrerHash); + System.out.println("Quality : " + quality); + System.out.println("Language : " + language); + System.out.println("DocType : " + doctype); + System.out.println(); + } + } + + public class kenum implements Enumeration { + // enumerates entry elements + kelondroTree.rowIterator i; + public kenum(boolean up, boolean rotating) throws IOException { + i = urlHashCache.rows(up, rotating); + } + public boolean hasMoreElements() { + return i.hasNext(); + } + public Object nextElement() { + return new entry(new String((byte[]) i.next())); + } + } + + public Enumeration elements(boolean up, boolean rotating) throws IOException { + // enumerates entry elements + return new kenum(up, rotating); + } + + public static void main(String[] args) { + // test-generation of url hashes for debugging + // one argument requires, will be treated as url + // returns url-hash + if (args[0].equals("-h")) try { + // arg 1 is url + System.out.println("HASH: " + urlHash(new URL(args[1]))); + } catch (MalformedURLException e) {} + if (args[0].equals("-l")) try { + // arg 1 is path to URLCache + plasmaCrawlLURL urls = new plasmaCrawlLURL(new File(args[1]), 1); + Enumeration enu = urls.elements(true, false); + while (enu.hasMoreElements()) { + ((entry) enu.nextElement()).print(); + } + } catch (Exception e) { + e.printStackTrace(); + } + + } + +} diff --git a/source/de/anomic/plasma/plasmaCrawlLoader.java b/source/de/anomic/plasma/plasmaCrawlLoader.java new file mode 100644 index 000000000..52aa1f1ff --- /dev/null +++ b/source/de/anomic/plasma/plasmaCrawlLoader.java @@ -0,0 +1,262 @@ +// plasmaCrawlerLoader.java +// ------------------------ +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 25.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.plasma; + +import java.io.*; +import java.util.*; +import java.net.*; +import de.anomic.net.*; +import de.anomic.http.*; +import de.anomic.server.*; +import de.anomic.tools.*; +import de.anomic.htmlFilter.*; + +public class plasmaCrawlLoader { + + private plasmaHTCache cacheManager; + private int socketTimeout; + private int loadTimeout; + private boolean remoteProxyUse; + private String remoteProxyHost; + private int remoteProxyPort; + private int maxSlots; + private List slots; + private serverLog log; + private HashSet acceptMimeTypes; + + public plasmaCrawlLoader(plasmaHTCache cacheManager, serverLog log, int socketTimeout, int loadTimeout, int mslots, boolean proxyUse, String proxyHost, int proxyPort, + HashSet acceptMimeTypes) { + this.cacheManager = cacheManager; + this.log = log; + this.socketTimeout = socketTimeout; + this.loadTimeout = loadTimeout; + this.remoteProxyUse = proxyUse; + this.remoteProxyHost = proxyHost; + this.remoteProxyPort = proxyPort; + this.maxSlots = mslots; + this.slots = new LinkedList(); + this.acceptMimeTypes = acceptMimeTypes; + } + + private void killTimeouts() { + Exec thread; + for (int i = slots.size() - 1; i >= 0; i--) { + // check if thread is alive + thread = (Exec) slots.get(i); + if (thread.isAlive()) { + // check the age of the thread + if (System.currentTimeMillis() - thread.startdate > loadTimeout) { + // we kill that thread + thread.interrupt(); // hopefully this wakes him up. + slots.remove(i); + System.out.println("CRAWLER: IGNORING SLEEPING DOWNLOAD SLOT " + thread.url.toString()); + } + } else { + // thread i is dead, remove it + slots.remove(i); + } + } + } + + public synchronized void loadParallel(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) { + + // wait until there is space in the download slots + Exec thread; + while (slots.size() >= maxSlots) { + killTimeouts(); + + // wait a while + try { + Thread.currentThread().sleep(1000); + } catch (InterruptedException e) { + break; + } + } + + // we found space in the download slots + thread = new Exec(url, referer, initiator, depth, profile); + thread.start(); + slots.add(thread); + } + + public int size() { + killTimeouts(); + return slots.size(); + } + + public Exec[] threadStatus() { + killTimeouts(); + Exec[] result = new Exec[slots.size()]; + for (int i = 0; i < slots.size(); i++) result[i] = (Exec) slots.get(i); + return result; + } + + public class Exec extends Thread { + + public URL url; + public String referer; + public String initiator; + public int depth; + public long startdate; + public plasmaCrawlProfile.entry profile; + public String error; + + public Exec(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) { + this.url = url; // the url to crawl + this.referer = referer; // the url that contained this url as link + this.initiator = initiator; + this.depth = depth; // distance from start-url + this.startdate = System.currentTimeMillis(); + this.profile = profile; + this.error = null; + } + + public void run() { + try { + load(url, referer, initiator, depth, profile); + } catch (IOException e) { + } + } + + private httpc newhttpc(String server, int port, boolean ssl) throws IOException { + // a new httpc connection, combined with possible remote proxy + if (remoteProxyUse) + return new httpc(server, port, socketTimeout, ssl, remoteProxyHost, remoteProxyPort); + else + return new httpc(server, port, socketTimeout, ssl); + } + + private void load(URL url, String referer, String initiator, int depth, plasmaCrawlProfile.entry profile) throws IOException { + if (url == null) return; + Date requestDate = new Date(); // remember the time... + String host = url.getHost(); + String path = url.getPath(); + int port = url.getPort(); + boolean ssl = url.getProtocol().equals("https"); + if (port < 0) port = (ssl) ? 443 : 80; + + // set referrer; in some case advertise a little bit: + referer = referer.trim(); + if (referer.length() == 0) referer = "http://www.yacy.net/yacy/"; + + // take a file from the net + try { + // create a request header + httpHeader requestHeader = new httpHeader(); + requestHeader.put("User-Agent", httpdProxyHandler.userAgent); + requestHeader.put("Referer", referer); + requestHeader.put("Accept-Encoding", "gzip,deflate"); + + //System.out.println("CRAWLER_REQUEST_HEADER=" + requestHeader.toString()); // DEBUG + + // open the connection + httpc remote = newhttpc(host, port, ssl); + + // send request + httpc.response res = remote.GET(path, requestHeader); + + if (res.status.startsWith("200")) { + // the transfer is ok + long contentLength = res.responseHeader.contentLength(); + + // make a scraper and transformer + htmlFilterContentScraper scraper = new htmlFilterContentScraper(url); + OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false); + + // reserve cache entry + plasmaHTCache.Entry htCache = cacheManager.newEntry(requestDate, depth, url, requestHeader, res.status, res.responseHeader, scraper, initiator, profile); + + // request has been placed and result has been returned. work off response + File cacheFile = cacheManager.getCachePath(url); + try { + if (!(httpd.isTextMime(res.responseHeader.mime().toLowerCase(), acceptMimeTypes))) { + // if the response has not the right file type then reject file + hfos.close(); + remote.close(); + System.out.println("REJECTED WRONG MIME TYPE " + res.responseHeader.mime() + " for url " + url.toString()); + htCache.status = plasmaHTCache.CACHE_UNFILLED; + } else if ((profile.storeHTCache()) && ((error = htCache.shallStoreCache()) == null)) { + // we write the new cache entry to file system directly + cacheFile.getParentFile().mkdirs(); + res.writeContent(hfos, cacheFile); // writes in content scraper and cache file + htCache.status = plasmaHTCache.CACHE_FILL; + } else { + if (error != null) log.logDebug("CRAWLER NOT STORED RESOURCE " + url.toString() + ": " + error); + // anyway, the content still lives in the content scraper + res.writeContent(hfos, null); // writes only into content scraper + htCache.status = plasmaHTCache.CACHE_PASSING; + } + // enQueue new entry with response header + if ((initiator == null) || (initiator.length() == 0)) { + // enqueued for proxy writings + cacheManager.stackProcess(htCache); + } else { + // direct processing for crawling + cacheManager.process(htCache); + } + } catch (SocketException e) { + // this may happen if the client suddenly closes its connection + // maybe the user has stopped loading + // in that case, we are not responsible and just forget it + // but we clean the cache also, since it may be only partial + // and most possible corrupted + if (cacheFile.exists()) cacheFile.delete(); + System.out.println("CRAWLER LOADER ERROR1: with url=" + url.toString() + ": " + e.toString()); + } + } else { + // if the response has not the right response type then reject file + System.out.println("REJECTED WRONG STATUS TYPE '" + res.status + "' for url " + url.toString()); + // not processed any further + } + remote.close(); + } catch (Exception e) { + // this may happen if the targeted host does not exist or anything with the + // remote server was wrong. + System.out.println("CRAWLER LOADER ERROR2 with url=" + url.toString() + ": " + e.toString()); + e.printStackTrace(); + } + } + + } + + +} diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java new file mode 100644 index 000000000..61f5463de --- /dev/null +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -0,0 +1,357 @@ +// plasmaNURL.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 09.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// NURL - noticed (known but not loaded) URL's + +package de.anomic.plasma; + +import java.io.*; +import java.net.*; +import java.util.*; +import de.anomic.kelondro.*; +import de.anomic.server.*; +import de.anomic.tools.*; +import de.anomic.http.*; +import de.anomic.yacy.*; + +public class plasmaCrawlNURL extends plasmaURL { + + + private kelondroStack localStack; // links found by crawling to depth-1 + private kelondroStack limitStack; // links found by crawling at target depth + private kelondroStack overhangStack; // links found by crawling at depth+1 + private kelondroStack remoteStack; // links from remote crawl orders + + private HashSet stackIndex; // to find out if a specific link is already on any stack + + public plasmaCrawlNURL(File cacheStacksPath, int bufferkb) throws IOException { + super(); + int[] ce = { + urlHashLength, // the url hash + urlHashLength, // initiator + urlStringLength, // the url as string + urlHashLength, // the url's referrer hash + urlNameLength, // the name of the url, from anchor tag name + urlDateLength, // the time when the url was first time appeared + urlCrawlProfileHandleLength, // the name of the prefetch profile handle + urlCrawlDepthLength, // the prefetch depth so far, starts at 0 + urlParentBranchesLength, // number of anchors of the parent + urlForkFactorLength, // sum of anchors of all ancestors + urlFlagLength, // extra space + urlHandleLength // extra handle + }; + + // create a stack for newly entered entries + if (!(cacheStacksPath.exists())) cacheStacksPath.mkdir(); // make the path + + File cacheFile = new File(cacheStacksPath, "urlNotice1.db"); + if (cacheFile.exists()) { + // open existing cache + urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400); + } else { + // create new cache + cacheFile.getParentFile().mkdirs(); + urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, ce); + } + + File localCrawlStack = new File(cacheStacksPath, "urlNoticeLocal0.stack"); + if (localCrawlStack.exists()) { + localStack = new kelondroStack(localCrawlStack, 0); + } else { + localStack = new kelondroStack(localCrawlStack, 0, new int[] {plasmaURL.urlHashLength}); + } + File globalCrawlStack = new File(cacheStacksPath, "urlNoticeRemote0.stack"); + if (globalCrawlStack.exists()) { + remoteStack = new kelondroStack(globalCrawlStack, 0); + } else { + remoteStack = new kelondroStack(globalCrawlStack, 0, new int[] {plasmaURL.urlHashLength}); + } + + // init stack Index + stackIndex = new HashSet(); + Iterator i = localStack.iterator(); + while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey())); + i = remoteStack.iterator(); + while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey())); + } + + private static String normalizeHost(String host) { + if (host.length() > urlHostLength) host = host.substring(0, urlHostLength); + host = host.toLowerCase(); + while (host.length() < urlHostLength) host = host + " "; + return host; + } + + private static String normalizeHandle(int h) { + String d = Integer.toHexString(h); + while (d.length() < urlHandleLength) d = "0" + d; + return d; + } + + public int stackSize() { + return localStack.size() + remoteStack.size(); + } + public int localStackSize() { + return localStack.size(); + } + public int remoteStackSize() { + return remoteStack.size(); + } + + public boolean existsInStack(String urlhash) { + return stackIndex.contains(urlhash); + } + + public synchronized entry newEntry(String initiator, URL url, Date loaddate, String referrer, String name, + String profile, int depth, int anchors, int forkfactor, int stackMode) { + entry e = new entry(initiator, url, referrer, name, loaddate, profile, + depth, anchors, forkfactor); + + // stackMode can have 3 cases: + // 0 = do not stack + // 1 = on local stack + // 2 = on global stack + // 3 = on overhang stack + // 4 = on remote stack + try { + if (stackMode == 1) { + localStack.push(new byte[][] {e.hash.getBytes()}); + stackIndex.add(new String(e.hash.getBytes())); + } + if (stackMode == 4) { + remoteStack.push(new byte[][] {e.hash.getBytes()}); + stackIndex.add(new String(e.hash.getBytes())); + } + } catch (IOException er) { + } + return e; + } + + public entry localPop() { return pop(localStack); } + public entry[] localTop(int count) { return top(localStack, count); } + + public entry remotePop() { return pop(remoteStack); } + public entry[] remoteTop(int count) { return top(remoteStack, count); } + + private entry pop(kelondroStack stack) { + // this is a filo - pop + try { + if (stack.size() > 0) { + entry e = new entry(new String(stack.pop()[0])); + stackIndex.remove(e.hash); + return e; + } else { + return null; + } + } catch (IOException e) { + return null; + } + } + + private entry[] top(kelondroStack stack, int count) { + // this is a filo - top + if (count > stack.size()) count = stack.size(); + entry[] list = new entry[count]; + try { + for (int i = 0; i < count; i++) { + list[i] = new entry(new String(stack.top(i)[0])); + } + return list; + } catch (IOException e) { + return null; + } + } + + public synchronized entry getEntry(String hash) { + return new entry(hash); + } + + public synchronized void remove(String hash) { + try { + urlHashCache.remove(hash.getBytes()); + } catch (IOException e) {} + } + + public class entry { + + private String initiator; // the initiator hash, is NULL or "" if it is the own proxy + private String hash; // the url's hash + private String referrer; // the url's referrer hash + private URL url; // the url as string + private String name; // the name of the url, from anchor tag name + private Date loaddate; // the time when the url was first time appeared + private String profileHandle; // the name of the prefetch profile + private int depth; // the prefetch depth so far, starts at 0 + private int anchors; // number of anchors of the parent + private int forkfactor; // sum of anchors of all ancestors + private bitfield flags; + private int handle; + + public entry(String initiator, URL url, String referrer, String name, Date loaddate, String profileHandle, + int depth, int anchors, int forkfactor) { + // create new entry and store it into database + this.hash = urlHash(url); + this.initiator = initiator; + this.url = url; + this.referrer = (referrer == null) ? "------------" : referrer; + this.name = name; + this.loaddate = loaddate; + this.profileHandle = profileHandle; + this.depth = depth; + this.anchors = anchors; + this.forkfactor = forkfactor; + this.flags = new bitfield(urlFlagLength); + this.handle = 0; + store(); + } + + public entry(String hash) { + // generates an plasmaNURLEntry using the url hash + // to speed up the access, the url-hashes are buffered + // in the hash cache. + // we have two options to find the url: + // - look into the hash cache + // - look into the filed properties + // if the url cannot be found, this returns null + this.hash = hash; + try { + byte[][] entry = urlHashCache.get(hash.getBytes()); + if (entry != null) { + this.initiator = new String(entry[1]); + this.url = new URL(new String(entry[2]).trim()); + this.referrer = new String(entry[3]); + this.name = new String(entry[4]).trim(); + this.loaddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[5]))); + this.profileHandle = new String(entry[6]).trim(); + this.depth = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[7])); + this.anchors = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8])); + this.forkfactor = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[9])); + this.flags = new bitfield(entry[10]); + this.handle = Integer.parseInt(new String(entry[11])); + return; + } + } catch (Exception e) { + } + } + + private void store() { + // stores the values from the object variables into the database + String loaddatestr = serverCodings.enhancedCoder.encodeBase64Long(loaddate.getTime() / 86400000, urlDateLength); + + // store the hash in the hash cache + try { + // even if the entry exists, we simply overwrite it + byte[][] entry = new byte[][] { + this.hash.getBytes(), + (initiator == null) ? "".getBytes() : this.initiator.getBytes(), + this.url.toString().getBytes(), + this.referrer.getBytes(), + this.name.getBytes(), + loaddatestr.getBytes(), + this.profileHandle.getBytes(), + serverCodings.enhancedCoder.encodeBase64Long(this.depth, urlCrawlDepthLength).getBytes(), + serverCodings.enhancedCoder.encodeBase64Long(this.anchors, urlParentBranchesLength).getBytes(), + serverCodings.enhancedCoder.encodeBase64Long(this.forkfactor, urlForkFactorLength).getBytes(), + this.flags.getBytes(), + normalizeHandle(this.handle).getBytes() + }; + urlHashCache.put(entry); + } catch (IOException e) { + System.out.println("INTERNAL ERROR AT plasmaNURL:url2hash:" + e.toString()); + } + } + + public String hash() { + // return a url-hash, based on the md5 algorithm + // the result is a String of 12 bytes within a 72-bit space + // (each byte has an 6-bit range) + // that should be enough for all web pages on the world + return this.hash; + } + public String initiator() { + if (initiator == null) return null; + if (initiator.length() == 0) return null; + return initiator; + } + public boolean proxy() { + return (initiator() == null); + } + public String referrerHash() { + return this.referrer; + } + public URL url() { + return url; + } + public Date loaddate() { + return loaddate; + } + public String name() { + // return the creator's hash + return name; + } + public int depth() { + return depth; + } + public String profileHandle() { + return profileHandle; + } + } + + public class kenum implements Enumeration { + // enumerates entry elements + kelondroTree.rowIterator i; + public kenum(boolean up, boolean rotating) throws IOException { + i = urlHashCache.rows(up, rotating); + } + public boolean hasMoreElements() { + return i.hasNext(); + } + public Object nextElement() { + return new entry(new String(((byte[][]) i.next())[0])); + } + } + + public Enumeration elements(boolean up, boolean rotating) throws IOException { + // enumerates entry elements + return new kenum(up, rotating); + } + +} diff --git a/source/de/anomic/plasma/plasmaCrawlProfile.java b/source/de/anomic/plasma/plasmaCrawlProfile.java new file mode 100644 index 000000000..85262aeec --- /dev/null +++ b/source/de/anomic/plasma/plasmaCrawlProfile.java @@ -0,0 +1,235 @@ +// plasmaCrawlProfile.java +// ------------------------ +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 25.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.plasma; + +import java.io.*; +import java.util.*; +import de.anomic.kelondro.*; +import de.anomic.server.*; + +public class plasmaCrawlProfile { + + private kelondroMap profileTable; + + public plasmaCrawlProfile(File profileTableFile) throws IOException { + if (profileTableFile.exists()) { + profileTable = new kelondroMap(new kelondroDyn(profileTableFile, 32000)); + } else { + profileTableFile.getParentFile().mkdirs(); + profileTable = new kelondroMap(new kelondroDyn(profileTableFile, 32000, plasmaURL.urlCrawlProfileHandleLength, 2000)); + } + } + + public int size() { + return profileTable.size(); + } + + public Iterator profiles(boolean up) { + // enumerates profile entries + try { + return new profileIterator(up); + } catch (IOException e) { + return new HashSet().iterator(); + } + } + + public class profileIterator implements Iterator { + // the iterator iterates all keys, which are byte[] objects + kelondroDyn.dynKeyIterator handleIterator; + entry next; + public profileIterator(boolean up) throws IOException { + handleIterator = profileTable.keys(up, false); + next = null; + } + public boolean hasNext() { + return handleIterator.hasNext(); + } + public Object next() { + next = getEntry((String) handleIterator.next()); + return next; + } + public void remove() { + removeEntry(next.handle()); + } + } + + public void removeEntry(String handle) { + try { + profileTable.remove(handle); + } catch (IOException e) {} + } + + public entry newEntry(String name, String startURL, String generalFilter, String specificFilter, + int generalDepth, int specificDepth, + boolean crawlingQ, + boolean storeHTCache, boolean storeTXCache, + boolean localIndexing, boolean remoteIndexing, + boolean xsstopw, boolean xdstopw, boolean xpstopw) throws IOException { + entry ne = new entry(name, startURL, generalFilter, specificFilter, + generalDepth, specificDepth, + crawlingQ, storeHTCache, storeTXCache, localIndexing, remoteIndexing, + xsstopw, xdstopw, xpstopw); + profileTable.set(ne.handle(), ne.map()); + return ne; + } + + public entry getEntry(String handle) { + try { + Map m = profileTable.get(handle); + if (m == null) return null; + return new entry(m); + } catch (IOException e) { + return null; + } + } + + + + public class entry { + // this is a simple record structure that hold all properties of a single crawl start + + private Map mem; + public entry(String name, String startURL, String generalFilter, String specificFilter, + int generalDepth, int specificDepth, + boolean crawlingQ, + boolean storeHTCache, boolean storeTXCache, + boolean localIndexing, boolean remoteIndexing, + boolean xsstopw, boolean xdstopw, boolean xpstopw) { + String handle = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, plasmaURL.urlCrawlProfileHandleLength); + mem = new HashMap(); + mem.put("handle", handle); + mem.put("name", name); + mem.put("startURL", startURL); + mem.put("generalFilter", generalFilter); + mem.put("specificFilter", specificFilter); + mem.put("generalDepth", "" + generalDepth); + mem.put("specificDepth", "" + specificDepth); + mem.put("crawlingQ", (crawlingQ) ? "true" : "false"); // crawling of urls with '?' + mem.put("storeHTCache", (storeHTCache) ? "true" : "false"); + mem.put("storeTXCache", (storeTXCache) ? "true" : "false"); + mem.put("localIndexing", (localIndexing) ? "true" : "false"); + mem.put("remoteIndexing", (remoteIndexing) ? "true" : "false"); + mem.put("xsstopw", (xsstopw) ? "true" : "false"); // exclude static stop-words + mem.put("xdstopw", (xdstopw) ? "true" : "false"); // exclude dynamic stop-word + mem.put("xpstopw", (xpstopw) ? "true" : "false"); // exclude parent stop-words + } + public entry(Map mem) { + this.mem = mem; + } + + public Map map() { + return mem; + } + public String handle() { + String r = (String) mem.get("handle"); + if (r == null) return null; else return r; + } + public String name() { + String r = (String) mem.get("name"); + if (r == null) return ""; else return r; + } + public String startURL() { + String r = (String) mem.get("startURL"); + if (r == null) return null; else return r; + } + public String generalFilter() { + String r = (String) mem.get("generalFilter"); + if (r == null) return ".*"; else return r; + } + public String specificFilter() { + String r = (String) mem.get("specificFilter"); + if (r == null) return ".*"; else return r; + } + public int generalDepth() { + String r = (String) mem.get("generalDepth"); + if (r == null) return 0; else try { + return Integer.parseInt(r); + } catch (NumberFormatException e) { + return 0; + } + } + public int specificDepth() { + String r = (String) mem.get("specificDepth"); + if (r == null) return 0; else try { + return Integer.parseInt(r); + } catch (NumberFormatException e) { + return 0; + } + } + public boolean crawlingQ() { + String r = (String) mem.get("crawlingQ"); + if (r == null) return false; else return (r.equals("true")); + } + public boolean storeHTCache() { + String r = (String) mem.get("storeHTCache"); + if (r == null) return false; else return (r.equals("true")); + } + public boolean storeTXCache() { + String r = (String) mem.get("storeTXCache"); + if (r == null) return false; else return (r.equals("true")); + } + public boolean localIndexing() { + String r = (String) mem.get("localIndexing"); + if (r == null) return false; else return (r.equals("true")); + } + public boolean remoteIndexing() { + String r = (String) mem.get("remoteIndexing"); + if (r == null) return false; else return (r.equals("true")); + } + public boolean excludeStaticStopwords() { + String r = (String) mem.get("xsstopw"); + if (r == null) return false; else return (r.equals("true")); + } + public boolean excludeDynamicStopwords() { + String r = (String) mem.get("xdstopw"); + if (r == null) return false; else return (r.equals("true")); + } + public boolean excludeParentStopwords() { + String r = (String) mem.get("xpstopw"); + if (r == null) return false; else return (r.equals("true")); + } + public void changeEntry(String propName, String newValue) throws IOException { + mem.put(propName, newValue); + profileTable.set(handle(), mem); + } + } +} diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java new file mode 100644 index 000000000..f6c809696 --- /dev/null +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -0,0 +1,857 @@ +// plasmaHTCache.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 12.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + Class documentation: + This class has two purposes: + 1. provide a object that carries path and header information + that shall be used as objects within a scheduler's stack + 2. static methods for a cache control and cache aging + the class shall also be used to do a cache-cleaning and index creation +*/ + +package de.anomic.plasma; + +import java.io.*; +import java.net.*; +import java.util.*; +import de.anomic.kelondro.*; +import de.anomic.tools.*; +import de.anomic.htmlFilter.*; +import de.anomic.net.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.server.*; + +public class plasmaHTCache { + + private static final int stackLimit = 150; // if we exceed that limit, we do not check idle + private static final long idleDelay = 2000; // 2 seconds no hits until we think that we idle + private static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day + + private plasmaSwitchboard switchboard; + private kelondroMap responseHeaderDB = null; + private LinkedList cacheStack; + private TreeMap cacheAge; // a - relation + public long currCacheSize; + public long maxCacheSize; + private long lastAcc; + private File cachePath; + public static serverLog log; + + public static final int CACHE_UNFILLED = 0; // default case without assignment + public static final int CACHE_FILL = 1; // this means: update == true + public static final int CACHE_HIT = 2; // the best case: reading from Cache + public static final int CACHE_STALE_NO_RELOAD = 3; // this shall be treated as a rare case that should not appear + public static final int CACHE_STALE_RELOAD_GOOD = 4; // this means: update == true + public static final int CACHE_STALE_RELOAD_BAD = 5; // this updates only the responseHeader, not the content + public static final int CACHE_PASSING = 6; // does not touch cache, just passing + + public plasmaHTCache(plasmaSwitchboard switchboard, int bufferkb) { + this.switchboard = switchboard; + + int loglevel = Integer.parseInt(switchboard.getConfig("plasmaLoglevel", "2")); + this.log = new serverLog("HTCACHE", loglevel); + + // set cache path + cachePath = new File(switchboard.getRootPath(),switchboard.getConfig("proxyCache","HTCACHE")); + if (!(cachePath.exists())) { + // make the cache path + cachePath.mkdir(); + } + if (!(cachePath.isDirectory())) { + // if the cache does not exists or is a file and not a directory, panic + System.out.println("the cache path " + cachePath.toString() + " is not a directory or does not exists and cannot be created"); + System.exit(0); + } + + // open the response header database + File dbfile = new File(cachePath, "responseHeader.db"); + try { + if (dbfile.exists()) + responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400)); + else + responseHeaderDB = new kelondroMap(new kelondroDyn(dbfile, bufferkb * 0x400, plasmaCrawlLURL.urlHashLength, 150)); + } catch (IOException e) { + System.out.println("the request header database could not be opened: " + e.getMessage()); + System.exit(0); + } + + // init stack + cacheStack = new LinkedList(); + + // init idle check + lastAcc = System.currentTimeMillis(); + + // init cache age and size management + cacheAge = new TreeMap(); + currCacheSize = 0; + maxCacheSize = Long.parseLong(switchboard.getConfig("proxyCacheSize", "2")); // this is megabyte + maxCacheSize = maxCacheSize * 1024 * 1024; // now it's the number of bytes + + // start the cache startup thread + // this will collect information about the current cache size and elements + serverInstantThread.oneTimeJob(this, "cacheScan", log, 5000); + } + + private String ageString(long date, File f) { + String s = Integer.toHexString(f.hashCode()); + while (s.length() < 8) s = "0" + s; + s = Long.toHexString(date) + s; + while (s.length() < 24) s = "0" + s; + return s; + } + + public void cacheScan() { + //log.logSystem("STARTING CACHE SCANNING"); + kelondroMScoreCluster doms = new kelondroMScoreCluster(); + int c = 0; + enumerateFiles ef = new enumerateFiles(cachePath, true, false, true); + File f; + while (ef.hasMoreElements()) { + c++; + f = (File) ef.nextElement(); + long d = f.lastModified(); + //System.out.println("Cache: " + dom(f)); + doms.incScore(dom(f)); + currCacheSize += f.length(); + cacheAge.put(ageString(d, f), f); + } + //System.out.println("%" + (String) cacheAge.firstKey() + "=" + cacheAge.get(cacheAge.firstKey())); + long ageHours = (System.currentTimeMillis() - + Long.parseLong(((String) cacheAge.firstKey()).substring(0, 16), 16)) / 3600000; + log.logSystem("CACHE SCANNED, CONTAINS " + c + + " FILES = " + currCacheSize/1048576 + "MB, OLDEST IS " + + ((ageHours < 24) ? (ageHours + " HOURS") : ((ageHours / 24) + " DAYS")) + + " OLD"); + + // start to prefetch ip's from dns + String dom; + long start = System.currentTimeMillis(); + String ip, result = ""; + c = 0; + while ((doms.size() > 0) && (c < 50) && ((System.currentTimeMillis() - start) < 60000)) { + dom = (String) doms.getMaxObject(); + ip = httpc.dnsResolve(dom); + if (ip == null) break; + result += ", " + dom + "=" + ip; + log.logSystem("PRE-FILLED " + dom + "=" + ip); + c++; + doms.deleteScore(dom); + // wait a short while to prevent that this looks like a DoS + try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {} + } + if (result.length() > 2) log.logSystem("PRE-FILLED DNS CACHE, FETCHED " + c + + " ADDRESSES: " + result.substring(2)); + } + + private String dom(File f) { + String s = f.toString().substring(cachePath.toString().length() + 1); + int p = s.indexOf("/"); + if (p < 0) p = s.indexOf("\\"); + if (p < 0) return null; + return s.substring(0, p); + } + + public httpHeader getCachedResponse(String urlHash) throws IOException { + httpHeader header = new httpHeader(null, responseHeaderDB.get(urlHash)); + //System.out.println("DEBUG: getCachedResponse hash=" + urlHash + ", header=" + header.toString()); + return header; + } + + boolean idle() { + return (System.currentTimeMillis() > (idleDelay + lastAcc)); + } + + boolean full() { + return (cacheStack.size() > stackLimit); + } + + boolean empty() { + return (cacheStack.size() == 0); + } + + synchronized public void stackProcess(Entry entry) throws IOException { + lastAcc = System.currentTimeMillis(); + if (full()) + process(entry); + else + cacheStack.add(entry); + } + + synchronized public void stackProcess(Entry entry, byte[] cacheArray) throws IOException { + lastAcc = System.currentTimeMillis(); + entry.cacheArray = cacheArray; + if (full()) + process(entry); + else + cacheStack.add(entry); + } + + public int size() { + return cacheStack.size(); + } + + synchronized public void process(Entry entry) throws IOException { + + if (entry == null) return; + + // store response header + if ((entry.status == CACHE_FILL) || + (entry.status == CACHE_STALE_RELOAD_GOOD) || + (entry.status == CACHE_STALE_RELOAD_BAD)) { + responseHeaderDB.set(entry.urlHash, entry.responseHeader); + } + + // work off unwritten files and undone parsing + String storeError = null; + if (((entry.status == CACHE_FILL) || (entry.status == CACHE_STALE_RELOAD_GOOD)) && + ((storeError = entry.shallStoreCache()) == null)) { + + // write file if not written yet + if (entry.cacheArray != null) try { + if (entry.cacheFile.exists()) { + currCacheSize -= entry.cacheFile.length(); + entry.cacheFile.delete(); + } + entry.cacheFile.getParentFile().mkdirs(); + serverFileUtils.write(entry.cacheArray, entry.cacheFile); + entry.cacheArray = null; + } catch (FileNotFoundException e) { + // this is the case of a "(Not a directory)" error, which should be prohibited + // by the shallStoreCache() property. However, sometimes the error still occurs + // In this case do nothing. + log.logError("File storage failed: " + e.getMessage()); + } + + // update statistics + currCacheSize += entry.cacheFile.length(); + cacheAge.put(ageString(entry.cacheFile.lastModified(), entry.cacheFile), entry.cacheFile); + + // enqueue in switchboard + switchboard.enQueue(entry); + } else if (entry.status == CACHE_PASSING) { + // even if the file should not be stored in the cache, it can be used to be indexed + if (storeError != null) log.logDebug("NOT STORED " + entry.cacheFile + ":" + storeError); + + // enqueue in switchboard + switchboard.enQueue(entry); + } + + // write log + + switch (entry.status) { + case CACHE_UNFILLED: + log.logInfo("CACHE UNFILLED: " + entry.cacheFile); break; + case CACHE_FILL: + log.logInfo("CACHE FILL: " + entry.cacheFile); break; + case CACHE_HIT: + log.logInfo("CACHE HIT: " + entry.cacheFile); break; + case CACHE_STALE_NO_RELOAD: + log.logInfo("CACHE STALE, NO RELOAD: " + entry.cacheFile); break; + case CACHE_STALE_RELOAD_GOOD: + log.logInfo("CACHE STALE, NECESSARY RELOAD: " + entry.cacheFile); break; + case CACHE_STALE_RELOAD_BAD: + log.logInfo("CACHE STALE, SUPERFLUOUS RELOAD: " + entry.cacheFile); break; + case CACHE_PASSING: + log.logInfo("PASSING: " + entry.cacheFile); break; + default: + log.logInfo("CACHE STATE UNKNOWN: " + entry.cacheFile); break; + } + } + + + public boolean job() { + if (empty()) return false; + try { + File f; + int workoff; + workoff = cacheStack.size() / 10; + // we want to work off always 10 % to prevent that we collaps + while ((workoff-- > 0) && (!(empty()))) { + process((Entry) cacheStack.removeFirst()); + } + + // loop until we are not idle or nothing more to do + while ((!empty()) && (idle())) { + // work off stack and store entries to file system + process((Entry) cacheStack.removeFirst()); + + // clean up cache to have enough space for next entries + while (currCacheSize > maxCacheSize) { + f = (File) cacheAge.remove(cacheAge.firstKey()); + if (f.exists()) { + currCacheSize -= f.length(); + f.delete(); + log.logInfo("DELETED OLD CACHE : " + f.toString()); + } + } + } + } catch (IOException e) { + System.out.println("The proxy cache manager has died because of an IO-problem: " + e.getMessage()); + e.printStackTrace(System.out); + System.exit(-1); + } + return true; + } + + public static boolean isPicture(httpHeader response) { + String ct = (String) response.get("Content-Type"); + if (ct == null) return false; + return (ct).toUpperCase().startsWith("IMAGE"); + } + + public static boolean isText(httpHeader response) { + String ct = (String) response.get("Content-Type"); + if (ct == null) return false; + return (ct).toUpperCase().startsWith("TEXT"); + } + + public static boolean noIndexingURL(String urlString) { + if (urlString == null) return false; + urlString = urlString.toLowerCase(); + return ( + (urlString.endsWith(".gz")) || + (urlString.endsWith(".msi")) || + (urlString.endsWith(".doc")) || + (urlString.endsWith(".zip")) || + (urlString.endsWith(".tgz")) || + (urlString.endsWith(".rar")) || + (urlString.endsWith(".pdf")) || + (urlString.endsWith(".ppt")) || + (urlString.endsWith(".xls")) || + (urlString.endsWith(".log")) || + (urlString.endsWith(".java")) || + (urlString.endsWith(".c")) || + (urlString.endsWith(".p")) + ); + } + + // this method creates from a given host and path a cache path + public File getCachePath(URL url) { + // from a given host (which may also be an IPv4 - number, but not IPv6 or + // a domain; all without leading 'http://') and a path (which must start + // with a leading '/', and may also end in an '/') a path to a file + // in the file system with root as given in cachePath is constructed + // it will also be ensured, that the complete path exists; if necessary + // that path will be generated + //System.out.println("DEBUG: getCachedPath=" + url.toString()); + String remotePath = url.getPath(); + if (!(remotePath.startsWith("/"))) remotePath = "/" + remotePath; + if (remotePath.endsWith("/")) remotePath = remotePath + "ndx"; + if (remotePath.indexOf('#') > 0) remotePath.substring(0, remotePath.indexOf('#')); + remotePath.replace('?', '_'); remotePath.replace('&', '_'); // yes this is not reversible, but that is not needed + int port = url.getPort(); + if (port < 0) port = 80; + return new File(this.cachePath, url.getHost() + ((port == 80) ? "" : ("+" + port)) + remotePath); + } + + public static URL getURL(File cachePath, File f) { + // this is the reverse function to getCachePath: it constructs the url as string + // from a given storage path + String s = f.toString().replace('\\', '/'); + String c = cachePath.toString().replace('\\', '/'); + //System.out.println("DEBUG: getURL for c=" + c + ", s=" + s); + int p = s.lastIndexOf(c); + if (p >= 0) { + s = s.substring(p + c.length()); + while (s.startsWith("/")) s = s.substring(1); + if ((p = s.indexOf("+")) >= 0) { + s = s.substring(0, p) + ":" + s.substring(p + 1); + } else { + p = s.indexOf("/"); + if (p < 0) + s = s + ":80/"; + else + s = s.substring(0, p) + ":80" + s.substring(p); + } + if (s.endsWith("ndx")) s = s.substring(0, s.length() - 3); + //System.out.println("DEBUG: getURL url=" + s); + try { + return new URL("http://" + s); + } catch (Exception e) { + return null; + } + } + return null; + } + + public static boolean isPOST(String urlString) { + return ((urlString.indexOf("?") >= 0) || + (urlString.indexOf("&") >= 0)); + } + + public static boolean isCGI(String urlString) { + return ((urlString.toLowerCase().indexOf("cgi") >= 0) || + (urlString.toLowerCase().indexOf("exe") >= 0)); + } + + public Entry newEntry(Date initDate, int depth, URL url, + httpHeader requestHeader, + String responseStatus, httpHeader responseHeader, + htmlFilterContentScraper scraper, + String initiator, + plasmaCrawlProfile.entry profile) { + //System.out.println("NEW ENTRY: " + url.toString()); // DEBUG + return new Entry(initDate, depth, url, requestHeader, responseStatus, responseHeader, scraper, initiator, profile); + } + + public class Entry { + + // the class objects + public Date initDate; // the date when the request happened; will be used as a key + public int depth; // the depth of prefetching + public httpHeader requestHeader; // we carry also the header to prevent too many file system access + public String responseStatus; + public httpHeader responseHeader; // we carry also the header to prevent too many file system access + public File cacheFile; // the cache file + public byte[] cacheArray; // or the cache as byte-array + public URL url; + public String urlHash; + public String urlString; + public int status; // cache load/hit/stale etc status + public Date lastModified; + public htmlFilterContentScraper scraper; + public char doctype; + public String language; + public plasmaCrawlProfile.entry profile; + private String initiator; + public Entry(Date initDate, int depth, URL url, + httpHeader requestHeader, + String responseStatus, httpHeader responseHeader, + htmlFilterContentScraper scraper, + String initiator, + plasmaCrawlProfile.entry profile) { + + // normalize url + this.urlString = htmlFilterContentScraper.urlNormalform(url); + try { + this.url = new URL(urlString); + } catch (MalformedURLException e) { + System.out.println("internal error at httpdProxyCache.Entry: " + e); + System.exit(-1); + } + this.cacheFile = getCachePath(this.url); + this.urlHash = plasmaCrawlLURL.urlHash(urlString); + + // assigned: + this.initDate = initDate; + this.depth = depth; + this.requestHeader = requestHeader; + this.responseStatus = responseStatus; + this.responseHeader = responseHeader; + this.scraper = scraper; + this.profile = profile; + this.initiator = (initiator == null) ? null : ((initiator.length() == 0) ? null: initiator); + + // calculated: + if (responseHeader == null) { + try { + throw new RuntimeException("RESPONSE HEADER = NULL"); + } catch (Exception e) { + System.out.println("RESPONSE HEADER = NULL in " + url); + e.printStackTrace(); + System.exit(0); + } + + lastModified = new Date(); + } else { + lastModified = responseHeader.lastModified(); + if (lastModified == null) lastModified = new Date(); // does not exist in header + } + this.doctype = plasmaWordIndexEntry.docType(urlString); + this.language = plasmaWordIndexEntry.language(url); + + // to be defined later: + this.cacheArray = null; + this.status = CACHE_UNFILLED; + } + + public String initiator() { + return initiator; + } + public boolean proxy() { + return initiator() == null; + } + public long size() { + if (cacheArray == null) return 0; else return cacheArray.length; + } + + public URL referrerURL() { + if (requestHeader == null) return null; + try { + return new URL((String) requestHeader.get("Referer", "")); + } catch (Exception e) { + return null; + } + } + + public boolean update() { + return ((status == CACHE_FILL) || (status == CACHE_STALE_RELOAD_GOOD)); + } + + + // the following three methods for cache read/write granting shall be as loose as possible + // but also as strict as necessary to enable caching of most items + + public String shallStoreCache() { + // returns NULL if the answer is TRUE + // in case of FALSE, the reason as String is returned + + // check profile + if (!(profile.storeHTCache())) return "storage_not_wanted"; + + // decide upon header information if a specific file should be stored to the cache or not + // if the storage was requested by prefetching, the request map is null + + // check status code + if (!((responseStatus.startsWith("200")) || (responseStatus.startsWith("203")))) return "bad_status_" + responseStatus.substring(0,3); + + // check storage location + // sometimes a file name is equal to a path name in the same directory; + // or sometimes a file name is equal a directory name created earlier; + // we cannot match that here in the cache file path and therefore omit writing into the cache + if ((cacheFile.getParentFile().isFile()) || (cacheFile.isDirectory())) return "path_ambiguous"; + if (cacheFile.toString().indexOf("..") >= 0) return "path_dangerous"; + + // -CGI access in request + // CGI access makes the page very individual, and therefore not usable in caches + if ((isPOST(urlString)) && (!(profile.crawlingQ()))) return "dynamic_post"; + if (isCGI(urlString)) return "dynamic_cgi"; + + // -authorization cases in request + // authorization makes pages very individual, and therefore we cannot use the + // content in the cache + if ((requestHeader != null) && (requestHeader.containsKey("AUTHORIZATION"))) return "personalized"; + + // -ranges in request and response + // we do not cache partial content + if ((requestHeader != null) && (requestHeader.containsKey("RANGE"))) return "partial"; + if ((responseHeader != null) && (responseHeader.containsKey("CONTENT-RANGE"))) return "partial"; + + // -if-modified-since in request + // we do not care about if-modified-since, because this case only occurres if the + // cache file does not exist, and we need as much info as possible for the indexing + + // -cookies in request + // we do not care about cookies, because that would prevent loading more pages + // from one domain once a request resulted in a client-side stored cookie + + // -set-cookie in response + // we do not care about cookies in responses, because that info comes along + // any/many pages from a server and does not express the validity of the page + // in modes of life-time/expiration or individuality + + // -pragma in response + // if we have a pragma non-cache, we don't cache. usually if this is wanted from + // the server, it makes sense + if ((responseHeader.containsKey("PRAGMA")) && + (((String) responseHeader.get("Pragma")).toUpperCase().equals("NO-CACHE"))) return "controlled_no_cache"; + + // -expires in response + // we do not care about expires, because at the time this is called the data is + // obvious valid and that header info is used in the indexing later on + + // -cache-control in response + // the cache-control has many value options. + String cacheControl = (String) responseHeader.get("Cache-Control"); + if (cacheControl != null) { + cacheControl = cacheControl.trim().toUpperCase(); + if (cacheControl.startsWith("MAX-AGE=")) { + // we need also the load date + Date date = responseHeader.date(); + if (date == null) return "stale_no_date_given_in_response"; + try { + long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live + if ((new Date()).getTime() - date.getTime() > ttl) { + //System.out.println("***not indexed because cache-control"); + return "stale_expired"; + } + } catch (Exception e) { + return "stale_error_" + e.getMessage() + ")"; + } + } + } + + + return null; + } + + public String shallIndexCache() { + // decide upon header information if a specific file should be indexed + // this method returns null if the answer is 'YES'! + // if the answer is 'NO' (do not index), it returns a string with the reason + // to reject the crawling demand in clear text + + // check profile + if (!(profile.localIndexing())) return "Indexing_Not_Allowed"; + + // -CGI access in request + // CGI access makes the page very individual, and therefore not usable in caches + if ((isPOST(urlString)) && (!(profile.crawlingQ()))) return "Dynamic_(POST)"; + if ((isCGI(urlString)) && (!(profile.crawlingQ()))) return "Dynamic_(CGI)"; + + // -authorization cases in request + // we checked that in shallStoreCache + + // -ranges in request + // we checked that in shallStoreCache + + // a picture cannot be indexed + if (isPicture(responseHeader)) return "Media_Content_(Picture)"; + if (!(isText(responseHeader))) return "Media_Content_(not_text)"; + if (noIndexingURL(urlString)) return "Media_Content_(forbidden)"; + + + // -if-modified-since in request + // if the page is fresh at the very moment we can index it + if ((requestHeader != null) && + (requestHeader.containsKey("IF-MODIFIED-SINCE")) && + (responseHeader.containsKey("Last-Modified"))) { + // parse date + Date d1, d2; + d2 = responseHeader.lastModified(); if (d2 == null) d2 = new Date(); + d1 = requestHeader.ifModifiedSince(); if (d1 == null) d1 = new Date(); + // finally, we shall treat the cache as stale if the modification time is after the if-.. time + if (d2.after(d1)) { + //System.out.println("***not indexed because if-modified-since"); + return "Stale_(Last-Modified>Modified-Since)"; + } + } + + // -cookies in request + // unfortunately, we cannot index pages which have been requested with a cookie + // because the returned content may be special for the client + if ((requestHeader != null) && (requestHeader.containsKey("COOKIE"))) { + //System.out.println("***not indexed because cookie"); + return "Dynamic_(Requested_With_Cookie)"; + } + + // -set-cookie in response + // the set-cookie from the server does not indicate that the content is special + // thus we do not care about it here for indexing + + // -pragma in cached response + /* + if ((responseHeader.containsKey("PRAGMA")) && + (((String) responseHeader.get("Pragma")).toUpperCase().equals("NO-CACHE"))) return "Denied_(pragma_no_cache)"; + */ + + // see for documentation also: + // http://www.web-caching.com/cacheability.html + + // calculate often needed values for freshness attributes + Date date = responseHeader.date(); + Date expires = responseHeader.expires(); + Date lastModified = responseHeader.lastModified(); + String cacheControl = (String) responseHeader.get("Cache-Control"); + + // look for freshnes information + + // -expires in cached response + // the expires value gives us a very easy hint when the cache is stale + // sometimes, the expires date is set to the past to prevent that a page is cached + // we use that information to see if we should index it + if (expires != null) { + Date yesterday = new Date((new Date()).getTime() - oneday); + if (expires.before(yesterday)) return "Stale_(Expired)"; + } + + // -lastModified in cached response + // this information is too weak to use it to prevent indexing + // even if we can apply a TTL heuristic for cache usage + + // -cache-control in cached response + // the cache-control has many value options. + if (cacheControl != null) { + cacheControl = cacheControl.trim().toUpperCase(); + /* we have the following cases for cache-control: + "public" -- can be indexed + "private", "no-cache", "no-store" -- cannot be indexed + "max-age=" -- stale/fresh dependent on date + */ + if (cacheControl.startsWith("PUBLIC")) { + // ok, do nothing + } else if ((cacheControl.startsWith("PRIVATE")) || + (cacheControl.startsWith("NO-CACHE")) || + (cacheControl.startsWith("NO-STORE"))) { + // easy case + return "Stale_(denied_by_cache-control=" + cacheControl+ ")"; + } else if (cacheControl.startsWith("MAX-AGE=")) { + // we need also the load date + if (date == null) return "Stale_(no_date_given_in_response)"; + try { + long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live + if ((new Date()).getTime() - date.getTime() > ttl) { + //System.out.println("***not indexed because cache-control"); + return "Stale_(expired_by_cache-control)"; + } + } catch (Exception e) { + return "Error_(" + e.getMessage() + ")"; + } + } + } + + return null; + } + + public boolean shallUseCache() { + // decide upon header information if a specific file should be taken from the cache or not + + //System.out.println("SHALL READ CACHE: requestHeader = " + requestHeader.toString() + ", responseHeader = " + responseHeader.toString()); + + // -CGI access in request + // CGI access makes the page very individual, and therefore not usable in caches + if (isPOST(urlString)) return false; + if (isCGI(urlString)) return false; + + // -authorization cases in request + if (requestHeader.containsKey("AUTHORIZATION")) return false; + + // -ranges in request + // we do not cache partial content + if ((requestHeader != null) && (requestHeader.containsKey("RANGE"))) return false; + + //Date d1, d2; + + // -if-modified-since in request + // The entity has to be transferred only if it has + // been modified since the date given by the If-Modified-Since header. + if (requestHeader.containsKey("IF-MODIFIED-SINCE")) { + // checking this makes only sense if the cached response contains + // a Last-Modified field. If the field does not exist, we go the safe way + if (!(responseHeader.containsKey("Last-Modified"))) return false; + // parse date + Date d1, d2; + d2 = responseHeader.lastModified(); if (d2 == null) d2 = new Date(); + d1 = requestHeader.ifModifiedSince(); if (d1 == null) d1 = new Date(); + // finally, we shall treat the cache as stale if the modification time is after the if-.. time + if (d2.after(d1)) return false; + } + + boolean isNotPicture = !isPicture(responseHeader); + + // -cookies in request + // unfortunately, we should reload in case of a cookie + // but we think that pictures can still be considered as fresh + if ((requestHeader.containsKey("COOKIE")) && (isNotPicture)) return false; + + // -set-cookie in cached response + // this is a similar case as for COOKIE. + if ((responseHeader.containsKey("SET-COOKIE")) && (isNotPicture)) return false; // too strong + if ((responseHeader.containsKey("SET-COOKIE2")) && (isNotPicture)) return false; // too strong + + // -pragma in cached response + // logically, we would not need to care about no-cache pragmas in cached response headers, + // because they cannot exist since they are not written to the cache. + // So this IF should always fail.. + if ((responseHeader.containsKey("PRAGMA")) && + (((String) responseHeader.get("Pragma")).toUpperCase().equals("NO-CACHE"))) return false; + + // calculate often needed values for freshness attributes + Date date = responseHeader.date(); + Date expires = responseHeader.expires(); + Date lastModified = responseHeader.lastModified(); + String cacheControl = (String) responseHeader.get("Cache-Control"); + + + // see for documentation also: + // http://www.web-caching.com/cacheability.html + // http://vancouver-webpages.com/CacheNow/ + + // look for freshnes information + // if we don't have any freshnes indication, we treat the file as stale. + // no handle for freshness control: + if ((expires == null) && (cacheControl == null) && (lastModified == null)) return false; + + // -expires in cached response + // the expires value gives us a very easy hint when the cache is stale + if (expires != null) { + Date yesterday = new Date((new Date()).getTime() - oneday); + if (expires.before(yesterday)) return false; + } + + // -lastModified in cached response + // we can apply a TTL (Time To Live) heuristic here. We call the time delta between the last read + // of the file and the last modified date as the age of the file. If we consider the file as + // middel-aged then, the maximum TTL would be cache-creation plus age. + // This would be a TTL factor of 100% we want no more than 10% TTL, so that a 10 month old cache + // file may only be treated as fresh for one more month, not more. + if (lastModified != null) { + if (date == null) date = new Date(); + long age = date.getTime() - lastModified.getTime(); + if (age < 0) return false; + // TTL (Time-To-Live) is age/10 = (d2.getTime() - d1.getTime()) / 10 + // the actual living-time is new Date().getTime() - d2.getTime() + // therefore the cache is stale, if Date().getTime() - d2.getTime() > age/10 + if ((new Date()).getTime() - date.getTime() > age / 10) return false; + } + + // -cache-control in cached response + // the cache-control has many value options. + if (cacheControl != null) { + cacheControl = cacheControl.trim().toUpperCase(); + if (cacheControl.startsWith("PUBLIC")) { + // ok, do nothing + } else if ((cacheControl.startsWith("PRIVATE")) || + (cacheControl.startsWith("NO-CACHE")) || + (cacheControl.startsWith("NO-STORE"))) { + // easy case + return false; + } else if (cacheControl.startsWith("MAX-AGE=")) { + // we need also the load date + if (date == null) return false; + try { + long ttl = 1000 * Long.parseLong(cacheControl.substring(8)); // milliseconds to live + if ((new Date()).getTime() - date.getTime() > ttl) { + return false; + } + } catch (Exception e) { + return false; + } + } + } + + return true; + } + } + +} diff --git a/source/de/anomic/plasma/plasmaSearch.java b/source/de/anomic/plasma/plasmaSearch.java new file mode 100644 index 000000000..3019a6efe --- /dev/null +++ b/source/de/anomic/plasma/plasmaSearch.java @@ -0,0 +1,446 @@ +// plasmaSearch.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 11.06.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.plasma; + +import java.io.*; +import java.net.*; +import java.util.*; +import de.anomic.server.*; +import de.anomic.kelondro.*; + +public class plasmaSearch { + + public static final char O_QUALITY = 'q'; + public static final char O_AGE = 'a'; + public static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"'; + + private plasmaCrawlLURL urlStore; + private plasmaWordIndex wordIndex; + + public plasmaSearch(plasmaCrawlLURL urlStore, plasmaWordIndex wordIndex) { + this.urlStore = urlStore; + this.wordIndex = wordIndex; + } + + public static int calcVirtualAge(Date modified) { + // this calculates a virtual age from a given date + // the purpose is to have an age in days of a given modified date + // from a fixed standpoint in the past + //if (modified == null) return 0; + // this is milliseconds. we need days + // one day has 60*60*24 seconds = 86400 seconds + // we take mod 64**3 = 262144, this is the mask of the storage + return (int) ((modified.getTime() / 86400000) % 262144); + } + + public synchronized void addWordIndex(URL url, String urlHash, Date urlModified, int quality, String wordHash, int wordCount, int posintext, int posinphrase, int posofphraseint, String language, char doctype, boolean local) { + // this is called by the remote search procedure when a new index arrives from remote + plasmaWordIndexEntry entry = new plasmaWordIndexEntry(urlHash, wordCount, posintext, posinphrase, posofphraseint, + calcVirtualAge(urlModified), quality, language, doctype, local); + try { + wordIndex.addEntry(wordHash, entry); + } catch (IOException e) {} + // System.out.println("* received one index entry for URL: " + url); // debug + } + + public synchronized int addPageIndex(URL url, String urlHash, Date urlModified, plasmaCondenser condenser, + String language, char doctype) { + // this is called by the switchboard to put in a new page into the index + // use all the words in one condenser object to simultanous create index entries + int age = calcVirtualAge(urlModified); + int quality = 0; + try { + quality = Integer.parseInt(condenser.getAnalysis().getProperty("INFORMATION_VALUE","0"), 16); + } catch (NumberFormatException e) { + System.out.println("INTERNAL ERROR WITH CONDENSER.INFORMATION_VALUE: " + e.toString() + ": in URL " + url.toString()); + } + + // iterate over all words + Iterator i = condenser.getWords().iterator(); + String word; + int count; + plasmaWordIndexEntry entry; + String wordHash; + int c = 0; + int p = 0; + while (i.hasNext()) { + word = (String) i.next(); + count = condenser.wordCount(word); + //if ((s.length() > 4) && (c > 1)) System.out.println("# " + s + ": " + c); + wordHash = plasmaWordIndexEntry.word2hash(word); + entry = new plasmaWordIndexEntry(urlHash, count, p++, 0, 0, + age, quality, language, doctype, true); + try { + c += wordIndex.addEntry(wordHash, entry); + } catch (IOException e) {} + } + //System.out.println("DEBUG: plasmaSearch.addPageIndex: added " + condenser.getWords().size() + " words, flushed " + c + " entries"); + return condenser.getWords().size(); + } + + + public static Set words2hashes(String[] words) { + HashSet hashes = new HashSet(); + for (int i = 0; i < words.length; i++) hashes.add(plasmaWordIndexEntry.word2hash(words[i])); + return hashes; + } + + public static Set words2hashes(Set words) { + Iterator i = words.iterator(); + HashSet hashes = new HashSet(); + while (i.hasNext()) hashes.add(plasmaWordIndexEntry.word2hash((String) i.next())); + return hashes; + } + + public synchronized plasmaWordIndexEntity searchWords(Set words, long time) throws IOException { + // search for the set of words and return an array of urlEntry elements + return searchHashes(words2hashes(words), time); + } + + public synchronized plasmaWordIndexEntity searchHashes(Set hashes, long time) throws IOException { + // search for the set of hashes and return an array of urlEntry elements + + long stamp = System.currentTimeMillis(); + TreeMap map = new TreeMap(); + String singleHash; + plasmaWordIndexEntity singleResult; + Iterator i = hashes.iterator(); + while (i.hasNext()) { + // get next hash: + singleHash = (String) i.next(); + + // retrieve index + singleResult = wordIndex.getEntity(singleHash, true); + + // check result + if (singleResult.size() == 0) return new plasmaWordIndexEntity(null); // as this is a cunjunction of searches, we have no result if any word is not known + + // store result in order of result size + map.put(serverCodings.enhancedCoder.encodeHex(singleResult.size(), 8) + singleHash, singleResult); + } + + // check if there is any result + if (map.size() == 0) return new plasmaWordIndexEntity(null); // no result, nothing found + + // the map now holds the search results in order of number of hits per word + // we now must pairwise build up a conjunction of these sets + String k = (String) map.firstKey(); // the smallest, which means, the one with the least entries + plasmaWordIndexEntity searchResult = (plasmaWordIndexEntity) map.remove(k); + while ((map.size() > 0) && (searchResult.size() > 0) && (time > 0)) { + // take the first element of map which is a result and combine it with result + k = (String) map.firstKey(); // the next smallest... + time -= (System.currentTimeMillis() - stamp); stamp = System.currentTimeMillis(); + searchResult = joinConstructive(searchResult, (plasmaWordIndexEntity) map.remove(k), 2 * time / (map.size() + 1)); + } + + // in 'searchResult' is now the combined search result + if (searchResult.size() == 0) return new plasmaWordIndexEntity(null); + return searchResult; + } + + private static int log2(int x) { + int l = 0; + while (x > 0) {x = x >> 1; l++;} + return l; + } + + private synchronized plasmaWordIndexEntity joinConstructive(plasmaWordIndexEntity i1, plasmaWordIndexEntity i2, long time) throws IOException { + if ((i1 == null) || (i2 == null)) return null; + if ((i1.size() == 0) || (i2.size() == 0)) return new plasmaWordIndexEntity(null); + + // decide which method to use + int high = ((i1.size() > i2.size()) ? i1.size() : i2.size()); + int low = ((i1.size() > i2.size()) ? i2.size() : i1.size()); + int stepsEnum = 10 * (high + low - 1); + int stepsTest = 12 * log2(high) * low; + + // start most efficient method + if (stepsEnum > stepsTest) { + if (i1.size() < i2.size()) + return joinConstructiveByTest(i1, i2, time); + else + return joinConstructiveByTest(i2, i1, time); + } else { + return joinConstructiveByEnumeration(i1, i2, time); + } + } + + private synchronized plasmaWordIndexEntity joinConstructiveByTest(plasmaWordIndexEntity small, plasmaWordIndexEntity large, long time) throws IOException { + System.out.println("DEBUG: JOIN METHOD BY TEST"); + plasmaWordIndexEntity conj = new plasmaWordIndexEntity(null); // start with empty search result + Enumeration se = small.elements(true); + plasmaWordIndexEntry ie; + long stamp = System.currentTimeMillis(); + while ((se.hasMoreElements()) && ((System.currentTimeMillis() - stamp) < time)) { + ie = (plasmaWordIndexEntry) se.nextElement(); + if (large.contains(ie)) conj.addEntry(ie); + } + return conj; + } + + private synchronized plasmaWordIndexEntity joinConstructiveByEnumeration(plasmaWordIndexEntity i1, plasmaWordIndexEntity i2, long time) throws IOException { + System.out.println("DEBUG: JOIN METHOD BY ENUMERATION"); + plasmaWordIndexEntity conj = new plasmaWordIndexEntity(null); // start with empty search result + Enumeration e1 = i1.elements(true); + Enumeration e2 = i2.elements(true); + int c; + if ((e1.hasMoreElements()) && (e2.hasMoreElements())) { + plasmaWordIndexEntry ie1 = (plasmaWordIndexEntry) e1.nextElement(); + plasmaWordIndexEntry ie2 = (plasmaWordIndexEntry) e2.nextElement(); + long stamp = System.currentTimeMillis(); + while ((System.currentTimeMillis() - stamp) < time) { + c = ie1.getUrlHash().compareTo(ie2.getUrlHash()); + if (c < 0) { + if (e1.hasMoreElements()) ie1 = (plasmaWordIndexEntry) e1.nextElement(); else break; + } else if (c > 0) { + if (e2.hasMoreElements()) ie2 = (plasmaWordIndexEntry) e2.nextElement(); else break; + } else { + // we have found the same urls in different searches! + conj.addEntry(ie1); + if (e1.hasMoreElements()) ie1 = (plasmaWordIndexEntry) e1.nextElement(); else break; + if (e2.hasMoreElements()) ie2 = (plasmaWordIndexEntry) e2.nextElement(); else break; + } + } + } + return conj; + } + + public synchronized plasmaSearch.result order(plasmaWordIndexEntity searchResult, Set searchhashes, Set stopwords, char[] priority, long maxTime, int minEntries) throws IOException { + // we collect the urlhashes from it and construct a List with urlEntry objects + plasmaSearch.result acc = new result(searchhashes, stopwords, priority); + + if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty + + Enumeration e = searchResult.elements(true); + plasmaWordIndexEntry entry; + long startTime = System.currentTimeMillis(); + //String headline; + while ((e.hasMoreElements()) && ((acc.sizeFetched() < minEntries) || (System.currentTimeMillis() - startTime < maxTime))) { + entry = (plasmaWordIndexEntry) e.nextElement(); + //headline = entry. + acc.addResult(entry); + } + acc.sortResults(); + System.out.println("plasmaSearch.order: minEntries = " + minEntries + ", effectiveEntries = " + acc.sizeOrdered() + ", demanded Time = " + maxTime + ", effectiveTime = " + (System.currentTimeMillis() - startTime)); + return acc; + } + + public class result /*implements Enumeration*/ { + + TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry + kelondroMScoreCluster ref; + Set searchhashes; + Set stopwords; + char[] order; + ArrayList results; + + public result(Set searchhashes, Set stopwords, char[] order) { + this.pageAcc = new TreeMap(); + ref = new kelondroMScoreCluster(); + this.searchhashes = searchhashes; + this.stopwords = stopwords; + this.order = order; + this.results = new ArrayList(); + } + + public int sizeOrdered() { + return pageAcc.size(); + } + + public int sizeFetched() { + return results.size(); + } + + public boolean hasMoreElements() { + return pageAcc.size() > 0; + } + + public plasmaCrawlLURL.entry nextElement() { + Object top = pageAcc.lastKey(); + return (plasmaCrawlLURL.entry) pageAcc.remove(top); + } + /* + protected void putElement(plasmaWordIndexEntry indexEntry) { + // find the url entry + plasmaCrawlLURL.entry page = urlStore.getEntry(indexEntry.getUrlHash()); + // check if the url exists; the url may not exist in case it was deleted + // somewhere else (i.e. manually through interface etc.) + if (page == null) return; + URL url = page.url(); + String descr = page.descr(); + if ((url == null) || (descr == null)) return; + + // apply pre-calculated order attributes + long ranking = 0; + if (order[0] == O_QUALITY) ranking = 4096 * indexEntry.getQuality(); + else if (order[0] == O_AGE) ranking = 4096 * indexEntry.getVirtualAge(); + if (order[1] == O_QUALITY) ranking += indexEntry.getQuality(); + else if (order[1] == O_AGE) ranking += indexEntry.getVirtualAge(); + + // apply query-in-result matching + long inc = 4096 * 4096; + + String[] urlcomps = url.toString().split(splitrex); + //printSplitLog(url.toString(), urlcomps); + Set urlcomph = words2hashes(urlcomps); + String[] descrcomps = descr.split(splitrex); + //printSplitLog(descr, descrcomps); + Set descrcomph = words2hashes(descrcomps); + Iterator i = searchhashes.iterator(); + String queryhash; + while (i.hasNext()) { + queryhash = (String) i.next(); + if (urlcomph.contains(queryhash)) ranking += inc; + if (descrcomph.contains(queryhash)) ranking += 10 * inc; + } + + // insert value + //System.out.println("Ranking " + ranking + " for url " + url.toString()); + pageAcc.put(serverCodings.encodeHex(ranking, 16) + indexEntry.getUrlHash(), page); + + addScoreFiltered(urlcomps); + addScoreFiltered(descrcomps); + } + */ + protected void addResult(plasmaWordIndexEntry indexEntry) { + // this does 3 things: + // 1. simply store indexEntry and page to a cache + // 2. calculate references and store them to cache + // 2. add reference to reference sorting table + + // find the url entry + plasmaCrawlLURL.entry page = urlStore.getEntry(indexEntry.getUrlHash()); + + // take out relevant information for reference computation + URL url = page.url(); + String descr = page.descr(); + if ((url == null) || (descr == null)) return; + + String[] urlcomps = url.toString().split(splitrex); + //printSplitLog(url.toString(), urlcomps); + + String[] descrcomps = descr.split(splitrex); + //printSplitLog(descr, descrcomps); + + // store everything + Object[] resultVector = new Object[] {indexEntry, page, urlcomps, descrcomps}; + results.add(resultVector); + + // add references + addScoreFiltered(urlcomps); + addScoreFiltered(descrcomps); + } + + protected void sortResults() { + Object[] references = getReferences(16); + Set commonSense = new HashSet(); + for (int i = 0; i < references.length; i++) commonSense.add((String) references[i]); + + Object[] resultVector; + plasmaWordIndexEntry indexEntry; + plasmaCrawlLURL.entry page; + String[] urlcomps; + String[] descrcomps; + long ranking; + long inc = 4096 * 4096; + String queryhash; + for (int i = 0; i < results.size(); i++) { + // take out values from result array + resultVector = (Object[]) results.get(i); + indexEntry = (plasmaWordIndexEntry) resultVector[0]; + page = (plasmaCrawlLURL.entry) resultVector[1]; + urlcomps = (String[]) resultVector[2]; + descrcomps = (String[]) resultVector[3]; + + // apply pre-calculated order attributes + ranking = 0; + if (order[0] == O_QUALITY) ranking = 4096 * indexEntry.getQuality(); + else if (order[0] == O_AGE) ranking = 4096 * indexEntry.getVirtualAge(); + if (order[1] == O_QUALITY) ranking += indexEntry.getQuality(); + else if (order[1] == O_AGE) ranking += indexEntry.getVirtualAge(); + + // apply 'common-sense' heuristic using references + for (int j = 0; j < urlcomps.length; j++) if (commonSense.contains(urlcomps[j])) ranking += inc; + for (int j = 0; j < descrcomps.length; j++) if (commonSense.contains(descrcomps[j])) ranking += inc; + + // apply query-in-result matching + Set urlcomph = words2hashes(urlcomps); + Set descrcomph = words2hashes(descrcomps); + Iterator shi = searchhashes.iterator(); + while (shi.hasNext()) { + queryhash = (String) shi.next(); + if (urlcomph.contains(queryhash)) ranking += 10 * inc; + if (descrcomph.contains(queryhash)) ranking += 100 * inc; + } + + // insert value + //System.out.println("Ranking " + ranking + " for url " + url.toString()); + pageAcc.put(serverCodings.encodeHex(ranking, 16) + indexEntry.getUrlHash(), page); + } + } + + public Object[] getReferences(int count) { + return ref.getScores(count, false, 2, Integer.MAX_VALUE); + } + + private void addScoreFiltered(String[] words) { + String word; + for (int i = 0; i < words.length; i++) { + word = words[i].toLowerCase(); + if ((word.length() > 2) && + (!(stopwords.contains(word))) && + ("http_html_php_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0) && + (!(searchhashes.contains(plasmaWordIndexEntry.word2hash(word))))) + ref.incScore(word); + } + } + + private void printSplitLog(String x, String[] y) { + String s = ""; + for (int i = 0; i < y.length; i++) s = s + ", " + y[i]; + if (s.length() > 0) s = s.substring(2); + System.out.println("Split '" + x + "' = {" + s + "}"); + } + } + +} diff --git a/source/de/anomic/plasma/plasmaStore.java b/source/de/anomic/plasma/plasmaStore.java new file mode 100644 index 000000000..5c673822f --- /dev/null +++ b/source/de/anomic/plasma/plasmaStore.java @@ -0,0 +1,131 @@ +// plasmaStore.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 20.01.2004 +// +// You agree that the Author(s) is (are) not responsible for cost, +// loss of data or any harm that may be caused by usage of this softare or +// this documentation. The usage of this software is on your own risk. The +// installation and usage (starting/running) of this software may allow other +// people or application to access your computer and any attached devices and +// is highly dependent on the configuration of the software which must be +// done by the user of the software;the author(s) is (are) also +// not responsible for proper configuration and usage of the software, even +// if provoked by documentation provided together with the software. +// +// THE SOFTWARE THAT FOLLOWS AS ART OF PROGRAMMING BELOW THIS SECTION +// IS PUBLISHED UNDER THE GPL AS DOCUMENTED IN THE FILE gpl.txt ASIDE THIS +// FILE AND AS IN http://www.gnu.org/licenses/gpl.txt +// ANY CHANGES TO THIS FILE ACCORDING TO THE GPL CAN BE DONE TO THE +// LINES THAT FOLLOWS THIS COPYRIGHT NOTICE HERE, BUT CHANGES MUST NOT +// BE DONE ABOVE OR INSIDE THE COPYRIGHT NOTICE. A RE-DISTRIBUTION +// MUST CONTAIN THE INTACT AND UNCHANGED COPYRIGHT NOTICE. +// CONTRIBUTIONS AND CHANGES TO THE PROGRAM CODE SHOULD BE MARKED AS SUCH. + +/* + This class provides storage functions for the plasma search engine. + Unlike the plasmaSwitchboard, which holds run-time information, + this class holds general index information that is in run-time + specific. +*/ + +package de.anomic.plasma; + +import java.io.*; +import java.util.*; + +public class plasmaStore { + + + // some static helper methods + public static void saveGzip(File f, byte[] content) throws IOException { + f.getParentFile().mkdirs(); + java.util.zip.GZIPOutputStream gzipout = new java.util.zip.GZIPOutputStream(new FileOutputStream(f)); + gzipout.write(content, 0, content.length); + gzipout.close(); + } + + public static byte[] loadGzip(File f) throws IOException { + java.util.zip.GZIPInputStream gzipin = new java.util.zip.GZIPInputStream(new FileInputStream(f)); + byte[] result = new byte[1024]; + byte[] buffer = new byte[512]; + byte[] b; + int len = 0; + int last; + while ((last = gzipin.read(buffer, 0, buffer.length)) > 0) { + // assert the buffer to the result + while (result.length - len < last) { + // the result array is too small, increase space + b = new byte[result.length * 2]; + System.arraycopy(result, 0, b, 0, len); + result = b; b = null; + } + // copy the last read + System.arraycopy(buffer, 0, result, len, last); + len += last; + } + gzipin.close(); + // finished with reading. now cut the result to the right size + b = new byte[len]; + System.arraycopy(result, 0, b, 0, len); + result = null; + return b; + } + + /* public static void saveProperties(File f, Properties props, String comment) throws IOException { + File fp = f.getParentFile(); + if (fp != null) fp.mkdirs(); + FileOutputStream fos = new FileOutputStream(f); + props.store(fos, comment); + fos.close(); + } + + public static Properties loadProperties(File f) throws IOException { + Properties p = new Properties(); + FileInputStream fis = new FileInputStream(f); + p.load(fis); + fis.close(); + return p; + } + */ + + private static long[] appendFileToStack(File fragment, File dest) throws IOException { + // returns a long[2] with + // long[0] = startOfFileFragemt in dest + // long[1] = lengthOfFileFragment in dest + long l = fragment.length(); + long p = dest.length(); + RandomAccessFile fo = new RandomAccessFile(dest, "rw"); + FileInputStream fi = new FileInputStream(fragment); + byte[] buffer = new byte[1024]; + int c; + fo.seek(p); + while ((c = fi.read(buffer)) >= 0) fo.write(buffer, 0, c); + fi.close(); + fo.close(); + long[] r = new long[2]; + r[0] = p; + r[1] = l; + return r; + } + + + /* + public static void main(String[] args) { + try { + HashSet set = new HashSet(); + for (int i = 0; i < args.length; i++) set.add(args[i]); + plasmaStore store = new plasmaStore(new File("DATABASE")); + List result = plasmaSearch.search(set); + for (int i = 0; i < result.size(); i++) { + ((plasmaLURL.entry) result.get(i)).print(); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + */ +} diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java new file mode 100644 index 000000000..bb09b4736 --- /dev/null +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -0,0 +1,1368 @@ +// plasmaSwitchboard.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 24.03.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + This class holds the run-time environment of the plasma + Search Engine. It's data forms a blackboard which can be used + to organize running jobs around the indexing algorithm. + The blackboard consist of the following entities: + - storage: one plasmaStore object with the url-based database + - configuration: initialized by properties once, then by external functions + - job queues: for parsing, condensing, indexing + - black/blue/whitelists: controls input and output to the index + + this class is also the core of the http crawling. + There are some items that need to be respected when crawling the web: + 1) respect robots.txt + 2) do not access one domain too frequently, wait between accesses + 3) remember crawled URL's and do not access again too early + 4) priorization of specific links should be possible (hot-lists) + 5) attributes for crawling (depth, filters, hot/black-lists, priority) + 6) different crawling jobs with different attributes ('Orders') simultanoulsy + + We implement some specific tasks and use different database to archieve these goals: + - a database 'crawlerDisallow.db' contains all url's that shall not be crawled + - a database 'crawlerDomain.db' holds all domains and access times, where we loaded the disallow tables + this table contains the following entities: + + - four databases for scheduled access: crawlerScheduledHotText.db, crawlerScheduledColdText.db, + crawlerScheduledHotMedia.db and crawlerScheduledColdMedia.db + - two stacks for new URLS: newText.stack and newMedia.stack + - two databases for URL double-check: knownText.db and knownMedia.db + - one database with crawling orders: crawlerOrders.db + + The Information flow of a single URL that is crawled is as follows: + - a html file is loaded from a specific URL within the module httpdProxyServlet as + a process of the proxy. + - the file is passed to httpdProxyCache. Here it's processing is delayed until the proxy is idle. + - The cache entry is passed on to the plasmaSwitchboard. There the URL is stored into plasmaLURL where + the URL is stored under a specific hash. The URL's from the content are stripped off, stored in plasmaLURL + with a 'wrong' date (the date of the URL's are not known at this time, only after fetching) and stacked with + plasmaCrawlerTextStack. The content is read and splitted into rated words in plasmaCondenser. + The splitted words are then integrated into the index with plasmaSearch. + - In plasmaSearch the words are indexed by reversing the relation between URL and words: one URL points + to many words, the words within the document at the URL. After reversing, one word points + to many URL's, all the URL's where the word occurrs. One single word->URL-hash relation is stored in + plasmaIndexEntry. A set of plasmaIndexEntries is a reverse word index. + This reverse word index is stored temporarly in plasmaIndexCache. + - In plasmaIndexCache the single plasmaIndexEntry'ies are collected and stored into a plasmaIndex - entry + These plasmaIndex - Objects are the true reverse words indexes. + - in plasmaIndex the plasmaIndexEntry - objects are stored in a kelondroTree; an indexed file in the file system. + + The information flow of a search request is as follows: + - in httpdFileServlet the user enters a search query, which is passed to plasmaSwitchboard + - in plasmaSwitchboard, the query is passed to plasmaSearch. + - in plasmaSearch, the plasmaSearch.result object is generated by simultanous enumeration of + URL hases in the reverse word indexes plasmaIndex + - (future: the plasmaSearch.result - object is used to identify more key words for a new search) + + + +*/ + +package de.anomic.plasma; + +import java.io.*; +import java.net.*; +import java.util.*; +import java.text.*; +import de.anomic.server.*; +import de.anomic.tools.*; +import de.anomic.htmlFilter.*; +import de.anomic.yacy.*; +import de.anomic.data.*; +import de.anomic.http.*; +import de.anomic.kelondro.*; + +public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwitch { + + + // load slots + private static final int crawlSlots = 6; + + // couloured list management + public static TreeSet blueList = null; + public static TreeSet stopwords = null; + + // storage management + private File cachePath; + private File plasmaPath; + public File listsPath; + public plasmaCrawlLURL loadedURL; + public plasmaCrawlNURL noticeURL; + public plasmaCrawlEURL errorURL; + public plasmaWordIndex wordIndex; + public plasmaSearch searchManager; + public plasmaHTCache cacheManager; + public plasmaCrawlLoader cacheLoader; + public LinkedList processStack = new LinkedList(); + public serverLog log; + public messageBoard messageDB; + public wikiBoard wikiDB; + public String remoteProxyHost; + public int remoteProxyPort; + public plasmaCrawlProfile profiles; + public plasmaCrawlProfile.entry defaultProxyProfile; + public plasmaCrawlProfile.entry defaultRemoteProfile; + public distributeIndex indexDistribution; + public HashSet mimeWhite; + public HashSet extensionBlack; + public HashMap outgoingCookies, incomingCookies; + public kelondroTables facilityDB; + public int serverJobs; + public boolean terminate = false; + + public plasmaSwitchboard(String rootPath, String initPath, String configPath) throws IOException { + super(rootPath, initPath, configPath); + serverJobs = 0; + + // set loglevel + int loglevel = Integer.parseInt(getConfig("plasmaLoglevel", "2")); + log = new serverLog("PLASMA", loglevel); + + // load values from configs + plasmaPath = new File(rootPath, getConfig("dbPath", "DATABASE")); + listsPath = new File(rootPath, getConfig("listsPath", "LISTS")); + remoteProxyHost = getConfig("remoteProxyHost", ""); + try { + remoteProxyPort = Integer.parseInt(getConfig("remoteProxyPort", "3128")); + } catch (NumberFormatException e) { + remoteProxyPort = 3128; + } + if (!(getConfig("remoteProxyUse", "false").equals("true"))) { + remoteProxyHost = null; + remoteProxyPort = 0; + } + + if (!(listsPath.exists())) listsPath.mkdirs(); + + // load coloured lists + if (blueList == null) { + // read only once upon first instantiation of this class + String f = getConfig("plasmaBlueList", null); + if (f != null) blueList = loadList(new File(f)); else blueList= new TreeSet(); + } + + // load stopwords + if (stopwords == null) { + stopwords = loadList(new File(rootPath, "yacy.stopwords")); + } + + // read memory amount + int ramCacheKB = Integer.parseInt(getConfig("ramCacheSize", "1")) * 0x400; + int ramLURL = ramCacheKB * Integer.parseInt(getConfig("ramCachePercentLURL", "1")) / 100; + int ramPURL = ramLURL / 2; + int ramEURL = ramLURL / 2; + int ramRWI = ramCacheKB * Integer.parseInt(getConfig("ramCachePercentRWI", "1")) / 100; + int ramHTTP = ramCacheKB * Integer.parseInt(getConfig("ramCachePercentHTTP", "1")) / 100; + int ramMessage = ramCacheKB * Integer.parseInt(getConfig("ramCachePercentMessage", "1")) / 100; + int ramWiki = ramCacheKB * Integer.parseInt(getConfig("ramCachePercentWiki", "1")) / 100; + log.logSystem("LURL Cache memory = " + ramLURL + " KB"); + log.logSystem("RWI Cache memory = " + ramRWI + " KB"); + log.logSystem("HTTP Cache memory = " + ramHTTP + " KB"); + log.logSystem("Message Cache memory = " + ramMessage + " KB"); + log.logSystem("Wiki Cache memory = " + ramWiki + " KB"); + + // make crawl profiles database and default profiles + profiles = new plasmaCrawlProfile(new File(plasmaPath, "crawlProfiles0.db")); + + if ((profiles.size() == 0) || + (getConfig("defaultProxyProfile", "").length() == 0) || + (profiles.getEntry(getConfig("defaultProxyProfile", "")) == null)) { + // generate new default entry for proxy crawling + defaultProxyProfile = profiles.newEntry("proxy", "", ".*", ".*", Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), false, true, true, true, false, true, true, true); + setConfig("defaultProxyProfile", defaultProxyProfile.handle()); + } else { + defaultProxyProfile = profiles.getEntry(getConfig("defaultProxyProfile", "")); + } + if ((profiles.size() == 1) || + (getConfig("defaultRemoteProfile", "").length() == 0) || + (profiles.getEntry(getConfig("defaultRemoteProfile", "")) == null)) { + // generate new default entry for proxy crawling + defaultRemoteProfile = profiles.newEntry("remote", "", ".*", ".*", 0, 0, false, false, true, true, false, true, true, false); + setConfig("defaultRemoteProfile", defaultRemoteProfile.handle()); + } else { + defaultRemoteProfile = profiles.getEntry(getConfig("defaultRemoteProfile", "")); + } + + // start indexing management + loadedURL = new plasmaCrawlLURL(new File(plasmaPath, "urlHash.db"), ramLURL); + noticeURL = new plasmaCrawlNURL(plasmaPath, ramPURL); + errorURL = new plasmaCrawlEURL(new File(plasmaPath, "urlErr0.db"), ramEURL); + //indexCache = new plasmaWordIndexRAMCache(plasmaPath, 2000, ramRWI); + wordIndex = new plasmaWordIndex(plasmaPath, ramRWI); + searchManager = new plasmaSearch(loadedURL, wordIndex); + + // start a cache manager + cacheManager = new plasmaHTCache(this, ramHTTP); + + // define an extension-blacklist + String[] extensionBlackArray = getConfig("mediaExt","").split(","); + extensionBlack = new HashSet(); + for (int i = 0; i < extensionBlackArray.length; i++) extensionBlack.add(extensionBlackArray[i].toLowerCase()); + + // define mime-type-whitelist + String[] mimeWhiteArray = getConfig("parseableMime","").split(","); + mimeWhite = new HashSet(); + for (int i = 0; i < mimeWhiteArray.length; i++) mimeWhite.add(mimeWhiteArray[i].toLowerCase()); + + // start a loader + int remoteport; + try { remoteport = Integer.parseInt(getConfig("remoteProxyPort","3128")); } + catch (NumberFormatException e) { remoteport = 3128; } + cacheLoader = new plasmaCrawlLoader(cacheManager, log, + Integer.parseInt(getConfig("clientTimeout", "10000")), + 5000, crawlSlots, + getConfig("remoteProxyUse","false").equals("true"), + getConfig("remoteProxyHost",""), + remoteport, + mimeWhite); + + // init boards + messageDB = new messageBoard(new File(getRootPath(), "DATA/SETTINGS/message.db"), ramMessage); + wikiDB = new wikiBoard(new File(getRootPath(), "DATA/SETTINGS/wiki.db"), + new File(getRootPath(), "DATA/SETTINGS/wiki-bkp.db"), ramWiki); + + // init cookie-Monitor + outgoingCookies = new HashMap(); + incomingCookies = new HashMap(); + + // clean up profiles + cleanProfiles(); + + // init facility DB + File facilityDBpath = new File(getRootPath(), "DATA/SETTINGS/"); + facilityDB = new kelondroTables(facilityDBpath); + facilityDB.declareMaps("backlinks", 250, 500, new String[] {"date"}, null); + facilityDB.declareMaps("zeitgeist", 40, 500); + facilityDB.declareTree("statistik", new int[]{11, 8, 8, 8, 8, 8, 8}, 0x400); + facilityDB.update("statistik", (new serverDate()).toShortString(false).substring(0, 11), new long[]{1,2,3,4,5,6}); + long[] testresult = facilityDB.selectLong("statistik", "yyyyMMddHHm"); + testresult = facilityDB.selectLong("statistik", (new serverDate()).toShortString(false).substring(0, 11)); + + // deploy threads + deployThread("70_cachemanager", "Proxy Cache Enqueue", "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack", + new serverInstantThread(cacheManager, "job", "size"), log, 10000); + deployThread("50_localcrawl", "Local Crawl", "thread that performes a single crawl step from the local crawl queue", + new serverInstantThread(this, "localCrawlJob", "localCrawlJobSize"), log, 20000); + deployThread("60_globalcrawl", "Global Crawl", "thread that performes a single crawl/indexing step of a web page for global crawling", + new serverInstantThread(this, "globalCrawlJob", "globalCrawlJobSize"), log, 30000); + deployThread("90_cleanup", "Cleanup", "simple cleaning process for monitoring information" , + new serverInstantThread(this, "cleanupJob", "cleanupJobSize"), log, 10000); // all 5 Minutes + deployThread("80_dequeue", "Indexing Dequeue", "thread that creates database entries from scraped web content and performes indexing" , + new serverInstantThread(this, "deQueue", "queueSize"), log, 10000); + // start yacy core + yacyCore yc = new yacyCore(this); + serverInstantThread.oneTimeJob(yc, "loadSeeds", yc.log, 3000); + deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task", + new serverInstantThread(yc, "peerPing", null), yc.log, 6000); + deployThread("40_peerseedcycle", "Seed-List Upload", "task that a principal peer performes to generate and upload a seed-list to a ftp account", + new serverInstantThread(yc, "publishSeedList", null), yc.log, 180000); + indexDistribution = new distributeIndex(100 /*indexCount*/, 8000, 1 /*peerCount*/); + deployThread("20_dhtdistribution", "DHT Distribution (currently by juniors only)", "selection, transfer and deletion of index entries that are not searched on your peer, but on others", + new serverInstantThread(indexDistribution, "job", null), log, 120000); + } + + public void handleBusyState(int jobs) { + this.serverJobs = jobs; + } + + private void cleanProfiles() { + if (totalSize() > 0) return; + Iterator i = profiles.profiles(true); + plasmaCrawlProfile.entry entry; + while (i.hasNext()) { + entry = (plasmaCrawlProfile.entry) i.next(); + if (!((entry.name().equals("proxy")) || (entry.name().equals("remote")))) i.remove(); + } + } + + public plasmaHTCache getCacheManager() { + return cacheManager; + } + + private static TreeSet loadList(File file) { + TreeSet list = new TreeSet(kelondroMSetTools.fastStringComparator); + if (!(file.exists())) return list; + try { + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file))); + String line; + while ((line = br.readLine()) != null) { + line = line.trim(); + if ((line.length() > 0) && (!(line.startsWith("#")))) list.add(line.trim().toLowerCase()); + } + br.close(); + } catch (IOException e) {} + return list; + } + + public void close() { + log.logSystem("SWITCHBOARD SHUTDOWN STEP 1: sending termination signal to managed threads:"); + terminateAllThreads(true); + log.logSystem("SWITCHBOARD SHUTDOWN STEP 2: sending termination signal to threaded indexing (stand by..)"); + int waitingBoundSeconds = Integer.parseInt(getConfig("shutdownWaiting", "120")); + wordIndex.terminate(waitingBoundSeconds); + log.logSystem("SWITCHBOARD SHUTDOWN STEP 3: sending termination signal to database manager"); + try { + wikiDB.close(); + messageDB.close(); + facilityDB.close(); + loadedURL.close(); + } catch (IOException e) {} + log.logSystem("SWITCHBOARD SHUTDOWN TERMINATED"); + } + + public int totalSize() { + return processStack.size() + cacheLoader.size() + noticeURL.stackSize(); + } + + public int queueSize() { + return processStack.size(); + } + + public int lUrlSize() { + return loadedURL.size(); + } + + public int cacheSizeMin() { + return wordIndex.sizeMin(); + } + + public void enQueue(Object job) { + plasmaHTCache.Entry entry = (plasmaHTCache.Entry) job; + processStack.addLast(entry); + } + + public synchronized void deQueue() { + if (serverJobs < 5) { + if (processStack.size() > 0) { + log.logDebug("DEQUEUE: dequeueing one step (processStack=" + processStack.size() + ", localStackSize=" + noticeURL.localStackSize() + ", remoteStackSize=" + noticeURL.remoteStackSize() + ")"); + processResourceStack((plasmaHTCache.Entry) processStack.removeFirst()); + } + } else { + //if (processStack.size() > 0) { + log.logDebug("DEQUEUE: serverJobs=" + serverJobs + " 'busy' - no dequeueing (processStack=" + processStack.size() + ", localStackSize=" + noticeURL.localStackSize() + ", remoteStackSize=" + noticeURL.remoteStackSize() + ")"); + //} + } + } + + public int cleanupJobSize() { + int c = 0; + if ((errorURL.stackSize() > 1000)) c++; + for (int i = 1; i <= 6; i++) { + if (loadedURL.getStackSize(i) > 1000) c++; + } + return c; + } + + public boolean cleanupJob() { + + boolean hasDoneSomething = false; + + // clean up error stack + if ((errorURL.stackSize() > 1000)) { + errorURL.clearStack(); + hasDoneSomething = true; + } + // clean up loadedURL stack + for (int i = 1; i <= 6; i++) { + if (loadedURL.getStackSize(i) > 1000) { + loadedURL.clearStack(i); + hasDoneSomething = true; + } + } + // clean up profiles + cleanProfiles(); + return hasDoneSomething; + } + + public int localCrawlJobSize() { + return noticeURL.localStackSize(); + } + + public boolean localCrawlJob() { + if ((serverJobs < 2) && + (processStack.size() < crawlSlots) && + (noticeURL.localStackSize() > 0) && + (cacheLoader.size() < crawlSlots)) { + // local crawl (may start a global crawl) + plasmaCrawlNURL.entry nex = noticeURL.localPop(); + processCrawling(nex, nex.initiator()); + return true; + } + return false; + } + + public int globalCrawlJobSize() { + return noticeURL.remoteStackSize(); + } + + public boolean globalCrawlJob() { + if ((serverJobs < 2) && + (processStack.size() == 0) && + (noticeURL.localStackSize() == 0) && + (noticeURL.remoteStackSize() > 0)) { + // we don't want to crawl a global URL globally, since WE are the global part. (from this point of view) + plasmaCrawlNURL.entry nex = noticeURL.remotePop(); + processCrawling(nex, nex.initiator()); + return true; + } + return false; + } + + private synchronized void processResourceStack(plasmaHTCache.Entry entry) { + // work off one stack entry with a fresh resource (scraped web page) + if (entry.scraper != null) try { + // we must distinguish the following cases: resource-load was initiated by + // 1) global crawling: the index is extern, not here (not possible here) + // 2) result of search queries, some indexes are here (not possible here) + // 3) result of index transfer, some of them are here (not possible here) + // 4) proxy-load (initiator is "------------") + // 5) local prefetch/crawling (initiator is own seedHash) + // 6) local fetching for global crawling (other known or unknwon initiator) + int processCase = 0; + yacySeed initiator = null; + String initiatorHash = (entry.proxy()) ? plasmaURL.dummyHash : entry.initiator(); + if (initiatorHash.equals(plasmaURL.dummyHash)) { + // proxy-load + processCase = 4; + } else if (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) { + // normal crawling + processCase = 5; + } else { + // this was done for remote peer (a global crawl) + initiator = yacyCore.seedDB.getConnected(initiatorHash); + processCase = 6; + } + + log.logDebug("processResourceStack: processCase=" + processCase + ", depth=" + entry.depth + ", maxDepth=" + entry.profile.generalDepth() + ", filter=" + entry.profile.generalFilter() + ", initiatorHash=" + initiatorHash + ", status=" + entry.status + ", url=" + entry.url); // DEBUG + + // put anchors on crawl stack + if (((processCase == 4) || (processCase == 5)) && + (entry.depth < entry.profile.generalDepth())) { + Properties hl = entry.scraper.getHyperlinks(); + Enumeration e = hl.propertyNames(); + String nexturlstring; + String rejectReason; + int c = 0; + while (e.hasMoreElements()) { + nexturlstring = (String) e.nextElement(); + rejectReason = stackCrawl(nexturlstring, entry.urlString, initiatorHash, hl.getProperty(nexturlstring), entry.lastModified, entry.depth + 1, entry.profile); + if (rejectReason == null) { + c++; + } else { + errorURL.newEntry(new URL(nexturlstring), entry.urlString, entry.initiator(), yacyCore.seedDB.mySeed.hash, + hl.getProperty(nexturlstring), rejectReason, new bitfield(plasmaURL.urlFlagLength), false); + } + } + log.logInfo("CRAWL: ADDED " + c + " LINKS FROM " + entry.url.toString() + + ", NEW CRAWL STACK SIZE IS " + noticeURL.localStackSize()); + } + + // create index + String noIndexReason; + String descr = entry.scraper.getHeadline(); + URL referrerURL = entry.referrerURL(); + String referrerHash = (referrerURL == null) ? plasmaURL.dummyHash : plasmaURL.urlHash(referrerURL); + if ((noIndexReason = entry.shallIndexCache()) == null ) { + // strip out words + log.logDebug("(Profile) Condensing for '" + entry.urlString + "'"); + plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(entry.scraper.getText())); + + //log.logInfo("INDEXING HEADLINE:" + descr); + try { + log.logDebug("(Profile) Create LURL-Entry for '" + entry.urlString + "'"); + plasmaCrawlLURL.entry newEntry = loadedURL.newEntry( + entry.url, descr, entry.lastModified, new Date(), + initiatorHash, + yacyCore.seedDB.mySeed.hash, + referrerHash, + 0, true, + Integer.parseInt(condenser.getAnalysis().getProperty("INFORMATION_VALUE","0"), 16), + entry.language, entry.doctype, + entry.size(), + (int) Long.parseLong(condenser.getAnalysis().getProperty("NUMB_WORDS","0"), 16), + processCase + ); + + String urlHash = newEntry.hash(); + log.logDebug("(Profile) Remove NURL for '" + entry.urlString + "'"); + noticeURL.remove(urlHash); // worked-off + + if (((processCase == 4) || (processCase == 5) || (processCase == 6)) && + (entry.profile.localIndexing())) { + // remove stopwords + log.logDebug("(Profile) Exclude Stopwords for '" + entry.urlString + "'"); + log.logInfo("Excluded " + condenser.excludeWords(stopwords) + " words in URL " + entry.url); + //System.out.println("DEBUG: words left to be indexed: " + condenser.getWords()); + + // do indexing + log.logDebug("(Profile) Create Index for '" + entry.urlString + "'"); + int words = searchManager.addPageIndex(entry.url, urlHash, entry.lastModified, condenser, entry.language, entry.doctype); + log.logInfo("Indexed " + words + " words in URL " + entry.url + " (" + descr + ")"); + + // if this was performed for a remote crawl request, notify requester + if ((processCase == 6) && (initiator != null)) { + log.logInfo("Sending crawl receipt for '" + entry.urlString + "' to " + initiator.getName()); + yacyClient.crawlReceipt(initiator, "crawl", "fill", "indexed", newEntry, ""); + } + } else { + log.logDebug("Resource '" + entry.urlString + "' not indexed (indexing is off)"); + } + } catch (Exception ee) { + log.logError("Could not index URL " + entry.url + ": " + ee.getMessage()); + ee.printStackTrace(); + if ((processCase == 6) && (initiator != null)) { + yacyClient.crawlReceipt(initiator, "crawl", "exception", ee.getMessage(), null, ""); + } + } + + } else { + log.logInfo("Not indexed any word in URL " + entry.url + "; cause: " + noIndexReason); + errorURL.newEntry(entry.url, referrerHash, + ((entry.proxy()) ? plasmaURL.dummyHash : entry.initiator()), + yacyCore.seedDB.mySeed.hash, + descr, noIndexReason, new bitfield(plasmaURL.urlFlagLength), true); + if ((processCase == 6) && (initiator != null)) { + yacyClient.crawlReceipt(initiator, "crawl", "rejected", noIndexReason, null, ""); + } + } + + // explicit delete/free resources + entry.scraper = null; entry = null; + } catch (IOException e) { + log.logError("ERROR in plasmaSwitchboard.process(): " + e.toString()); + } + } + + public String stackCrawl(String nexturlString, String referrerString, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) { + // stacks a crawl item. The position can also be remote + // returns null if successful, a reason string if not successful + + String reason = null; // failure reason + + // strange error + if (nexturlString == null) { + reason = "denied_(url_null)"; + log.logError("Wrong URL in stackCrawl: url=null"); + return reason; + } + + URL nexturl = null; + if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = plasmaURL.dummyHash; + String referrerHash = plasmaURL.urlHash(referrerString); + try { + nexturl = new URL(nexturlString); + } catch (MalformedURLException e) { + reason = "denied_(url_'" + nexturlString + "'_wrong)"; + log.logError("Wrong URL in stackCrawl: " + nexturlString); + return reason; + } + + // filter deny + if (!(nexturlString.matches(profile.generalFilter()))) { + reason = "denied_(does_not_match_filter)"; + errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash, + name, reason, new bitfield(plasmaURL.urlFlagLength), false); + return reason; + } + + // deny cgi + if (plasmaHTCache.isCGI(nexturlString)) { + reason = "denied_(cgi_url)"; + errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash, + name, reason, new bitfield(plasmaURL.urlFlagLength), false); + return reason; + } + + // deny post properties + if ((plasmaHTCache.isPOST(nexturlString)) && (!(profile.crawlingQ()))) { + reason = "denied_(post_url)"; + errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash, + name, reason, new bitfield(plasmaURL.urlFlagLength), false); + return reason; + } + + String nexturlhash = plasmaURL.urlHash(nexturl); + if (loadedURL.exists(nexturlhash)) { + // DISTIGUISH OLD/RE-SEARCH CASES HERE! + reason = "double_(already_loaded)"; + errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash, + name, reason, new bitfield(plasmaURL.urlFlagLength), false); + return reason; + } + if (noticeURL.existsInStack(nexturlhash)) { + reason = "double_(noticed_in_crawler)"; + errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash, + name, reason, new bitfield(plasmaURL.urlFlagLength), false); + return reason; + } + + // store information + boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash))); + noticeURL.newEntry(initiatorHash, /* initiator, needed for p2p-feedback */ + nexturl, /* url clear text string */ + loadDate, /* load date */ + referrerHash, /* last url in crawling queue */ + name, /* the anchor name */ + profile.handle(), + currentdepth, /*depth so far*/ + 0, /*anchors, default value */ + 0, /*forkfactor, default value */ + ((local) ? 1 : 4) /*local/remote stack*/ + ); + + return null; + } + + private URL hash2url(String urlhash) { + if (urlhash.equals(plasmaURL.dummyHash)) return null; + plasmaCrawlNURL.entry ne = noticeURL.getEntry(urlhash); + if (ne != null) return ne.url(); + plasmaCrawlLURL.entry le = loadedURL.getEntry(urlhash); + if (le != null) return le.url(); + plasmaCrawlEURL.entry ee = errorURL.getEntry(urlhash); + if (ee != null) return ee.url(); + return null; + } + + private String hash2urlstring(String urlhash) { + URL u = hash2url(urlhash); + if (u == null) return plasmaURL.dummyHash; else return u.toString(); + } + + private synchronized void processCrawling(plasmaCrawlNURL.entry urlEntry, String initiator) { + if (urlEntry.url() == null) return; + String profileHandle = urlEntry.profileHandle(); + //System.out.println("DEBUG plasmaSwitchboard.processCrawling: profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url()); + plasmaCrawlProfile.entry profile = profiles.getEntry(profileHandle); + if (profile == null) { + log.logError("CRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: LOST PROFILE HANDLE '" + urlEntry.profileHandle() + "' (must be internal error) for URL " + urlEntry.url()); + return; + } + log.logDebug("plasmaSwitchboard.processCrawling: url=" + urlEntry.url() + ", initiator=" + urlEntry.initiator() + + ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false") + ", depth=" + urlEntry.depth() + ", crawlDepth=" + profile.generalDepth() + ", filter=" + profile.generalFilter() + + ", permission=" + (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false")); + + boolean tryRemote = + (profile.remoteIndexing()) /* granted */ && + (urlEntry.depth() == profile.generalDepth()) /* leaf node */ && + (urlEntry.initiator() != null) && (!(urlEntry.initiator().equals(plasmaURL.dummyHash))) /* not proxy */ && + ((yacyCore.seedDB.mySeed.isSenior()) || + (yacyCore.seedDB.mySeed.isPrincipal())) /* qualified */; + + if (tryRemote) { + boolean success = processGlobalCrawling(urlEntry); + if (!(success)) processLocalCrawling(urlEntry, profile, initiator); + } else { + processLocalCrawling(urlEntry, profile, initiator); + } + } + + private synchronized void processLocalCrawling(plasmaCrawlNURL.entry urlEntry, plasmaCrawlProfile.entry profile, String initiator) { + // work off one Crawl stack entry + if ((urlEntry == null) && (urlEntry.url() == null)) { + log.logInfo("LOCALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: urlEntry=null"); + return; + } + cacheLoader.loadParallel(urlEntry.url(), urlEntry.referrerHash(), initiator, urlEntry.depth(), profile); + log.logInfo("LOCALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: enqueed for load " + urlEntry.url()); + } + + private synchronized boolean processGlobalCrawling(plasmaCrawlNURL.entry urlEntry) { + if (urlEntry == null) { + log.logInfo("GLOBALCRAWL[" + noticeURL.localStackSize() + ", " + noticeURL.remoteStackSize() + "]: urlEntry=null"); + return false; + } + + // are we qualified? + if ((yacyCore.seedDB.mySeed == null) || + (yacyCore.seedDB.mySeed.isJunior())) { + log.logDebug("plasmaSwitchboard.processGlobalCrawling: no permission"); + return false; + } + + // check url + if (urlEntry.url() == null) { + log.logDebug("ERROR: plasmaSwitchboard.processGlobalCrawling - url is null. name=" + urlEntry.name()); + return false; + } + String nexturlString = urlEntry.url().toString(); + String urlhash = plasmaURL.urlHash(urlEntry.url()); + + // check remote crawl + yacySeed remoteSeed = yacyCore.dhtAgent.getCrawlSeed(urlhash); + if (remoteSeed == null) { + log.logDebug("plasmaSwitchboard.processGlobalCrawling: no remote crawl seed available"); + return false; + } + + HashMap page = yacyClient.crawlOrder(remoteSeed, nexturlString, hash2urlstring(urlEntry.referrerHash()), 0); + + // check success + /* + the result of the 'response' value can have one of the following values: + negative cases, no retry + denied - the peer does not want to crawl that + exception - an exception occurred + + negative case, retry possible + rejected - the peer has rejected to process, but a re-try should be possible + + positive case with crawling + stacked - the resource is processed asap + + positive case without crawling + double - the resource is already in database, believed to be fresh and not reloaded + the resource is also returned in lurl + */ + if ((page == null) || (page.get("delay") == null)) { + log.logInfo("CRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " FAILED. CAUSE: unknown (URL=" + nexturlString + ")"); + yacyCore.peerActions.peerDeparture(remoteSeed); + return false; + } else try { + log.logDebug("plasmaSwitchboard.processGlobalCrawling: remoteSeed=" + remoteSeed.getName() + ", url=" + nexturlString + ", response=" + page.toString()); // DEBUG + + int newdelay = Integer.parseInt((String) page.get("delay")); + yacyCore.dhtAgent.setCrawlDelay(remoteSeed.hash, newdelay); + String response = (String) page.get("response"); + if (response.equals("stacked")) { + log.logInfo("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " PLACED URL=" + nexturlString + "; NEW DELAY=" + newdelay); + return true; + } else if (response.equals("double")) { + String lurl = (String) page.get("lurl"); + if ((lurl != null) && (lurl.length() != 0)) { + String propStr = crypt.simpleDecode(lurl, (String) page.get("key")); + plasmaCrawlLURL.entry entry = loadedURL.newEntry(propStr, true, yacyCore.seedDB.mySeed.hash, remoteSeed.hash, 1); + noticeURL.remove(entry.hash()); + log.logInfo("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " SUPERFLUOUS. CAUSE: " + page.get("reason") + " (URL=" + nexturlString + "). URL IS CONSIDERED AS 'LOADED!'"); + return true; + } else { + log.logInfo("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " REJECTED. CAUSE: " + page.get("reason") + " (URL=" + nexturlString + ")"); + return false; + } + } else { + log.logInfo("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " DENIED. RESPONSE=" + response + ", CAUSE=" + page.get("reason") + ", URL=" + nexturlString); + return false; + } + } catch (Exception e) { + // wrong values + log.logError("GLOBALCRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " FAILED. CLIENT RETURNED: " + page.toString()); + e.printStackTrace(); + return false; + } + + } + + private static SimpleDateFormat DateFormatter = new SimpleDateFormat("EEE, dd MMM yyyy"); + public static String dateString(Date date) { + if (date == null) return ""; else return DateFormatter.format(date); + } + + + public serverObjects searchFromLocal(Set querywords, String order1, String order2, int count, boolean global, long time /*milliseconds*/, String urlmask) { + + serverObjects prop = new serverObjects(); + try { + char[] order = new char[2]; + if (order1.equals("quality")) order[0] = plasmaSearch.O_QUALITY; else order[0] = plasmaSearch.O_AGE; + if (order2.equals("quality")) order[1] = plasmaSearch.O_QUALITY; else order[1] = plasmaSearch.O_AGE; + + // filter out words that appear in bluelist + Set queryhashes = plasmaSearch.words2hashes(querywords); + Iterator it = querywords.iterator(); + String word, gs = ""; + while (it.hasNext()) { + word = (String) it.next(); + if (blueList.contains(word)) it.remove(); else gs += "+" + word; + } + if (gs.length() > 0) gs = gs.substring(1); + + // log + log.logInfo("INIT WORD SEARCH: " + gs + " - " + count + " links, " + (time / 1000) + " seconds"); + long timestamp = System.currentTimeMillis(); + + // do global fetching + int globalresults = 0; + if (global) { + int fetchcount = ((int) time / 1000) * 4; // number of wanted results until break in search + int fetchpeers = ((int) time / 1000) * 3; // number of target peers; means 30 peers in 10 seconds + long fetchtime = time * 7 / 10; // time to waste + if (fetchcount > count) fetchcount = count; + globalresults = yacySearch.search(querywords, loadedURL, searchManager, fetchcount, fetchpeers, fetchtime); + log.logDebug("SEARCH TIME AFTER GLOBAL-TRIGGER TO " + fetchpeers + " PEERS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); + } + prop.put("globalresults", globalresults); // the result are written to the local DB + + + // now search locally (the global results should be now in the local db) + long remainingTime = time - (System.currentTimeMillis() - timestamp); + plasmaWordIndexEntity idx = searchManager.searchWords(querywords, remainingTime * 8 / 10); // the search + log.logDebug("SEARCH TIME AFTER FINDING " + idx.size() + " ELEMENTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); + + remainingTime = time - (System.currentTimeMillis() - timestamp); + if (remainingTime < 500) remainingTime = 500; + if (remainingTime > 3000) remainingTime = 3000; + plasmaSearch.result acc = searchManager.order(idx, queryhashes, stopwords, order, remainingTime, 100); + log.logDebug("SEARCH TIME AFTER ORDERING OF SEARCH RESULT: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); + + // result is a List of urlEntry elements: prepare answer + if (acc == null) { + prop.put("totalcount", "0"); + prop.put("linkcount", "0"); + } else { + prop.put("totalcount", "" + acc.sizeOrdered()); + int i = 0; + int p; + URL url; + plasmaCrawlLURL.entry urlentry; + String urlstring, urlname, filename; + String host, hash; + String descr = ""; + yacySeed seed; + String address; + //kelondroMScoreCluster ref = new kelondroMScoreCluster(); + while ((acc.hasMoreElements()) && (i < count)) { + urlentry = acc.nextElement(); + url = urlentry.url(); + host = url.getHost(); + if (host.endsWith(".yacyh")) { + // translate host into current IP + p = host.indexOf("."); + hash = yacySeed.hexHash2b64Hash(host.substring(p + 1, host.length() - 6)); + seed = yacyCore.seedDB.getConnected(hash); + filename = url.getFile(); + if ((seed == null) || ((address = seed.getAddress()) == null)) { + // seed is not known from here + removeReferences(urlentry.hash(), getWords(("yacyshare " + filename.replace('?', ' ') + " " + urlentry.descr()).getBytes())); + loadedURL.remove(urlentry.hash()); // clean up + continue; // next result + } + url = new URL("http://" + address + "/" + host.substring(0, p) + filename); + urlname = "http://share." + seed.getName() + ".yacy" + filename; + if ((p = urlname.indexOf("?")) > 0) urlname = urlname.substring(0, p); + urlstring = htmlFilterContentScraper.urlNormalform(url); + } else { + urlstring = htmlFilterContentScraper.urlNormalform(url); + urlname = urlstring; + } + descr = urlentry.descr(); + + // check bluelist again: filter out all links where any bluelisted word + // appear either in url, url's description or search word + // the search word was sorted out earlier + /* + String s = descr.toLowerCase() + url.toString().toLowerCase(); + for (int c = 0; c < blueList.length; c++) { + if (s.indexOf(blueList[c]) >= 0) return; + } + */ + //addScoreForked(ref, gs, descr.split(" ")); + //addScoreForked(ref, gs, urlstring.split("/")); + if (urlstring.matches(urlmask)) { //.* is default + prop.put("results_" + i + "_description", descr); + prop.put("results_" + i + "_url", urlstring); + prop.put("results_" + i + "_urlname", urlname); + prop.put("results_" + i + "_date", dateString(urlentry.moddate())); + i++; + } + } + log.logDebug("SEARCH TIME AFTER RESULT PREPARATION: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); + + // calc some more cross-reference + remainingTime = time - (System.currentTimeMillis() - timestamp); + if (remainingTime < 0) remainingTime = 1000; + /* + while ((acc.hasMoreElements()) && (((time + timestamp) < System.currentTimeMillis()))) { + urlentry = acc.nextElement(); + urlstring = htmlFilterContentScraper.urlNormalform(urlentry.url()); + descr = urlentry.descr(); + + addScoreForked(ref, gs, descr.split(" ")); + addScoreForked(ref, gs, urlstring.split("/")); + } + **/ + //Object[] ws = ref.getScores(16, false, 2, Integer.MAX_VALUE); + Object[] ws = acc.getReferences(16); + log.logDebug("SEARCH TIME AFTER XREF PREPARATION: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); + + /* + System.out.print("DEBUG WORD-SCORE: "); + for (int ii = 0; ii < ws.length; ii++) System.out.print(ws[ii] + ", "); + System.out.println(" all words = " + ref.getElementCount() + ", total count = " + ref.getTotalCount()); + */ + prop.put("references", ws); + prop.put("linkcount", "" + i); + prop.put("results", "" + i); + } + + // log + log.logInfo("EXIT WORD SEARCH: " + gs + " - " + + prop.get("totalcount", "0") + " links, " + + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); + if (idx != null) idx.close(); + return prop; + } catch (IOException e) { + return null; + } + } + + public serverObjects searchFromRemote(Set hashes, int count, boolean global, long duetime) { + if (hashes == null) hashes = new HashSet(); + + serverObjects prop = new serverObjects(); + try { + log.logInfo("INIT HASH SEARCH: " + hashes + " - " + count + " links"); + long timestamp = System.currentTimeMillis(); + plasmaWordIndexEntity idx = searchManager.searchHashes(hashes, duetime * 8 / 10); // a nameless temporary index, not sorted by special order but by hash + long remainingTime = duetime - (System.currentTimeMillis() - timestamp); + plasmaSearch.result acc = searchManager.order(idx, hashes, stopwords, new char[]{plasmaSearch.O_QUALITY, plasmaSearch.O_AGE}, remainingTime, 100); + + // result is a List of urlEntry elements + if (acc == null) { + prop.put("totalcount", "0"); + prop.put("linkcount", "0"); + prop.put("references", ""); + } else { + prop.put("totalcount", "" + acc.sizeOrdered()); + int i = 0; + String links = ""; + String resource = ""; + //plasmaIndexEntry pie; + plasmaCrawlLURL.entry urlentry; + while ((acc.hasMoreElements()) && (i < count)) { + urlentry = acc.nextElement(); + resource = urlentry.toString(); + if (resource != null) { + links += "resource" + i + "=" + resource + serverCore.crlfString; + i++; + } + } + prop.put("links", links); + prop.put("linkcount", "" + i); + + // prepare reference hints + Object[] ws = acc.getReferences(16); + String refstr = ""; + for (int j = 0; j < ws.length; j++) refstr += "," + (String) ws[j]; + if (refstr.length() > 0) refstr = refstr.substring(1); + prop.put("references", refstr); + } + + // add information about forward peers + prop.put("fwhop", ""); // hops (depth) of forwards that had been performed to construct this result + prop.put("fwsrc", ""); // peers that helped to construct this result + prop.put("fwrec", ""); // peers that would have helped to construct this result (recommendations) + + // log + log.logInfo("EXIT HASH SEARCH: " + hashes + " - " + + ((idx == null) ? "0" : (""+idx.size())) + " links, " + + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); + if (idx != null) idx.close(); + return prop; + } catch (IOException e) { + return null; + } + } + + + public serverObjects action(String actionName, serverObjects actionInput) { + // perform an action. + + if (actionName.equals("urlcount")) { + serverObjects result = new serverObjects(); + result.put("urls","" + loadedURL.size()); + return result; + } + + // not a correct query + return null; + } + + + public String toString() { + // it is possible to use this method in the cgi pages. + // actually it is used there for testing purpose + return "PROPS: " + super.toString() + "; QUEUE: " + processStack.toString(); + } + + /* + private void addScoreForked(kelondroMScoreCluster ref, String no, String[] words) { + String s; + if (words != null) for (int i = 0; i < words.length; i++) { + s = words[i].trim().toLowerCase(); + if (s.indexOf(".") >= 0) addScoreForked(ref, no, s.split("\\.")); + else if (s.indexOf(",") >= 0) addScoreForked(ref, no, s.split(",")); + else if (s.indexOf(":") >= 0) addScoreForked(ref, no, s.split(":")); + else if (s.indexOf("-") >= 0) addScoreForked(ref, no, s.split("-")); + else if (s.indexOf("/") >= 0) addScoreForked(ref, no, s.split("/")); + else if (s.indexOf('"') >= 0) addScoreForked(ref, no, s.split(new String(new byte[] {(char)'"'}))); + else addScoreFiltered(ref, no, s); + } + } + private void addScoreFiltered(kelondroMScoreCluster ref, String no, String word) { + if ((word.length() > 2) && + ("http_html_ftp_www_com_org_net_gov_edu_index_home_page_for_usage_the_and_".indexOf(word) < 0) && + (no.indexOf(word) < 0)) + ref.incScore(word); + } + */ + + // method for index deletion + public int removeAllUrlReferences(URL url, boolean fetchOnline) { + return removeAllUrlReferences(plasmaURL.urlHash(url), fetchOnline); + } + + public int removeAllUrlReferences(String urlhash, boolean fetchOnline) { + // find all the words in a specific resource and remove the url reference from every word index + // finally, delete the url entry + + // determine the url string + plasmaCrawlLURL.entry entry = loadedURL.getEntry(urlhash); + URL url = entry.url(); + if (url == null) return 0; + // get set of words + Set words = getWords(getText(getResource(url, fetchOnline))); + // delete all word references + int count = removeReferences(urlhash, words); + // finally delete the url entry itself + loadedURL.remove(urlhash); + return count; + } + + public int removeReferences(URL url, Set words) { + return removeReferences(plasmaURL.urlHash(url), words); + } + + public int removeReferences(String urlhash, Set words) { + // sequentially delete all word references + // returns number of deletions + Iterator it = words.iterator(); + String word; + String[] urlEntries = new String[] {urlhash}; + int count = 0; + while (it.hasNext()) { + word = (String) it.next(); + // delete the URL reference in this word index + try { + count += wordIndex.removeEntries(plasmaWordIndexEntry.word2hash(word), urlEntries, true); + } catch (IOException e) {} + } + return count; + } + + private byte[] getResource(URL url, boolean fetchOnline) { + byte[] resource = null; + // first load from cache + resource = getResourceFromCache(url); + // if not succedded then load from web + if ((fetchOnline) && (resource == null)) resource = getResourceFromWeb(url); + // the result + return resource; + } + + private byte[] getResourceFromCache(URL url) { + // load the url as resource from the cache + String path = htmlFilterContentScraper.urlNormalform(url).substring(6); + File cache = new File(getRootPath(), getConfig("proxyCache", "DATA/HTCACHE")); + File f = new File(cache, path); + try { + return serverFileUtils.read(f); + } catch (IOException e) { + return null; + } + } + + private byte[] getResourceFromWeb(URL url) { + // load the url as resource from the web + try { + return httpc.singleGET(url, 5000, null, null, remoteProxyHost, remoteProxyPort); + } catch (IOException e) { + return null; + } + } + + private static byte[] getText(byte[] resource) { + if (resource == null) return null; + // generate word list from resource + htmlFilterContentScraper scraper = new htmlFilterContentScraper(null); + OutputStream os = new htmlFilterOutputStream(null, scraper, null, false); + try { + serverFileUtils.write(resource, os); + return scraper.getText(); + } catch (IOException e) { + return null; + } + } + + public static Set getWords(byte[] text) { + if (text == null) return null; + ByteArrayInputStream buffer = new ByteArrayInputStream(text); + try { + plasmaCondenser condenser = new plasmaCondenser(buffer); + return condenser.getWords(); + } catch (IOException e) { + return null; + } + } + + public class distributeIndex { + // distributes parts of the index to other peers + // stops as soon as an error occurrs + + int indexCount; + int peerCount; + long pause; + long maxTime; + + public distributeIndex(int indexCount, long maxTimePerTransfer, int peerCount) { + this.indexCount = indexCount; + this.peerCount = peerCount; + this.maxTime = maxTimePerTransfer; + } + + public boolean job() { + if ((yacyCore.seedDB == null) || + (yacyCore.seedDB.mySeed == null) || + (yacyCore.seedDB.mySeed.isVirgin()) || + (loadedURL.size() < 10) || + (wordIndex.sizeMin() < 100) || + (!(yacyCore.seedDB.mySeed.isJunior()))) return false; + + int transferred; + long starttime = System.currentTimeMillis(); + try { + if ((totalSize() == 0) && + (getConfig("allowDistributeIndex", "false").equals("true")) && + ((transferred = performTransferIndex(indexCount, peerCount, true)) > 0)) { + indexCount = transferred; + if ((System.currentTimeMillis() - starttime) > (maxTime * peerCount)) indexCount--; else indexCount++; + return true; + } else { + // make a long pause + return false; + } + } catch (IllegalArgumentException ee) { + // this is a bug that occurres if a not-fixeable data-inconsistency in the table structure was detected + // make a long pause + log.logError("very bad data inconsistency: " + ee.getMessage()); + //ee.printStackTrace(); + return false; + } + } + + public void setCounts(int indexCount, int peerCount, long pause) { + this.indexCount = indexCount; + this.peerCount = peerCount; + this.pause = pause; + } + + } + + public int performTransferIndex(int indexCount, int peerCount, boolean delete) { + if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1; + + // collect index + //String startPointHash = yacyCore.seedCache.mySeed.hash; + String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength); + plasmaWordIndexEntity[] indexEntities = selectTransferIndexes(startPointHash, indexCount); + if ((indexEntities == null) || (indexEntities.length == 0)) { + log.logDebug("No Index available for Index Transfer, hash start-point " + startPointHash); + return -1; + } + // count the indexes again, can be smaller as expected + indexCount = 0; for (int i = 0; i < indexEntities.length; i++) indexCount += indexEntities[i].size(); + + // find start point for DHT-selection + String keyhash = indexEntities[indexEntities.length - 1].wordHash(); + + // iterate over DHT-peers and send away the indexes + yacySeed seed; + int hc = 0; + Enumeration e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(keyhash); + String error; + String peerNames = ""; + while ((e.hasMoreElements()) && (hc < peerCount)) { + seed = (yacySeed) e.nextElement(); + if (seed != null) { + error = yacyClient.transferIndex(seed, indexEntities, loadedURL); + if (error == null) { + log.logInfo("Index Transfer of " + indexCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "] to peer " + seed.getName() + ":" + seed.hash + " successfull"); + peerNames += ", " + seed.getName(); + hc++; + } else { + log.logWarning("Index Transfer to peer " + seed.getName() + ":" + seed.hash + " failed:'" + error + "', disconnecting peer"); + yacyCore.peerActions.peerDeparture(seed); + } + } + } + if (peerNames.length() > 0) peerNames = peerNames.substring(2); // remove comma + + // clean up and finish with deletion of indexes + if (hc >= peerCount) { + // success + if (delete) { + try { + if (deleteTransferIndexes(indexEntities)) { + log.logDebug("Deleted all transferred whole-word indexes locally"); + return indexCount; + } else { + log.logError("Deleted not all transferred whole-word indexes"); + return -1; + } + } catch (IOException ee) { + log.logError("Deletion of Indexes not possible:" + ee.getMessage()); + ee.printStackTrace(); + return -1; + } + } + return indexCount; + } else { + log.logError("Index distribution failed. Too less peers (" + hc + ") received the index, not deleted locally."); + return -1; + } + } + + private plasmaWordIndexEntity[] selectTransferIndexes(String hash, int count) { + Vector tmpEntities = new Vector(); + String nexthash = ""; + try { + Iterator wordHashIterator = wordIndex.hashIterator(hash, true, true, true); + plasmaWordIndexEntity indexEntity, tmpEntity; + Enumeration urlEnum; + plasmaWordIndexEntry indexEntry; + while ((count > 0) && (wordHashIterator.hasNext()) && + ((nexthash = (String) wordHashIterator.next()) != null) && (nexthash.trim().length() > 0)) { + indexEntity = wordIndex.getEntity(nexthash, true); + if (indexEntity.size() == 0) { + indexEntity.deleteComplete(); + } else if (indexEntity.size() <= count) { + // take the whole entity + tmpEntities.add(indexEntity); + log.logDebug("Selected Whole Index (" + indexEntity.size() + " urls) for word " + indexEntity.wordHash()); + count -= indexEntity.size(); + } else { + // make an on-the-fly entity and insert values + tmpEntity = new plasmaWordIndexEntity(indexEntity.wordHash()); + urlEnum = indexEntity.elements(true); + while ((urlEnum.hasMoreElements()) && (count > 0)) { + indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement(); + tmpEntity.addEntry(indexEntry); + count--; + } + urlEnum = null; + log.logDebug("Selected Partial Index (" + tmpEntity.size() + " from " + indexEntity.size() +" urls) for word " + tmpEntity.wordHash()); + tmpEntities.add(tmpEntity); + indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards + indexEntity = null; + } + + } + // transfer to array + plasmaWordIndexEntity[] indexEntities = new plasmaWordIndexEntity[tmpEntities.size()]; + for (int i = 0; i < tmpEntities.size(); i++) indexEntities[i] = (plasmaWordIndexEntity) tmpEntities.elementAt(i); + return indexEntities; + } catch (IOException e) { + log.logError("selectTransferIndexes IO-Error (hash=" + nexthash + "): " + e.getMessage()); + e.printStackTrace(); + return new plasmaWordIndexEntity[0]; + } + } + + private boolean deleteTransferIndexes(plasmaWordIndexEntity[] indexEntities) throws IOException { + String wordhash; + Enumeration urlEnum; + plasmaWordIndexEntry indexEntry; + plasmaWordIndexEntity indexEntity; + String[] urlHashes; + int sz; + boolean success = true; + for (int i = 0; i < indexEntities.length; i++) { + if (indexEntities[i].isTMPEntity()) { + // delete entries separately + int c = 0; + urlHashes = new String[indexEntities[i].size()]; + urlEnum = indexEntities[i].elements(true); + while (urlEnum.hasMoreElements()) { + indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement(); + urlHashes[c++] = indexEntry.getUrlHash(); + } + wordIndex.removeEntries(indexEntities[i].wordHash(), urlHashes, true); + indexEntity = wordIndex.getEntity(indexEntities[i].wordHash(), true); + sz = indexEntity.size(); + indexEntity.close(); + log.logDebug("Deleted Partinal Index (" + c + " urls) for word " + indexEntities[i].wordHash() + "; " + sz + " entries left"); + // DEBUG: now try to delete the remaining index. If this works, this routine is fine + /* + if (wordIndex.getEntity(indexEntities[i].wordHash()).deleteComplete()) + System.out.println("DEBUG: trial delete of partial word index " + indexEntities[i].wordHash() + " SUCCESSFULL"); + else + System.out.println("DEBUG: trial delete of partial word index " + indexEntities[i].wordHash() + " FAILED"); + */ + // end debug + indexEntities[i].close(); + } else { + // delete complete file + if (!(indexEntities[i].deleteComplete())) { + indexEntities[i].close(); + // have another try... + if (!(plasmaWordIndexEntity.wordHash2path(plasmaPath, indexEntities[i].wordHash()).delete())) { + success = false; + log.logError("Could not delete whole Index for word " + indexEntities[i].wordHash()); + } + } else { + indexEntities[i].close(); + } + } + } + return success; + } + + public int adminAuthenticated(httpHeader header) { + String adminAccountBase64MD5 = getConfig("adminAccountBase64MD5", ""); + if (adminAccountBase64MD5.length() == 0) return 2; // not necessary + String authorization = ((String) header.get("Authorization", "xxxxxx")).trim().substring(6); + if (authorization.length() == 0) return 1; // no authentication information given + if ((((String) header.get("CLIENTIP", "")).equals("localhost")) && (adminAccountBase64MD5.equals(authorization))) return 3; // soft-authenticated for localhost + if (adminAccountBase64MD5.equals(serverCodings.standardCoder.encodeMD5Hex(authorization))) return 4; // hard-authenticated, all ok + return 0; // wrong password + } +} diff --git a/source/de/anomic/plasma/plasmaURL.java b/source/de/anomic/plasma/plasmaURL.java new file mode 100644 index 000000000..0d2155821 --- /dev/null +++ b/source/de/anomic/plasma/plasmaURL.java @@ -0,0 +1,153 @@ +// plasmaURL.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 09.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.plasma; + +import java.io.*; +import java.net.*; +import java.util.*; +import java.text.*; +import de.anomic.kelondro.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.htmlFilter.*; + +public class plasmaURL { + + // day formatter for entry export + protected static SimpleDateFormat shortDayFormatter = new SimpleDateFormat("yyyyMMdd"); + + // statics for value lengths + public static final int urlHashLength = yacySeedDB.commonHashLength; // 12 + public static final int urlStringLength = 256;// not too short for links without parameters + public static final int urlDescrLength = 80; // The headline of a web page (meta-tag or

    ) + public static final int urlNameLength = 40; // the tag content between and + public static final int urlErrorLength = 20; // a reason description for unavailable urls + public static final int urlDateLength = 4; // any date, shortened + public static final int urlCopyCountLength = 2; // counter for numbers of copies of this index + public static final int urlFlagLength = 2; // any stuff + public static final int urlQualityLength = 3; // taken from heuristic + public static final int urlLanguageLength = 2; // taken from TLD suffix as quick-hack + public static final int urlDoctypeLength = 1; // taken from extension + public static final int urlSizeLength = 6; // the source size, from cache + public static final int urlWordCountLength = 3; // the number of words, from condenser + public static final int urlCrawlProfileHandleLength = 4; // name of the prefetch profile + public static final int urlCrawlDepthLength = 2; // prefetch depth, first is '0' + public static final int urlParentBranchesLength = 3; // number of anchors of the parent + public static final int urlForkFactorLength = 4; // sum of anchors of all ancestors + public static final int urlRetryLength = 2; // number of load retries + public static final int urlHostLength = 8; // the host as struncated name + public static final int urlHandleLength = 4; // a handle + + public static String dummyHash; + static { + dummyHash = ""; + for (int i = 0; i < urlHashLength; i++) dummyHash += "-"; + } + + // the class object + protected kelondroTree urlHashCache; + private HashSet existsIndex; + + public plasmaURL() throws IOException { + urlHashCache = null; + existsIndex = new HashSet(); + } + + public int size() { + return urlHashCache.size(); + } + + public void close() throws IOException { + urlHashCache.close(); + } + + public boolean exists(String urlHash) { + if (existsIndex.contains(urlHash)) return true; + try { + if (urlHashCache.get(urlHash.getBytes()) != null) { + existsIndex.add(urlHash); + return true; + } else { + return false; + } + } catch (IOException e) { + return false; + } + } + + public void remove(String urlHash) { + try { + existsIndex.remove(urlHash); + urlHashCache.remove(urlHash.getBytes()); + } catch (IOException e) {} + } + + public static String urlHash(URL url) { + if (url == null) return null; + String hash = serverCodings.encodeMD5B64(htmlFilterContentScraper.urlNormalform(url), true).substring(0, urlHashLength); + return hash; + } + + public static String urlHash(String url) { + if ((url == null) || (url.length() < 10)) return null; + String hash = serverCodings.encodeMD5B64(htmlFilterContentScraper.urlNormalform(url), true).substring(0, urlHashLength); + return hash; + } + + public Iterator urlHashes(String urlHash, boolean up) throws IOException { + return urlHashCache.rows(up, false, urlHash.getBytes()); + } + + protected static Properties s2p(String s) { + Properties p = new Properties(); + int pos; + StringTokenizer st = new StringTokenizer(s, ","); + String token; + while (st.hasMoreTokens()) { + token = st.nextToken().trim(); + pos = token.indexOf("="); + if (pos > 0) p.setProperty(token.substring(0, pos).trim(), token.substring(pos + 1).trim()); + } + return p; + } + +} diff --git a/source/de/anomic/plasma/plasmaWordConnotation.java b/source/de/anomic/plasma/plasmaWordConnotation.java new file mode 100644 index 000000000..d2b15ce96 --- /dev/null +++ b/source/de/anomic/plasma/plasmaWordConnotation.java @@ -0,0 +1,97 @@ +// plasmaWordCon.java +// ----------------------- +// part of The Kelondro Database +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 22.09.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + */ + + +package de.anomic.plasma; + +import de.anomic.kelondro.*; +import de.anomic.server.*; +import java.io.*; +import java.util.*; + +public class plasmaWordConnotation { + + private static final int wordlength = 32; + private static final int countlength = 4; + private static final int nodesize = 4048; + private kelondroDynTree refDB; + + public plasmaWordConnotation(File refDBfile, int bufferkb) throws IOException { + if (refDBfile.exists()) + refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400); + else + refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, wordlength, nodesize, new int[] {wordlength, countlength}); + + } + + private void addSingleRef(String word, String reference) throws IOException { + //word = word.toLowerCase(); + //reference = reference.toLowerCase(); + byte[][] record = refDB.get(word, reference.getBytes()); + long c; + if (record == null) c = 0; else c = serverCodings.enhancedCoder.decodeBase64Long(new String(record[1])); + record[1] = serverCodings.enhancedCoder.encodeBase64Long(c++, countlength).getBytes(); + refDB.put(word, record); + } + + public void addSentence(String[] words) throws IOException { + for (int i = 0; i < words.length; i++) words[i] = words[i].toLowerCase(); + for (int i = 0; i < words.length; i++) { + for (int j = 0; j < words.length; j++) { + if ((i != j) && (words[i].length() > 2) && (words[j].length() > 2)) + addSingleRef(words[i], words[j]); + } + } + } + + public void addSentence(String sentence) throws IOException { + addSentence(sentence.split(" ")); + } + + public String[] getConnotation(String word, int count) { + TreeMap map = new TreeMap(); + return null; + } + +} diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java new file mode 100644 index 000000000..d5ee6fce6 --- /dev/null +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -0,0 +1,287 @@ +// plasmaWordIndex.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// last major change: 02.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// compile with +// javac -classpath classes -sourcepath source -d classes -g source/de/anomic/plasma/*.java + + +package de.anomic.plasma; + +import java.io.*; +import java.util.*; +import de.anomic.yacy.*; +import de.anomic.kelondro.*; + +public class plasmaWordIndex { + + File databaseRoot; + plasmaWordIndexRAMCache ramCache; + + public plasmaWordIndex(File databaseRoot, int bufferkb) throws IOException { + this.databaseRoot = databaseRoot; + this.ramCache = new plasmaWordIndexRAMCache(databaseRoot, 2000, bufferkb); + ramCache.start(); + } + + public synchronized int addEntry(String wordHash, plasmaWordIndexEntry entry) throws IOException { + return ramCache.addEntryToIndexMem(wordHash, entry); + } + + public synchronized plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) throws IOException { + return ramCache.getIndexMem(wordHash, deleteIfEmpty); + } + + public synchronized int sizeMin() { + return ramCache.sizeMin(); + } + + public synchronized int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) throws IOException { + return ramCache.removeEntriesMem(wordHash, urlHashes, deleteComplete); + } + + public void terminate(int waitingBoundSeconds) { + ramCache.terminate(waitingBoundSeconds); + } + + public synchronized void deleteComplete(String wordHash) throws IOException { + ramCache.deleteComplete(wordHash); + } + + public synchronized Iterator hashIterator(String startHash, boolean up, boolean rot, boolean deleteEmpty) { + Iterator i = new iterateCombined(startHash, up, deleteEmpty); + if ((rot) && (!(i.hasNext())) && (startHash != null)) { + return new iterateCombined(null, up, deleteEmpty); + } else { + return i; + } + } + + public class iterateCombined implements Iterator { + + Comparator comp; + Iterator filei; + Iterator cachei; + String nextf, nextc; + + public iterateCombined(String startHash, boolean up, boolean deleteEmpty) { + this.comp = kelondroMSetTools.fastStringComparator(up); + filei = fileIterator(startHash, up, deleteEmpty); + try { + cachei = ramCache.wordHashesMem(startHash, 100); + } catch (IOException e) { + cachei = new HashSet().iterator(); + } + nextFile(); + nextCache(); + } + + private void nextFile() { + if (filei.hasNext()) nextf = (String) filei.next(); else nextf = null; + } + private void nextCache() { + if (cachei.hasNext()) nextc = new String(((byte[][]) cachei.next())[0]); else nextc = null; + } + + public boolean hasNext() { + return (nextf != null) || (nextc != null); + } + + public Object next() { + String s; + if (nextc == null) { + s = nextf; + //System.out.println("Iterate Hash: take " + s + " from file, cache is empty"); + nextFile(); + return s;} + if (nextf == null) { + s = nextc; + //System.out.println("Iterate Hash: take " + s + " from cache, file is empty"); + nextCache(); + return s;} + // compare the strings + int c = comp.compare(nextf, nextc); + if (c == 0) { + s = nextf; + //System.out.println("Iterate Hash: take " + s + " from file&cache"); + nextFile(); + nextCache(); + return s; + } else if (c < 0) { + s = nextf; + //System.out.println("Iterate Hash: take " + s + " from file"); + nextFile(); + return s; + } else { + s = nextc; + //System.out.println("Iterate Hash: take " + s + " from cache"); + nextCache(); + return s; + } + } + + public void remove() { + + } + } + + public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) { + return new iterateFiles(startHash, up, deleteEmpty); + } + + public class iterateFiles implements Iterator { + + private ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries + private Comparator comp; // for string-compare + private String buffer; // the prefetch-buffer + private boolean delete; + + + public iterateFiles(String startHash, boolean up, boolean deleteEmpty) { + this.hierarchy = new ArrayList(); + this.comp = kelondroMSetTools.fastStringComparator(up); + this.delete = deleteEmpty; + + // the we initially fill the hierarchy with the content of the root folder + String path = "WORDS"; + TreeSet list = list(new File(databaseRoot, path)); + + // if we have a start hash then we find the appropriate subdirectory to start + if ((startHash != null) && (startHash.length() == yacySeedDB.commonHashLength)) { + delete(startHash.substring(0, 1), list); + if (list.size() > 0) { + hierarchy.add(list); + String[] paths = new String[]{startHash.substring(0, 1), startHash.substring(1, 2), startHash.substring(2, 4), startHash.substring(4, 6)}; + int pathc = 0; + while ((pathc < paths.length) && + (comp.compare((String) list.first(), paths[pathc]) == 0)) { + path = path + "/" + paths[pathc]; + list = list(new File(databaseRoot, path)); + delete(paths[pathc], list); + if (list.size() == 0) break; + hierarchy.add(list); + pathc++; + } + } + while (((buffer = next0()) != null) && (comp.compare(buffer, startHash) < 0)) {}; + } else { + hierarchy.add(list); + buffer = next0(); + } + } + + private synchronized void delete(String pattern, TreeSet names) { + String name; + while ((names.size() > 0) && (comp.compare((new File(name = (String) names.first())).getName(), pattern) < 0)) names.remove(name); + } + + private TreeSet list(File path) { + //System.out.println("PATH: " + path); + TreeSet t = new TreeSet(comp); + String[] l = path.list(); + if (l != null) for (int i = 0; i < l.length; i++) t.add(path + "/" + l[i]); + //else System.out.println("DEBUG: wrong path " + path); + //System.out.println(t); + return t; + } + + private synchronized String next0() { + // the object is a File pointing to the corresponding file + File f; + String n; + TreeSet t; + do { + t = null; + while ((t == null) && (hierarchy.size() > 0)) { + t = (TreeSet) hierarchy.get(hierarchy.size() - 1); + if (t.size() == 0) { + hierarchy.remove(hierarchy.size() - 1); // we step up one hierarchy + t = null; + } + } + if ((hierarchy.size() == 0) || (t.size() == 0)) return null; // this is the end + // fetch value + f = new File(n = (String) t.first()); + t.remove(n); + // if the value represents another folder, we step into the next hierarchy + if (f.isDirectory()) { + t = list(f); + if (t.size() == 0) { + if (delete) f.delete(); + } else { + hierarchy.add(t); + } + f = null; + } + } while (f == null); + // thats it + if ((f == null) || ((n = f.getName()) == null) || (n.length() < yacySeedDB.commonHashLength)) { + return null; + } else { + return n.substring(0, yacySeedDB.commonHashLength); + } + } + + public boolean hasNext() { + return buffer != null; + } + + public Object next() { + String r = buffer; + while (((buffer = next0()) != null) && (comp.compare(buffer, r) < 0)) {}; + return r; + } + + public void remove() { + + } + } + + public static void main(String[] args) { + //System.out.println(kelondroMSetTools.fastStringComparator(true).compare("RwGeoUdyDQ0Y", "rwGeoUdyDQ0Y")); + try { + plasmaWordIndex index = new plasmaWordIndex(new File("D:\\dev\\proxy\\DATA\\PLASMADB"), 555); + Iterator i = index.hashIterator("5A8yhZMh_Kmv", true, true, true); + while (i.hasNext()) { + System.out.println("File: " + (String) i.next()); + } + } catch (IOException e) {} + } +} diff --git a/source/de/anomic/plasma/plasmaWordIndexEntity.java b/source/de/anomic/plasma/plasmaWordIndexEntity.java new file mode 100644 index 000000000..9d5e96c53 --- /dev/null +++ b/source/de/anomic/plasma/plasmaWordIndexEntity.java @@ -0,0 +1,245 @@ +// plasmaIndex.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 21.01.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.plasma; + +import java.io.*; +import java.util.*; +import de.anomic.kelondro.*; + +public class plasmaWordIndexEntity { + + private String theWordHash; + private kelondroTree theIndex; + private TreeMap theTmpMap; + private File theLocation; + private boolean delete; + + public plasmaWordIndexEntity(File databaseRoot, String wordHash, boolean deleteIfEmpty) throws IOException { + theWordHash = wordHash; + theIndex = indexFile(databaseRoot, wordHash); + theTmpMap = null; + delete = deleteIfEmpty; + } + + public static boolean removePlasmaIndex(File databaseRoot, String wordHash) throws IOException { + File f = wordHash2path(databaseRoot, wordHash); + boolean success = true; + if (f.exists()) success = f.delete(); + // clean up directory structure + f = f.getParentFile(); + while ((f.isDirectory()) && (f.list().length == 0)) { + if (!(f.delete())) break; + f = f.getParentFile(); + } + return success; + } + + private kelondroTree indexFile(File databaseRoot, String wordHash) throws IOException { + theLocation = wordHash2path(databaseRoot, wordHash); + File fp = theLocation.getParentFile(); + if (fp != null) fp.mkdirs(); + kelondroTree kt; + if (theLocation.exists()) { + // open existing index file + kt = new kelondroTree(theLocation, 0x400); + } else { + // create new index file + kt = new kelondroTree(theLocation, 0x400, plasmaURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort); + } + return kt; // everyone who get this should close it when finished! + } + + public static File wordHash2path(File databaseRoot, String hash) { + // creates a path that constructs hashing on a file system + return new File (databaseRoot, "WORDS/" + + hash.substring(0,1) + "/" + hash.substring(1,2) + "/" + hash.substring(2,4) + "/" + + hash.substring(4,6) + "/" + hash + ".db"); + } + + public plasmaWordIndexEntity(String wordHash) { + // this creates a nameless temporary index. It is needed for combined search + // and used to hold the intersection of two indexes + // if the nameless intity is suppose to hold indexes for a specific word, + // it can be given here; othervise set wordhash to null + theWordHash = wordHash; + theIndex = null; + theLocation = null; + theTmpMap = new TreeMap(); + } + + public boolean isTMPEntity() { + return theTmpMap != null; + } + + public String wordHash() { + return theWordHash; + } + + public int size() { + if (theTmpMap == null) { + int size = theIndex.size(); + if ((size == 0) && (delete)) { + try { + deleteComplete(); + } catch (IOException e) { + delete = false; + } + return 0; + } else { + return size; + } + } else { + return theTmpMap.size(); + } + } + + public void close() throws IOException { + if (theTmpMap == null) { + theIndex.close(); + theIndex = null; + } else theTmpMap = null; + } + + public boolean contains(String urlhash) throws IOException { + if (theTmpMap == null) return (theIndex.get(urlhash.getBytes()) != null); else return (theTmpMap.containsKey(urlhash)); + } + + public boolean contains(plasmaWordIndexEntry entry) throws IOException { + if (theTmpMap == null) return (theIndex.get(entry.getUrlHash().getBytes()) != null); else return (theTmpMap.containsKey(entry.getUrlHash())); + } + + public void addEntry(plasmaWordIndexEntry entry) throws IOException { + if (theTmpMap == null) { + theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm(false).getBytes()); + //System.out.println(theIndex.toString()); // debug + } else { + theTmpMap.put(entry.getUrlHash(), entry); + } + } + + public boolean deleteComplete() throws IOException { + if (theTmpMap == null) { + theIndex.close(); + // remove file + boolean success = theLocation.delete(); + // and also the paren directory if that is empty + if (success) { + File f = theLocation.getParentFile(); + while ((f.isDirectory()) && (f.list().length == 0)) { + if (!(f.delete())) break; + f = f.getParentFile(); + } + } + // reset all values + theIndex = null; + theLocation = null; + // switch to temporary more + theTmpMap = new TreeMap(); + //theIndex.removeAll(); + return success; + } else { + theTmpMap = new TreeMap(); + return true; + } + } + + public boolean removeEntry(String urlHash, boolean deleteComplete) throws IOException { + // returns true if there was an entry before, false if the key did not exist + // if after the removal the file is empty, then the file can be deleted if + // the flag deleteComplete is set. + if (theTmpMap == null) { + boolean wasEntry = (theIndex.remove(urlHash.getBytes()) != null); + if ((theIndex.size() == 0) && (deleteComplete)) deleteComplete(); + return wasEntry; + } else { + return (theTmpMap.remove(urlHash) != null); + } + } + + public Enumeration elements(boolean up) { + // returns an enumeration of plasmaWordIndexEntry objects + if (theTmpMap == null) return new dbenum(up); else return new tmpenum(up); + } + + public class dbenum implements Enumeration { + Iterator i; + public dbenum(boolean up) { + i = theIndex.nodeIterator(up, false); + } + public boolean hasMoreElements() { + return i.hasNext(); + } + public Object nextElement() { + try { + byte[][] n = ((kelondroRecords.Node) i.next()).getValues(); + return new plasmaWordIndexEntry(new String(n[0]), new String(n[1])); + } catch (IOException e) { + throw new RuntimeException("dbenum: " + e.getMessage()); + } + } + } + public class tmpenum implements Enumeration { + TreeMap searchTree; + boolean up; + public tmpenum(boolean up) { + this.up = up; + searchTree = (TreeMap) theTmpMap.clone(); // a shallow clone that is destroyed during search + } + public boolean hasMoreElements() { + return searchTree.size() > 0; + } + public Object nextElement() { + Object urlHash = (up) ? searchTree.firstKey() : searchTree.lastKey(); + plasmaWordIndexEntry entry = (plasmaWordIndexEntry) searchTree.remove(urlHash); + return entry; + } + } + + public String toString() { + if (theTmpMap == null) return "DB:" + theIndex.toString(); + else if (theTmpMap != null) return "MAP:" + theTmpMap.size() + " RECORDS IN " + theTmpMap.toString(); + else return "EMPTY"; + } + + + +} diff --git a/source/de/anomic/plasma/plasmaWordIndexEntry.java b/source/de/anomic/plasma/plasmaWordIndexEntry.java new file mode 100644 index 000000000..ff13479f5 --- /dev/null +++ b/source/de/anomic/plasma/plasmaWordIndexEntry.java @@ -0,0 +1,322 @@ +// plasmaIndexEntry.java +// ----------------------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 01.06.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + This class defines the structures of an index entry +*/ + +package de.anomic.plasma; + +import java.net.*; +import java.util.*; +import de.anomic.yacy.*; +import de.anomic.server.*; + +public class plasmaWordIndexEntry { + + // an wordEntry can be filled in either of two ways: + // by the discrete values of the entry + // or by the encoded entry-string + + // the size of a word hash + public static final int wordHashLength = yacySeedDB.commonHashLength; // 12 + + // the size of the index entry attributes + public static final int attrSpaceShort = 12; + public static final int attrSpaceLong = 18; + + // the associated hash + private String urlHash; + + // discrete values + private int count; // words in file + private int posintext; // first position of the word in text as number of word; 0=unknown or irrelevant position + private int posinphrase; // position within a phrase of the word + private int posofphrase; // position of the phrase in the text as count of sentences; 0=unknown; 1=path; 2=keywords; 3=headline; >4: in text + private int age; // calculated by using last-modified + private int quality; // result of a heuristic on the source file + private String language; // essentially the country code (the TLD as heuristic), two letters lowercase only + private char doctype; // type of source + private char localflag; // indicates if the index was created locally + + // some doctypes: + public static final char DT_PDFPS = 'p'; + public static final char DT_TEXT = 't'; + public static final char DT_HTML = 'h'; + public static final char DT_DOC = 'd'; + public static final char DT_IMAGE = 'i'; + public static final char DT_MOVIE = 'm'; + public static final char DT_FLASH = 'f'; + public static final char DT_SHARE = 's'; + public static final char DT_UNKNOWN = 'u'; + + // local flag attributes + public static final char LT_LOCAL = 'L'; + public static final char LT_GLOBAL = 'G'; + + // encoded discrete values + private String code; + + // create a word hash + public static String word2hash(String word) { + return serverCodings.encodeMD5B64(word.toLowerCase(), true).substring(0, wordHashLength); + } + + // doctype calculation + public static char docType(String path) { + char doctype = DT_UNKNOWN; + if (path.endsWith(".gif")) doctype = DT_IMAGE; + if (path.endsWith(".jpg")) doctype = DT_IMAGE; + if (path.endsWith(".jpeg")) doctype = DT_IMAGE; + if (path.endsWith(".png")) doctype = DT_IMAGE; + if (path.endsWith(".html")) doctype = DT_HTML; + if (path.endsWith(".txt")) doctype = DT_TEXT; + if (path.endsWith(".doc")) doctype = DT_DOC; + if (path.endsWith(".rtf")) doctype = DT_DOC; + if (path.endsWith(".pdf")) doctype = DT_PDFPS; + if (path.endsWith(".ps")) doctype = DT_PDFPS; + if (path.endsWith(".avi")) doctype = DT_MOVIE; + if (path.endsWith(".mov")) doctype = DT_MOVIE; + if (path.endsWith(".qt")) doctype = DT_MOVIE; + if (path.endsWith(".mpg")) doctype = DT_MOVIE; + if (path.endsWith(".md5")) doctype = DT_SHARE; + if (path.endsWith(".mpeg")) doctype = DT_MOVIE; + if (path.endsWith(".asf")) doctype = DT_FLASH; + return doctype; + } + + // language calculation + public static String language(URL url) { + String host = url.getHost(); + int pos = host.lastIndexOf("."); + String language = "uk"; + if ((pos > 0) && (host.length() - pos == 3)) language = host.substring(pos + 1).toLowerCase(); + return language; + } + + // the class instantiation can only be done by a plasmaStore method + // therefore they are all public + public plasmaWordIndexEntry(String urlHash, int count, int posintext, int posinphrase, int posofphraseint, int virtualage, int quality, String language, char doctype, boolean local) { + + // ** hier fehlt noch als Attribut: , damit 'nearby' getrackt werden kann ** + + if ((language == null) || (language.length() != plasmaCrawlLURL.urlLanguageLength)) language = "uk"; + this.urlHash = urlHash; + this.count = count; + this.posintext = posintext; + this.posinphrase = posinphrase; + this.posofphrase = posofphrase; + this.age = virtualage; + this.quality = quality; + this.language = language; + this.doctype = doctype; + this.code = null; + this.localflag = (local) ? LT_LOCAL : LT_GLOBAL; + } + + public plasmaWordIndexEntry(String urlHash, String code) { + // the code is not parsed but used later on + this.urlHash = urlHash; + this.count = 0; + this.posintext = 0; + this.posinphrase = 0; + this.posofphrase = 0; + this.age = 0; + this.quality = 0; + this.language = "uk"; + this.doctype = 'u'; + this.code = code; + this.localflag = LT_LOCAL; + } + + public plasmaWordIndexEntry(String external) { + // parse external form + String[] elts = external.substring(1, external.length() - 1).split(","); + Properties pr = new Properties(); + int p; + for (int i = 0; i < elts.length; i++) { + pr.put(elts[i].substring(0, (p = elts[i].indexOf("="))), elts[i].substring(p + 1)); + } + // set values + this.urlHash = pr.getProperty("h", ""); + this.count = (int) serverCodings.enhancedCoder.decodeBase64Long(pr.getProperty("c", "A")); + this.posintext = (int) serverCodings.enhancedCoder.decodeBase64Long(pr.getProperty("t", "__")); + this.posinphrase = (int) serverCodings.enhancedCoder.decodeBase64Long(pr.getProperty("r", "__")); + this.posofphrase = (int) serverCodings.enhancedCoder.decodeBase64Long(pr.getProperty("o", "__")); + this.age = (int) serverCodings.enhancedCoder.decodeBase64Long(pr.getProperty("a", "A")); + this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(pr.getProperty("q", "__")); + this.language = pr.getProperty("l", "uk"); + this.doctype = pr.getProperty("d", "u").charAt(0); + this.localflag = pr.getProperty("f", ""+LT_LOCAL).charAt(0); + // clear code + this.code = null; + } + + private String b64save(long x, int l) { + try { + return serverCodings.enhancedCoder.encodeBase64Long(x, l); + } catch (Exception e) { + // if x does not fit into l + return "________".substring(0, l); + } + } + + public String toEncodedForm(boolean longAttr) { + if (code == null) { + String shortAttr = + b64save(quality, plasmaCrawlLURL.urlQualityLength) + + b64save(age, 3) + + b64save(count, 2) + + language + + doctype + + localflag; // 3 + 3 + 2 + 2 + 1 + 1 = 12 bytes + if (longAttr) + return + shortAttr + + b64save(posintext, 2) + + b64save(posinphrase, 2) + + b64save(posofphrase, 2); + // 12 + 3 + 2 + 2 + 1 + 1 = 12 bytes + else + return shortAttr; + + } else { + return code; + } + } + + public String toExternalForm() { + if (code == null) { + return "{" + + "h=" + urlHash + + ",q=" + b64save(quality, plasmaCrawlLURL.urlQualityLength) + + ",a=" + b64save(age, 3) + + ",c=" + b64save(count, 2) + + ",l=" + language + + ",d=" + doctype + + ",f=" + localflag + + ",t=" + b64save(posintext, 2) + + ",r=" + b64save(posinphrase, 2) + + ",o=" + b64save(posofphrase, 2) + + "}"; + } else { + return "{" + + "h=" + urlHash + + ",q=" + code.substring(0, 3) + + ",a=" + code.substring(3, 6) + + ",c=" + code.substring(6, 8) + + ",l=" + code.substring(8, 10) + + ",d=" + code.charAt(10) + + ",f=" + code.charAt(11) + + ((code.length() > 12) ? ( + ",t=" + code.substring(12, 14) + + ",r=" + code.substring(14, 16) + + ",o=" + code.substring(16, 18) + ) : "") + + "}"; + } + } + + public String getUrlHash() { + return urlHash; + } + + public int getQuality() { + if (code == null) return quality; + else return (int) serverCodings.enhancedCoder.decodeBase64Long(code.substring(0, 3)); + } + + public int getVirtualAge() { + if (code == null) return age; + else return (int) serverCodings.enhancedCoder.decodeBase64Long(code.substring(3, 6)); + } + + public int getCount() { + if (code == null) return count; + else return (int) serverCodings.enhancedCoder.decodeBase64Long(code.substring(6, 8)); + } + + public int posintext() { + if (code == null) return posintext; + if (code.length() >= 14) + return (int) serverCodings.enhancedCoder.decodeBase64Long(code.substring(12, 14)); + else + return 0; + } + + public int posinphrase() { + if (code == null) return posinphrase; + if (code.length() >= 15) + return (int) serverCodings.enhancedCoder.decodeBase64Long(code.substring(14, 16)); + else + return 0; + } + + public int posofphrase() { + if (code == null) return posofphrase; + if (code.length() >= 16) + return (int) serverCodings.enhancedCoder.decodeBase64Long(code.substring(16, 18)); + else + return 0; + } + + public String getLanguage() { + if (code == null) return language; + else return code.substring(8, 10); + } + + public char getType() { + if (code == null) return doctype; + else return code.charAt(10); + } + + public boolean isLocal() { + if (code == null) return localflag == LT_LOCAL; + else return code.charAt(11) == LT_LOCAL; + } + + public static void main(String[] args) { + // outputs the word hash to a given word + if (args.length != 1) System.exit(0); + System.out.println("WORDHASH: " + word2hash(args[0])); + } + +} diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCache.java b/source/de/anomic/plasma/plasmaWordIndexFileCache.java new file mode 100644 index 000000000..c90e43bfe --- /dev/null +++ b/source/de/anomic/plasma/plasmaWordIndexFileCache.java @@ -0,0 +1,239 @@ +// plasmaWordIndexFileCache.java +// ----------------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 22.01.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +/* + The plasmaIndexCache manages a database table with a list of + indexEntries in it. This is done in a completely different fashion + as organized by the plasmaIndex tables. The entries are not + sorted and just stored in a buffer. + Whenever during a seach an index is retrieved, first it's buffer + is flushed into the corresponding index table, so that it can be + sorted into the remaining index entry elements. + The cache database consist of + - the word hash as primary key + - one column with a one-byte counter + - a number of more columns with indexEntry elements +*/ + + +// compile with +// javac -classpath classes -sourcepath source -d classes -g source/de/anomic/plasma/*.java + +package de.anomic.plasma; + +import java.io.*; +import java.util.*; +import de.anomic.kelondro.*; + +public class plasmaWordIndexFileCache { + + private static final String indexCacheFileName = "indexCache.db"; + private static final int buffers = 50; // number of buffered entries per word + + // class variables + private File databaseRoot; + private kelondroTree indexCache; + + public plasmaWordIndexFileCache(File databaseRoot, int bufferkb) throws IOException { + this.databaseRoot = databaseRoot; + File indexCacheFile = new File(databaseRoot, indexCacheFileName); + if (indexCacheFile.exists()) { + // simply open the file + indexCache = new kelondroTree(indexCacheFile, bufferkb * 0x400); + } else { + // create a new file + int[] columns = new int[buffers + 2]; + columns[0] = plasmaWordIndexEntry.wordHashLength; + columns[1] = 1; + for (int i = 0; i < buffers; i++) columns[i + 2] = plasmaCrawlLURL.urlHashLength + plasmaWordIndexEntry.attrSpaceShort; + indexCache = new kelondroTree(indexCacheFile, bufferkb * 0x400, columns); + } + } + + + protected void close() throws IOException { + indexCache.close(); + indexCache = null; + } + + private byte[][] getCache(String wordHash) throws IOException { + // read one line from the cache; if none exists: construct one + byte[][] row; + try { + row = indexCache.get(wordHash.getBytes()); + } catch (Exception e) { + // we had some negativeSeekOffsetExceptions here; in that case the indexCache is corrupt + System.out.println("Error in plasmaWordINdexFileCache.getCache: index for hash " + wordHash + " is corrupt:" + e.toString()); + e.printStackTrace(); + row = null; + } + if (row == null) { + row = new byte[indexCache.columns()][]; + row[0] = wordHash.getBytes(); + row[1] = new byte[1]; + row[1][0] = (byte) 0; + } + return row; + } + + + protected Iterator wordHashes(String wordHash, boolean up) throws IOException { + return indexCache.rows(up, false, (wordHash == null) ? null : wordHash.getBytes()); + } + + protected plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) throws IOException { + // first flush the index cache, if there is any for that word hash + byte[][] row = indexCache.get(wordHash.getBytes()); + if (row != null) { + int entries = (int) row[1][0]; + if (entries != 0) flushCache(row, null); // if the cache has entries, flush it + indexCache.remove(wordHash.getBytes()); // delete the cache index row; suppose to be empty now + } + // then return the index from the uncached file (with new entries) + return new plasmaWordIndexEntity(databaseRoot, wordHash, deleteIfEmpty); + } + + protected void addEntriesToIndex(String wordHash, Vector /* of plasmaIndexEntry */ newEntries) throws IOException { + //System.out.println("* adding cached word index: " + wordHash + "=" + word + ":" + entry.toEncodedForm()); // debug + // fetch the index cache + if (newEntries.size() == 0) return; + byte[][] row = getCache(wordHash); + int entries = (int) row[1][0]; + // check if the index cache is full + if (entries + 2 + newEntries.size() >= indexCache.columns()) { + flushCache(row, newEntries); // and put in new values + entries = 0; + row[1][0] = (byte) 0; // set number of entries to zero + } else { + // put in the new values + String newEntry; + for (int i = 0; i < newEntries.size(); i++) { + newEntry = ((plasmaWordIndexEntry) newEntries.elementAt(i)).getUrlHash() + ((plasmaWordIndexEntry) newEntries.elementAt(i)).toEncodedForm(false); + row[entries + 2] = newEntry.getBytes(); + entries++; + } + row[1][0] = (byte) entries; + try { + indexCache.put(row); + } catch (IllegalArgumentException e) { + // this is a very bad case; a database inconsistency occurred + deleteComplete(wordHash); + System.out.println("fatal error in plasmaWordIndexFileCacle.addEntriesToIndex: write to word hash file " + wordHash + " failed - " + e.getMessage()); + } + } + // finished! + } + + protected void deleteComplete(String wordHash) throws IOException { + plasmaWordIndexEntity.removePlasmaIndex(databaseRoot, wordHash); + indexCache.remove(wordHash.getBytes()); + } + + protected int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) throws IOException { + // removes all given url hashes from a single word index. Returns number of deletions. + plasmaWordIndexEntity pi = getIndex(wordHash, true); + int count = 0; + for (int i = 0; i < urlHashes.length; i++) if (pi.removeEntry(urlHashes[i], deleteComplete)) count++; + int size = pi.size(); + pi.close(); pi = null; + // check if we can remove the index completely + if ((deleteComplete) && (size == 0)) { + // remove index + if (!(plasmaWordIndexEntity.removePlasmaIndex(databaseRoot, wordHash))) + System.out.println("DEBUG: cannot remove index file for word hash " + wordHash); + // remove cache + indexCache.remove(wordHash.getBytes()); + } + return count; + } + + private synchronized void flushCache(byte[][] row, Vector indexEntries) throws IOException { + String wordHash = new String(row[0]); + int entries = (int) row[1][0]; + if ((entries == 0) && ((indexEntries == null) || (indexEntries.size() == 0))) return; + + // open file + plasmaWordIndexEntity pi = new plasmaWordIndexEntity(databaseRoot, wordHash, false); + + // write from array + plasmaWordIndexEntry entry; + for (int i = 0; i < entries; i++) { + entry = new plasmaWordIndexEntry(new String(row[i + 2], 0, plasmaCrawlLURL.urlHashLength), + new String(row[i + 2], plasmaCrawlLURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort)); + pi.addEntry(entry); + } + + // write from vector + if (indexEntries != null) { + for (int i = 0; i < indexEntries.size(); i++) + pi.addEntry((plasmaWordIndexEntry) indexEntries.elementAt(i)); + } + + // close and return + pi.close(); + pi = null; + } + + private int size(String wordHash) throws IOException { + // return number of entries in specific cache + byte[][] row = indexCache.get(wordHash.getBytes()); + if (row == null) return 0; + return (int) row[1][0]; + } + + protected int size() { + if (indexCache == null) return 0; else return indexCache.size(); + } + + /* + private plasmaIndex getIndexF(String wordHash) throws IOException { + return new plasmaIndex(databaseRoot, wordHash); + } + + private void addEntryToIndexF(String wordHash, plasmaIndexEntry entry) throws IOException { + plasmaIndex pi = new plasmaIndex(databaseRoot, wordHash); + pi.addEntry(entry); + pi.close(); + } + */ + +} diff --git a/source/de/anomic/plasma/plasmaWordIndexRAMCache.java b/source/de/anomic/plasma/plasmaWordIndexRAMCache.java new file mode 100644 index 000000000..fe9e198f9 --- /dev/null +++ b/source/de/anomic/plasma/plasmaWordIndexRAMCache.java @@ -0,0 +1,224 @@ +// plasmaIndexRAMCache.java +// ----------------------- +// part of YACY +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 22.12.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// compile with +// javac -classpath classes -sourcepath source -d classes -g source/de/anomic/plasma/*.java + + +package de.anomic.plasma; + +import java.io.*; +import java.util.*; +import de.anomic.yacy.*; +import de.anomic.server.*; +import de.anomic.kelondro.*; + +public class plasmaWordIndexRAMCache extends Thread { + + static String minKey, maxKey; + + // class variables + TreeMap cache; + kelondroMScoreCluster hashScore; + plasmaWordIndexFileCache pic; + boolean terminate; + int maxWords; + + static { + maxKey = ""; + for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += 'z'; + minKey = ""; + for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-'; + } + + public plasmaWordIndexRAMCache(File databaseRoot, int maxWords, int bufferkb) throws IOException { + this.pic = new plasmaWordIndexFileCache(databaseRoot, bufferkb); + this.cache = new TreeMap(); + this.hashScore = new kelondroMScoreCluster(); + this.maxWords = maxWords; + this.terminate = false; + } + + public void run() { + serverLog.logSystem("PLASMA INDEXING", "started word cache management"); + int check; + // permanently flush cache elements + while (!(terminate)) { + if (hashScore.size() < 100) try {Thread.currentThread().sleep(10000);} catch (InterruptedException e) {} + while ((!(terminate)) && (cache != null) && (hashScore.size() > 0)) try { + //check = hashScore.size(); + flushSpecific(true); + //serverLog.logDebug("PLASMA INDEXING", "single flush. bevore=" + check + "; after=" + hashScore.size()); + try {Thread.currentThread().sleep(200 + (maxWords / (1 + hashScore.size())));} catch (InterruptedException e) {} + } catch (IOException e) { + serverLog.logError("PLASMA INDEXING", "PANIK! exception in main cache loop: " + e.getMessage()); + e.printStackTrace(); + terminate = true; + cache = null; + } + } + + serverLog.logSystem("PLASMA INDEXING", "CATCHED TERMINATION SIGNAL: start final flush"); + + // close all; + try { + // first flush everything + while (hashScore.size() > 0) flushSpecific(false); + + // then close file cache: + pic.close(); + } catch (IOException e) { + serverLog.logDebug("PLASMA INDEXING", "interrupted final flush: " + e.toString()); + } + // delete data + cache = null; + hashScore = null; + serverLog.logSystem("PLASMA INDEXING", "finished final flush"); + } + + public void terminate(int waitingBoundSeconds) { + terminate = true; + // wait until terination is done + // we can do at least 6 flushes/second + int waitingtime = 10 + (((cache == null) ? 0 : cache.size()) / 5); // seconds + if (waitingtime > waitingBoundSeconds) waitingtime = waitingBoundSeconds; // upper bound + while ((cache != null) && (waitingtime > 0)) { + serverLog.logDebug("PLASMA INDEXING", "final word flush; cache.size=" + cache.size() + "; time-out in " + waitingtime + " seconds"); + try {Thread.currentThread().sleep(5000);} catch (InterruptedException e) {} + waitingtime -= 5; + } + if (cache != null) serverLog.logError("PLASMA INDEXING", "Cache was not flushed completely; " + hashScore.size() + " words lost"); + + } + + private synchronized int flushSpecific(boolean greatest) throws IOException { + //System.out.println("DEBUG: plasmaIndexRAMCache.flushSpecific(" + ((greatest) ? "greatest" : "smallest") + "); cache.size() = " + cache.size()); + if ((hashScore.size() == 0) && (cache.size() == 0)) { + serverLog.logDebug("PLASMA INDEXING", "flushSpecific: called but cache is empty"); + return 0; + } + if ((hashScore.size() == 0) && (cache.size() != 0)) { + serverLog.logError("PLASMA INDEXING", "flushSpecific: hashScore.size=0 but cache.size=" + cache.size()); + return 0; + } + if ((hashScore.size() != 0) && (cache.size() == 0)) { + serverLog.logError("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + " but cache.size=0"); + return 0; + } + + //serverLog.logDebug("PLASMA INDEXING", "flushSpecific: hashScore.size=" + hashScore.size() + ", cache.size=" + cache.size()); + + String key = (String) ((greatest) ? hashScore.getMaxObject() : hashScore.getMinObject()); + return flushKey(key, "flushSpecific"); + } + + private synchronized int flushKey(String key, String caller) throws IOException { + Vector v = (Vector) cache.get(key); + if (v == null) { + //serverLog.logDebug("PLASMA INDEXING", "flushKey: '" + caller + "' forced to flush non-existing key " + key); + return 0; + } + pic.addEntriesToIndex(key, v); + cache.remove(key); + hashScore.deleteScore(key); + return v.size(); + } + + public synchronized Iterator wordHashesMem(String wordHash, int count) throws IOException { + // returns a list of hashes from a specific start point + // we need to flush some of the elements in the cache first + // maybe we flush too much, but this is not easy to find out and it does not matter + TreeMap subMap = new TreeMap(cache.subMap((wordHash == null) ? minKey : wordHash, maxKey)); + int flushcount = subMap.size(); + if (flushcount > count) flushcount = count; + String key; + for (int i = 0; i < flushcount ; i++) { + key = (String) subMap.firstKey(); + flushKey(key, "getSequentialWordHashesMem"); + subMap.remove(key); + } + // finally return the result from the underlying hash list: + return pic.wordHashes(wordHash, true); + } + + public plasmaWordIndexEntity getIndexMem(String wordHash, boolean deleteIfEmpty) throws IOException { + flushKey(wordHash, "getIndexMem"); + return pic.getIndex(wordHash, deleteIfEmpty); + } + + public synchronized int addEntryToIndexMem(String wordHash, plasmaWordIndexEntry entry) throws IOException { + // make space for new words + int flushc = 0; + //serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size()); + while (hashScore.size() > maxWords) flushc += flushSpecific(false); + //if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries"); + + // put new words into cache + Vector v = (Vector) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null + if (v == null) v = new Vector(); + v.add(entry); + cache.put(wordHash, v); + hashScore.incScore(wordHash); + return flushc; + } + + public synchronized void deleteComplete(String wordHash) throws IOException { + cache.remove(wordHash); + hashScore.deleteScore(wordHash); + pic.deleteComplete(wordHash); + } + + public int removeEntriesMem(String wordHash, String[] urlHashes, boolean deleteComplete) throws IOException { + flushKey(wordHash, "removeEntriesMem"); + return pic.removeEntries(wordHash, urlHashes, deleteComplete); + } + + public int sizeMin() { + // it is not easy to find out the correct size of the cache + // to make the result correct, it would be necessary to flush the complete ram cache + // instead, we return the minimum size of the cache, which is the maximun of either the + // ram or table cache + if ((hashScore == null) || (pic == null)) return 0; + return (hashScore.size() < pic.size()) ? pic.size() : hashScore.size(); + } + + +} diff --git a/source/de/anomic/server/serverAbstractSwitch.java b/source/de/anomic/server/serverAbstractSwitch.java new file mode 100644 index 000000000..2deedd86a --- /dev/null +++ b/source/de/anomic/server/serverAbstractSwitch.java @@ -0,0 +1,292 @@ +// serverAbstractSwitch.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 24.03.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.server; + +import java.io.*; +import java.net.*; +import java.util.*; + +public abstract class serverAbstractSwitch implements serverSwitch { + + // configuration management + private File configFile; + private Hashtable configProps; + private String configComment; + private Hashtable authorization; + private String rootPath; + private TreeMap workerThreads; + + public serverAbstractSwitch(String rootPath, String initPath, String configPath) throws IOException { + // we initialize the switchboard with a property file, + // but maintain these properties then later in a new 'config' file + // to reset all changed configs, the config file must + // be deleted, but not the init file + // the only attribute that will always be read from the init is the + // file name of the config file + this.rootPath = rootPath; + configComment = "this is an automaticaly generated file, updated by serverAbstractSwitch and initialized by " + initPath; + File initFile = new File(rootPath, initPath); + configFile = new File(rootPath, configPath); //propertiesFile(config); + new File(configFile.getParent()).mkdir(); + + // predefine init's + Hashtable initProps; + if (initFile.exists()) initProps = loadHashtable(initFile); else initProps = new Hashtable(); + + // load config's from last save + if (configFile.exists()) configProps = loadHashtable(configFile); else configProps = new Hashtable(); + + // remove all values from config that do not appear in init (out-dated settings) + Enumeration e = configProps.keys(); + String key; + while (e.hasMoreElements()) { + key = (String) e.nextElement(); + //System.out.println("TESTING " + key); + if (!(initProps.containsKey(key))) { + //System.out.println("MIGRATE: removing out-dated property '" + key + "'"); + configProps.remove(key); + } + } + + // merge new props from init to config + // this is necessary for migration, when new properties are attached + initProps.putAll(configProps); + configProps = initProps; + + // save result; this may initially create a config file after initialization + saveConfig(); + + // other settings + authorization = new Hashtable(); + + // init thread control + workerThreads = new TreeMap(); + } + + public static Hashtable loadHashtable(File f) { + // load props + Properties prop = new Properties(); + try { + prop.load(new FileInputStream(f)); + } catch (IOException e1) { + System.err.println("ERROR: " + f.toString() + " not found in settings path"); + prop = null; + } + return (Hashtable) prop; + } + + public static void saveHashtable(File f, Hashtable props, String comment) throws IOException { + PrintWriter pw = new PrintWriter(new FileOutputStream(f)); + pw.println("# " + comment); + Enumeration e = props.keys(); + String key, value; + while (e.hasMoreElements()) { + key = (String) e.nextElement(); + //value = (String) props.get(key); + value = ((String) props.get(key)).replaceAll("\n", "\\\\n"); + pw.println(key + "=" + value); + } + pw.println("# EOF"); + pw.close(); + } + + public void setConfig(String key, long value) { + setConfig(key, "" + value); + } + + public void setConfig(String key, String value) { + configProps.put(key, value); + saveConfig(); + } + + public String getConfig(String key, String dflt) { + String s = (String) configProps.get(key); + if (s == null) return dflt; else return s; + } + + public Enumeration configKeys() { + return configProps.keys(); + } + + private void saveConfig() { + try { + saveHashtable(configFile, configProps, configComment); + } catch (IOException e) { + System.out.println("ERROR: cannot write config file " + configFile.toString() + ": " + e.getMessage()); + } + } + + public void deployThread(String threadName, String threadShortDescription, String threadLongDescription, serverThread newThread, serverLog log, long startupDelay) { + deployThread(threadName, threadShortDescription, threadLongDescription, + newThread, log, startupDelay, + Long.parseLong(getConfig(threadName + "_idlesleep" , "novalue")), + Long.parseLong(getConfig(threadName + "_busysleep" , "novalue"))); + } + + public void deployThread(String threadName, String threadShortDescription, String threadLongDescription, serverThread newThread, serverLog log, long startupDelay, long initialIdleSleep, long initialBusySleep) { + if (newThread.isAlive()) throw new RuntimeException("undeployed threads must not live; they are started as part of the deployment"); + newThread.setStartupSleep(startupDelay); + long sleep; + try { + sleep = Long.parseLong(getConfig(threadName + "_idlesleep" , "novalue")); + newThread.setIdleSleep(sleep); + } catch (NumberFormatException e) { + newThread.setIdleSleep(initialIdleSleep); + setConfig(threadName + "_idlesleep", initialIdleSleep); + } + try { + sleep = Long.parseLong(getConfig(threadName + "_busysleep" , "novalue")); + newThread.setBusySleep(sleep); + } catch (NumberFormatException e) { + newThread.setBusySleep(initialBusySleep); + setConfig(threadName + "_busysleep", initialBusySleep); + } + newThread.setLog(log); + newThread.setDescription(threadShortDescription, threadLongDescription); + workerThreads.put(threadName, newThread); + // start the thread + if (workerThreads.containsKey(threadName)) newThread.start(); + } + + public serverThread getThread(String threadName) { + return (serverThread) workerThreads.get(threadName); + } + + public void setThreadSleep(String threadName, long idleMillis, long busyMillis) { + serverThread thread = (serverThread) workerThreads.get(threadName); + if (thread != null) { + thread.setIdleSleep(idleMillis); + thread.setBusySleep(busyMillis); + } + } + + public synchronized void terminateThread(String threadName, boolean waitFor) { + if (workerThreads.containsKey(threadName)) { + ((serverThread) workerThreads.get(threadName)).terminate(waitFor); + workerThreads.remove(threadName); + } + } + + public synchronized void terminateAllThreads(boolean waitFor) { + Iterator e = workerThreads.keySet().iterator(); + while (e.hasNext()) { + ((serverThread) workerThreads.get((String) e.next())).terminate(false); + } + if (waitFor) { + e = workerThreads.keySet().iterator(); + while (e.hasNext()) { + ((serverThread) workerThreads.get((String) e.next())).terminate(true); + e.remove(); + } + } + } + + public Iterator /*of serverThread-Names (String)*/ threadNames() { + return workerThreads.keySet().iterator(); + } + + abstract public int queueSize(); + abstract public void enQueue(Object job); + abstract public void deQueue(); + + + // authentification routines: + + public void setAuthentify(InetAddress host, String user, String rights) { + // sets access attributes according to host addresses + authorization.put(host, user + "@" + rights); + } + + public void removeAuthentify(InetAddress host) { + // remove access attributes according to host addresses + authorization.remove(host); + } + + public String getAuthentifyUser(InetAddress host) { + // read user name according to host addresses + String a = (String) authorization.get(host); + if (a == null) return null; + int p = a.indexOf("@"); + if (p < 0) return null; + return a.substring(0, p); + } + + public String getAuthentifyRights(InetAddress host) { + // read access rigths according to host addresses + String a = (String) authorization.get(host); + if (a == null) return null; + int p = a.indexOf("@"); + if (p < 0) return null; + return a.substring(p + 1); + } + + public void addAuthentifyRight(InetAddress host, String right) { + String rights = getAuthentifyRights(host); + if (rights == null) { + // create new authentification + setAuthentify(host, "unknown", right); + } else { + // add more authentification + String user = getAuthentifyUser(host); + setAuthentify(host, user, rights + right); + } + } + + public boolean hasAuthentifyRight(InetAddress host, String right) { + String rights = getAuthentifyRights(host); + if (rights == null) return false; + return rights.indexOf(right) >= 0; + } + + public abstract serverObjects action(String actionName, serverObjects actionInput); + + public String getRootPath() { + return rootPath; + } + + public String toString() { + return configProps.toString(); + } + + public void handleBusyState(int jobs) { + // do nothing here; should be overridden + } +} diff --git a/source/de/anomic/server/serverAbstractThread.java b/source/de/anomic/server/serverAbstractThread.java new file mode 100644 index 000000000..88d45e3e0 --- /dev/null +++ b/source/de/anomic/server/serverAbstractThread.java @@ -0,0 +1,227 @@ +// serverAbstractThread.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.yacy.net +// Frankfurt, Germany, 2005 +// last major change: 14.03.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + an Implementation of a serverRunnable must only extend this class and impement + the methods: + open(), + job() and + close() + */ + +package de.anomic.server; + +public abstract class serverAbstractThread extends Thread implements serverThread { + + private long startup = 0, idlePause = 0, busyPause = 0, blockPause = 0; + private boolean running = true; + private serverLog log = null; + private long idletime = 0, busytime = 0; + private String shortDescr = "", longDescr = ""; + private long threadBlockTimestamp = System.currentTimeMillis(); + private long idleCycles = 0, busyCycles = 0; + + protected void announceThreadBlockApply() { + // shall only be used, if a thread blocks for an important reason + // like a socket connect and must renew the timestamp to correct + // statistics + this.threadBlockTimestamp = System.currentTimeMillis(); + } + + protected void announceThreadBlockRelease() { + // shall only be used, if a thread blocks for an important reason + // like a socket connect and must renew the timestamp to correct + // statistics + long thisBlockTime = (System.currentTimeMillis() - this.threadBlockTimestamp); + this.blockPause += thisBlockTime; + this.busytime -= thisBlockTime; + } + + protected void announceMoreExecTime(long millis) { + this.busytime += millis; + } + + protected void announceMoreSleepTime(long millis) { + this.idletime += millis; + } + + public void setDescription(String shortText, String longText) { + // sets a visible description string + this.shortDescr = shortText; + this.longDescr = longText; + } + + public void setStartupSleep(long milliseconds) { + // sets a sleep time before execution of the job-loop + startup = milliseconds; + } + + public void setIdleSleep(long milliseconds) { + // sets a sleep time for pauses between two jobs + idlePause = milliseconds; + } + + public void setBusySleep(long milliseconds) { + // sets a sleep time for pauses between two jobs + busyPause = milliseconds; + } + + public String getShortDescription() { + return this.shortDescr; + } + + public String getLongDescription() { + return this.longDescr; + } + + public long getIdleCycles() { + // returns the total number of cycles of job execution with idle-result + return this.idleCycles; + } + + public long getBusyCycles() { + // returns the total number of cycles of job execution with busy-result + return this.busyCycles; + } + + public long getBlockTime() { + // returns the total time that this thread has been blocked so far + return this.blockPause; + } + + public long getSleepTime() { + // returns the total time that this thread has slept so far + return this.idletime; + } + + public long getExecTime() { + // returns the total time that this thread has worked so far + return this.busytime; + } + + public void setLog(serverLog log) { + // defines a log where process states can be written to + this.log = log; + } + + public void terminate(boolean waitFor) { + // after calling this method, the thread shall terminate + this.running = false; + // wait for termination + if (waitFor) while (this.isAlive()) + try {this.sleep(100);} catch (InterruptedException e) {break;} + // If we reach this point, the process is closed + } + + private void logError(String text) { + if (log == null) + serverLog.logError("THREAD-CONTROL", text); + else + log.logError(text); + } + private void logSystem(String text) { + if (log == null) + serverLog.logSystem("THREAD-CONTROL", text); + else + log.logSystem(text); + } + + public void jobExceptionHandler(Exception e) { + // default handler for job exceptions. shall be overridden for own handler + logError("thread '" + this.getName() + "': " + e.toString()); + e.printStackTrace(); + } + + public void run() { + if (startup > 0) { + // do a startup-delay + logSystem("thread '" + this.getName() + "' deployed, delaying start-up."); + ratz(startup); + if (!(running)) return; + } + this.open(); + if (log != null) { + if (startup > 0) + logSystem("thread '" + this.getName() + "' delayed, " + ((this.busyPause < 0) ? "starting now job." : "starting now loop.")); + else + logSystem("thread '" + this.getName() + "' deployed, " + ((this.busyPause < 0) ? "starting job." : "starting loop.")); + } + int outerloop; + long innerpause; + long timestamp; + boolean isBusy; + while (running) { + try { + // do job + timestamp = System.currentTimeMillis(); + isBusy = this.job(); + busytime += System.currentTimeMillis() - timestamp; + // interrupt loop if this is supposed to be a one-time job + if ((this.idlePause < 0) || (this.busyPause < 0)) break; // for one-time jobs + // process scheduled pause + timestamp = System.currentTimeMillis(); + ratz((isBusy) ? this.busyPause : this.idlePause); + idletime += System.currentTimeMillis() - timestamp; + if (isBusy) busyCycles++; else idleCycles++; + } catch (Exception e) { + // handle exceptions: thread must not die on any unexpected exceptions + // if the exception is too bad it should call terminate() + this.jobExceptionHandler(e); + busyCycles++; + } + } + this.close(); + logSystem("thread '" + this.getName() + "' terminated."); + } + + private void ratz(long millis) { + int loop = 1; + while (millis > 1000) { + loop = loop * 2; + millis = millis / 2; + } + while ((loop-- > 0) && (running)) { + try {this.sleep(millis);} catch (InterruptedException e) {} + } + } + + public void open() {} // dummy definition; should be overriden + public void close() {} // dummy definition; should be overriden +} \ No newline at end of file diff --git a/source/de/anomic/server/serverByteBuffer.java b/source/de/anomic/server/serverByteBuffer.java new file mode 100644 index 000000000..047026ff2 --- /dev/null +++ b/source/de/anomic/server/serverByteBuffer.java @@ -0,0 +1,277 @@ +// serverByteBuffer.java +// --------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 11.03.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.server; + +import java.io.*; +import java.util.*; + +public class serverByteBuffer { + + public static final byte singlequote = (byte) 39; + public static final byte doublequote = (byte) 34; + public static final byte equal = (byte) '='; + + private byte[] buffer; + private int offset; + private int length; + + public serverByteBuffer() { + buffer = new byte[80]; + length = 0; + offset = 0; + } + + public serverByteBuffer(byte[] bb) { + buffer = bb; + length = bb.length; + offset = 0; + } + + public serverByteBuffer(byte[] bb, int of, int le) { + if (of * 2 > bb.length) { + buffer = new byte[le]; + System.arraycopy(bb, of, buffer, 0, le); + length = le; + offset = 0; + } else { + buffer = bb; + length = le; + offset = of; + } + } + + public serverByteBuffer(serverByteBuffer bb) { + buffer = bb.buffer; + length = bb.length; + offset = bb.offset; + } + + public serverByteBuffer(File f) throws IOException { + // initially fill the byte buffer with the content of a file + if (f.length() > (long) Integer.MAX_VALUE) throw new IOException("file is too large for buffering"); + + length = (int) f.length(); + buffer = new byte[length]; + offset = 0; + + try { + FileInputStream fis = new FileInputStream(f); + byte buf[] = new byte[512]; + int p = 0; + int l; + while ((l = fis.read(buf)) > 0) { + System.arraycopy(buf, 0, buffer, p, l); + p += l; + } + fis.close(); + } catch (FileNotFoundException e) { + throw new IOException("File not found: " + f.toString() + "; " + e.getMessage()); + } + } + + public int length() { + return length; + } + + private void grow() { + byte[] tmp = new byte[buffer.length * 2 + 1]; + System.arraycopy(buffer, offset, tmp, 0, length); + buffer = tmp; + tmp = null; + offset = 0; + } + + public serverByteBuffer append(byte b) { + if (offset + length + 1 > buffer.length) grow(); + buffer[offset + length++] = b; + return this; + } + + public serverByteBuffer append(byte[] bb) { + return append(bb, 0, bb.length); + } + + public serverByteBuffer append(byte[] bb, int of, int le) { + while (offset + length + le > buffer.length) grow(); + System.arraycopy(bb, of, buffer, offset + length, le); + length += le; + return this; + } + + public serverByteBuffer append(String s) { + return append(s.getBytes()); + } + + public serverByteBuffer append(serverByteBuffer bb) { + return append(bb.buffer, bb.offset, bb.length); + } + + public serverByteBuffer append(Object o) { + if (o instanceof String) return append((String) o); + if (o instanceof byte[]) return append((byte[]) o); + return null; + } + + public byte byteAt(int pos) { + if (pos > length) return -1; + return buffer[offset + pos]; + } + + public int indexOf(byte b) { + return indexOf(b, 0); + } + + public int indexOf(byte b, int start) { + if (start >= length) return -1; + for (int i = start; i < length; i++) if (buffer[offset + i] == b) return i; + return -1; + } + + public int lastIndexOf(byte b) { + for (int i = length - 1; i >= 0; i--) if (buffer[offset + i] == b) return i; + return -1; + } + + public byte[] getBytes() { + return getBytes(0); + } + + public byte[] getBytes(int start) { + return getBytes(start, length); + } + + public byte[] getBytes(int start, int end) { + // start is inclusive, end is exclusive + if (end > length) throw new IndexOutOfBoundsException("getBytes: end > length"); + if (start > length) throw new IndexOutOfBoundsException("getBytes: start > length"); + byte[] tmp = new byte[end - start]; + System.arraycopy(buffer, offset + start, tmp, 0, end - start); + return tmp; + } + + private serverByteBuffer trim(int start) { + if (start > length) throw new IndexOutOfBoundsException("trim: start > length"); + offset = offset + start; + length = length - start; + return this; + } + + private serverByteBuffer trim(int start, int end) { + if (end > length) throw new IndexOutOfBoundsException("trim: end > length"); + trim(start); + length = end - start; + return this; + } + + public serverByteBuffer trim() { + int l = 0; while ((l < length) && (buffer[l] <= 32)) l++; + int r = length; while ((r > 0) && (buffer[r - 1] <= 32)) r--; + if ((l <= r) && (l < length)) return trim(l, r); + return this; + } + + public String toString() { + return new String(getBytes()); + } + + public Properties propParser() { + // extract a=b or a="b" - relations from the buffer + int pos = offset; + int start; + String key; + Properties p = new Properties(); + // eat up spaces at beginning + while ((pos < length) && (buffer[pos] <= 32)) pos++; + while (pos < length) { + // pos is at start of next key + start = pos; + while ((pos < length) && (buffer[pos] != equal)) pos++; + if (pos >= length) break; // this is the case if we found no equal + key = new String(buffer, start, pos - start).trim().toLowerCase(); + // we have a key + pos++; + // find start of value + while ((pos < length) && (buffer[pos] <= 32)) pos++; + // doublequotes are obligatory. However, we want to be fuzzy if they are ommittet + if (pos >= length) { + // error case: input ended too early + break; + } else if (buffer[pos] == doublequote) { + // search next doublequote + pos++; + start = pos; + while ((pos < length) && (buffer[pos] != doublequote)) pos++; + if (pos >= length) break; // this is the case if we found no parent doublequote + p.setProperty(key, new String(buffer, start, pos - start).trim()); + pos++; + } else if (buffer[pos] == singlequote) { + // search next singlequote + pos++; + start = pos; + while ((pos < length) && (buffer[pos] != singlequote)) pos++; + if (pos >= length) break; // this is the case if we found no parent singlequote + p.setProperty(key, new String(buffer, start, pos - start).trim()); + pos++; + } else { + // search next whitespace + start = pos; + while ((pos < length) && (buffer[pos] > 32)) pos++; + p.setProperty(key, new String(buffer, start, pos - start).trim()); + } + // pos should point now to a whitespace: eat up spaces + while ((pos < length) && (buffer[pos] <= 32)) pos++; + // go on with next loop + } + return p; + } + + public static boolean equals(byte[] buffer, byte[] pattern) { + return equals(buffer, 0, pattern); + } + + public static boolean equals(byte[] buffer, int offset, byte[] pattern) { + // compares two byte arrays: true, if pattern appears completely at offset position + if (buffer.length < offset + pattern.length) return false; + for (int i = 0; i < pattern.length; i++) if (buffer[offset + i] != pattern[i]) return false; + return true; + } + +} \ No newline at end of file diff --git a/source/de/anomic/server/serverClassLoader.java b/source/de/anomic/server/serverClassLoader.java new file mode 100644 index 000000000..c1de26f13 --- /dev/null +++ b/source/de/anomic/server/serverClassLoader.java @@ -0,0 +1,120 @@ +// serverClassLoader.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 11.07.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.server; + +import java.io.*; +import java.util.*; +import java.lang.reflect.*; + +public class serverClassLoader extends ClassLoader { + + Hashtable classes; + + public serverClassLoader() { + super(ClassLoader.getSystemClassLoader()); + classes = new Hashtable(); + } + + public serverClassLoader(ClassLoader parent) { + super(parent); + classes = new Hashtable(); + } + + public Package[] packages() { + return super.getPackages(); + } + + public Class loadClass(File classfile) throws ClassNotFoundException { + // we consider that the classkey can either be only the name of a class, or a partial or + // complete path to a class file + + // normalize classkey: strip off '.class' + //if (classkey.endsWith(".class")) classkey = classkey.substring(0, classkey.length() - 6); + + // try to find the class in the hashtable + Class c = (Class) classes.get(classfile); + if (c != null) return c; + + // consider classkey as a file and extract the file name + //File classfile = new File(classkey); + // this file cannot exist for real, since we stripped off the .class + // we constructed the classfile for the only purpose to strip off the name: + + // get the class name out of the classfile + String classname = classfile.getName(); + int p = classname.indexOf("."); + classname = classname.substring(0, p); + + // now that we have the name, we can create the real class file + //classfile = new File(classkey + ".class"); + + // first try: take the class out of the cache, denoted by the classname + try { + c = findLoadedClass(classname); + if (c == null) { + // second try: ask the system + c = findSystemClass(classname); + } + if (c == null) { + // third try + throw new ClassNotFoundException("internal trigger"); + } + } catch (ClassNotFoundException e) { + //System.out.println("INTERNAL ERROR1 in cachedClassLoader: " + e.getMessage()); + // third try: load the file from the file system + byte[] b; + try { + b = serverFileUtils.read(classfile); + // now make a class out of the stream + // System.out.println("loading class " + classname + " from file " + classfile.toString()); + c = this.defineClass(classname, b, 0, b.length); + resolveClass(c); + classes.put(classfile, c); + } catch (IOException ee) { + //System.out.println("INTERNAL ERROR2 in cachedClassLoader: " + ee.getMessage()); + throw new ClassNotFoundException(classfile.toString()); + } + } + return c; + } + +} \ No newline at end of file diff --git a/source/de/anomic/server/serverCodings.java b/source/de/anomic/server/serverCodings.java new file mode 100644 index 000000000..4bd3e49b4 --- /dev/null +++ b/source/de/anomic/server/serverCodings.java @@ -0,0 +1,281 @@ +// serverCodings.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 29.04.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.server; + +import java.io.*; +import java.security.*; + + +public class serverCodings { + + // this provides encoding and decoding of long cardinals into a 6-bit - based number format + // expressed by a string. This is probably the most compact form to encode numbers as strings. + // the resulting string is filename-friendly, it contains no special character that is not + // suitable for file names. + + public static serverCodings standardCoder = new serverCodings(true); + public static serverCodings enhancedCoder = new serverCodings(false); + + boolean rfc1113compliant; + public char[] alpha; + public byte[] ahpla; + + public serverCodings(boolean rfc1113compliant) { + // if we choose not to be rfc1113compliant, + // then we get shorter base64 results which are also filename-compatible + this.rfc1113compliant = rfc1113compliant; + alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); + if (!(rfc1113compliant)) { + alpha[62] = '-'; + alpha[63] = '_'; + } + ahpla = new byte[256]; + for (int i = 0; i < 256; i++) ahpla[i] = -1; + for (int i = 0; i < alpha.length; i++) ahpla[alpha[i]] = (byte) i; + } + + public String encodeBase64Long(long c, int length) { + if (length < 0) length = 0; + String s = ""; + if (c == 0) s = alpha[0] + s; + else while (c > 0) { + s = alpha[(byte) (c & 0x3F)] + s; + c >>= 6; + } + if ((length != 0) && (s.length() > length)) + throw new RuntimeException("encodeBase64 result '" + s + "' exceeds demanded length of " + length + " digits"); + if (length == 0) length = 1; // rare exception for the case that c == 0 + while (s.length() < length) s = alpha[0] + s; + return s; + } + + public long decodeBase64Long(String s) { + while (s.endsWith("=")) s = s.substring(0, s.length() - 1); + long c = 0; + for (int i = 0; i < s.length(); i++) { + c <<= 6; + c += ahpla[s.charAt(i)]; + } + return c; + } + + public static long maxBase64(int len) { + // computes the maximum number that can be coded with a base64-encoded String of base len + long c = 0; + for (int i = 0; i < len; i++) { + c <<= 6; + c += 63; + } + return c; + } + + public String encodeBase64String(String in) { + return encodeBase64(in.getBytes()); + } + + // we will use this encoding to encode strings with 2^8 values to b64-Strings + // we will do that by grouping each three input bytes to four output bytes. + public String encodeBase64(byte[] in) { + StringBuffer out = new StringBuffer(in.length / 3 * 4 + 3); + int pos = 0; + long l; + while (in.length - pos >= 3) { + l = ((((0XffL & (long) in[pos]) << 8) + (0XffL & (long) in[pos + 1])) << 8) + (0XffL & (long) in[pos + 2]); + pos += 3; + out = out.append(encodeBase64Long(l, 4)); + } + // now there may be remaining bytes + if (in.length % 3 != 0) + out = out.append( + (in.length % 3 == 2) ? + encodeBase64Long((((0XffL & (long) in[pos]) << 8) + (0XffL & (long) in[pos + 1])) << 8, 4).substring(0,3) : + encodeBase64Long((((0XffL & (long) in[pos])) << 8) << 8, 4).substring(0, 2)); + if (rfc1113compliant) while (out.length() % 4 > 0) out.append("="); + // return result + return out.toString(); + } + + public String decodeBase64String(String in) { + try { + return new String(decodeBase64(in), "ISO-8859-1"); + } catch (java.io.UnsupportedEncodingException e) { + System.out.println("internal error in base64: " + e.getMessage()); + return null; + } + } + + public byte[] decodeBase64(String in) { + try { + int posIn = 0; + int posOut = 0; + if (rfc1113compliant) while (in.charAt(in.length() - 1) == '=') in = in.substring(0, in.length() - 1); + byte[] out = new byte[in.length() / 4 * 3 + (((in.length() % 4) == 0) ? 0 : in.length() % 4 - 1)]; + long l; + char c1, c2, c3; + while (posIn + 3 < in.length()) { + l = decodeBase64Long(in.substring(posIn, posIn + 4)); + out[posOut+2] = (byte) (l % 256); l = l / 256; + out[posOut+1] = (byte) (l % 256); l = l / 256; + out[posOut ] = (byte) (l % 256); l = l / 256; + posIn += 4; + posOut += 3; + } + if (posIn < in.length()) { + if (in.length() - posIn == 3) { + l = decodeBase64Long(in.substring(posIn) + "A"); + l = l / 256; + out[posOut+1] = (byte) (l % 256); l = l / 256; + out[posOut ] = (byte) (l % 256); l = l / 256; + } else { + l = decodeBase64Long(in.substring(posIn) + "AA"); + l = l / 256 / 256; + out[posOut ] = (byte) (l % 256); l = l / 256; + } + } + return out; + } catch (ArrayIndexOutOfBoundsException e) { + // maybe the input was not base64 + return null; + } + } + + public static String encodeHex(long in, int length) { + String s = Long.toHexString(in); + while (s.length() < length) s = "0" + s; + return s; + } + + public static String encodeHex(byte[] in) { + if (in == null) return ""; + String result = ""; + for (int i = 0; i < in.length; i++) + result = result + (((0Xff & (int) in[i]) < 16) ? "0" : "") + Integer.toHexString(0Xff & (int) in[i]); + return result; + } + + public static byte[] decodeHex(String hex) { + byte[] result = new byte[hex.length() / 2]; + for (int i = 0; i < result.length; i++) { + result[i] = (byte) (16 * Integer.parseInt(hex.charAt(i * 2) + "", 16) + Integer.parseInt(hex.charAt(i * 2 + 1) + "", 16)); + } + return result; + } + + public static String encodeMD5B64(String key, boolean enhanced) { + if (enhanced) + return enhancedCoder.encodeBase64(encodeMD5Raw(key)); + else + return standardCoder.encodeBase64(encodeMD5Raw(key)); + } + + public static String encodeMD5B64(File file, boolean enhanced) { + if (enhanced) + return enhancedCoder.encodeBase64(encodeMD5Raw(file)); + else + return standardCoder.encodeBase64(encodeMD5Raw(file)); + } + + public static String encodeMD5Hex(String key) { + // generate a hex representation from the md5 of a string + return encodeHex(encodeMD5Raw(key)); + } + + public static String encodeMD5Hex(File file) { + // generate a hex representation from the md5 of a file + return encodeHex(encodeMD5Raw(file)); + } + + + private static byte[] encodeMD5Raw(String key) { + try { + MessageDigest digest = MessageDigest.getInstance("MD5"); + digest.reset(); + digest.update(key.getBytes()); + return digest.digest(); + } catch (java.security.NoSuchAlgorithmException e) { + System.out.println("Internal Error at md5:" + e.getMessage()); + } + return null; + } + + private static byte[] encodeMD5Raw(File file) { + try { + MessageDigest digest = MessageDigest.getInstance("MD5"); + digest.reset(); + InputStream in = new BufferedInputStream(new FileInputStream(file), 2048); + byte buf [] = new byte[2048]; + int n; + while ((n = in.read(buf)) > 0) digest.update(buf, 0, n); + in.close(); + // now compute the hex-representation of the md5 digest + return digest.digest(); + } catch (java.security.NoSuchAlgorithmException e) { + System.out.println("Internal Error at md5:" + e.getMessage()); + } catch (java.io.FileNotFoundException e) { + System.out.println("file not found:" + file.toString()); + } catch (java.io.IOException e) { + System.out.println("file error with " + file.toString() + ": " + e.getMessage()); + } + return null; + } + + public static void main(String[] s) { + serverCodings b64 = new serverCodings(true); + if (s.length == 0) {System.out.println("usage: -[ec|dc|es|ds] "); System.exit(0);} + if (s[0].equals("-ec")) { + // generate a b64 encoding from a given cardinal + System.out.println(b64.encodeBase64Long(Long.parseLong(s[1]), 4)); + } + if (s[0].equals("-dc")) { + // generate a b64 decoding from a given cardinal + System.out.println(b64.decodeBase64Long(s[1])); + } + if (s[0].equals("-es")) { + // generate a b64 encoding from a given string + System.out.println(b64.encodeBase64String(s[1])); + } + if (s[0].equals("-ds")) { + // generate a b64 decoding from a given string + System.out.println(b64.decodeBase64String(s[1])); + } + } + +} diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java new file mode 100644 index 000000000..769d79a10 --- /dev/null +++ b/source/de/anomic/server/serverCore.java @@ -0,0 +1,654 @@ +// serverCore.java +// ------------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2002-2004 +// last major change: 09.03.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.server; + +// standard server +import java.io.*; +import java.net.*; +import java.lang.*; +import java.util.*; +import java.lang.reflect.*; + +// needed for ssl +import javax.net.*; +import javax.net.ssl.*; +import java.security.KeyStore; +import javax.security.cert.X509Certificate; + +public class serverCore extends serverAbstractThread implements serverThread { + + // generic input/output static methods + public static final byte cr = 13; + public static final byte lf = 10; + public static final byte[] crlf = {cr, lf}; + public static final String crlfString = new String(crlf); + + // static variables + public static final Boolean TERMINATE_CONNECTION = Boolean.FALSE; + public static final Boolean RESUME_CONNECTION = Boolean.TRUE; + + // class variables + private int port; // the listening port + private ServerSocket socket; // listener + private int maxSessions = 0; // max. number of sessions; 0=unlimited + private serverLog log; // log object + //private serverSwitch switchboard; // external values + private int timeout; // connection time-out of the socket + private Hashtable activeThreads; // contains the active threads + private Hashtable sleepingThreads; // contains the threads that are alive since the sleepthreashold + private boolean termSleepingThreads; // if true then threads over sleepthreashold are killed + private int thresholdActive = 5000; // after that time a thread should have got a command line + private int thresholdSleep = 30000; // after that time a thread is considered as beeing sleeping (30 seconds) + private int thresholdDead = 3600000; // after that time a thread is considered as beeing dead-locked (1 hour) + private serverHandler handlerPrototype;// the command class (a serverHandler) + private Class[] initHandlerClasses; // the init's methods arguments + private Class[] initSessionClasses; // the init's methods arguments + private serverSwitch switchboard; // the command class switchboard + private Hashtable denyHost; + private int commandMaxLength; + + private static ServerSocketFactory getServerSocketFactory(boolean dflt, File keyfile, String passphrase) { + // see doc's at + // http://java.sun.com/developer/technicalArticles/Security/secureinternet/ + if (dflt) { + return ServerSocketFactory.getDefault(); + } else { + SSLServerSocketFactory ssf = null; + try { + // set up key manager to do server authentication + SSLContext ctx; + KeyManagerFactory kmf; + KeyStore ks; + char[] pp = passphrase.toCharArray(); + + // open keystore + ks = KeyStore.getInstance("JKS"); + ks.load(new FileInputStream(keyfile), pp); + + // get a KeyManager Factory + String algorithm = KeyManagerFactory.getDefaultAlgorithm(); // should be "SunX509" + kmf = KeyManagerFactory.getInstance(algorithm); + kmf.init(ks, pp); + + // create a ssl context with the keyManager Factory + //ctx = SSLContext.getInstance("TLS"); + ctx = SSLContext.getInstance("SSLv3"); + + ctx.init(kmf.getKeyManagers(), null, null); + + ssf = ctx.getServerSocketFactory(); + return ssf; + } catch (Exception e) { + e.printStackTrace(); + return null; + } + } + } + + // class initializer + public serverCore(int port, int maxSessions, int timeout, + boolean termSleepingThreads, boolean blockAttack, + serverHandler handlerPrototype, serverSwitch switchboard, + int commandMaxLength, int logl) throws IOException { + this.port = port; + this.commandMaxLength = commandMaxLength; + this.denyHost = (blockAttack) ? new Hashtable() : null; + + /* + try { + ServerSocketFactory ssf = getServerSocketFactory(false, new File("D:\\dev\\proxy\\addon\\testkeys"), "passphrase"); + this.socket = ssf.createServerSocket(port); + //((SSLServerSocket) this.socket ).setNeedClientAuth(true); + } catch (java.net.BindException e) { + System.out.println("FATAL ERROR: " + e.getMessage() + " - probably root access rights needed. check port number"); System.exit(0); + } + */ + + try { + this.socket = new ServerSocket(port); + } catch (java.net.BindException e) { + System.out.println("FATAL ERROR: " + e.getMessage() + " - probably root access rights needed. check port number"); System.exit(0); + } + + try { + this.handlerPrototype = handlerPrototype; + this.switchboard = switchboard; + this.initHandlerClasses = new Class[] {Class.forName("de.anomic.server.serverSwitch")}; + this.initSessionClasses = new Class[] {Class.forName("de.anomic.server.serverCore$Session")}; + this.maxSessions = maxSessions; + this.socket.setSoTimeout(0); // unlimited + this.timeout = timeout; + this.termSleepingThreads = termSleepingThreads; + this.log = new serverLog("SERVER", logl); + activeThreads = new Hashtable(); + sleepingThreads = new Hashtable(); + } catch (java.lang.ClassNotFoundException e) { + System.out.println("FATAL ERROR: " + e.getMessage() + " - Class Not Found"); System.exit(0); + } + } + + public static boolean isNotLocal(URL url) { + return isNotLocal(url.getHost()); + } + + private static boolean isNotLocal(String ip) { + if ((ip.equals("localhost")) || + (ip.startsWith("127")) || + (ip.startsWith("192.168")) || + (ip.startsWith("10.")) + ) return false; + return true; + } + + public static InetAddress publicIP() { + try { + // list all addresses + //InetAddress[] ia = InetAddress.getAllByName("localhost"); + InetAddress[] ia = InetAddress.getAllByName(InetAddress.getLocalHost().getHostName()); + //for (int i = 0; i < ia.length; i++) System.out.println("IP: " + ia[i].getHostAddress()); // DEBUG + if (ia.length == 0) { + try { + return InetAddress.getLocalHost(); + } catch (UnknownHostException e) { + try { + return InetAddress.getByName("127.0.0.1"); + } catch (UnknownHostException ee) { + return null; + } + } + } + if (ia.length == 1) { + // only one network connection available + return ia[0]; + } + // we have more addresses, find an address that is not local + int b0, b1; + for (int i = 0; i < ia.length; i++) { + b0 = 0Xff & ia[i].getAddress()[0]; + b1 = 0Xff & ia[i].getAddress()[1]; + if ((b0 != 10) && // class A reserved + (b0 != 127) && // loopback + ((b0 != 172) || (b1 < 16) || (b1 > 31)) && // class B reserved + ((b0 != 192) || (b0 != 168)) && // class C reserved + (ia[i].getHostAddress().indexOf(":") < 0) + ) return ia[i]; + } + // there is only a local address, we filter out the possibly returned loopback address 127.0.0.1 + for (int i = 0; i < ia.length; i++) { + if (((0Xff & ia[i].getAddress()[0]) != 127) && + (ia[i].getHostAddress().indexOf(":") < 0)) return ia[i]; + } + // if all fails, give back whatever we have + for (int i = 0; i < ia.length; i++) { + if (ia[i].getHostAddress().indexOf(":") < 0) return ia[i]; + } + return ia[0]; + } catch (java.net.UnknownHostException e) { + System.err.println("ERROR: (internal) " + e.getMessage()); + return null; + } + } + + public void open() { + log.logSystem("* server started on " + publicIP() + ":" + port); + } + + // class body + public boolean job() throws Exception { + // prepare for new connection + idleThreadCheck(); + switchboard.handleBusyState(activeThreads.size()); + + log.logDebug( + "* waiting for connections, " + activeThreads.size() + " sessions running, " + + sleepingThreads.size() + " sleeping"); + + // list all connection (debug) + /* + if (activeThreads.size() > 0) { + Enumeration threadEnum = activeThreads.keys(); + Session se; + long time; + while (threadEnum.hasMoreElements()) { + se = (Session) threadEnum.nextElement(); + time = System.currentTimeMillis() - ((Long) activeThreads.get(se)).longValue(); + log.logDebug("* ACTIVE SESSION (" + ((se.isAlive()) ? "alive" : "dead") + ", " + time + "): " + se.request); + } + } + */ + + // wait for new connection + announceThreadBlockApply(); + Socket controlSocket = socket.accept(); + announceThreadBlockRelease(); + if ((denyHost == null) || (denyHost.get((""+controlSocket.getInetAddress().getHostAddress())) == null)) { + //log.logDebug("* catched request from " + controlSocket.getInetAddress().getHostAddress()); + controlSocket.setSoTimeout(timeout); + + Session connection = new Session(controlSocket); + // start the thread + connection.start(); + //try {Thread.currentThread().sleep(1000);} catch (InterruptedException e) {} // wait for debug + activeThreads.put(connection, new Long(System.currentTimeMillis())); + //log.logDebug("* NEW SESSION: " + connection.request); + + } else { + System.out.println("ACCESS FROM " + controlSocket.getInetAddress().getHostAddress() + " DENIED"); + } + // idle until number of maximal threads is (again) reached + //synchronized(this) { + while ((maxSessions > 0) && (activeThreads.size() >= maxSessions)) try { + log.logDebug("* Waiting for activeThreads=" + activeThreads.size() + " < maxSessions=" + maxSessions); + Thread.currentThread().sleep(2000); + idleThreadCheck(); + } catch (InterruptedException e) {} + return true; + } + + public void close() { + log.logSystem("* terminated"); + } + + public int getJobCount() { + return activeThreads.size(); + } + + // idle sensor: the thread is idle if there are no sessions running + public boolean idle() { + idleThreadCheck(); + return (activeThreads.size() == 0); + } + + public void idleThreadCheck() { + // a 'garbage collector' for session threads + Enumeration threadEnum; + Session session; + + // look for sleeping threads + threadEnum = activeThreads.keys(); + long time; + while (threadEnum.hasMoreElements()) { + session = (Session) (threadEnum.nextElement()); + //if (session.request == null) session.interrupt(); + if (session.isAlive()) { + // check if socket still exists + time = System.currentTimeMillis() - ((Long) activeThreads.get(session)).longValue(); + if (/*(session.controlSocket.isClosed()) || */ + (!(session.controlSocket.isBound())) || + (!(session.controlSocket.isConnected())) || + ((session.request == null) && (time > 1000))) { + // kick it + try { + session.out.close(); + session.in.close(); + session.controlSocket.close(); + } catch (IOException e) {} + session.interrupt(); // hopefully this wakes him up. + activeThreads.remove(session); + String reason = ""; + if (session.controlSocket.isClosed()) reason = "control socked closed"; + if (!(session.controlSocket.isBound())) reason = "control socked unbound"; + if (!(session.controlSocket.isConnected())) reason = "control socked not connected"; + if (session.request == null) reason = "no request placed"; + log.logDebug("* canceled disconnected connection (" + reason + ") '" + session.request + "'"); + } else if (time > thresholdSleep) { + // move thread from the active threads to the sleeping + sleepingThreads.put(session, activeThreads.remove(session)); + log.logDebug("* sleeping connection '" + session.request + "'"); + } else if ((time > thresholdActive) && (session.request == null)) { + // thread is not in use (or too late). kickk it. + try { + session.out.close(); + session.in.close(); + session.controlSocket.close(); + } catch (IOException e) {} + session.interrupt(); // hopefully this wakes him up. + activeThreads.remove(session); + log.logDebug("* canceled inactivated connection"); + } + } else { + // the thread is dead, remove it + log.logDebug("* normal close of connection to '" + session.request + "', time=" + session.getTime()); + activeThreads.remove(session); + } + } + + // look for dead threads + threadEnum = sleepingThreads.keys(); + while (threadEnum.hasMoreElements()) { + session = (Session) (threadEnum.nextElement()); + if (session.isAlive()) { + // check the age of the thread + if (System.currentTimeMillis() - ((Long) sleepingThreads.get(session)).longValue() > thresholdDead) { + // kill the thread + if (termSleepingThreads) { + try { + session.out.close(); + session.in.close(); + session.controlSocket.close(); + } catch (IOException e) {} + session.interrupt(); // hopefully this wakes him up. + } + sleepingThreads.remove(session); + log.logDebug("* out-timed connection '" + session.request + "'"); + } + } else { + // the thread is dead, remove it + sleepingThreads.remove(session); + log.logDebug("* dead connection '" + session.request + "'"); + } + } + + } + + public class Session extends Thread { + + private long start; // startup time + private serverHandler commandObj; + private String request; // current command line + private int commandCounter; // for logging: number of commands in this session + private String identity; // a string that identifies the client (i.e. ftp: account name) + //private boolean promiscuous; // if true, no lines are read and streams are only passed + public Socket controlSocket; // dialog socket + public InetAddress userAddress; // the address of the client + public PushbackInputStream in; // on control input stream + public OutputStream out; // on control output stream, autoflush + + public Session(Socket controlSocket) throws IOException { + //this.promiscuous = false; + this.start = System.currentTimeMillis(); + //log.logDebug("* session " + handle + " allocated"); + this.identity = "-"; + this.userAddress = controlSocket.getInetAddress(); + String ipname = userAddress.getHostAddress(); + // check if we want to allow this socket to connect us + this.controlSocket = controlSocket; + this.in = new PushbackInputStream(controlSocket.getInputStream()); + this.out = controlSocket.getOutputStream(); + commandCounter = 0; + // initiate the command class + // we pass the input and output stream to the commands, + // so that they can take over communication, if needed + try { + // use the handler prototype to create a new command object class + commandObj = (serverHandler) handlerPrototype.clone(); + commandObj.initSession(this); + } catch (Exception e) { + e.printStackTrace(); + } + //log.logDebug("* session " + handle + " initialized. time = " + (System.currentTimeMillis() - handle)); + } + + public long getTime() { + return System.currentTimeMillis() - start; + } + + public void setIdentity(String id) { + this.identity = id; + } + + /* + public void setPromiscuous() { + this.promiscuous = true; + } + */ + + public void log(boolean outgoing, String request) { + log.logInfo(userAddress.getHostAddress() + "/" + this.identity + " " + + "[" + activeThreads.size() + ", " + commandCounter + + ((outgoing) ? "] > " : "] < ") + + request); + } + + public void writeLine(String messg) throws IOException { + send(out, messg); + log(true, messg); + } + + public byte[] readLine() { + return receive(in, timeout, commandMaxLength, false); + } + + public final void run() { + //log.logDebug("* session " + handle + " started. time = " + (System.currentTimeMillis() - handle)); + try { + listen(); + } finally { + try { + out.flush(); + // close everything + out.close(); + in.close(); + controlSocket.close(); + } catch (IOException e) { + System.err.println("ERROR: (internal) " + e); + } + synchronized (this) {this.notify();} + } + //log.logDebug("* session " + handle + " completed. time = " + (System.currentTimeMillis() - handle)); + announceMoreExecTime(System.currentTimeMillis() - start); + } + + private void listen() { + try { + // set up some reflection + Class[] stringType = {"".getClass()}; + Class[] exceptionType = {Class.forName("java.lang.Throwable")}; + + // send greeting + Object result = commandObj.greeting(); + if (result != null) { + if ((result instanceof String) && (((String) result).length() > 0)) writeLine((String) result); + } + + // start dialog + byte[] requestBytes = null; + boolean terminate = false; + int pos; + String cmd; + String tmp; + Object[] stringParameter = new String[1]; + while ((in != null) && ((requestBytes = readLine()) != null)) { + commandCounter++; + request = new String(requestBytes); + //log.logDebug("* session " + handle + " received command '" + request + "'. time = " + (System.currentTimeMillis() - handle)); + log(false, request); + try { + pos = request.indexOf(' '); + if (pos < 0) { + cmd = request.trim().toUpperCase(); + stringParameter[0] = ""; + } else { + cmd = request.substring(0, pos).trim().toUpperCase(); + stringParameter[0] = request.substring(pos).trim(); + } + + // exec command and return value + result = commandObj.getClass().getMethod(cmd, stringType).invoke(commandObj, stringParameter); + //log.logDebug("* session " + handle + " completed command '" + request + "'. time = " + (System.currentTimeMillis() - handle)); + this.out.flush(); + if (result == null) { + /* + log(2, true, "(NULL RETURNED/STREAM PASSED)"); + */ + } else if (result instanceof Boolean) { + if (((Boolean) result) == TERMINATE_CONNECTION) break; + } else if (result instanceof String) { + if (((String) result).startsWith("!")) { + result = ((String) result).substring(1); + terminate = true; + } + writeLine((String) result); + } else if (result instanceof InputStream) { + tmp = send(out, (InputStream) result); + if ((tmp.length() > 4) && (tmp.toUpperCase().startsWith("PASS"))) { + log(true, "PASS ********"); + } else { + log(true, tmp); + } + tmp = null; + } + if (terminate) break; + + } catch (InvocationTargetException ite) { + System.out.println("ERROR A " + userAddress.getHostAddress()); + // we extract a target exception and let the thread survive + writeLine((String) commandObj.error(ite.getTargetException())); + } catch (NoSuchMethodException nsme) { + System.out.println("ERROR B " + userAddress.getHostAddress()); + if (isNotLocal(userAddress.getHostAddress().toString())) { + if (denyHost != null) + denyHost.put((""+userAddress.getHostAddress()), "deny"); // block client: hacker attempt + } + break; + // the client requested a command that does not exist + //Object[] errorParameter = { nsme }; + //writeLine((String) error.invoke(this.cmdObject, errorParameter)); + } catch (IllegalAccessException iae) { + System.out.println("ERROR C " + userAddress.getHostAddress()); + // wrong parameters: this an only be an internal problem + writeLine((String) commandObj.error(iae)); + } catch (java.lang.ClassCastException e) { + System.out.println("ERROR D " + userAddress.getHostAddress()); + // ?? + writeLine((String) commandObj.error(e)); + } catch (Exception e) { + System.out.println("ERROR E " + userAddress.getHostAddress()); + // whatever happens: the thread has to survive! + writeLine("UNKNOWN REASON:" + (String) commandObj.error(e)); + } + } + } catch (java.lang.ClassNotFoundException e) { + System.out.println("Internal Error: wrapper class not found: " + e.getMessage()); + System.exit(0); + } catch (java.io.IOException e) { + // connection interruption: more or less normal + } + } + + } + + public static byte[] receive(PushbackInputStream pbis, long timeout, int maxSize, boolean logerr) { + // this is essentially a readln on a PushbackInputStream + int bufferSize = 0; + bufferSize = 10; + + try { + long t = timeout; + while (((bufferSize = pbis.available()) == 0) && (t > 0)) try { + Thread.currentThread().sleep(100); + t -= 100; + } catch (InterruptedException e) {} + if (t <= 0) { + if (logerr) serverLog.logError("SERVER", "receive interrupted - timeout"); + return null; + } + if (bufferSize == 0) { + if (logerr) serverLog.logError("SERVER", "receive interrupted - buffer empty"); + return null; + } + } catch (IOException e) { + if (logerr) serverLog.logError("SERVER", "receive interrupted - exception 1 = " + e.getMessage()); + return null; + } + + byte[] buffer = new byte[bufferSize]; + byte[] bufferBkp; + bufferSize = 0; + int b = 0; + + try { + while ((b = pbis.read()) > 31) { + // we have a valid byte in b, add it to the buffer + if (buffer.length == bufferSize) { + // the buffer is full, double its size + bufferBkp = buffer; + buffer = new byte[bufferSize * 2]; + java.lang.System.arraycopy(bufferBkp, 0, buffer, 0, bufferSize); + bufferBkp = null; + } + //if (bufferSize > 10000) {System.out.println("***ERRORDEBUG***:" + new String(buffer));} // debug + buffer[bufferSize++] = (byte) b; // error hier: ArrayIndexOutOfBoundsException: -2007395416 oder 0 + if (bufferSize > maxSize) break; + } + // we have catched a possible line end + if (b == cr) { + // maybe a lf follows, read it: + if ((b = pbis.read()) != lf) if (b >= 0) pbis.unread(b); // we push back the byte + } + + // finally shrink buffer + bufferBkp = buffer; + buffer = new byte[bufferSize]; + java.lang.System.arraycopy(bufferBkp, 0, buffer, 0, bufferSize); + bufferBkp = null; + + // return only the byte[] + return buffer; + } catch (IOException e) { + if (logerr) serverLog.logError("SERVER", "receive interrupted - exception 2 = " + e.getMessage()); + return null; + } + } + + public static void send(OutputStream os, String buf) throws IOException { + os.write(buf.getBytes()); + os.write(crlf); + os.flush(); + } + + public static void send(OutputStream os, byte[] buf) throws IOException { + os.write(buf); + os.write(crlf); + os.flush(); + } + + public static String send(OutputStream os, InputStream is) throws IOException { + int bufferSize = is.available(); + byte[] buffer = new byte[((bufferSize < 1) || (bufferSize > 4096)) ? 4096 : bufferSize]; + int l; + while ((l = is.read(buffer)) > 0) {os.write(buffer, 0, l);} + os.write(crlf); + os.flush(); + if (bufferSize > 80) return ""; else return new String(buffer); + } + +} diff --git a/source/de/anomic/server/serverDate.java b/source/de/anomic/server/serverDate.java new file mode 100644 index 000000000..85de40f9e --- /dev/null +++ b/source/de/anomic/server/serverDate.java @@ -0,0 +1,256 @@ +// serverDate.java +// ------------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// last major change: 14.03.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// this class is needed to replace the slow java built-in date method by a faster version + +package de.anomic.server; + +import java.lang.*; +import java.util.*; +import java.text.*; + +public class serverDate { + + + // statics + private final static long secondMillis = 1000; + private final static long minuteMillis = 60 * secondMillis; + private final static long hourMillis = 60 * minuteMillis; + private final static long dayMillis = 24 * hourMillis; + private final static long normalyearMillis = 365 * dayMillis; + private final static long leapyearMillis = 366 * dayMillis; + private final static int january = 31, normalfebruary = 28, leapfebruary = 29, march = 31, + april = 30, may = 31, june = 30, july = 31, august = 31, + september = 30, october = 31, november = 30, december = 31; + private final static int[] dimnormal = {january, normalfebruary, march, april, may, june, july, august, september, october, november, december}; + private final static int[] dimleap = {january, leapfebruary, march, april, may, june, july, august, september, october, november, december}; + private final static String[] wkday = {"Mon","Tue","Wed","Thu","Fri","Sat","Sun"}; + private final static String[] month = {"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"}; + + // pre-calculation of time tables + private final static long[] dimnormalacc, dimleapacc; + private static long[] utimeyearsacc; + static { + long millis = 0; + utimeyearsacc = new long[67]; + for (int i = 0; i < 67; i++) { + utimeyearsacc[i] = millis; + millis += ((i & 3) == 0) ? leapyearMillis : normalyearMillis; + } + millis = 0; + dimnormalacc = new long[12]; + for (int i = 0; i < 12; i++) { + dimnormalacc[i] = millis; + millis += (dayMillis * dimnormal[i]); + } + millis = 0; + dimleapacc = new long[12]; + for (int i = 0; i < 12; i++) { + dimleapacc[i] = millis; + millis += (dayMillis * dimleap[i]); + } + } + + // class variables + private int milliseconds, seconds, minutes, hours, days, months, years; // years since 1970 + private int dow; // day-of-week + private long utime; + + public serverDate() { + this(System.currentTimeMillis()); + } + + public serverDate(long utime) { + // set the time as the difference, measured in milliseconds, + // between the current time and midnight, January 1, 1970 UTC/GMT + this.utime = utime; + dow = (int) (((utime / dayMillis) + 3) % 7); + years = (int) (utime / normalyearMillis); // a guess + if (utime < utimeyearsacc[years]) years--; // the correction + long remain = utime - utimeyearsacc[years]; + months = (int) (remain / (29 * dayMillis)); // a guess + if ((years & 3) == 0) { + if (remain < dimleapacc[months]) months--; // correction + remain = remain - dimleapacc[months]; + } else { + if (remain < dimnormalacc[months]) months--; // correction + remain = remain - dimnormalacc[months]; + } + days = (int) (remain / dayMillis); remain = remain % dayMillis; + hours = (int) (remain / hourMillis); remain = remain % hourMillis; + minutes = (int) (remain / minuteMillis); remain = remain % minuteMillis; + seconds = (int) (remain / secondMillis); remain = remain % secondMillis; + milliseconds = (int) remain; + } + + private void calcUTime() { + this.utime = utimeyearsacc[years] + dimleapacc[months - 1] + dayMillis * (days - 1) + + hourMillis * hours + minuteMillis * minutes + secondMillis * seconds + milliseconds; + this.dow = (int) (((utime / dayMillis) + 3) % 7); + } + + public serverDate(String datestring) throws java.text.ParseException { + // parse a date string; othervise throw a java.text.ParseException + if ((datestring.length() == 14) || (datestring.length() == 17)) { + // parse a ShortString + try {years = Integer.parseInt(datestring.substring(0, 4)) - 1970;} catch (NumberFormatException e) { + throw new java.text.ParseException("serverDate '" + datestring + "' wrong year", 0); + } + if (years < 0) throw new java.text.ParseException("serverDate '" + datestring + "' wrong year", 0); + try {months = Integer.parseInt(datestring.substring(4, 6)) - 1;} catch (NumberFormatException e) { + throw new java.text.ParseException("serverDate '" + datestring + "' wrong month", 4); + } + if ((months < 0) || (months > 11)) throw new java.text.ParseException("serverDate '" + datestring + "' wrong month", 4); + try {days = Integer.parseInt(datestring.substring(6, 8)) - 1;} catch (NumberFormatException e) { + throw new java.text.ParseException("serverDate '" + datestring + "' wrong day", 6); + } + if ((days < 0) || (days > 30)) throw new java.text.ParseException("serverDate '" + datestring + "' wrong day", 6); + try {hours = Integer.parseInt(datestring.substring(8, 10));} catch (NumberFormatException e) { + throw new java.text.ParseException("serverDate '" + datestring + "' wrong hour", 8); + } + if ((hours < 0) || (hours > 23)) throw new java.text.ParseException("serverDate '" + datestring + "' wrong hour", 8); + try {minutes = Integer.parseInt(datestring.substring(10, 12));} catch (NumberFormatException e) { + throw new java.text.ParseException("serverDate '" + datestring + "' wrong minute", 10); + } + if ((minutes < 0) || (minutes > 59)) throw new java.text.ParseException("serverDate '" + datestring + "' wrong minute", 10); + try {seconds = Integer.parseInt(datestring.substring(12, 14));} catch (NumberFormatException e) { + throw new java.text.ParseException("serverDate '" + datestring + "' wrong second", 12); + } + if ((seconds < 0) || (seconds > 59)) throw new java.text.ParseException("serverDate '" + datestring + "' wrong second", 12); + if (datestring.length() == 17) { + try {milliseconds = Integer.parseInt(datestring.substring(14, 17));} catch (NumberFormatException e) { + throw new java.text.ParseException("serverDate '" + datestring + "' wrong millisecond", 14); + } + } else { + milliseconds = 0; + } + if ((milliseconds < 0) || (milliseconds > 999)) throw new java.text.ParseException("serverDate '" + datestring + "' wrong millisecond", 14); + calcUTime(); + return; + } + throw new java.text.ParseException("serverDate '" + datestring + "' format unknown", 0); + } + + public String toString() { + return "utime=" + utime + ", year=" + (years + 1970) + + ", month=" + (months + 1) + ", day=" + (days + 1) + + ", hour=" + hours + ", minute=" + minutes + + ", second=" + seconds + ", millis=" + milliseconds + + ", day-of-week=" + wkday[dow]; + } + + public String toShortString(boolean millis) { + // returns a "yyyyMMddHHmmssSSS" + byte[] result = new byte[(millis) ? 17 : 14]; + int x = 1970 + years; + result[ 0] = (byte) (48 + (x / 1000)); x = x % 1000; + result[ 1] = (byte) (48 + (x / 100)); x = x % 100; + result[ 2] = (byte) (48 + (x / 10)); x = x % 10; + result[ 3] = (byte) (48 + x); + x = months + 1; + result[ 4] = (byte) (48 + (x / 10)); + result[ 5] = (byte) (48 + (x % 10)); + x = days + 1; + result[ 6] = (byte) (48 + (x / 10)); + result[ 7] = (byte) (48 + (x % 10)); + result[ 8] = (byte) (48 + (hours / 10)); + result[ 9] = (byte) (48 + (hours % 10)); + result[10] = (byte) (48 + (minutes / 10)); + result[11] = (byte) (48 + (minutes % 10)); + result[12] = (byte) (48 + (seconds / 10)); + result[13] = (byte) (48 + (seconds % 10)); + if (millis) { + x = milliseconds; + result[14] = (byte) (48 + (x / 100)); x = x % 100; + result[15] = (byte) (48 + (x / 10)); x = x % 10; + result[16] = (byte) (48 + x); + } + return new String(result); + } + + /* + private static String format(int c, int len) { + String s = "" + c; + while (s.length() < len) s = "0" + s; + return s; + } + */ + + // the following is only here to compare the kelondroDate with java-Date: + private static TimeZone GMTTimeZone = TimeZone.getTimeZone("GMT"); + private static Calendar gregorian = new GregorianCalendar(GMTTimeZone); + private static SimpleDateFormat testSFormatter = new SimpleDateFormat("yyyyMMddHHmmss"); + private static SimpleDateFormat testLFormatter = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US); + + public static String testSDateShortString() { + return testSFormatter.format(gregorian.getTime()); + } + + public static void main(String[] args) { + //System.out.println("kelondroDate is (" + new kelondroDate().toString() + ")"); + System.out.println("serverDate : " + new serverDate().toShortString(false)); + System.out.println(" javaDate : " + testSDateShortString()); + System.out.println("serverDate : " + new serverDate().toString()); + System.out.println(" JavaDate : " + testLFormatter.format(new Date())); + System.out.println("serverDate0: " + new serverDate(0).toShortString(false)); + System.out.println(" JavaDate0: " + testSFormatter.format(new Date(0))); + System.out.println("serverDate0: " + new serverDate(0).toString()); + System.out.println(" JavaDate0: " + testLFormatter.format(new Date(0))); + // parse test + try { + System.out.println("serverDate re-parse short: " + new serverDate(new serverDate().toShortString(false)).toShortString(true)); + System.out.println("serverDate re-parse long : " + new serverDate(new serverDate().toShortString(true)).toShortString(true)); + } catch (java.text.ParseException e) { + System.out.println("Parse Exception: " + e.getMessage() + ", pos " + e.getErrorOffset()); + } + String testresult; + int cycles = 10000; + long start; + + start = System.currentTimeMillis(); + for (int i = 0; i < cycles; i++) testresult = new serverDate().toShortString(false); + System.out.println("time for " + cycles + " calls to serverDate:" + (System.currentTimeMillis() - start) + " milliseconds"); + + start = System.currentTimeMillis(); + for (int i = 0; i < cycles; i++) testresult = testSDateShortString(); + System.out.println("time for " + cycles + " calls to javaDate:" + (System.currentTimeMillis() - start) + " milliseconds"); + } +} diff --git a/source/de/anomic/server/serverFileUtils.java b/source/de/anomic/server/serverFileUtils.java new file mode 100644 index 000000000..9e96ec3b6 --- /dev/null +++ b/source/de/anomic/server/serverFileUtils.java @@ -0,0 +1,99 @@ +// serverFileUtils.java +// ------------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 05.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.server; + +import java.io.*; + +public class serverFileUtils { + + public static void copy(InputStream source, OutputStream dest) throws IOException { + byte[] buffer = new byte[4096]; + int c; + while ((c = source.read(buffer)) > 0) dest.write(buffer, 0, c); + dest.flush(); + } + + public static void copy(InputStream source, File dest) throws IOException { + FileOutputStream fos = new FileOutputStream(dest); + copy(source, fos); + fos.close(); + } + + public static void copy(File source, OutputStream dest) throws IOException { + InputStream fis = new FileInputStream(source); + copy(fis, dest); + fis.close(); + } + + public static void copy(File source, File dest) throws IOException { + FileInputStream fis = new FileInputStream(source); + FileOutputStream fos = new FileOutputStream(dest); + copy(fis, fos); + fis.close(); + fos.close(); + } + + public static byte[] read(InputStream source) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + copy(source, baos); + baos.close(); + return baos.toByteArray(); + } + + public static byte[] read(File source) throws IOException { + byte[] buffer = new byte[(int) source.length()]; + InputStream fis = new FileInputStream(source); + int p = 0; + int c; + while ((c = fis.read(buffer, p, buffer.length - p)) > 0) p += c; + fis.close(); + return buffer; + } + + public static void write(byte[] source, OutputStream dest) throws IOException { + copy(new ByteArrayInputStream(source), dest); + } + + public static void write(byte[] source, File dest) throws IOException { + copy(new ByteArrayInputStream(source), dest); + } + +} diff --git a/source/de/anomic/server/serverHandler.java b/source/de/anomic/server/serverHandler.java new file mode 100644 index 000000000..aa94508e5 --- /dev/null +++ b/source/de/anomic/server/serverHandler.java @@ -0,0 +1,113 @@ +// serverHandler.java +// ------------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 05.04.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + serverHandler: + + A Generic Server becomes a server for s specific protocol by impementation of + a corresponding handler class. The handler class provides methods for each + command of the protocol that is implemented. + The Handler class is assigned to the serverCore by passing the handlers + name to the serverCore upon initialization. + Example: + serverCore server = new serverCore(port, 1000, 0, false, "ftpdProtocol", null, 0); + In this example the protocol handler "ftpdProtocol" is assigned. There a class + named ftpdProtocol.java must be implemented, that implements this interface, + a serverHandler. + Any protocol command can be implemented in either way: + + public String COMMAND(String arg) throws IOException; + public InputStream COMMAND(String arg) throws IOException; + public void COMMAND(String arg) throws IOException; + + ..where COMMAND is the command that had been passed to the server + on the terminal connection. The 'arg' argument is the remaining part of + the command on the terminal connection. + If the handler method returns a NULL value, which is especially + the case if the method implements a 'void' return-value method, + then the server disconnects the connection. + Any other return value (String or an InputStream) is returned to + the client on it's own line through the terminal connection. + If it is wanted that the server terminates right after submitting + a last line, then this can be indicated by prefixing the return + value by a '!'-character. + + If one of the command methods throws a IOException, then the + server asks the error - method for a return value on the terminal + connection. + + The greeting-method is used to request a string that is transmitted + to the client as terminal output at the beginning of a connection + session. +*/ + +package de.anomic.server; + +import java.io.*; + +public interface serverHandler { + + // init method for static variables of the handler + // this method shall be called only once + // information that is passed here is cloned for every new instance + //public void initHandler(serverSwitch switchboard) throws java.io.IOException; + + // an init method that the server calls to provide hooks and + // information to the session's sockets and information + // the Switchboard allowes to trigger events through all sessions + // and an supervision process. + // this method shall be called only once + public void initSession(serverCore.Session session) throws java.io.IOException; + + // a response line upon connection is send to client + // if no response line is wanted, return "" or null + public String greeting(); + + // return string in case of any error that occurs during communication + // is always (but not only) called if an IO-dependent exception occurs. + public String error(Throwable e); + + // clone method for the handler prototype + // each time a server makes a new connection it clones the hanlder prototype + // the clone method does not need to clone every detail of a handler connection, + // but only the necessary one for a newly initialized instance + public Object clone(); + +} diff --git a/source/de/anomic/server/serverInstantThread.java b/source/de/anomic/server/serverInstantThread.java new file mode 100644 index 000000000..e4430d581 --- /dev/null +++ b/source/de/anomic/server/serverInstantThread.java @@ -0,0 +1,77 @@ + +package de.anomic.server; + +import java.lang.reflect.*; + +public class serverInstantThread extends serverAbstractThread implements serverThread { + + private Method jobExecMethod, jobCountMethod; + private Object environment; + + public serverInstantThread(Object env, String jobExec, String jobCount) { + // job is the name of a method of the object 'env' + try { + this.jobExecMethod = env.getClass().getMethod(jobExec, new Class[0]); + if (jobCount == null) + this.jobCountMethod = null; + else + this.jobCountMethod = env.getClass().getMethod(jobCount, new Class[0]); + this.environment = env; + this.setName(env.getClass().getName() + "." + jobExec); + } catch (NoSuchMethodException e) { + throw new RuntimeException("Internal Error in serverInstantThread, wrong declaration: " + e.getMessage()); + } + } + + public int getJobCount() { + if (this.jobCountMethod == null) return Integer.MAX_VALUE; + try { + Object result = jobCountMethod.invoke(environment, new Object[0]); + if (result instanceof Integer) + return ((Integer) result).intValue(); + else + return -1; + } catch (IllegalAccessException e) { + return -1; + } catch (IllegalArgumentException e) { + return -1; + } catch (InvocationTargetException e) { + System.out.println("Runtime Error in serverInstantThread, thread '" + this.getName() + "': " + e.getMessage()); + e.printStackTrace(); + return -1; + } + } + + public boolean job() throws Exception { + boolean jobHasDoneSomething = false; + try { + Object result = jobExecMethod.invoke(environment, new Object[0]); + if (result == null) jobHasDoneSomething = true; + else if (result instanceof Boolean) jobHasDoneSomething = ((Boolean) result).booleanValue(); + } catch (IllegalAccessException e) { + System.out.println("Internal Error in serverInstantThread: " + e.getMessage()); + System.out.println("shutting down thread '" + this.getName() + "'"); + this.terminate(false); + } catch (IllegalArgumentException e) { + System.out.println("Internal Error in serverInstantThread: " + e.getMessage()); + System.out.println("shutting down thread '" + this.getName() + "'"); + this.terminate(false); + } catch (InvocationTargetException e) { + System.out.println("Runtime Error in serverInstantThread, thread '" + this.getName() + "': " + e.getMessage()); + e.printStackTrace(); + } + return jobHasDoneSomething; + } + + public static serverThread oneTimeJob(Object env, String jobExec, serverLog log, long startupDelay) { + // start the job and execute it once as background process + serverThread thread = new serverInstantThread(env, jobExec, null); + thread.setStartupSleep(startupDelay); + thread.setIdleSleep(-1); + thread.setBusySleep(-1); + thread.setLog(log); + thread.start(); + return thread; + } + +} diff --git a/source/de/anomic/server/serverLog.java b/source/de/anomic/server/serverLog.java new file mode 100644 index 000000000..c7289f668 --- /dev/null +++ b/source/de/anomic/server/serverLog.java @@ -0,0 +1,158 @@ +// serverLog.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 04.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.server; + +import java.text.*; +import java.util.*; + +public class serverLog { + + // statics + private static TimeZone GMTTimeZone = TimeZone.getTimeZone("PST"); + private static SimpleDateFormat longFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + private static SimpleDateFormat shortFormatter = new SimpleDateFormat("yyyyMMddHHmmss"); + + // log-level categories + public static final int LOGLEVEL_ZERO = 0; // no output at all + public static final int LOGLEVEL_FAILURE = 1; // system-level error, internal cause, critical and not fixeable (i.e. inconsistency) + public static final int LOGLEVEL_ERROR = 2; // exceptional error, catcheable and non-critical (i.e. file error) + public static final int LOGLEVEL_WARNING = 3; // uncritical service failure, may require user activity (i.e. input required, wrong authorization) + public static final int LOGLEVEL_SYSTEM = 4; // regular system status information (i.e. start-up messages) + public static final int LOGLEVEL_INFO = 5; // regular action information (i.e. any httpd request URL) + public static final int LOGLEVEL_DEBUG = 6; // in-function status debug output + + // these categories are also present as character tokens + public static final char LOGTOKEN_ZERO = 'Z'; + public static final char LOGTOKEN_FAILURE = 'F'; + public static final char LOGTOKEN_ERROR = 'E'; + public static final char LOGTOKEN_WARNING = 'W'; + public static final char LOGTOKEN_SYSTEM = 'S'; + public static final char LOGTOKEN_INFO = 'I'; + public static final char LOGTOKEN_DEBUG = 'D'; + + // an array-wrapped function + private static final char[] l2t = new char[] { + LOGTOKEN_ZERO, LOGTOKEN_FAILURE, LOGTOKEN_ERROR, LOGTOKEN_WARNING, + LOGTOKEN_SYSTEM, LOGTOKEN_INFO, LOGTOKEN_DEBUG + }; + + // statics + private static serverLog genericLog = new serverLog("GENERIC", LOGLEVEL_DEBUG); // generic log + private static LinkedList lastLog = new LinkedList(); // for Web-Interface + private static int lastlogMaxSize = 400; // for Web-Interface + + // class variables + private String appName; + private int logLevel; + + public serverLog(String appName) { + this(appName, LOGLEVEL_DEBUG); + } + + public serverLog(String appName, int logLevel) { + this.logLevel = logLevel; + this.appName = appName; + } + + public serverLog(String appName, char logToken) { + this(appName, t2l(logToken)); + } + + public void setLoglevel(int newLevel) { + this.logLevel = newLevel; + } + + private static int t2l(char token) { + switch (token) { + case LOGTOKEN_ZERO: return LOGLEVEL_ZERO; + case LOGTOKEN_FAILURE: return LOGLEVEL_FAILURE; + case LOGTOKEN_ERROR: return LOGLEVEL_ERROR; + case LOGTOKEN_WARNING: return LOGLEVEL_WARNING; + case LOGTOKEN_SYSTEM: return LOGLEVEL_SYSTEM; + case LOGTOKEN_INFO: return LOGLEVEL_INFO; + case LOGTOKEN_DEBUG: return LOGLEVEL_DEBUG; + } + return LOGLEVEL_DEBUG; + } + + private static String dateLongString() { + return longFormatter.format(new GregorianCalendar(GMTTimeZone).getTime()); + } + + private static String dateShortString() { + return shortFormatter.format(new GregorianCalendar(GMTTimeZone).getTime()); + } + + private void log(int messageLevel, String message) { + if (messageLevel <= logLevel) { + System.out.println(l2t[messageLevel] + " " + dateLongString() + " " + appName + " " + message); + lastLog.add(l2t[messageLevel] + " " + dateLongString() + " " + appName + " " + message); + while (lastLog.size() > lastlogMaxSize) lastLog.removeFirst(); + } + } + + public static LinkedList getLastLog(){ + return lastLog; + } + + // class log messages + public void logFailure(String message) {log(LOGLEVEL_FAILURE, message);} + public void logError(String message) {log(LOGLEVEL_ERROR, message);} + public void logWarning(String message) {log(LOGLEVEL_WARNING, message);} + public void logSystem(String message) {log(LOGLEVEL_SYSTEM, message);} + public void logInfo(String message) {log(LOGLEVEL_INFO, message);} + public void logDebug(String message) {log(LOGLEVEL_DEBUG, message);} + + + // static log messages: log everything + private static void log(String appName, int messageLevel, String message) { + genericLog.appName = appName; + genericLog.log(messageLevel, message); + } + + public static void logFailure(String appName, String message) {log(appName, LOGLEVEL_FAILURE, message);} + public static void logError(String appName, String message) {log(appName, LOGLEVEL_ERROR, message);} + public static void logWarning(String appName, String message) {log(appName, LOGLEVEL_WARNING, message);} + public static void logSystem(String appName, String message) {log(appName, LOGLEVEL_SYSTEM, message);} + public static void logInfo(String appName, String message) {log(appName, LOGLEVEL_INFO, message);} + public static void logDebug(String appName, String message) {log(appName, LOGLEVEL_DEBUG, message);} + +} diff --git a/source/de/anomic/server/serverObjects.java b/source/de/anomic/server/serverObjects.java new file mode 100644 index 000000000..d6a87026d --- /dev/null +++ b/source/de/anomic/server/serverObjects.java @@ -0,0 +1,171 @@ +// serverObjects.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 05.06.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + Why do we need this Class? + The purpose of this class is to provide a hashtable object to the server + and implementing interfaces. Values to and from cgi pages are encapsulated in + this object. The server shall be executable in a Java 1.0 environment, + so the following other options did not comply: + + Properties - setProperty would be needed, but only available in 1.2 + HashMap, TreeMap - only in 1.2 + Hashtable - available in 1.0, but 'put' does not accept null values + + So this class was created as a convenience. + It will also contain special methods that read data from internet-resources + in the background, while data can already be read out of the object. + This shall speed up usage when a slow internet connection is used (dial-up) +*/ + +package de.anomic.server; + +import java.io.*; +import java.util.*; + +public class serverObjects extends Hashtable implements Cloneable { + + public serverObjects() { + super(); + } + + public serverObjects(Map input) { + super(input); + } + + // new put takes also null values + public Object put(Object key, Object value) { + if (key == null) { + // this does nothing + return null; + } else if (value == null) { + // assigning the null value creates the same effect like removing the element + return super.remove(key); + } else { + return super.put(key, value); + } + } + + // byte[] variant + public byte[] put(String key, byte[] value) { + return (byte[]) this.put((Object) key, (Object) value); + } + + // string variant + public String put(String key, String value) { + return (String) this.put((Object) key, (Object) value); + } + + // long variant + public long put(String key, long value) { + String result = this.put(key, "" + value); + if (result == null) return 0; else try { + return Long.parseLong(result); + } catch (NumberFormatException e) { + return 0; + } + } + + // inc variant: for counters + public long inc(String key) { + String c = (String) super.get(key); + if (c == null) c = "0"; + long l = Long.parseLong(c) + 1; + super.put(key, "" + l); + return l; + } + + // new get with default objects + public Object get(Object key, Object dflt) { + Object result = super.get(key); + if (result == null) return dflt; else return result; + } + + // string variant + public String get(String key, String dflt) { + return (String) this.get((Object) key, (Object) dflt); + } + + // returns a set of all values where their key mappes the keyMapper + public String[] getAll(String keyMapper) { + // the keyMapper may contain regular expressions as defined in String.matches + // this method is particulary useful when parsing the result of checkbox forms + Vector v = new Vector(); + Enumeration e = keys(); + String key; + while (e.hasMoreElements()) { + key = (String) e.nextElement(); + if (key.matches(keyMapper)) v.add(get(key)); + } + // make a String[] + String[] result = new String[v.size()]; + for (int i = 0; i < v.size(); i++) result[i] = (String) v.elementAt(i); + return result; + } + + // put all elements of another hastable into the own table + public void putAll(serverObjects add) { + Enumeration e = add.keys(); + Object k; + while (e.hasMoreElements()) { + k = e.nextElement(); + put(k, add.get(k)); + } + } + + // convenience methods for storing and loading to a file system + public void store(File f) throws IOException { + FileOutputStream fos = new FileOutputStream(f); + Enumeration e = keys(); + String key, value; + while (e.hasMoreElements()) { + key = (String) e.nextElement(); + value = ((String) get(key)).replaceAll("\n", "\\\\n"); + fos.write((key + "=" + value + "\r\n").getBytes()); + } + fos.flush(); + fos.close(); + } + + public Object clone() { + return super.clone(); + } + +} \ No newline at end of file diff --git a/source/de/anomic/server/serverSwitch.java b/source/de/anomic/server/serverSwitch.java new file mode 100644 index 000000000..e926371fb --- /dev/null +++ b/source/de/anomic/server/serverSwitch.java @@ -0,0 +1,96 @@ +// serverSwitch.java +// ------------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 04.02.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + this is an interface for possible switchboard implementations + Its purpose is to provide a mechanism which cgi pages can use + to influence the behavior of a concurrntly running application +*/ + +package de.anomic.server; + +import java.net.*; +import java.util.*; + +public interface serverSwitch { + // the root path for the application + public String getRootPath(); + + // the switchboard can be used to set and read properties + public void setConfig(String key, String value); + public String getConfig(String key, String dflt); + public Enumeration configKeys(); + + // the switchboard can manage worker threads + public void deployThread(String threadName, String threadShortDescription, String threadLongDescription, + serverThread newThread, serverLog log, + long startupDelay, long initialIdleSleep, long initialBusySleep); + public serverThread getThread(String threadName); + public void setThreadSleep(String threadName, long idleMillis, long busyMillis); + public void terminateThread(String threadName, boolean waitFor); + public void terminateAllThreads(boolean waitFor); + public Iterator /*of serverThread-Names (String)*/ threadNames(); + + // the switchboard also shall maintain a job list + // jobs can be queued by submitting a job object + // to work off a queue job, use deQueue, which is meant to + // work off exactly only one job, not all + public int queueSize(); + public void enQueue(Object job); + public void deQueue(); + + // authentification routines: sets and reads access attributes according to host addresses + public void setAuthentify(InetAddress host, String user, String rigth); + public void removeAuthentify(InetAddress host); + public String getAuthentifyUser(InetAddress host); + public String getAuthentifyRights(InetAddress host); + public void addAuthentifyRight(InetAddress host, String right); + public boolean hasAuthentifyRight(InetAddress host, String right); + + // ask the switchboard to perform an action + // the result is a properties structure with the result of the action + // The actionName selects an action + // the actionInput is an input for the selected action + public serverObjects action(String actionName, serverObjects actionInput); + + // performance control: the server can announce busy and idle status to the switchboard + // these announcements can be used to trigger events or interrupts + public void handleBusyState(int jobs); +} diff --git a/source/de/anomic/server/serverSystem.java b/source/de/anomic/server/serverSystem.java new file mode 100644 index 000000000..d086ac79f --- /dev/null +++ b/source/de/anomic/server/serverSystem.java @@ -0,0 +1,383 @@ +// serverSystem.java +// ------------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 11.03.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.server; + +import java.io.*; +import java.net.*; +import java.util.*; +import java.lang.reflect.*; + +public class serverSystem { + + // constants for system identification + public static final int systemMacOSC = 0; // 'classic' Mac OS 7.6.1/8.*/9.* + public static final int systemMacOSX = 1; // all Mac OS X + public static final int systemUnix = 2; // all Unix/Linux type systems + public static final int systemWindows = 3; // all Windows 95/98/NT/2K/XP + public static final int systemUnknown = -1; // any other system + + // constants for file type identification (Mac only) + public static final String blankTypeString = "____"; + + // system-identification statics + public static int systemOS = systemUnknown; + public static boolean isMacArchitecture = false; + public static boolean isUnixFS = false; + public static boolean canExecUnix = false; + + // Macintosh-specific statics + private static Class macMRJFileUtils = null; + private static Class macMRJOSType = null; + private static Constructor macMRJOSTypeConstructor = null; + private static Object macMRJOSNullObj = null; + private static Method macGetFileCreator = null; + private static Method macGetFileType = null; + private static Method macSetFileCreator = null; + private static Method macSetFileType = null; + private static Method macOpenURL = null; + public static Hashtable macFSTypeCache = null; + public static Hashtable macFSCreatorCache = null; + + + + // static initialization + static { + // check operation system type + Properties sysprop = System.getProperties(); + String sysname = sysprop.getProperty("os.name","").toLowerCase(); + if (sysname.startsWith("mac os x")) systemOS = systemMacOSX; + else if (sysname.startsWith("mac os")) systemOS = systemMacOSC; + else if (sysname.startsWith("windows")) systemOS = systemWindows; + else if ((sysname.startsWith("linux")) || (sysname.startsWith("unix"))) systemOS = systemUnix; + else systemOS = systemUnknown; + + isMacArchitecture = ((systemOS == systemMacOSC) || (systemOS == systemMacOSX)); + isUnixFS = ((systemOS == systemMacOSX) || (systemOS == systemUnix)); + canExecUnix = ((isUnixFS) || (!((systemOS == systemMacOSC) || (systemOS == systemWindows)))); + + // set up the MRJ Methods through reflection + if (isMacArchitecture) try { + macMRJFileUtils = Class.forName("com.apple.mrj.MRJFileUtils"); + macMRJOSType = Class.forName("com.apple.mrj.MRJOSType"); + macGetFileType = macMRJFileUtils.getMethod("getFileType", new Class[] {Class.forName("java.io.File")}); + macGetFileCreator = macMRJFileUtils.getMethod("getFileCreator", new Class[] {Class.forName("java.io.File")}); + macSetFileType = macMRJFileUtils.getMethod("setFileType", new Class[] {Class.forName("java.io.File"), macMRJOSType}); + macSetFileCreator = macMRJFileUtils.getMethod("setFileCreator", new Class[] {Class.forName("java.io.File"), macMRJOSType}); + macMRJOSTypeConstructor = macMRJOSType.getConstructor(new Class[] {Class.forName("java.lang.String")}); + macOpenURL = macMRJFileUtils.getMethod("openURL", new Class[] {Class.forName("java.lang.String")}); + byte[] nullb = new byte[4]; + for (int i = 0; i < 4; i++) nullb[i] = 0; + macMRJOSNullObj = macMRJOSTypeConstructor.newInstance(new Object[] {new String(nullb)}); + macFSTypeCache = new Hashtable(); + macFSCreatorCache = new Hashtable(); + } catch (Exception e) { + //e.printStackTrace(); + macMRJFileUtils = null; macMRJOSType = null; + } + } + + public static Object getMacOSTS(String s) { + if ((isMacArchitecture) && (macMRJFileUtils != null)) try { + if ((s == null) || (s.equals(blankTypeString))) return macMRJOSNullObj; + else return macMRJOSTypeConstructor.newInstance(new Object[] {s}); + } catch (Exception e) { + return macMRJOSNullObj; + } else return null; + } + + public static String getMacFSType(File f) { + if ((isMacArchitecture) && (macMRJFileUtils != null)) try { + String s = macGetFileType.invoke(null, new Object[] {f}).toString(); + if ((s == null) || (s.charAt(0) == 0)) return blankTypeString; else return s; + } catch (Exception e) { + return null; + } else return null; + } + + public static String getMacFSCreator(File f) { + if ((isMacArchitecture) && (macMRJFileUtils != null)) try { + String s = macGetFileCreator.invoke(null, new Object[] {f}).toString(); + if ((s == null) || (s.charAt(0) == 0)) return blankTypeString; else return s; + } catch (Exception e) { + return null; + } else return null; + } + + public static void setMacFSType(File f, String t) { + if ((isMacArchitecture) && (macMRJFileUtils != null)) try { + macSetFileType.invoke(null, new Object[] {f, getMacOSTS(t)}); + } catch (Exception e) {/*System.out.println(e.getMessage()); e.printStackTrace();*/} + } + + public static void setMacFSCreator(File f, String t) { + if ((isMacArchitecture) && (macMRJFileUtils != null)) try { + macSetFileCreator.invoke(null, new Object[] {f, getMacOSTS(t)}); + } catch (Exception e) {/*System.out.println(e.getMessage()); e.printStackTrace();*/} + } + + public static boolean aquireMacFSType(File f) { + if ((!(isMacArchitecture)) || (macMRJFileUtils == null)) return false; + String name = f.toString(); + + // check file type + int dot = name.lastIndexOf("."); + if ((dot < 0) || (dot + 1 >= name.length())) return false; + String type = getMacFSType(f); + if ((type == null) || (type.equals(blankTypeString))) return false; + String ext = name.substring(dot + 1).toLowerCase(); + String oldType = (String) macFSTypeCache.get(ext); + if ((oldType != null) && (oldType.equals(type))) return false; + macFSTypeCache.put(ext, type); + return true; + } + + public static boolean aquireMacFSCreator(File f) { + if ((!(isMacArchitecture)) || (macMRJFileUtils == null)) return false; + String name = f.toString(); + + // check creator + String creator = getMacFSCreator(f); + if ((creator == null) || (creator.equals(blankTypeString))) return false; + String oldCreator = (String) macFSCreatorCache.get(name); + if ((oldCreator != null) && (oldCreator.equals(creator))) return false; + macFSCreatorCache.put(name, creator); + return true; + } + + public static boolean applyMacFSType(File f) { + if ((!(isMacArchitecture)) || (macMRJFileUtils == null)) return false; + String name = f.toString(); + + // reconstruct file type + int dot = name.lastIndexOf("."); + if ((dot < 0) || (dot + 1 >= name.length())) return false; + String type = (String) macFSTypeCache.get(name.substring(dot + 1).toLowerCase()); + if (type == null) return false; + String oldType = getMacFSType(f); + if ((oldType != null) && (oldType.equals(type))) return false; + setMacFSType(f, type); + return getMacFSType(f).equals(type); + } + + public static boolean applyMacFSCreator(File f) { + if ((!(isMacArchitecture)) || (macMRJFileUtils == null)) return false; + String name = f.toString(); + + // reconstruct file creator + String creator = (String) macFSCreatorCache.get(name); + if (creator == null) return false; + String oldCreator = getMacFSCreator(f); + if ((oldCreator != null) && (oldCreator.equals(creator))) return false; + //System.out.println("***Setting creator for " + f.toString() + " to " + creator); + setMacFSCreator(f, creator); + return getMacFSCreator(f).equals(creator); // this is not always true! I guess it's caused by deprecation of the interface in 1.4er Apple Extensions + } + + public static String infoString() { + String s = "System="; + if (systemOS == systemUnknown) s += "unknown"; + else if (systemOS == systemMacOSC) s += "Mac OS Classic"; + else if (systemOS == systemMacOSX) s += "Mac OS X"; + else if (systemOS == systemUnix) s += "Unix/Linux"; + else if (systemOS == systemWindows) s += "Windows"; + else s += "unknown"; + if (isMacArchitecture) s += ", Mac System Architecture"; + if (isUnixFS) s += ", has Unix-like File System"; + if (canExecUnix) s += ", can execute Unix-Shell Commands"; + return s; + } + + public static String infoKey() { + String s = ""; + if (systemOS == systemUnknown) s += "o"; + else if (systemOS == systemMacOSC) s += "c"; + else if (systemOS == systemMacOSX) s += "x"; + else if (systemOS == systemUnix) s += "u"; + else if (systemOS == systemWindows) s += "w"; + else s += "o"; + if (isMacArchitecture) s += "m"; + if (isUnixFS) s += "f"; + if (canExecUnix) s += "e"; + return s; + } + + private static String errorResponse(Process p) { + BufferedReader err = new BufferedReader(new InputStreamReader(p.getErrorStream())); + String line, error = ""; + try { + while ((line = err.readLine()) != null) { + error = line + "\n"; + } + return error; + } catch (IOException e) { + return null; + } + } + + /* + public static void openBrowser(URL url) { + if (openBrowserJNLP(url)) return; + openBrowserExec(url.toString(), "firefox"); + } + + private static boolean openBrowserJNLP(URL url) { + try { + // Lookup the javax.jnlp.BasicService object + javax.jnlp.BasicService bs = (javax.jnlp.BasicService) javax.jnlp.ServiceManager.lookup("javax.jnlp.BasicService"); + // Invoke the showDocument method + return bs.showDocument(url); + } catch (Exception ue) { + // Service is not supported + return false; + } + } + */ + + public static void openBrowser(String url) { + openBrowser(url, "firefox"); + } + + public static void openBrowser(String url, String app) { + try { + String cmd; + Process p; + if (systemOS == systemUnknown) { + } else if (systemOS == systemMacOSC) { + if ((isMacArchitecture) && (macMRJFileUtils != null)) { + macOpenURL.invoke(null, new Object[] {url}); + } + } else if (systemOS == systemMacOSX) { + p = Runtime.getRuntime().exec(new String[] {"/usr/bin/osascript", "-e", "open location \"" + url + "\""}); + p.waitFor(); + if (p.exitValue() != 0) throw new RuntimeException("EXEC ERROR: " + errorResponse(p)); + } else if (systemOS == systemUnix) { + cmd = app + " -remote openURL(" + url + ") &"; + p = Runtime.getRuntime().exec(cmd); + p.waitFor(); + if (p.exitValue() != 0) { + cmd = app + " " + url + " &"; + p = Runtime.getRuntime().exec(cmd); + p.waitFor(); + } + if (p.exitValue() != 0) throw new RuntimeException("EXEC ERROR: " + errorResponse(p)); + } else if (systemOS == systemWindows) { + // see forum at http://forum.java.sun.com/thread.jsp?forum=57&thread=233364&message=838441 + cmd = "rundll32 url.dll,FileProtocolHandler " + url; + //cmd = "cmd.exe /c start javascript:document.location='" + url + "'"; + p = Runtime.getRuntime().exec(cmd); + p.waitFor(); + if (p.exitValue() != 0) throw new RuntimeException("EXEC ERROR: " + errorResponse(p)); + } + } catch (Exception e) { + System.out.println("please start your browser and open the following location: " + url); + } + } + + public static void main(String[] args) { + //try{System.getProperties().list(new PrintStream(new FileOutputStream(new File("system.properties.txt"))));} catch (FileNotFoundException e) {} + //System.out.println("nullstr=" + macMRJOSNullObj.toString()); + if (args[0].equals("-f")) { + File f = new File(args[1]); + System.out.println("File " + f.toString() + ": creator = " + getMacFSCreator(f) + "; type = " + getMacFSType(f)); + } + if (args[0].equals("-u")) { + openBrowser(args[1]); + } + } + +} + +/* +table of common system properties +comparisment between different operation systems + +property |Mac OS 9.22 |Mac OSX 10.1.5 |Windows 98 |Linux Kernel 2.4.22 | +-------------------+----------------------+----------------------+----------------------+----------------------+ +file.encoding |MacTEC |MacRoman |Cp1252 |ANSI_X3.4-1968 | +file.separator |/ |/ |\ |/ | +java.class.path |/hdisc/... |. |. |/usr/lib/j2se/ext | +java.class.version |45.3 |47.0 |48.0 |47.0 | +java.home |/hdisc/... |/System/Library/... |C:\PROGRAM\... |/usr/lib/j2se/1.3/jre | +java.vendor |Apple Computer, Inc. |Apple Computer, Inc. |Sun Microsystems Inc. |Blackdown Java-Linux | +java.version |1.1.8 |1.3.1 |1.4.0_02 |1.3.1 | +os.arch |PowerPC |ppc |x86 |i386 | +os.name |Mac OS |Mac OS X |Windows 98 |Linux | +os.version |9.2.2 |10.1.5 |4.10 |2.4.22 | +path.separator |: |: |; |: | +user.dir |/hdisc/... |/mydir/... |C:\mydir\... |/home/public | +user.home |/hdisc/... |/Users/myself |C:\WINDOWS |/home/public | +user.language |de |de |de |en | +user.name |Bob |myself |User |public | +user.timezone |ECT |Europe/Berlin |Europe/Berlin | | +-------------------+----------------------+----------------------+----------------------+----------------------+ +*/ + +/* + static struct browser possible_browsers[] = { + {N_("Opera"), "opera"}, + {N_("Netscape"), "netscape"}, + {N_("Mozilla"), "mozilla"}, + {N_("Konqueror"), "kfmclient"}, + {N_("Galeon"), "galeon"}, + {N_("Firebird"), "mozilla-firebird"}, + {N_("Firefox"), "firefox"}, + {N_("Gnome Default"), "gnome-open"} + }; + + new: + command = exec("netscape -remote " "\" openURL(\"%s\",new-window) "", uri); + command = exec("opera -newwindow \"%s\"", uri); + command = exec("opera -newpage \"%s\"", uri); + command = exec("galeon -w \"%s\"", uri); + command = exec("galeon -n \"%s\"", uri); + command = exec("%s -remote \"openURL(\"%s\"," "new-window)\"", web_browser, uri); + command = exec("%s -remote \"openURL(\"%s\"," "new-tab)\"", web_browser, uri); + + current: + command = exec("netscape -remote " "\"openURL(\"%s\")\"", uri); + command = exec("opera -remote " "\"openURL(\"%s\")\"", uri); + command = exec("galeon \"%s\"", uri); + command = exec("%s -remote \"openURL(\"%s\")\"", web_browser, uri); + + no option: + command = exec("kfmclient openURL \"%s\"", uri); + command = exec("gnome-open \"%s\"", uri); +*/ diff --git a/source/de/anomic/server/serverThread.java b/source/de/anomic/server/serverThread.java new file mode 100644 index 000000000..88c7e5ed6 --- /dev/null +++ b/source/de/anomic/server/serverThread.java @@ -0,0 +1,116 @@ +// serverThread.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.yacy.net +// Frankfurt, Germany, 2005 +// last major change: 14.03.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.server; + +public interface serverThread { + + // ------------------------------------------------------- + // methods inherited from Thread; needed for compatibility + public void start(); + public boolean isAlive(); + + // -------------------------------------------------------------------------- + // these method are implemented by serverThread and do not need to be altered + // this includes also the run()-Method + + public void setDescription(String shortText, String longText); + // sets a visible description string + + public void setStartupSleep(long milliseconds); + // sets a sleep time before execution of the job-loop + + public void setIdleSleep(long milliseconds); + // sets a sleep time for pauses between two jobs if the job returns false (idle) + + public void setBusySleep(long milliseconds); + // sets a sleep time for pauses between two jobs if the job returns true (busy) + + public String getShortDescription(); + // returns short description string for online display + + public String getLongDescription(); + // returns long description string for online display + + public long getIdleCycles(); + // returns the total number of cycles of job execution with idle-result + + public long getBusyCycles(); + // returns the total number of cycles of job execution with busy-result + + public long getBlockTime(); + // returns the total time that this thread has been blocked so far + + public long getSleepTime(); + // returns the total time that this thread has slept so far + + public long getExecTime(); + // returns the total time that this thread has worked so far + + public void setLog(serverLog log); + // defines a log where process states can be written to + + public void jobExceptionHandler(Exception e); + // handles any action necessary during job execution + + public void terminate(boolean waitFor); + // after calling this method, the thread shall terminate + // if waitFor is true, the method waits until the process has died + + // --------------------------------------------------------------------- + // the following methods are supposed to be implemented by customization + + public void open(); + // this is called right befor the job queue is started + + public boolean job() throws Exception; + // performes one job procedure; this loopes until terminate() is called + // job returns true if it has done something + // it returns false if it is idle and does not expect to work on more for a longer time + + public int getJobCount(); + // returns how many jobs are in the queue + // can be used to calculate a busy-state + + public void close(); + // jobs that need to be done after termination + // terminate must be called before + +} diff --git a/source/de/anomic/tools/bitfield.java b/source/de/anomic/tools/bitfield.java new file mode 100644 index 000000000..eb4c85b23 --- /dev/null +++ b/source/de/anomic/tools/bitfield.java @@ -0,0 +1,106 @@ +// bitfield.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 11.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.tools; + +public class bitfield { + + private byte[] bb; + + public bitfield(int bytelength) { + this.bb= new byte[bytelength]; + for (int i = 0 ; i < bytelength; i++) bb[i] = (char) 48; + } + + public bitfield(byte[] field) { + bb = field; + } + + private static byte setAtom(byte a, int pos) { + if ((pos > 5) || (pos < 0)) throw new RuntimeException("atom position out of bounds: " + pos); + return (byte) ((64 | ((a + 16) | (1< 5) || (pos < 0)) throw new RuntimeException("atom position out of bounds: " + pos); + return (byte) (((a + 16) & (0xff ^ (1< bb.length)) throw new RuntimeException("position out of bounds: " + pos); + bb[slot] = (value) ? setAtom(bb[slot], pos % 6) : unsetAtom(bb[slot], pos % 6); + } + + public boolean get(int pos) { + int slot = pos / 6; + if ((pos < 0) || (slot > bb.length)) throw new RuntimeException("position out of bounds: " + pos); + return (bb[slot] & (1<<(pos%6))) > 0; + } + + public int length() { + return bb.length * 6; + } + + public byte[] getBytes() { + return bb; + } + + public String toString() { + return new String(bb); + } + + public static void main(String[] args) { + bitfield test = new bitfield(4); + int l = test.length(); + System.out.println("available: " + l); + System.out.println("bevore: " + test.toString()); + for (int i = 0; i < l/2; i++) { + test.set(i, true); + System.out.println(i + ":" + test.toString()); + } + for (int i = l/2 - 1; i >= 0; i--) { + test.set(i, false); + System.out.println(i + ":" + test.toString()); + } + System.out.println("after: " + test.toString()); + } + +} diff --git a/source/de/anomic/tools/crypt.java b/source/de/anomic/tools/crypt.java new file mode 100644 index 000000000..28a018134 --- /dev/null +++ b/source/de/anomic/tools/crypt.java @@ -0,0 +1,127 @@ +// crypt.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 13.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.tools; + +import java.util.*; +import java.text.*; +import de.anomic.server.*; + +public class crypt { + + // -------------------------------------------------------- + // Section: random salt generation + // -------------------------------------------------------- + + private static long saltcounter = 0; + private static Random saltrandom = new Random(System.currentTimeMillis()); + + public static String randomSalt() { + // generate robust 48-bit random number + long salt = + (saltrandom.nextLong() & 0XffffffffffffL) + + (System.currentTimeMillis() & 0XffffffffffffL) + + ((1001 * saltcounter) & 0XffffffffffffL); + saltcounter++; + // we generate 48-bit salt values, that are represented as 8-character b64-encoded strings + return serverCodings.standardCoder.encodeBase64Long(salt & 0XffffffffffffL, 8); + } + + // -------------------------------------------------------- + // Section: PBE + PublicKey based on passwords encryption + // -------------------------------------------------------- + + public static final String vDATE = "20030925"; + public static final String copyright = "[ 'crypt' v" + vDATE + " by Michael Christen / www.anomic.de ]"; + public static final String magicString = "crypt|anomic.de|0"; // magic identifier inside every '.crypt' - file + public static final SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.ENGLISH); + + String cryptMethod; // one of ["TripleDES", "Blowfish", "DESede", "DES"] + private static final String defaultMethod = "PBEWithMD5AndDES"; //"DES"; + + public crypt(String pbe) { + // this is possible, but not recommended + this(pbe, (pbe + "XXXXXXXX").substring(0, 8)); + } + + public crypt(String pbe, String salt) { + this(pbe, salt, defaultMethod); + } + private crypt(String pbe, String salt, String method) { + // a Password-Based Encryption. The SecretKey is created on the fly + if (salt.length() > 8) salt = salt.substring(0,8); + if (salt.length() < 8) salt = (salt + "XXXXXXXX").substring(0,8); + + // Create a cipher and initialize it for encrypting end decrypting + cryptMethod = method; + } + + // -------------------------------------------------------- + // Section: simple Codings + // -------------------------------------------------------- + + public static String simpleEncode(String content) { + return simpleEncode(content, null, 'b'); + } + + public static String simpleEncode(String content, String key) { + return simpleEncode(content, key, 'b'); + } + + public static String simpleEncode(String content, String key, char method) { + if (key == null) key = "NULL"; + if (method == 'p') return "p|" + content; + if (method == 'b') return "b|" + serverCodings.enhancedCoder.encodeBase64String(content); + if (method == 'z') return "z|" + serverCodings.enhancedCoder.encodeBase64(gzip.gzipString(content)); + return null; + } + + public static String simpleDecode(String encoded, String key) { + if ((encoded == null) || (encoded.length() < 3)) return null; + if (encoded.charAt(1) != '|') return encoded; // not encoded + char method = encoded.charAt(0); + encoded = encoded.substring(2); + if (method == 'p') return encoded; + if (method == 'b') return serverCodings.enhancedCoder.decodeBase64String(encoded); + if (method == 'z') return gzip.gunzipString(serverCodings.enhancedCoder.decodeBase64(encoded)); + return null; + } + +} diff --git a/source/de/anomic/tools/cryptbig.java b/source/de/anomic/tools/cryptbig.java new file mode 100644 index 000000000..5edc9f478 --- /dev/null +++ b/source/de/anomic/tools/cryptbig.java @@ -0,0 +1,646 @@ +// crypt.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 13.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.tools; + +import java.io.*; +import java.util.*; +import javax.crypto.*; +import javax.crypto.spec.*; +import java.security.*; +import java.text.*; +import de.anomic.server.*; + +public class cryptbig { + + // -------------------------------------------------------- + // Section: random salt generation + // -------------------------------------------------------- + + private static long saltcounter = 0; + private static Random saltrandom = new Random(System.currentTimeMillis()); + + public static String randomSalt() { + // generate robust 48-bit random number + long salt = + (saltrandom.nextLong() & 0XffffffffffffL) + + (System.currentTimeMillis() & 0XffffffffffffL) + + ((1001 * saltcounter) & 0XffffffffffffL); + saltcounter++; + // we generate 48-bit salt values, that are represented as 8-character b64-encoded strings + return serverCodings.standardCoder.encodeBase64Long(salt & 0XffffffffffffL, 8); + } + + // -------------------------------------------------------- + // Section: PBE + PublicKey based on passwords encryption + // -------------------------------------------------------- + + public static final String vDATE = "20030925"; + public static final String copyright = "[ 'crypt' v" + vDATE + " by Michael Christen / www.anomic.de ]"; + public static final String magicString = "crypt|anomic.de|0"; // magic identifier inside every '.crypt' - file + public static final SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.ENGLISH); + + String cryptMethod; // one of ["TripleDES", "Blowfish", "DESede", "DES"] + private static final String defaultMethod = "PBEWithMD5AndDES"; //"DES"; + + Cipher ecipher; + Cipher dcipher; + + public cryptbig(String pbe) { + // this is possible, but not recommended + this(pbe, (pbe + "XXXXXXXX").substring(0, 8)); + } + + public cryptbig(String pbe, String salt) { + this(pbe, salt, defaultMethod); + } + + + private cryptbig(String pbe, String salt, String method) { + // a Password-Based Encryption. The SecretKey is created on the fly + PBEKeySpec keySpec = new PBEKeySpec(pbe.toCharArray()); + try { + if (salt.length() > 8) salt = salt.substring(0,8); + if (salt.length() < 8) salt = (salt + "XXXXXXXX").substring(0,8); + + // create the PBE key + SecretKeyFactory keyFactory = SecretKeyFactory.getInstance(method); + SecretKey key = keyFactory.generateSecret(keySpec); + + // create parameter spec for PBE + PBEParameterSpec paramSpec = new PBEParameterSpec(salt.getBytes(), 1000 /*ITERATIONS*/); + + // Create a cipher and initialize it for encrypting end decrypting + cryptMethod = method; + ecipher = Cipher.getInstance(cryptMethod); + dcipher = Cipher.getInstance(cryptMethod); + ecipher.init(Cipher.ENCRYPT_MODE, key, paramSpec); // paramSpec only for PBE! + dcipher.init(Cipher.DECRYPT_MODE, key, paramSpec); + } catch (javax.crypto.NoSuchPaddingException e) { + } catch (java.security.InvalidKeyException e) { + } catch (java.security.NoSuchAlgorithmException e) { + } catch (java.security.spec.InvalidKeySpecException e) { + } catch (java.security.InvalidAlgorithmParameterException e) { + } + } + + // Encode a string into a new string using utf-8, crypt and b64 + public String encryptString(String str) { + try { + byte[] utf = str.getBytes("UTF8"); + byte[] enc = encryptArray(utf); + if (enc == null) return null; + return serverCodings.standardCoder.encodeBase64(enc); + } catch (UnsupportedEncodingException e) { + } catch (java.io.IOException e) { + } + return null; + } + + // Decode a string into a new string using b64, crypt and utf-8 + public String decryptString(String str) { + try { + byte[] b64dec = serverCodings.standardCoder.decodeBase64(str); + if (b64dec == null) return null; // error in input string (inconsistency) + byte[] dec = decryptArray(b64dec); + if (dec == null) return null; + return new String(dec, "UTF8"); + } catch (UnsupportedEncodingException e) { + } catch (java.io.IOException e) { + } + return null; + } + + // Encode a byte array into a new byte array + public byte[] encryptArray(byte[] b) { + if (b == null) return null; + try { + return ecipher.doFinal(b); + } catch (javax.crypto.BadPaddingException e) { + } catch (IllegalBlockSizeException e) { + } + return null; + } + + // Decode a string into a new string using b64, crypt and utf-8 + public byte[] decryptArray(byte[] b) { + if (b == null) return null; + try { + return dcipher.doFinal(b); + } catch (javax.crypto.BadPaddingException e) { + } catch (IllegalBlockSizeException e) { + } + return null; + } + + + // This method returns the available implementations for a service type + public static Set listCryptoMethods(String serviceType) { + Set result = new HashSet(); + + // All providers + Provider[] providers = Security.getProviders(); + for (int i = 0; i < providers.length; i++) { + // Get services provided by each provider + Set keys = providers[i].keySet(); + for (Iterator it = keys.iterator(); it.hasNext(); ) { + String key = (String) it.next(); + key = key.split(" ")[0]; + if (key.startsWith(serviceType + ".")) { + result.add(key.substring(serviceType.length() + 1)); + } else if (key.startsWith("Alg.Alias." + serviceType + ".")) { + // This is an alias + result.add(key.substring(serviceType.length() + 11)); + } + } + } + return result; + } + + public static void testCryptMethods(Set methods) { + String method; + Iterator i = methods.iterator(); + while (i.hasNext()) { + method = (String) i.next(); + System.out.print(method + " : "); + try { + cryptbig crypter = new cryptbig("abrakadabra", method); + String encrypted = crypter.encryptString("nicht verraten abc 1234567890"); + System.out.print(encrypted + "/"); + String decrypted = crypter.decryptString(encrypted); + System.out.println(decrypted); + } catch (Exception e) { + System.out.println("Exception: " + e.getMessage()); + e.printStackTrace(); + } + } + } + + public void encryptFile(String inFileName, String outFileName, boolean compress) { + /* + File-Format of encrypted file: + Filename: b64-of-encryption-of- plus extension ".crypt" + File Content: + + + + + + + */ + try { + File inFile = new File(inFileName); + String inFileDate = dateFormatter.format(new Date(inFile.lastModified())); // 17 byte + String encryptionDate = dateFormatter.format(new Date()); // 17 byte + String inFileSize = serverCodings.standardCoder.encodeBase64Long(inFile.length(), 11); // 64 / 6 = 11; 11 byte + String flag = "1"; // 1 byte + int inFileNameLength = inFileName.length(); // 256 + String X = inFileDate + encryptionDate + inFileSize + flag + inFileName; + + System.out.println("TEST: preserving inFileDate : " + dateFormatter.parse(inFileDate, new ParsePosition(0))); + System.out.println("TEST: preserving encryptionDate: " + dateFormatter.parse(encryptionDate, new ParsePosition(0))); + System.out.println("TEST: preserving inFileLength : " + inFile.length()); + System.out.println("TEST: preserving flag : " + flag); + System.out.println("TEST: preserving inFileName : " + inFileName); + System.out.println("TEST: preserving X-String : " + X); + + // start encryption + InputStream fin = new CipherInputStream(new FileInputStream(inFile), ecipher); + OutputStream fout = new FileOutputStream(outFileName); + + // write magic and properties of original file + // - we encrypt the original date, the encryption date, the file size, the flag + // and file name together to the string A and calculate the length AL of that string + // - the length of the file name is therefore equal to AL-(17+17+11+1) = AL-46 + // - AL is then b64-ed and also encrypted which results into string B + // - the length of B is BL; BL is then b64-ed to a string C of fixed length 1 + // - after the magic String we write C, B and A + try { + String A = new String(ecipher.doFinal(X.getBytes("UTF8"))); + String B = new String(ecipher.doFinal(serverCodings.standardCoder.encodeBase64Long((long) A.length(), 2).getBytes("UTF8"))); // most probable not longer than 4 + String C = serverCodings.standardCoder.encodeBase64Long((long) B.length(), 1); // fixed length 1 (6 bits, that should be enough) + fout.write(magicString.getBytes()); // the magic string, used to identify a 'crypt'-file + fout.write(C.getBytes()); + fout.write(B.getBytes()); + fout.write(A.getBytes()); + + // write content of file + copy(fout, fin, 512); + } + catch (javax.crypto.IllegalBlockSizeException e) {System.err.println("ERROR:" + e.getMessage());} + catch (javax.crypto.BadPaddingException e) {System.err.println("ERROR:" + e.getMessage());} + // finished files + fin.close(); + fout.close(); + } catch (FileNotFoundException e) { + System.err.println("ERROR: file '" + inFileName + "' not found"); + } catch (IOException e) { + System.err.println("ERROR: IO trouble"); + } + } + + public void decryptFile(String inFileName, String outFileName) { + InputStream fin = null; + OutputStream fout = null; + try { + // Start opening the files + fin = new BufferedInputStream(new FileInputStream(inFileName), 4096); + + // read the file properties + byte[] thisMagic = new byte[magicString.length()]; fin.read(thisMagic); + + if (!((new String(thisMagic)).equals(magicString))) { + // this is not an crypt file, so dont do anything + fin.close(); + return; + } + byte[] C = new byte[1]; fin.read(C); // the length of the following String, encoded as b64 + byte[] B = new byte[(int) serverCodings.standardCoder.decodeBase64Long(new String(C))]; fin.read(B); // this is again the length of the following string, as encrypted b64-ed integer + byte[] A = new byte[(int) serverCodings.standardCoder.decodeBase64Long(new String(dcipher.doFinal(B), "UTF8"))]; fin.read(A); + String X = new String(dcipher.doFinal(A), "UTF8"); + + System.out.println("TEST: detecting X-String : " + X); + + // reconstruct the properties + Date inFileDate = dateFormatter.parse(X.substring(0, 17), new ParsePosition(0)); + Date encryptionDate = dateFormatter.parse(X.substring(17, 34), new ParsePosition(0)); + long inFileSize = serverCodings.standardCoder.decodeBase64Long(X.substring(34, 45)); + String flag = X.substring(45, 46); + String origFileName = X.substring(46); + + System.out.println("TEST: detecting inFileDate : " + inFileDate); + System.out.println("TEST: detecting encryptionDate: " + encryptionDate); + System.out.println("TEST: detecting inFileLength : " + inFileSize); + System.out.println("TEST: detecting flag : " + flag); + System.out.println("TEST: detecting inFileName : " + origFileName); + + // open the output file + fout = new BufferedOutputStream(new CipherOutputStream(new FileOutputStream(outFileName), dcipher), 4096); + + // read and decrypt the file + copy(fout, fin, 512); + + // close the files + fin.close(); + fout.close(); + + // do postprocessing + } catch (BadPaddingException e) { + System.err.println("ERROR: decryption of '" + inFileName + "' not possible: " + e.getMessage()); + } catch (IllegalBlockSizeException e) { + System.err.println("ERROR: decryption of '" + inFileName + "' not possible: " + e.getMessage()); + } catch (FileNotFoundException e) { + System.err.println("ERROR: file '" + inFileName + "' not found"); + } catch (IOException e) { + System.err.println("ERROR: IO trouble"); + try {fin.close(); fout.close();} catch (Exception ee) {} + } + } + + private static void copy(OutputStream out, InputStream in, int bufferSize) throws IOException { + InputStream bIn = new BufferedInputStream(in, bufferSize); + OutputStream bOut = new BufferedOutputStream(out, bufferSize); + byte buf [] = new byte[bufferSize]; + int n; + while ((n = bIn.read(buf)) > 0) bOut.write(buf, 0, n); + bIn.close(); + bOut.close(); + } + + + public static String scrambleString(String key, String s) { + // we perform several operations + // - generate salt + // - gzip string + // - crypt string with key and salt + // - base64-encode result + // - attach salt and return + String salt = randomSalt(); + //System.out.println("Salt=" + salt); + cryptbig c = new cryptbig(key, salt); + boolean gzFlag = true; + byte[] gz = gzip.gzipString(s); + if (gz.length > s.length()) { + // revert compression + try { + gz = s.getBytes("UTF8"); + gzFlag = false; + } catch (UnsupportedEncodingException e) { + return null; + } + } + //System.out.println("GZIP length=" + gz.length); + if (gz == null) return null; + byte[] enc = c.encryptArray(gz); + if (enc == null) return null; + return salt + ((gzFlag) ? "1" : "0") + serverCodings.enhancedCoder.encodeBase64(enc); + } + + public static String descrambleString(String key, String s) { + String salt = s.substring(0, 8); + boolean gzFlag = (s.charAt(8) == '1'); + s = s.substring(9); + cryptbig c = new cryptbig(key, salt); + byte[] b64dec = serverCodings.enhancedCoder.decodeBase64(s); + if (b64dec == null) return null; // error in input string (inconsistency) + byte[] dec = c.decryptArray(b64dec); + if (dec == null) return null; + if (gzFlag) + return gzip.gunzipString(dec); + else + try {return new String(dec,"UTF8");} catch (UnsupportedEncodingException e) {return null;} + + } + + + // -------------------------------------------------------- + // Section: simple Codings + // -------------------------------------------------------- + + + public static String simpleEncode(String content) { + return simpleEncode(content, null, 'b'); + } + + public static String simpleEncode(String content, String key) { + return simpleEncode(content, key, 'b'); + } + + public static String simpleEncode(String content, String key, char method) { + if (key == null) key = "NULL"; + if (method == 'p') return "p|" + content; + if (method == 'b') return "b|" + serverCodings.enhancedCoder.encodeBase64String(content); + if (method == 'z') return "z|" + serverCodings.enhancedCoder.encodeBase64(gzip.gzipString(content)); + if (method == 'c') return "c|" + scrambleString(key, content); + return null; + } + + public static String simpleDecode(String encoded, String key) { + if ((encoded == null) || (encoded.length() < 3)) return null; + if (encoded.charAt(1) != '|') return encoded; // not encoded + char method = encoded.charAt(0); + encoded = encoded.substring(2); + if (method == 'p') return encoded; + if (method == 'b') return serverCodings.enhancedCoder.decodeBase64String(encoded); + if (method == 'z') return gzip.gunzipString(serverCodings.enhancedCoder.decodeBase64(encoded)); + if (method == 'c') return descrambleString(key, encoded); + return null; + } + + + // -------------------------------------------------------- + // Section: one-way encryption + // -------------------------------------------------------- + + public static String oneWayEncryption(String key) { + cryptbig crypter = new cryptbig(key); + String e = crypter.encryptString(key); + if (e.length() == 0) e = "0XXXX"; + if (e.length() % 2 == 1) e += "X"; + while (e.length() < 32) e = e + e; + char[] r = new char[16]; + for (int i = 0; i < 16; i++) r[i] = e.charAt(2 * i + 1); + return new String(r); + } + + // -------------------------------------------------------- + // Section: command interface + // -------------------------------------------------------- + + private static void help() { + System.out.println("AnomicCrypt (2003) by Michael Christen"); + System.out.println("Password-based encryption using the " + defaultMethod + "-method in standard java"); + System.out.println("usage: crypt -h | -help"); + System.out.println(" crypt -1 "); + System.out.println(" crypt -md5 "); + System.out.println(" crypt ( -es64 | -ds64 | -ec64 | -dc64 ) "); + System.out.println(" crypt ( -e | -d ) "); + System.out.println(" crypt -enc \\"); + System.out.println(" [-o | -preserveFilename] \\"); + System.out.println(" [-d | -preserveDate] [-noZip]"); + System.out.println(" crypt -dec \\"); + System.out.println(" [-o | -preserveFilename] \\"); + System.out.println(" [-d | -preserveDate]"); + System.out.println(" crypt ( -info | -name | -size | -date | -edate ) \\"); + System.out.println(" "); + } + + private static void longhelp() { + // --line-help-- *--------------------------------------------------------------- + System.out.println("AnomicCrypt (2003) by Michael Christen"); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -1 "); + System.out.println(""); + System.out.println(" One-way encryption of the given password."); + System.out.println(" The result is computed by encoding the word with the word as"); + System.out.println(" the password and repeating it until the length is greater"); + System.out.println(" than 32. Then every second character is taken to compose the"); + System.out.println(" result which has always the length of 16 characters."); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -md5 "); + System.out.println(""); + System.out.println(" MD5 digest according to RFC 1321. The resulting bytes are"); + System.out.println(" encoded as two-digit hex and concatenated to a single string."); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -ec64 "); + System.out.println(""); + System.out.println(" Encoding of a cardianal (a positive long integer) with the"); + System.out.println(" built-in non-standard base-64 algorithm."); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -dc64 "); + System.out.println(""); + System.out.println(" Decoding of the given b64-coded string to a cardinal number."); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -es64 "); + System.out.println(""); + System.out.println(" Encoding of a given String to a b64 string."); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -ds64 "); + System.out.println(""); + System.out.println(" Decoding of a given b64-coded string to a normal string."); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -e "); + System.out.println(""); + System.out.println(" Encryption of a given Unicode-String."); + System.out.println(" The given string is first encoded to an UTF-8 byte stream, then"); + System.out.println(" encoded using a password based encryption and then finaly"); + System.out.println(" encoded to b64 to generate a printable form."); + System.out.println(" The PBE method is " + defaultMethod + "."); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -d "); + System.out.println(""); + System.out.println(" Decryption of a string."); + System.out.println(" The string is b64-decoded, " + defaultMethod + "-decrypted, "); + System.out.println(" and then transformed to an unicode string."); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -enc \\"); + System.out.println(" [-o | -preserveFilename] \\"); + System.out.println(" [-d | -preserveDate] [-noZip]"); + System.out.println(""); + System.out.println(""); + System.out.println(""); + System.out.println(""); + System.out.println("crypt -dec \\"); + System.out.println(" [-o | -preserveFilename] \\"); + System.out.println(" [-d | -preserveDate]"); + System.out.println(""); + System.out.println(""); + System.out.println("crypt ( -info | -name | -size | -date | -edate ) "); + System.out.println(""); + System.out.println(""); + } + + public static void main(String[] s) { + if (s.length == 0) { + help(); + System.exit(0); + } + if ((s[0].equals("-h")) || (s[0].equals("-help"))) { + longhelp(); + System.exit(0); + } + if (s[0].equals("-tc")) { + // list all available crypt mehtods: + Set methods = listCryptoMethods("Cipher"); + System.out.println(methods.size() + " crypt methods:" + methods.toString()); + testCryptMethods(methods); + System.exit(0); + } + if (s[0].equals("-random")) { + int count = ((s.length == 2) ? (Integer.parseInt(s[1])) : 1); + for (int i = 0; i < count; i++) System.out.println(randomSalt()); + System.exit(0); + } + if (s[0].equals("-1")) { + if (s.length != 2) {help(); System.exit(-1);} + System.out.println(oneWayEncryption(s[1])); + System.exit(0); + } + if (s[0].equals("-ec64")) { + // generate a b64 encoding from a given cardinal + if (s.length != 2) {help(); System.exit(-1);} + System.out.println(serverCodings.standardCoder.encodeBase64Long(Long.parseLong(s[1]), 0)); + System.exit(0); + } + if (s[0].equals("-dc64")) { + // generate a b64 decoding from a given cardinal + if (s.length != 2) {help(); System.exit(-1);} + System.out.println(serverCodings.standardCoder.decodeBase64Long(s[1])); + System.exit(0); + } + if (s[0].equals("-es64")) { + // generate a b64 encoding from a given string + if (s.length != 2) {help(); System.exit(-1);} + System.out.println(serverCodings.standardCoder.encodeBase64String(s[1])); + System.exit(0); + } + if (s[0].equals("-ds64")) { + // generate a b64 decoding from a given string + if (s.length != 2) {help(); System.exit(-1);} + System.out.println(serverCodings.standardCoder.decodeBase64String(s[1])); + System.exit(0); + } + if (s[0].equals("-ess")) { + // 'scramble' string + if (s.length != 3) {help(); System.exit(-1);} + long t = System.currentTimeMillis(); + System.out.println(scrambleString(s[1], s[2])); + System.out.println("Calculation time: " + (System.currentTimeMillis() - t) + " milliseconds"); + System.exit(0); + } + if (s[0].equals("-dss")) { + // 'descramble' string + if (s.length != 3) {help(); System.exit(-1);} + long t = System.currentTimeMillis(); + System.out.println(descrambleString(s[1], s[2])); + System.out.println("Calculation time: " + (System.currentTimeMillis() - t) + " milliseconds"); + System.exit(0); + } + if (s[0].equals("-e")) { + if (s.length != 3) {help(); System.exit(-1);} + System.out.println((new cryptbig(s[1])).encryptString(s[2])); + System.exit(0); + } + if (s[0].equals("-d")) { + if (s.length != 3) {help(); System.exit(-1);} + System.out.println((new cryptbig(s[1])).decryptString(s[2])); + System.exit(0); + } + if (s[0].equals("-md5")) { + // generate a public key from a password that can be used for encryption + if (s.length != 2) {help(); System.exit(-1);} + String md5s = serverCodings.encodeMD5Hex(new File(s[1])); + System.out.println(md5s); + System.exit(0); + } + if (s[0].equals("-enc")) { + if ((s.length < 3) || (s.length > 4)) {help(); System.exit(-1);} + String target; + if (s.length == 3) target = s[2] + ".crypt"; else target = s[3]; + (new cryptbig(s[1])).encryptFile(s[2], target, true /*compress*/); + System.exit(0); + } + if (s[0].equals("-dec")) { + if ((s.length < 3) || (s.length > 4)) {help(); System.exit(-1);} + String target; + if (s.length == 3) { + if (s[2].endsWith(".crypt")) + target = s[2].substring(0, s[2].length() - 7); + else + target = s[2] + ".decoded"; + } else { + target = s[3]; + } + (new cryptbig(s[1])).decryptFile(s[2], target); + System.exit(0); + } + help(); System.exit(-1); + } + +} diff --git a/source/de/anomic/tools/disorderHeap.java b/source/de/anomic/tools/disorderHeap.java new file mode 100644 index 000000000..23cb5a2a6 --- /dev/null +++ b/source/de/anomic/tools/disorderHeap.java @@ -0,0 +1,95 @@ +// disorderHeap.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 17.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.tools; + +import java.util.*; + +public class disorderHeap { + + LinkedList list; + + public disorderHeap() { + list = new LinkedList(); + } + + public disorderHeap(int numbers) { + // create a disorder heap with numbers in it + // the numbers are 0..numbers-1 + this(); + for (int i = 0; i < numbers; i++) add("" + i); + } + + public synchronized void add(Object element) { + // add one element into the list at an arbitrary position + int pos = (int) ((System.currentTimeMillis() / 7) % ((long) (list.size() + 1))); + list.add(pos, element); + } + + public synchronized Object remove() { + if (list.size() == 0) return null; + int pos = (int) ((System.currentTimeMillis() / 13) % ((long) list.size())); + return list.remove(pos); + } + + public synchronized int number() { + String n = (String) this.remove(); + if (n == null) return -1; + try { + return Integer.parseInt(n); + } catch (Exception e) { + return -1; + } + } + + public synchronized int size() { + return list.size(); + } + + + public static void main(String[] args) { + disorderHeap ul = new disorderHeap(); + for (int i = 0; i < args.length; i++) ul.add(args[i]); + for (int i = 0; i < args.length; i++) System.out.print((String) ul.remove() + " "); + System.out.println(); + } + +} \ No newline at end of file diff --git a/source/de/anomic/tools/disorderSet.java b/source/de/anomic/tools/disorderSet.java new file mode 100644 index 000000000..5ba0a4ac0 --- /dev/null +++ b/source/de/anomic/tools/disorderSet.java @@ -0,0 +1,78 @@ +// disorderSet.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 17.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.tools; + +import java.util.*; + +public class disorderSet extends HashSet implements Set { + + disorderHeap dh; + + public disorderSet() { + super(); + dh = null; + } + + public boolean hasAny() { + return (this.size() > 0); + } + + public Object any() { + // return just any element + if ((dh == null) || (dh.size() == 0)) { + if (this.size() == 0) return null; + // fill up the queue + dh = new disorderHeap(); + Iterator elements = this.iterator(); + while (elements.hasNext()) dh.add(elements.next()); + } + return dh.remove(); + } + + public static void main(String[] args) { + disorderSet ds = new disorderSet(); + for (int i = 0; i < args.length; i++) ds.add(args[i]); + for (int i = 0; i < args.length * 3; i++) System.out.print((String) ds.any() + " "); + System.out.println(); + } + +} \ No newline at end of file diff --git a/source/de/anomic/tools/enumerateFiles.java b/source/de/anomic/tools/enumerateFiles.java new file mode 100644 index 000000000..81ba4d8c8 --- /dev/null +++ b/source/de/anomic/tools/enumerateFiles.java @@ -0,0 +1,116 @@ +// enumerateFiles.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 26.12.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.tools; + +import java.io.*; +import java.util.*; + +public class enumerateFiles implements Enumeration { + + // implements iterative search through recursively defined subdirectories + // and return all paths to the files + + private ArrayList hierarchy; // contains TreeSet elements, earch TreeSet contains File Entries + private boolean incOrder; // if true, the smallest value is returned first + private Object buffer; // the prefetch-buffer + private boolean return_files; + private boolean return_folders; + + public enumerateFiles(File root, boolean files, boolean folders, boolean increasing) { + // we define our data structures first + return_files = files; + return_folders = folders; + hierarchy = new ArrayList(); + incOrder = increasing; + // the we initially fill the hierarchy with the content of the root folder + TreeSet t = new TreeSet(); + String[] l = root.list(); + // System.out.println("D " + l.toString()); + for (int i = 0; i < l.length; i++) t.add(new File(root, l[i])); + hierarchy.add(t); + // start with search by filling the buffer + buffer = nextElement0(); + } + + private Object nextElement0() { + // the object is a File pointing to the corresponding file + File f; + TreeSet t; + do { + // System.out.println("D " + hierarchy.toString()); + t = null; + while ((t == null) && (hierarchy.size() > 0)) { + t = (TreeSet) hierarchy.get(hierarchy.size() - 1); + if (t.size() == 0) { + hierarchy.remove(hierarchy.size() - 1); // we step up one hierarchy + t = null; + } + } + if ((hierarchy.size() == 0) || (t.size() == 0)) return null; // this is the end + // fetch value + if (incOrder) f = (File) t.first(); else f = (File) t.last(); + t.remove(f); + // if the value represents another folder, we step into the next hierarchy + if (f.isDirectory()) { + t = new TreeSet(); + String[] l = f.list(); + for (int i = 0; i < l.length; i++) t.add(new File(f, l[i])); + hierarchy.add(t); + if (!(return_folders)) f = null; + } else { + if (!(return_files)) f = null; + } + } while (f == null); + // thats it + return f; + } + + public boolean hasMoreElements() { + return buffer != null; + } + + public Object nextElement() { + Object r = buffer; + buffer = nextElement0(); + return r; + } + +} diff --git a/source/de/anomic/tools/gzip.java b/source/de/anomic/tools/gzip.java new file mode 100644 index 000000000..854558432 --- /dev/null +++ b/source/de/anomic/tools/gzip.java @@ -0,0 +1,151 @@ +// gzip.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 13.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.tools; + +import java.io.*; +import java.util.zip.*; + +public class gzip { + + public static void gzipFile(String inFile, String outFile) { + try { + InputStream fin = new FileInputStream(inFile); + OutputStream fout = new GZIPOutputStream(new FileOutputStream(outFile), 128); + copy(fout, fin, 128); + fin.close(); + fout.close(); + } catch (FileNotFoundException e) { + System.err.println("ERROR: file '" + inFile + "' not found"); + } catch (IOException e) { + System.err.println("ERROR: IO trouble"); + } + } + + public static void gunzipFile(String inFile, String outFile) { + try { + InputStream fin = new GZIPInputStream(new FileInputStream(inFile)); + OutputStream fout = new FileOutputStream(outFile); + copy(fout, fin, 128); + fin.close(); + fout.close(); + } catch (FileNotFoundException e) { + System.err.println("ERROR: file '" + inFile + "' not found"); + } catch (IOException e) { + System.err.println("ERROR: IO trouble"); + } + } + + public static byte[] gzipString(String in) { + try { + InputStream fin = new ByteArrayInputStream(in.getBytes("UTF8")); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + OutputStream fout = new GZIPOutputStream(baos, 128); + copy(fout, fin, 128); + fin.close(); + fout.close(); + return baos.toByteArray(); + } catch (IOException e) { + System.err.println("ERROR: IO trouble"); + return null; + } + } + + public static String gunzipString(byte[] in) { + try { + InputStream fin = new GZIPInputStream(new ByteArrayInputStream(in)); + ByteArrayOutputStream fout = new ByteArrayOutputStream(); + copy(fout, fin, 128); + fin.close(); + fout.close(); + return new String(fout.toByteArray(), "UTF8"); + } catch (IOException e) { + System.err.println("ERROR: IO trouble"); + return null; + } + } + + private static void copy(OutputStream out, InputStream in, int bufferSize) throws IOException { + InputStream bIn = new BufferedInputStream(in, bufferSize); + OutputStream bOut = new BufferedOutputStream(out, bufferSize); + byte buf [] = new byte[bufferSize]; + int n; + while ((n = bIn.read(buf)) > 0) bOut.write(buf, 0, n); + bIn.close(); + bOut.close(); + } + + private static void help() { + System.out.println("AnomicGzip (2004) by Michael Christen"); + System.out.println("usage: gzip [-u] []"); + } + + + public static void main(String[] s) { + if (s.length == 0) { + help(); + System.exit(0); + } + if ((s[0].equals("-h")) || (s[0].equals("-help"))) { + help(); + System.exit(0); + } + if (s[0].equals("-u")) { + if ((s.length < 2) || (s.length > 3)) {help(); System.exit(-1);} + String target; + if (s.length == 2) { + if (s[1].endsWith(".gz")) + target = s[1].substring(0, s[1].length() - 3); + else + target = s[1] + ".gunzip"; + } else { + target = s[2]; + } + gzip.gunzipFile((s[1]), target); + System.exit(0); + } + if ((s.length < 1) || (s.length > 2)) {help(); System.exit(-1);} + String target; + if (s.length == 1) target = s[0] + ".gz"; else target = s[1]; + gzip.gzipFile((s[0]), target); + System.exit(0); + } + +} diff --git a/source/de/anomic/tools/htmlPlotter.java b/source/de/anomic/tools/htmlPlotter.java new file mode 100644 index 000000000..7f3e7972c --- /dev/null +++ b/source/de/anomic/tools/htmlPlotter.java @@ -0,0 +1,265 @@ +// htmlPlotter.java +// --------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 16.09.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.tools; + +import java.io.*; +import de.anomic.server.*; + +public class htmlPlotter { + + private byte[][][] grid; + int width, height; + byte[] defaultCol; + + public htmlPlotter(int width, int height, String defaultCol) { + this.width = width; + this.height = height; + this.defaultCol = parseCol(defaultCol); + grid = new byte[width][height][3]; + for (int x = 0; x < width; x++) + for (int y = 0; y < height; y++) grid[x][y] = null; + } + + private byte[] parseCol(String col) { + byte[] c = new byte[3]; + c[0] = (byte) Integer.parseInt(col.substring(0,2), 16); + c[1] = (byte) Integer.parseInt(col.substring(2,4), 16); + c[2] = (byte) Integer.parseInt(col.substring(4,6), 16); + return c; + } + + private String genCol(byte[] c) { + if (c == null) c = defaultCol; + return hex2(c[0]) + hex2(c[1]) + hex2(c[2]); + } + + private String hex2(byte b) { + String h = Integer.toHexString(((int) b) & 0xff); + if (h.length() == 1) return "0" + h; else return h; + } + + public void plot(int x, int y, String col) { + plot(x, y, parseCol(col)); + } + + public void plot(int x, int y, byte[] c) { + if ((x < 0) || (x >= width)) return; + if ((y < 0) || (y >= height)) return; + grid[x][y] = c; + } + + private boolean equalCol(byte[] a, byte[] b) { + if ((a == null) && (b == null)) return true; + if ((a == null) || (b == null)) return false; + return ((a[0] == b[0]) && (a[1] == b[1]) && (a[2] == b[2])); + } + + public String toHTML() { + String s = "

    diff --git a/htroot/env/templates/metas.template b/htroot/env/templates/metas.template new file mode 100644 index 000000000..622c8d2a3 --- /dev/null +++ b/htroot/env/templates/metas.template @@ -0,0 +1,7 @@ + + + + + + + diff --git a/htroot/env/templates/submenuCookie.template b/htroot/env/templates/submenuCookie.template new file mode 100644 index 000000000..e575f28c4 --- /dev/null +++ b/htroot/env/templates/submenuCookie.template @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/htroot/htdocsdefault/dir.html b/htroot/htdocsdefault/dir.html new file mode 100644 index 000000000..0172b88dd --- /dev/null +++ b/htroot/htdocsdefault/dir.html @@ -0,0 +1,102 @@ + + + +YACY: Public Files + + + + + + + + + + + + + +
    + + + + + + +
    + + + + + + + +
    +
    + + Public File Directory + +
    +
    +
    + + + +
    +
    + +
    +
      
    +
    +
    + + + +
      + + + + + + + + +
    Welcome! You are identified and authorized as "#[ident]#".#[logout]#
    + + + + + + + +
    + + + +
    Service
    #[service]#
    +
    + + + +
    Account
    #[account]#
    +
    + + + +
    Info
    #[info]#
    +
    + +
    + + +
    +

    #[dir]#

    +
    +

    +Access of this page with the URL's: +http://#[peeraddress]#/share/, or +http://share.#[peerdomain]#.yacy from within the YACY network. +

    + +
     
    + + diff --git a/htroot/htdocsdefault/dir.java b/htroot/htdocsdefault/dir.java new file mode 100644 index 000000000..a1126738a --- /dev/null +++ b/htroot/htdocsdefault/dir.java @@ -0,0 +1,467 @@ +// dir.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 15.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath /classes /htroot/htdocsdefault/dir.java +// which most probably means to compile this with +// javac -classpath ../../classes dir.java + +import java.util.*; +import java.text.*; +import java.io.*; +import java.net.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.data.*; +import de.anomic.plasma.*; +import de.anomic.http.*; +import de.anomic.htmlFilter.*; + +public class dir { + + private static SimpleDateFormat SimpleFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + public static String dateString(Date date) { + return SimpleFormatter.format(date); + } + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + + //System.out.println("###Header="+ header); + //System.out.println("###post=" + post); + String action = ((post == null) ? "info" : post.get("action", "info")); + String tree = ""; + // variables for this path + //File htroot = new File(switchboard.getRootPath(), switchboard.getConfig("htRootPath", "htroot")); + File htroot = new File(switchboard.getRootPath(), switchboard.getConfig("htDocsPath", "DATA/HTDOCS")); + String path = (String) header.get("PATH", "/"); + int p = path.lastIndexOf("/"); + if (p >= 0) path = path.substring(0, p + 1); + File dir = new File(htroot, path); + + // general settings + prop.put("peername", env.getConfig("peerName", "")); + prop.put("peerdomain", env.getConfig("peerName", "").toLowerCase()); + prop.put("peeraddress", yacyCore.seedDB.mySeed.getAddress()); + prop.put("hostname", serverCore.publicIP().getHostName()); + prop.put("hostip", serverCore.publicIP().getHostAddress()); + prop.put("port", env.getConfig("port", "8080")); + + // generate upload/download authorizations + String adminAccountBase64MD5 = switchboard.getConfig("adminAccountBase64MD5", ""); + String uploadAccountBase64MD5 = switchboard.getConfig("uploadAccountBase64MD5", ""); + String downloadAccountBase64MD5 = switchboard.getConfig("downloadAccountBase64MD5", ""); + String logoutAccountBase64MD5 = serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String(":")); + String authorizationMD5 = serverCodings.standardCoder.encodeMD5Hex(((String) header.get("Authorization", "xxxxxx")).trim().substring(6)); + //if (logoutAccountBase64.equals(authorization)) + boolean adminAuthorization = + ((adminAccountBase64MD5.length() != 0) && + (adminAccountBase64MD5.equals(authorizationMD5))); + boolean uploadAuthorization = + ((adminAuthorization) || + ((uploadAccountBase64MD5.length() != 0) && + (uploadAccountBase64MD5.equals(authorizationMD5)))); + boolean downloadAuthorization = + ((adminAuthorization) || (uploadAuthorization) || + (downloadAccountBase64MD5.length() == 0) || + (downloadAccountBase64MD5.equals(authorizationMD5))); + + // do authentitcate processes by triggering the http authenticate method + if ((action.equals("authenticateAdmin")) && (!(adminAuthorization))) { + prop.put("AUTHENTICATE", "admin log-in"); + return prop; + } + if ((action.equals("authenticateUpload")) && (!(uploadAuthorization))) { + prop.put("AUTHENTICATE", "upload log-in"); + return prop; + } + if ((action.equals("authenticateDownload")) && (!(downloadAuthorization))) { + prop.put("AUTHENTICATE", "download log-in"); + return prop; + } + + // work off actions + if (action.equals("logout")) { + if (adminAuthorization) { + prop.put("AUTHENTICATE", "admin log-in"); + return prop; + } else if (uploadAuthorization) { + prop.put("AUTHENTICATE", "upload log-in"); + return prop; + } else if (downloadAuthorization) { + prop.put("AUTHENTICATE", "download log-in"); + return prop; + } else { + action = ""; + } + } + if ((action.equals("downloadPassword")) && (adminAuthorization)) { + switchboard.setConfig("downloadAccountBase64MD5", (post.get("password", "").length() == 0) ? "" : serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String("download:" + post.get("password", "")))); + } + if ((action.equals("uploadPassword")) && (adminAuthorization)) { + switchboard.setConfig("uploadAccountBase64MD5", (post.get("password", "").length() == 0) ? "" : serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String("upload:" + post.get("password", "")))); + } + if ((action.equals("upload")) && + ((uploadAuthorization) || (adminAuthorization))) { + String filename = new File(post.get("file", "dummy")).getName(); + String description = post.get("description", ""); + p = filename.lastIndexOf("\\"); + if (p >= 0) filename = filename.substring(p + 1); + File newfile = new File(dir, filename); + File newfilemd5 = new File(dir, filename + ".md5"); + byte[] binary = (byte[]) post.get((Object) "file$file", (Object) new byte[0]); + try { + serverFileUtils.write(binary, newfile); + String md5s = serverCodings.encodeMD5Hex(newfile); + serverFileUtils.write((md5s + "\n" + description).getBytes(), newfilemd5); // generate md5 + + // index file info + if (post.get("indexing", "").equals("on")) { + String urlstring = yacyhURL(yacyCore.seedDB.mySeed, filename, md5s); + String phrase = filename.replace('.', ' ').replace('_', ' ').replace('-', ' '); + indexPhrase(switchboard, urlstring, phrase, description); + } + } catch (IOException e) {} + + + } + if ((action.equals("newdir")) && + ((uploadAuthorization) || (adminAuthorization))) { + String newdirname = post.get("directory", "EmptyDir"); + if ((newdirname != null) && (newdirname.length() > 0)) { + File newdir = new File(dir, newdirname); + newdir.mkdir(); + try { + serverFileUtils.copy(new File(dir,"dir.html"), new File(newdir, "dir.html")); + serverFileUtils.copy(new File(dir,"dir.class"), new File(newdir, "dir.class")); + } catch (IOException e) {} + } + } + if ((action.equals("delete")) && (adminAuthorization)) { + String filename = post.get("file", "foo"); + File file = new File(dir, filename); + if (file.exists()) { + File filemd5 = new File(dir, post.get("file", "foo") + ".md5"); + // read md5 and phrase + String md5s = ""; + String description = ""; + if (filemd5.exists()) try { + md5s = new String(serverFileUtils.read(filemd5)); + p = md5s.indexOf('\n'); + if (p >= 0) { + description = md5s.substring(p + 1); + md5s = md5s.substring(0, p); + } + } catch (IOException e) {} + // delete file(s) + if (file.isDirectory()) { + String[] content = file.list(); + for (int i = 0; i < content.length; i++) (new File(file, content[i])).delete(); + file.delete(); + } else if (file.isFile()) { + file.delete(); + if (filemd5.exists()) filemd5.delete(); + } + // delete index + String urlstring = yacyhURL(yacyCore.seedDB.mySeed, filename, md5s); + String phrase = filename.replace('.', ' ').replace('_', ' ').replace('-', ' '); + deletePhrase(switchboard, urlstring, phrase, description); + } + } + + // if authorized, generate directory tree listing + if ((adminAuthorization) || (uploadAuthorization) || (downloadAuthorization)) { + // generate dir listing + String[] list = dir.list(); + File f, fmd5; + String md5s, description; + Date d; + //tree += "path = " + path + "

    "; + if (list == null) + tree += "This directory is empty.
    "; + else { + int filecount = 0; + tree += "" + + ""; + boolean dark = false; + for (int i = 0; i < list.length; i++) if (!((list[i].startsWith("dir.")) || (list[i].endsWith(".md5")))) { + tree += ""; dark = !dark; + filecount++; + f = new File(dir, list[i]); + fmd5 = new File(dir, list[i] + ".md5"); + try { + if (fmd5.exists()) { + md5s = new String(serverFileUtils.read(fmd5)); + p = md5s.indexOf('\n'); + if (p >= 0) { + description = md5s.substring(p + 1); + md5s = md5s.substring(0, p); + } else { + description = ""; + } + } else { + // generate md5 on-the-fly + md5s = serverCodings.encodeMD5Hex(f); + description = ""; + serverFileUtils.write((md5s + "\n" + description).getBytes(), fmd5); + } + } catch (IOException e) { + md5s = ""; + description = ""; + } + d = new Date(f.lastModified()); + if (f.isDirectory()) { + tree += ""; + tree += ""; + tree += ""; + tree += ""; + tree += ""; + } else { + tree += ""; + tree += ""; + tree += ""; + tree += ""; + //if (adminAuthorization) tree += " "; else tree += "
    "; + tree += "" + serverCore.crlfString; + } + tree += "
    " + path + "
    " + dateString(d) + "" + list[i] + "" + formatLong(0, 10) + "Directory"; + tree += "" + dateString(d) + " "; + tree += "" + list[i] + "" + formatLong(f.length(), 10) + "" + md5s + "" + (((description.length() == 0) && ((list[i].endsWith(".jpg")) || (list[i].endsWith(".gif")) || (list[i].endsWith(".png")))) ? (""; + } + if (adminAuthorization) tree += + "" + + "
    " + + "" + + "" + + "
    "; + if (filecount == 0) { + tree += "EMPTY
    "; + } + } + } + + String ident = ""; + String account = ""; + String service = ""; + String info = ""; + String logout = ""; + if (adminAuthorization) { + ident = "Administrator"; + account = "" + + "" + + "" + + "
    upload:
    " + + "" + + " " + + "" + + "
    download:
    " + + "" + + " " + + "" + + "
    "; + logout = "
    " + + "" + + " (enter empty account)" + + "
    "; + service = "" + + "" + + "" + + "" + + "" + + "" + + "" + + "
    New Directory:
    " + + "" + + " " + + "" + + "
    File Upload:
    " + + "Resource = " + + "
    " + + "Description = 
    " + + "Indexing : 
    " + + "" + + "
    "; + info = "Admin and download accounts are necessary to grant their services to clients; " + + "no password is required for the download-account unless you set one. " + + "Files uploaded and indexed here have a special index entry 'yacyshare'; " + + "if you want to find files that are indexed in any share zone, add the word 'yacyshare' to the search words."; + } else if (uploadAuthorization) { + ident = "Uploader"; + account = "
    " + + "" + + "" + + "
    "; + if (uploadAccountBase64MD5.length() == 0) + logout = ""; + else + logout = "
    " + + "" + + " (enter empty account)" + + "
    "; + service = "" + + "" + + "" + + "" + + "" + + "" + + "" + + "
    New Directory:
    " + + "" + + " " + + "" + + "
    File Upload:
    " + + "Resource = " + + "
    " + + "Description = 
    " + + "Indexing : 
    " + + "" + + "
    "; + info = "Uploaders are not granted to delete files or directories. If you want to do this, log-in as admin."; + } else if (downloadAuthorization) { + ident = "Downloader"; + account = "
    " + + "" + + "" + + "
    " + + "
    " + + "" + + "" + + "
    "; + if (downloadAccountBase64MD5.length() == 0) + logout = ""; + else + logout = "
    " + + "" + + " (enter empty account)" + + "
    "; + service = "You are granted to view directory listings and do downloads in this directory.
    " + + "If you want to upload, please log in as user 'upload'"; + info = "Download is granted even if no download account has been defined. " + + "If you are an administrator and you wish to block non-authorized downloades, please log in as user 'admin' " + + "and set a download password."; + } else { + ident = "not authorized"; + tree = "To inspect this directory you need either an admin, upload or download account. Please log in."; + account = "
    " + + "" + + "" + + "
    " + + "
    " + + "" + + "" + + "
    " + + "
    " + + "" + + "" + + "
    "; + logout = ""; + service = "No service available."; + info = "You must log-in to upload or download."; + } + + prop.put("dir", tree); + prop.put("ident", ident); + prop.put("account", account); + prop.put("service", service); + prop.put("info", info); + prop.put("logout", logout); + // return rewrite properties + return prop; + } + + private static String formatLong(long l, int length) { + String r = "" + l; + int rl = r.length(); + for (int i = rl; i < length; i++) r = " " + r; + return r; + } + + // rDNS services: + // http://www.xdr2.net/reverse_DNS_lookup.asp + // http://remote.12dt.com/rns/ + // http://bl.reynolds.net.au/search/ + // http://www.declude.com/Articles.asp?ID=97 + // http://www.dnsstuff.com/ + + // listlist: http://www.aspnetimap.com/help/welcome/dnsbl.html + + public static String yacyhURL(yacySeed seed, String filename, String md5) { + return "http://share." + seed.getHexHash() + ".yacyh/" + filename + "?md5=" + md5; + } + + public static void indexPhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr) { + try { + URL url = new URL(urlstring); + plasmaCondenser condenser = new plasmaCondenser(new ByteArrayInputStream(("yacyshare. " + phrase + ". " + descr).getBytes())); + plasmaCrawlLURL.entry newEntry = switchboard.loadedURL.newEntry( + url, "YaCyShare: " + descr, new Date(), new Date(), + "____________", /*initiator*/ + yacyCore.seedDB.mySeed.hash, /*executor*/ + "AAAAAAAAAAAA", /*referrer*/ + 0, /*copycount*/ + false, /*localneed*/ + Integer.parseInt(condenser.getAnalysis().getProperty("INFORMATION_VALUE","0"), 16), + "**", /*language*/ + plasmaWordIndexEntry.DT_SHARE, /*doctype*/ + phrase.length(), /*size*/ + (int) Long.parseLong(condenser.getAnalysis().getProperty("NUMB_WORDS","0"), 16), + 5 /*process case*/ + ); + + String urlHash = newEntry.hash(); + int words = switchboard.searchManager.addPageIndex(url, urlHash, new Date(), condenser, "**", plasmaWordIndexEntry.DT_SHARE); + } catch (IOException e) {} + } + + public static void deletePhrase(plasmaSwitchboard switchboard, String urlstring, String phrase, String descr) { + try { + String urlhash = plasmaURL.urlHash(new URL(urlstring)); + Set words = plasmaSwitchboard.getWords(("yacyshare " + phrase + " " + descr).getBytes()); + switchboard.removeReferences(urlhash, words); + switchboard.loadedURL.remove(urlhash); + } catch (Exception e) { + System.out.println("INTERNAL ERROR in dir.deletePhrase:"); + e.printStackTrace(); + } + } +} diff --git a/htroot/htdocsdefault/welcome.html b/htroot/htdocsdefault/welcome.html new file mode 100644 index 000000000..0b639178c --- /dev/null +++ b/htroot/htdocsdefault/welcome.html @@ -0,0 +1,69 @@ + + + +YACY: Default Page for Individual Peer Content + + + + + + + + + + + + +
    + + + + + + +
    + + + + +
    Individual Web Page
    +
    +
    + +
    +
    +
    +

    + + + + + + +
    +

    Welcome to your own web page
    in the YACY Network!


    + +

    +THIS IS A DEMONSTRATION PAGE FOUR YOUR OWN INDIVIDUAL WEB SERVER! +PLEASE REPLACE THIS PAGE BY PUTTING A FILE index.html INTO THE PATH +<your-application-home>#[wwwpath]# +

    + +

    +This is peer '#[peername]#', running on host #[hostname]#.
    +Your are accessing this page from the host '#[clientip]#'.
    + +Every user of YaCy #[couldcan]# access this page +using the URL http://#[peeraddress]#/www/ +or http://www.#[peerdomain]#.yacy from within the YACY network.

    + +

    #[seniorinfo]#

    + +

    We integrated an easy mechanism for web page authoring +which can also be used for simple file-sharing. +Please open the sample page http://share.#[peerdomain]#.yacy +and set upload/download accounts to author and access content on this peer.

    + +
    + + diff --git a/htroot/htdocsdefault/welcome.java b/htroot/htdocsdefault/welcome.java new file mode 100644 index 000000000..d80562369 --- /dev/null +++ b/htroot/htdocsdefault/welcome.java @@ -0,0 +1,83 @@ +// welcome.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last change: 05.08.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes index.java +// if the shell's current path is HTROOT + +import java.util.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.http.*; + +public class welcome { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + + // set values + String s; + int pos; + + // update seed info + yacyCore.peerActions.updateMySeed(); + + prop.put("peername", env.getConfig("peerName", "")); + prop.put("peerdomain", env.getConfig("peerName", "").toLowerCase()); + prop.put("peeraddress", yacyCore.seedDB.mySeed.getAddress()); + prop.put("hostname", serverCore.publicIP().getHostName()); + prop.put("hostip", serverCore.publicIP().getHostAddress()); + prop.put("port", env.getConfig("port", "8080")); + prop.put("clientip", header.get("CLIENTIP", "")); + + String peertype = (yacyCore.seedDB.mySeed == null) ? "virgin" : yacyCore.seedDB.mySeed.get("PeerType", "virgin"); + boolean senior = (peertype.equals("senior")) || (peertype.equals("principal")); + if (senior) prop.put("couldcan", "can"); else prop.put("couldcan", "could"); + if (senior) prop.put("seniorinfo", "This peer runs in senior mode which means that your peer can be accessed using the addresses shown above."); else prop.put("seniorinfo", "Nobody can access your peer from the outside of your intranet. You must open your firewall and/or set a 'virtual server' at your router settings to enable access to the addresses as shown below."); + prop.put("wwwpath", "/" + env.getConfig("htDocsPath", "DATA/HTDOCS")); + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/index.html b/htroot/index.html new file mode 100644 index 000000000..915a3af23 --- /dev/null +++ b/htroot/index.html @@ -0,0 +1,141 @@ + + + +YACY: Search Page +#[metas]# + + + +#[header]# +

    +

    SEARCH WITH



    +
    #[promoteSearchPageGreeting]#

    + + +
    +
    +enter search word list, separated by space: +
    + + +
    +max. number of results: + +  order by: + +
    +resource: + +     max. search time (seconds): + +
    +url mask: +#(urlmaskoptions)# + +:: + restrict on + show all +#(/urlmaskoptions)# +
    +
    +

    +#(excluded)# +:: +The following words are stop-words and had been excluded from the search: #[stopwords]#. +#(/excluded)# +#(num-results)# +:: +No Results. +:: +No Results. (length of search words must be at least 3 characters) +:: +

    No Results. If you think this is unsatisfactory then you may consider to support +the global index by running your own proxy/peer. +If everybody contributes, the results will get better.

    +

    Other possible reasons for no result:
    +

      +
    • the search time was too short. Search again with same query to catch up 'late peers'.
    • +
    • there is currently no support for german umlaute. please use ae/oe/ue instead
    • +
    • words of length < 3 are not indexed. please omit such words
    • +
    • yacy tries to index singular instead of plural words. please use the singular form
    • +
    • only complete words are indexed, not parts of words
    • +
    • don't use stopwords as search words
    • +
    • during this test phase the reaction time of remote peers is unknown. +Please repeat your search to see if there are late-responses from remote peers
    • +

    +

    If you think the information you searched should exist in the global index, +then please run your own peer and start a crawl of your wanted information to make it +available for everyone. Then stay online to support crawls from other peers. Thank you!

    +:: +#[linkcount]# results from a total number of #[totalcount]# known links. +You can try to +catch up more links +from 'late' peers to enricht this search result. +#(/num-results)# +#(combine)# +:: +
    Please search for several words simultaneously! Click on one of these constraints:
    +#{words}# +#[word]# +#{/words}# +#(/combine)# +

    + + +#{results}# + +

    +#[description]# +
    +#[urlname]#
    +#[date]#

    + +#{/results}# + + +

    +#(resultbottomline)# +:: +The global search resulted in #[globalresults]# link contributions from other yacy peers +:: +You can enrich the search results by using the 'global' option: this will search also other yacy peers +:: +You cannot get global search results because you are not connected to another yacy peer. +To connect you must first use the proxy. +See here for an installation guide. +Alternatively, you can run the proxy in permanent online mode, which also grants global search. +To do this, press this button: +

    + +
    +:: +You can enrich the search results by using the 'global' option; you must also switch to online mode +(by using the proxy) to contribute to the global search. +#(/resultbottomline)# +


    +
    +
    YACY is a GPL'ed project +with the target of implementing a P2P-based global search engine.
    +Architecture and implementation (C) by Michael Peter Christen, +
    + +#[footer]# + + diff --git a/htroot/index.java b/htroot/index.java new file mode 100644 index 000000000..4316ccf30 --- /dev/null +++ b/htroot/index.java @@ -0,0 +1,262 @@ +// index.java +// ----------------------- +// part of the AnomicHTTPD caching proxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 12.07.2003 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +// you must compile this file with +// javac -classpath .:../classes index.java +// if the shell's current path is HTROOT + +import java.io.*; +import java.util.*; +import java.net.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.htmlFilter.*; +import de.anomic.yacy.*; +import de.anomic.http.*; +import de.anomic.kelondro.*; +import de.anomic.plasma.*; + +public class index { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + plasmaSwitchboard sb = (plasmaSwitchboard) env; + + // case if no values are requested + if ((post == null) || (env == null)) { + + // save referrer + //System.out.println("HEADER=" + header.toString()); + String referer = (String) header.get("Referer"); + if (referer != null) { + URL url; + try { url = new URL(referer); } catch (MalformedURLException e) { url = null; } + if ((url != null) && (serverCore.isNotLocal(url))) { + HashMap referrerprop = new HashMap(); + referrerprop.put("count", "1"); + referrerprop.put("clientip", header.get("CLIENTIP")); + referrerprop.put("useragent", header.get("User-Agent")); + referrerprop.put("date", (new serverDate()).toShortString(false)); + try { sb.facilityDB.update("backlinks", referer, referrerprop); } catch (IOException e) {} + } + } + + // we create empty entries for template strings + serverObjects prop = new serverObjects(); + prop.put("promoteSearchPageGreeting", env.getConfig("promoteSearchPageGreeting", "")); + prop.put("former", ""); + prop.put("num-results", 0); + prop.put("excluded", 0); + prop.put("combine", 0); + prop.put("resultbottomline", 0); + prop.put("count-10", 0); + prop.put("count-50", 0); + prop.put("count-100", 0); + prop.put("count-1000", 0); + prop.put("order-quality", 0); + prop.put("order-date", 0); + prop.put("resource-global", 0); + prop.put("resource-local", 0); + prop.put("time-1", 0); + prop.put("time-3", 0); + prop.put("time-10", 1); + prop.put("time-30", 0); + prop.put("time-60", 0); + prop.put("results", ""); + prop.put("urlmaskoptions", 0); + prop.put("urlmaskoptions_urlmaskfilter", ".*"); + return prop; + } + + // process search words + String querystring = (String) post.get("search", ""); + try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (IOException e) {} + TreeSet query = cleanQuery(querystring); + // filter out stopwords + TreeSet filtered = kelondroMSetTools.joinConstructive(query, plasmaSwitchboard.stopwords); + if (filtered.size() > 0) kelondroMSetTools.excludeDestructive(query, plasmaSwitchboard.stopwords); + + // prepare search order + String order = (String) post.get("order", ""); + int count = Integer.parseInt((String) post.get("count", "10")); + boolean global = ((String) post.get("resource", "global")).equals("global"); + long searchtime = 1000 * Long.parseLong((String) post.get("time", "1")); + boolean yacyonline = ((yacyCore.seedDB != null) && + (yacyCore.seedDB.mySeed != null) && + (yacyCore.seedDB.mySeed.getAddress() != null)); + + String order1 = (order.equals("Quality-Date")) ? "quality" : "date"; + String order2 = (order.equals("Quality-Date")) ? "date" : "quality"; + String urlmask=""; + if(post.containsKey("urlmask") && post.get("urlmask").equals("no")){ + urlmask=".*"; + }else{ + urlmask = (post.containsKey("urlmaskfilter")) ? (String) post.get("urlmaskfilter") : ".*"; + } + + // do the search + serverObjects prop = ((plasmaSwitchboard) env).searchFromLocal(query, order1, order2, count, + ((global) && (yacyonline) && (!(env.getConfig("last-search","").equals(querystring)))), + searchtime, urlmask); + + // remember the last search expression + env.setConfig("last-search", querystring); + + // process result of search + prop.put("resultbottomline", 0); + if (filtered.size() > 0){ + prop.put("excluded", 1); + prop.put("excluded_stopwords", filtered.toString()); + } else { + prop.put("excluded", 0); + } + if ((prop == null) || (prop.size() == 0)) { + if (((String) post.get("search", "")).length() < 3) + prop.put("num-results", 2);//no results - at least 3 chars + else + prop.put("num-results", 1);//no results + + } else { + int linkcount = Integer.parseInt(prop.get("linkcount", "0")); + int totalcount = Integer.parseInt(prop.get("totalcount", "0")); + if (totalcount > 10) { + Object[] references = (Object[]) prop.get("references", new String[0]); + prop.put("num-results", 4); + prop.put("num-results_linkcount", linkcount); + prop.put("num-results_totalcount", totalcount); + int hintcount = references.length; + if (hintcount > 0) { + if (hintcount > 16) hintcount = 16; + prop.put("combine", 1); + String word; + int p; + for (int i = 0; i < hintcount; i++) { + word = (String) references[i]; + if (word != null) { + prop.put("combine_words_" + i + "_word", word); + prop.put("combine_words_" + i + "_newsearch", ((String) post.get("search", "")).replace(' ', '+') + "+" + word); + prop.put("combine_words_" + i + "_count", count); + prop.put("combine_words_" + i + "_order", order); + prop.put("combine_words_" + i + "_resource", ((global) ? "global" : "local")); + prop.put("combine_words_" + i + "_time", (searchtime / 1000)); + } + prop.put("combine_words", i); + } + } + } else { + if (totalcount == 0) + prop.put("num-results", 3);//long + else { + prop.put("num-results", 4); + prop.put("num-results_linkcount", linkcount); + prop.put("num-results_totalcount", totalcount); + } + } + } + + if(urlmask.equals(".*")){ + prop.put("urlmaskoptions", 0); + } else { + prop.put("urlmaskoptions", 1); + } + + prop.put("urlmaskoptions_urlmaskfilter", urlmask); + + if (yacyonline) { + if (global) { + prop.put("resultbottomline", 1); + prop.put("resultbottomline_globalresults", prop.get("globalresults", "0")); + } else { + prop.put("resultbottomline", 2); + } + } else { + if (global) + prop.put("resultbottomlien", 3); + else + prop.put("resultbottomline", 4); + } + + prop.put("count-10", ((count == 10)) ? 1 : 0); + prop.put("count-50", ((count == 50)) ? 1 : 0); + prop.put("count-100", ((count == 100)) ? 1 : 0); + prop.put("count-1000", ((count == 1000)) ? 1 : 0); + prop.put("order-quality", ((order.equals("Quality-Date")) ? 1 : 0)); + prop.put("order-date", ((order.equals("Date-Quality")) ? 1 : 0)); + prop.put("resource-global", ((global) ? 1 : 0)); + prop.put("resource-local", ((!global) ? 1 : 0)); + prop.put("time-1", ((searchtime == 1000) ? 1 : 0)); + prop.put("time-3", ((searchtime == 3000) ? 1 : 0)); + prop.put("time-10", ((searchtime == 10000) ? 1 : 0)); + prop.put("time-30", ((searchtime == 30000) ? 1 : 0)); + prop.put("time-60", ((searchtime == 60000) ? 1 : 0)); + prop.put("former", (String) post.get("search", "")); + + // 'enrich search' variables + prop.put("num-results_former", (String) post.get("search", "")); + prop.put("num-results_time", searchtime / 1000); + prop.put("num-results_count", count); + prop.put("num-results_resource", (global) ? "global" : "local"); + prop.put("num-results_order", order); + + // return rewrite properties + prop.put("promoteSearchPageGreeting", env.getConfig("promoteSearchPageGreeting", "")); + return prop; + } + + public static TreeSet cleanQuery(String words) { + // convert Umlaute + words = htmlFilterContentScraper.convertUmlaute(new serverByteBuffer(words.getBytes())).toString(); + + // remove funny symbols + String seps = "' .,:/-&"; + words = words.toLowerCase().trim(); + int c; + for (int i = 0; i < seps.length(); i++) { + if ((c = words.indexOf(seps.charAt(i))) >= 0) words = words.substring(0, c) + (((c + 1) < words.length()) ? (" " + words.substring(c + 1)) : ""); + } + + // the string is clean now, but we must generate a set out of it + String[] a = words.split(" "); + TreeSet query = new TreeSet(kelondroMSetTools.fastStringComparator); + for (int i = 0; i < a.length; i++) query.add(a[i]); + return query; + } +} diff --git a/htroot/index.rss b/htroot/index.rss new file mode 100644 index 000000000..ad13b3494 --- /dev/null +++ b/htroot/index.rss @@ -0,0 +1,13 @@ + + + + Search for #[former]# + Search for #[former]# + #{results}# + + #[description]# + #[url]# + + #{/results}# + + diff --git a/htroot/profile.html b/htroot/profile.html new file mode 100644 index 000000000..2dbf84277 --- /dev/null +++ b/htroot/profile.html @@ -0,0 +1,2 @@ +#{list}##[key]#=#[value]# +#{/list}# diff --git a/htroot/proxymsg/error.html b/htroot/proxymsg/error.html new file mode 100644 index 000000000..9239617c2 --- /dev/null +++ b/htroot/proxymsg/error.html @@ -0,0 +1,60 @@ + + + +YaCy: Proxy Error Message + + + + + + + + + + + + +
    + + + + + + +
    + + + + +
    Proxy Error
    +
    +
    +
    +

    + + + + + + +
    +









    +

    +

    Error with url '#[url]#':



    +#[httperror]#

    +

    +#(errormessage)# +unspecified error +:: +not-yet-assigned error +:: +You don't have an active internet connection. Please go online. +:: +Could not load resource. The file is not available. +:: +#(/errormessge)# +

    +

    +
    + + diff --git a/htroot/sharedBlacklist_p.html b/htroot/sharedBlacklist_p.html new file mode 100644 index 000000000..8c3ad1a81 --- /dev/null +++ b/htroot/sharedBlacklist_p.html @@ -0,0 +1,36 @@ + + + +YACY: shared Blacklist +#[metas]# + + + +#[header]# +

    +

    Add Items to Blacklist

    + +

    This are all new Blacklist Entries from "#[name]#":

    +

    #[status]#

    + +
    + + +

    + +#[table]# + +
    Hostname/File/URLBlocked Server 

    +
    + +#[footer]# + + diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java new file mode 100644 index 000000000..6997b5539 --- /dev/null +++ b/htroot/sharedBlacklist_p.java @@ -0,0 +1,234 @@ +// sharedBlacklist_p.java +// ----------------------- +// part of the AnomicHTTPProxy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// +// This File is contributed by Alexander Schier +// last change: 04.07.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +// you must compile this file with +// javac -classpath .:../Classes Blacklist_p.java +// if the shell's current path is HTROOT + +import java.util.*; +import java.io.*; +import java.net.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +import de.anomic.net.*; +import de.anomic.http.*; +import de.anomic.plasma.*; + +public class sharedBlacklist_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + // return variable that accumulates replacements + serverObjects prop = new serverObjects(); + plasmaSwitchboard switchboard = (plasmaSwitchboard) env; + File listsPath = new File(switchboard.getRootPath(), env.getConfig("listsPath", "DATA/LISTS")); + String filename = ""; + String line = ""; + String out = ""; + String HTMLout = ""; + HashSet Blacklist = new HashSet(); + Vector otherBlacklist = new Vector(); + String status = ""; + int num = 0; + int i = 0; //loop-var + int count = 0; + String IP = "127.0.0.1"; //should be replaced later + String Port = "8080"; //aua! + String Name = ""; + String Hash = ""; + String address = ""; + + if( post != null && post.containsKey("filename") ){ + filename = (String)post.get("filename"); + } + try{ + //Read the List + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File(listsPath, filename)))); + while((line = br.readLine()) != null){ + if(! (line.startsWith("#") || line.equals("")) ){ + Blacklist.add(line); + out += line + serverCore.crlfString; + } + } + br.close(); + }catch(IOException e){} + + if( post != null && post.containsKey("hash") ){ //Step 1: retrieve the Items + Hash = (String) post.get("hash"); + status = "Proxy \"" + Name + "\" not found"; //will later be resetted + + yacySeed seed; + if( yacyCore.seedDB != null ){ //no nullpointer error.. + Enumeration e = yacyCore.seedDB.seedsConnected(true, false, null); + while (e.hasMoreElements()) { + seed = (yacySeed) e.nextElement(); + if (seed != null && seed.hash.equals(Hash) ) { + IP = seed.get("IP", "127.0.0.1"); + Port = seed.get("Port", "8080"); + Name = (String) seed.get("Name", "<" + IP + ":" + Port + ">"); + status = ""; + }else{ + //status = "No Seed found"; //wrong? The Name not known? + } + } + } + //DEBUG + //IP = "217.234.127.107"; + //Port = "8080"; + //Name = "RootServer"; + + //Make Adresse + address = "http://" + IP + ":" + Port + "/yacy/list.html?col=black"; + try { + otherBlacklist = httpc.wget(new URL(address), 6000, null, null, switchboard.remoteProxyHost, switchboard.remoteProxyPort); //get List + } catch (Exception e) {} + + //Make HTML-Optionlist with retrieved items + for(i = 0; i <= (otherBlacklist.size() -1); i++){ + String tmp = (String) otherBlacklist.get(i); + if( !Blacklist.contains(tmp) && (!tmp.equals("")) ){ + //newBlacklist.add(tmp); + count++; + HTMLout += "
    " + Name + "" + tmp + "
    " + Name + "" + tmp + "
    " + Name + "" + tmp + "
    \n\r"; + byte[] col; + int x, i, l; + for (int y = height - 1; y >= 0; y--) { + s += ""; + x = 0; + while (x < width) { + i = x + 1; + col = grid[x][y]; + while ((i < width) && (equalCol(grid[i][y], col))) i++; + l = i - x; + x = i; + if (l == 1) + s += ""; + else + s += ""; + } + s += "\n\r"; + } + s += "
    \n\r"; + return s; + } + + public void draw(int Ax, int Ay, int Bx, int By, String col) { + // Bresenham's line drawing algorithm + byte[] Color = parseCol(col); + int dX = Math.abs(Bx-Ax); + int dY = Math.abs(By-Ay); + int Xincr, Yincr; + if (Ax > Bx) Xincr=-1; else Xincr=1; + if (Ay > By) Yincr=-1; else Yincr=1; + if (dX >= dY) { + int dPr = dY<<1; + int dPru = dPr - (dX<<1); + int P = dPr - dX; + for (; dX>=0; dX--) { + plot(Ax, Ay, Color); + if (P > 0) { + Ax+=Xincr; + Ay+=Yincr; + P+=dPru; + } else { + Ax+=Xincr; + P+=dPr; + } + } + } else { + int dPr = dX<<1; + int dPru = dPr - (dY<<1); + int P = dPr - dY; + for (; dY>=0; dY--) { + plot(Ax, Ay, Color); + if (P > 0) { + Ax+=Xincr; + Ay+=Yincr; + P+=dPru; + } else { + Ay+=Yincr; + P+=dPr; + } + } + } + } + + public static void main(String[] args) { + htmlPlotter plotter = new htmlPlotter(200, 100, "FFFFFF"); + plotter.plot(0,0,"000000"); plotter.plot(33,50,"000000"); plotter.plot(36,50,"000000"); + //plotter.draw(10,10,170,88,"AAAAAA"); + try { + serverFileUtils.write(("" + plotter.toHTML() + "").getBytes(), new File("D:\\bin\\test.html")); + } catch (IOException e) { + e.printStackTrace(); + } + System.out.println(plotter.toHTML()); + } +} + +/* +/============================================================================ +// b r e s l i n e . c +// +// VERSION 1: draws only from one end and calculates both x and y. +// +// Programmer: Kenny Hoff +// Date: 10/25/95 +// Purpose: To implement the Bresenham's line drawing algorithm for all +// slopes and line directions (using minimal routines). +//============================================================================ + +#include + +// EXTERNALLY DEFINED FRAMEBUFFER AND FRAMEBUFFER DIMENSIONS (WIDTH)) +extern unsigned char far* FrameBuffer; +extern int WIDTH; +#define SetPixel(x,y,c) FrameBuffer[y*WIDTH+x]=c; + +//============================================================================ +// Fills the intermediate points along a line between the two given endpoints +// using Bresenham's line drawing algorithm. NOTE: this routine does no clipping +// so the coordinate values must be within the FrameBuffer bounds. +// NOTE: USE (Ax,Ay) as the starting point (values that are incremented) +//============================================================================ +void BresLine(int Ax, int Ay, int Bx, int By, unsigned char Color) +{ + //------------------------------------------------------------------------ + // INITIALIZE THE COMPONENTS OF THE ALGORITHM THAT ARE NOT AFFECTED BY THE + // SLOPE OR DIRECTION OF THE LINE + //------------------------------------------------------------------------ + int dX = abs(Bx-Ax); // store the change in X and Y of the line endpoints + int dY = abs(By-Ay); + + //------------------------------------------------------------------------ + // DETERMINE "DIRECTIONS" TO INCREMENT X AND Y (REGARDLESS OF DECISION) + //------------------------------------------------------------------------ + int Xincr, Yincr; + if (Ax > Bx) { Xincr=-1; } else { Xincr=1; } // which direction in X? + if (Ay > By) { Yincr=-1; } else { Yincr=1; } // which direction in Y? + + //------------------------------------------------------------------------ + // DETERMINE INDEPENDENT VARIABLE (ONE THAT ALWAYS INCREMENTS BY 1 (OR -1) ) + // AND INITIATE APPROPRIATE LINE DRAWING ROUTINE (BASED ON FIRST OCTANT + // ALWAYS). THE X AND Y'S MAY BE FLIPPED IF Y IS THE INDEPENDENT VARIABLE. + //------------------------------------------------------------------------ + if (dX >= dY) // if X is the independent variable + { + int dPr = dY<<1; // amount to increment decision if right is chosen (always) + int dPru = dPr - (d><<1); // amount to increment decision if up is chosen + int P = dPr - dX; // decision variable start value + + for (; dX>=0; dX--) // process each point in the line one at a time (just use dX) + { + SetPixel(Ax, Ay, Color); // plot the pixel + if (P > 0) // is the pixel going right AND up? + { + Ax+=Xincr; // increment independent variable + Ay+=Yincr; // increment dependent variable + P+=dPru; // increment decision (for up) + } + else // is the pixel just going right? + { + Ax+=Xincr; // increment independent variable + P+=dPr; // increment decision (for right) + } + } + } + else // if Y is the independent variable + { + int dPr = dX<<1; // amount to increment decision if right is chosen (always) + int dPru = dPr - (d><<1); // amount to increment decision if up is chosen + int P = dPr - dY; // decision variable start value + + for (; dY>=0; dY--) // process each point in the line one at a time (just use dY) + { + SetPixel(Ax, Ay, Color); // plot the pixel + if (P > 0) // is the pixel going up AND right? + { + Ax+=Xincr; // increment dependent variable + Ay+=Yincr; // increment independent variable + P+=dPru; // increment decision (for up) + } + else // is the pixel just going up? + { + Ay+=Yincr; // increment independent variable + P+=dPr; // increment decision (for right) + } + } + } +} + +*/ \ No newline at end of file diff --git a/source/de/anomic/tools/loaderCore.java b/source/de/anomic/tools/loaderCore.java new file mode 100644 index 000000000..566bbae9e --- /dev/null +++ b/source/de/anomic/tools/loaderCore.java @@ -0,0 +1,99 @@ +// loaderCore.java +// --------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 29.09.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.tools; + +import java.util.*; + +public abstract class loaderCore implements loaderProcess { + + // status constants + public static final int STATUS_IDLE = -1; // not yet initialized + public static final int STATUS_READY = 0; // initialized, but not yet started + public static final int STATUS_RUNNING = 1; // started and running + public static final int STATUS_ABORTED = 2; // terminated before completion + public static final int STATUS_FAILED = 3; // failed before completion + public static final int STATUS_COMPLETED = 4; // completed; may run again + public static final int STATUS_FINALIZED = 9; // completed; may not run again + + // class variables + protected Exception error = null; + protected int status = STATUS_IDLE; + protected Properties result = new Properties(); + protected boolean run = true; + protected int completion = 0; + + // steering methods + public abstract void feed(Vector v); // returns true if process was successful; should be always synchronized + + public void terminate() { + // if terminated before completion, completed() shows x < 100 + run = false; + } + + // feed-back methods + public Properties result() { + return result; + } + + public int completed() { + // guess of completion status. shall be 100 if totally completed. + return completion; + } + + // error control + public int status() { + // -1=idle, 0=ready, 1=running, 2=aborted, 3=failed, 4=completed, 9=finalized + return status; + } + + public boolean available() { + // true if it is ok to feed with feed() + return (status() == STATUS_READY) || + ((status() == STATUS_COMPLETED) && ((result == null) || (result.size() == 0))); + } + + public Exception error() { + // if in error status: this returnes exception + return error; + } + +} diff --git a/source/de/anomic/tools/loaderProcess.java b/source/de/anomic/tools/loaderProcess.java new file mode 100644 index 000000000..d371bc6aa --- /dev/null +++ b/source/de/anomic/tools/loaderProcess.java @@ -0,0 +1,61 @@ +// loaderProcess.java +// --------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.09.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + + +package de.anomic.tools; + +import java.util.*; + +public interface loaderProcess { + + // steering methods + public void feed(Vector v); // returns true if process was successful; should be always synchronized + public void terminate(); // if terminated before completion, completed() shows x < 100 + + // feed-back methods + public Properties result(); + public int completed(); // guess of completion status. shall be 100 if totally completed. + + // error control + public int status(); // see loaderCore status constants + public boolean available(); // true if it is ok to feed with feed() + public Exception error(); // if in error status: this returnes exception + +} diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java new file mode 100644 index 000000000..e6cf12235 --- /dev/null +++ b/source/de/anomic/tools/loaderThreads.java @@ -0,0 +1,236 @@ +// loaderThreads.java +// --------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 28.09.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.tools; + +import java.io.*; +import java.net.*; +import java.util.*; +import de.anomic.http.*; + +public class loaderThreads { + + // global values for loader threads + private int timeout; + private String user; + private String password; + private String remoteProxyHost; + private int remoteProxyPort; + + // management objects for collection of threads + Hashtable threads; + int completed, failed; + + public loaderThreads() { + this(null, 0); + } + + public loaderThreads(String remoteProxyHost, int remoteProxyPort) { + this(10000, null, null, remoteProxyHost, remoteProxyPort); + } + + public loaderThreads(int timeout, String user, String password, + String remoteProxyHost, int remoteProxyPort) { + this.timeout = timeout; + this.user = user; + this.password = password; + this.remoteProxyHost = remoteProxyHost; + this.remoteProxyPort = remoteProxyPort; + this.threads = new Hashtable(); + this.completed = 0; + this.failed = 0; + } + + public void newPropLoaderThread(String name, URL url) { + newThread(name, url, new propLoader()); + } + + public void newThread(String name, URL url, loaderProcess process) { + Thread t = new loaderThread(url, process); + threads.put(name, t); + t.start(); + } + + public void terminateThread(String name) { + loaderThread t = (loaderThread) threads.get(name); + if (t == null) throw new RuntimeException("no such thread: " + name); + else t.terminate(); + } + + public int threadCompleted(String name) { + loaderThread t = (loaderThread) threads.get(name); + if (t == null) throw new RuntimeException("no such thread: " + name); + else return t.completed(); + } + + public int threadStatus(String name) { + loaderThread t = (loaderThread) threads.get(name); + if (t == null) throw new RuntimeException("no such thread: " + name); + else return t.status(); + } + + public int completed() { + return completed; + } + + public int failed() { + return failed; + } + + public int count() { + return threads.size(); + } + + public Exception threadError(String name) { + loaderThread t = (loaderThread) threads.get(name); + if (t == null) throw new RuntimeException("no such thread: " + name); + else return t.error(); + } + + protected class loaderThread extends Thread { + private URL url; + private Exception error; + private long starttime; + private loaderProcess process; + private Vector page; + private boolean loaded; + + public loaderThread(URL url, loaderProcess process) { + this.url = url; + this.process = process; + this.error = null; + this.starttime = System.currentTimeMillis(); + this.page = null; + this.loaded = false; + } + + public void run() { + try { + page = httpc.wget(url, timeout, user, password, remoteProxyHost, remoteProxyPort); + loaded = true; + process.feed(page); + if (process.status() == loaderCore.STATUS_FAILED) { + error = process.error(); + } + if ((process.status() == loaderCore.STATUS_COMPLETED) || + (process.status() == loaderCore.STATUS_FINALIZED)) completed++; + if ((process.status() == loaderCore.STATUS_ABORTED) || + (process.status() == loaderCore.STATUS_FAILED)) failed++; + } catch (Exception e) { + error = e; + failed++; + } + } + + public void terminate() { + process.terminate(); + } + + public boolean loaded() { + return loaded; + } + + public int completed() { + if (process.status() == loaderCore.STATUS_READY) return 1; + if (process.status() == loaderCore.STATUS_RUNNING) return 9 + ((process.completed() * 9) / 10); + if (process.status() == loaderCore.STATUS_COMPLETED) return 100; + return 0; + } + + public int status() { + return process.status(); // see constants in loaderCore + } + + public Exception error() { + return error; + } + + } + + public class propLoader extends loaderCore implements loaderProcess { + + public propLoader() { + this.status = STATUS_READY; + } + + public synchronized void feed(Vector v) { + this.status = STATUS_RUNNING; + this.completion = 1; + int line = 0; + String s, key, value; + int p; + try { + while ((this.run) && (line < v.size())) { + // parse line and construct a property + s = (String) v.elementAt(line); + if ((s != null) && ((p = s.indexOf('=')) > 0)) { + key = s.substring(0, p).trim(); + value = s.substring(p + 1).trim(); + if (key.length() > 0) result.put(key, value); + } + // update thread information + line++; + this.completion = 100 * line / v.size(); + } + if (line == v.size()) { + this.status = STATUS_COMPLETED; + return; + } else { + this.status = STATUS_ABORTED; + return; + } + } catch (Exception e) { + this.status = STATUS_FAILED; + this.error = e; + return; + } + } + } + + public static void main(String[] args) { + loaderThreads loader = new loaderThreads("192.168.1.122", 3128); + try { + loader.newPropLoaderThread("load1", new URL("http://www.anomic.de/superseed.txt")); + } catch (MalformedURLException e) { + + } + } + +} diff --git a/source/de/anomic/tools/nxTools.java b/source/de/anomic/tools/nxTools.java new file mode 100644 index 000000000..80149215e --- /dev/null +++ b/source/de/anomic/tools/nxTools.java @@ -0,0 +1,103 @@ +// nxTools.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 04.05.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notice above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.tools; + +import java.util.*; + +public class nxTools { + + + public static HashMap table(Vector list) { + Enumeration i = list.elements(); + int pos; + String line; + HashMap props = new HashMap(); + while (i.hasMoreElements()) { + line = ((String) i.nextElement()).trim(); + //System.out.println("NXTOOLS_PROPS - LINE:" + line); + pos = line.indexOf("="); + if (pos > 0) props.put(line.substring(0, pos).trim(), line.substring(pos + 1).trim()); + } + return props; + } + + public static Vector grep(Vector list, int afterContext, String pattern) { + Enumeration i = list.elements(); + int ac = 0; + String line; + Vector result = new Vector(); + while (i.hasMoreElements()) { + line = (String) i.nextElement(); + if (line.indexOf(pattern) >= 0) { + result.add(line); + ac = afterContext + 1; + } else if (ac > 0) { + result.add(line); + } + ac--; + } + return result; + } + + public static String tail1(Vector list) { + if ((list == null) || (list.size() == 0)) return ""; + return (String) list.lastElement(); + } + + public static String awk(String sentence, String separator, int count) { + // returns the nth word of sentence, where count is the counter and the first word has the number 1 + // the words are separated by the separator + if ((sentence == null) || (separator == null) || (count < 1)) return null; + int pos; + while ((count >= 1) && (sentence.length() > 0)) { + pos = sentence.indexOf(separator); + if (pos < 0) { + if (count == 1) return sentence; else return null; + } else { + if (count == 1) return sentence.substring(0, pos); + sentence = sentence.substring(pos + separator.length()); + count--; + } + } + return null; + } + +} diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java new file mode 100644 index 000000000..e943b4e8e --- /dev/null +++ b/source/de/anomic/yacy/yacyClient.java @@ -0,0 +1,637 @@ +// yacyClient.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 02.12.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notice above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.yacy; + +import java.io.*; +import java.util.*; +import java.net.*; +import de.anomic.tools.*; +import de.anomic.plasma.*; +import de.anomic.net.*; +import de.anomic.http.*; +import de.anomic.server.*; + +public class yacyClient { + + public static int publishMySeed(String address, String otherHash) { + // this is called to enrich the seed information by + // - own address (if peer is behind a nat/router) + // - check peer type (virgin/junior/senior/principal) + // to do this, we send a 'Hello' to another peer + // this carries the following information: + // 'iam' - own hash + // 'youare' - remote hash, to verify that we are correct + // 'key' - a session key that the remote peer may use to answer + // and the own seed string + // we expect the following information to be send back: + // - 'yourip' the ip of the connection peer (we) + // - 'yourtype' the type of this peer that the other peer checked by asking for a specific word + // and the remote seed string + // the number of new seeds are returned + // one exceptional failure case is when we know the other's peers hash, the other peers responds correctly + // but they appear to be another peer by comparisment of the other peer's hash + // this works of course only if we know the other peer's hash. + + String key = crypt.randomSalt(); + HashMap result = null; + try { + /* + URL url = new URL("http://" + address + "/yacy/hello.html?iam=" + yacyCore.seedCache.mySeed.hash + + "&pattern=&count=20" + + "&key=" + key + "&seed=" + yacyCore.seedCache.mySeed.genSeedStr(key)); + yacyCore.log.logDebug("HELLO to URL " + url.toString()); + result = nxTools.table(httpc.wget(url, + 10000, null, null, yacyCore.seedCache.sb.remoteProxyHost, yacyCore.seedCache.sb.remoteProxyPort)); + */ + + URL url = new URL("http://" + address + "/yacy/hello.html"); + serverObjects obj = new serverObjects(); + obj.put("iam", yacyCore.seedDB.mySeed.hash); + obj.put("pattern", ""); + obj.put("count", "20"); + obj.put("key", key); + obj.put("mytime", yacyCore.universalDateShortString()); + obj.put("seed", yacyCore.seedDB.mySeed.genSeedStr(key)); + result = nxTools.table(httpc.wput(url, + 20000, null, null, + yacyCore.seedDB.sb.remoteProxyHost, + yacyCore.seedDB.sb.remoteProxyPort, + obj)); + } catch (Exception e) { + yacyCore.log.logDebug("yacyClient.publishMySeed exception:" + e.getMessage()); + return -1; + } + if ((result == null) || (result.size() < 3)) { + yacyCore.log.logDebug("yacyClient.publishMySeed result error: " + + ((result == null) ? "result null" : ("result=" + result.toString()))); + return -1; + } + + Date remoteTime = yacyCore.parseUniversalDate((String) result.get("mytime")); // read remote time + + // check consistency with expectation + if ((otherHash != null ) && (otherHash.length() > 0)) { + yacySeed otherPeer = yacySeed.genRemoteSeed((String) result.get("seed0"), key, remoteTime); + if ((otherPeer == null) || (!(otherPeer.hash.equals(otherHash)))) { + yacyCore.log.logDebug("yacyClient.publishMySeed consistency error: other peer wrong"); + return -1; // no success + } + } + + // set my own seed according to new information + yacySeed mySeedBkp = (yacySeed) yacyCore.seedDB.mySeed.clone(); + yacyCore.seedDB.mySeed.put("IP", (String) result.get("yourip")); + String mytype = (String) result.get("yourtype"); + if (mytype == null) mytype = "junior"; + if ((yacyCore.seedDB.mySeed.get("PeerType", "junior").equals("principal")) && (mytype.equals("senior"))) mytype = "principal"; + yacyCore.seedDB.mySeed.put("PeerType", mytype); + + if (!(yacyCore.seedDB.mySeed.isProper())) { + yacyCore.seedDB.mySeed = mySeedBkp; + yacyCore.log.logDebug("yacyClient.publishMySeed mySeed error: not proper"); + return -1; + } + + // read the seeds that the peer returned and integrate them into own database + int i = 0; + String seedStr; + int count = 0; + while ((seedStr = (String) result.get("seed" + i++)) != null) { + // integrate new seed into own database + // the first seed, "seed0" is the seed of the responding peer + if (yacyCore.peerActions.peerArrival(yacySeed.genRemoteSeed(seedStr, key, remoteTime), (i == 1))) count++; + } + return count; + } + + + public static yacySeed querySeed(yacySeed target, String seedHash) { + String key = crypt.randomSalt(); + try { + HashMap result = nxTools.table(httpc.wget( + new URL("http://" + target.getAddress() + + "/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash + + "&youare=" + target.hash + "&key=" + key + + "&object=seed&env=" + seedHash), + 10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + if ((result == null) || (result.size() == 0)) return null; + Date remoteTime = yacyCore.parseUniversalDate((String) result.get("mytime")); // read remote time + return yacySeed.genRemoteSeed((String) result.get("response"), key, remoteTime); + } catch (Exception e) { + yacyCore.log.logError("yacyClient.querySeed error:" + e.getMessage()); + return null; + } + } + + public static int queryRWICount(yacySeed target, String wordHash) { + try { + HashMap result = nxTools.table(httpc.wget( + new URL("http://" + target.getAddress() + + "/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash + + "&youare=" + target.hash + "&key=" + + "&object=rwicount&env=" + wordHash + + "&ttl=0"), + 10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + if ((result == null) || (result.size() == 0)) return -1; + return Integer.parseInt((String) result.get("response")); + } catch (Exception e) { + yacyCore.log.logError("yacyClient.queryRWICount error:" + e.getMessage()); + return -1; + } + } + + public static int queryUrlCount(yacySeed target) { + if (target == null) return -1; + if (yacyCore.seedDB.mySeed == null) return -1; + String querystr = + "http://" + target.getAddress() + + "/yacy/query.html?iam=" + yacyCore.seedDB.mySeed.hash + + "&youare=" + target.hash + + "&key=" + + "&object=lurlcount&env=&ttl=0"; + try { + HashMap result = nxTools.table(httpc.wget( + new URL(querystr), 5000, null, null, + yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + //yacyCore.log("DEBUG QUERY: query=" + querystr + "; result = " + result.toString()); + if ((result == null) || (result.size() == 0)) return -1; + String resp = (String) result.get("response"); + if (resp == null) return -1; else return Integer.parseInt(resp); + } catch (Exception e) { + //yacyCore.log.logError("yacyClient.queryUrlCount error asking peer '" + target.getName() + "':" + e.toString()); + return -1; + } + } + + public static int search(String wordhashes, int count, boolean global, + yacySeed targetPeer, plasmaCrawlLURL urlManager, plasmaSearch searchManager, + long duetime) { + // send a search request to peer with remote Hash + // this mainly converts the words into word hashes + + // INPUT: + // iam : complete seed of the requesting peer + // youare : seed hash of the target peer, used for testing network stability + // key : transmission key for response + // search : a list of search words + // hsearch : a string of word hashes + // fwdep : forward depth. if "0" then peer may NOT ask another peer for more results + // fwden : forward deny, a list of seed hashes. They may NOT be target of forward hopping + // count : maximum number of wanted results + // global : if "true", then result may consist of answers from other peers + // duetime : maximum time that a peer should spent to create a result + + // request result + String key = crypt.randomSalt(); + try { + String url = "http://" + targetPeer.getAddress() + "/yacy/search.html"; + /* + String url = "http://" + targetPeer.getAddress() + + "/yacy/search.html?myseed=" + yacyCore.seedCache.mySeed.genSeedStr(key) + + "&youare=" + targetPeer.hash + "&key=" + key + + "&myseed=" + yacyCore.seedCache.mySeed.genSeedStr(key) + + "&count=" + count + "&resource=" + ((global) ? "global" : "local") + + "&query=" + wordhashes; + */ + serverObjects obj = new serverObjects(); + obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key)); + obj.put("youare", targetPeer.hash); + obj.put("key", key); + obj.put("count", count); + obj.put("resource", ((global) ? "global" : "local")); + obj.put("query", wordhashes); + obj.put("ttl", "0"); + obj.put("duetime", "" + duetime); + obj.put("mytime", yacyCore.universalDateShortString()); + //yacyCore.log.logDebug("yacyClient.search url=" + url); + long timestamp = System.currentTimeMillis(); + HashMap result = nxTools.table(httpc.wput(new URL(url), + 300000, null, null, + yacyCore.seedDB.sb.remoteProxyHost, + yacyCore.seedDB.sb.remoteProxyPort, + obj)); + long totalrequesttime = System.currentTimeMillis() - timestamp; + + /* + HashMap result = nxTools.table(httpc.wget(new URL(url), + 300000, null, null, yacyCore.seedCache.remoteProxyHost, yacyCore.seedCache.remoteProxyPort)); + */ + // OUTPUT: + // version : application version of responder + // uptime : uptime in seconds of responder + // total : number of total available LURL's for this search + // count : number of returned LURL's for this search + // resource : LURL of search + // fwhop : hops (depth) of forwards that had been performed to construct this result + // fwsrc : peers that helped to construct this result + // fwrec : peers that would have helped to construct this result (recommendations) + // searchtime : time that the peer actually spent to create the result + // references : references (search hints) that was calculated during search + + // now create a plasmaIndex out of this result + //System.out.println("yacyClient: search result = " + result.toString()); // debug + int results = Integer.parseInt((String) result.get("count")); + //System.out.println("***result count " + results); + plasmaCrawlLURL.entry link; + String wordhash; + for (int n = 0; n < results; n++) { + link = urlManager.newEntry((String) result.get("resource" + n), true, yacyCore.seedDB.mySeed.hash, targetPeer.hash, 2); + for (int m = 0; m < wordhashes.length() / plasmaCrawlLURL.urlHashLength; m++) { + wordhash = wordhashes.substring(m * plasmaCrawlLURL.urlHashLength, (m + 1) * plasmaCrawlLURL.urlHashLength); + searchManager.addWordIndex(link.url(), link.hash(), link.moddate(), link.quality(), + wordhash, link.wordCount(), 0, 0, 0, link.language(), link.doctype(), false); + } + } + long searchtime; + try { + searchtime = Integer.parseInt("" + (String) result.get("searchtime")); + } catch (NumberFormatException e) { + searchtime = totalrequesttime; + } + yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references")); + return results; + } catch (Exception e) { + yacyCore.log.logError("yacyClient.search error: '" + targetPeer.get("Name", "anonymous") + "' failed - " + e); + //e.printStackTrace(); + return 0; + } + } + + public static HashMap permissionMessage(String targetHash) { + // ask for allowed message size and attachement size + // if this replies null, the peer does not answer + if ((yacyCore.seedDB == null) || (yacyCore.seedDB.mySeed == null)) return null; + serverObjects post = new serverObjects(); + String key = crypt.randomSalt(); + post.put("key", key); + post.put("process", "permission"); + post.put("iam", yacyCore.seedDB.mySeed.hash); + post.put("youare", targetHash); + post.put("mytime", yacyCore.universalDateShortString()); + String address; + if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) { + address = yacyCore.seedDB.mySeed.getAddress(); + //System.out.println("local address: " + address); + } else { + yacySeed targetSeed = yacyCore.seedDB.getConnected(targetHash); + if (targetSeed == null) return null; + address = targetSeed.getAddress(); + //System.out.println("remote address: " + address); + } + if (address == null) address = "localhost:8080"; + try { + return nxTools.table(httpc.wput( + new URL("http://" + address + "/yacy/message.html"), + 8000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post)); + } catch (Exception e) { + // most probably a network time-out exception + yacyCore.log.logError("yacyClient.permissionMessage error:" + e.getMessage()); + return null; + } + } + + public static HashMap postMessage(String targetHash, String subject, byte[] message) { + // this post a message to the remote message board + serverObjects post = new serverObjects(); + String key = crypt.randomSalt(); + post.put("key", key); + post.put("process", "post"); + post.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key)); + post.put("youare", targetHash); + post.put("subject", subject); + post.put("mytime", yacyCore.universalDateShortString()); + post.put("message", new String(message)); + String address; + if (targetHash.equals(yacyCore.seedDB.mySeed.hash)) + address = yacyCore.seedDB.mySeed.getAddress(); + else + address = yacyCore.seedDB.getConnected(targetHash).getAddress(); + if (address == null) address = "localhost:8080"; + //System.out.println("DEBUG POST " + address + "/yacy/message.html" + post.toString()); + try { + Vector v = httpc.wput(new URL("http://" + address + "/yacy/message.html"), 20000, null, null, + yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post); + //System.out.println("V=" + v.toString()); + return nxTools.table(v); + } catch (Exception e) { + yacyCore.log.logError("yacyClient.postMessage error:" + e.getMessage()); + return null; + } + } + + public static HashMap crawlOrder(yacySeed targetSeed, String url, String referrer, int depth) { + // this post a message to the remote message board + if (targetSeed == null) return null; + if (yacyCore.seedDB.mySeed == null) return null; + if (yacyCore.seedDB.mySeed == targetSeed) return null; + + // construct request + String key = crypt.randomSalt(); + String address = targetSeed.getAddress(); + if (address == null) return null; + try { + return nxTools.table(httpc.wget( + new URL("http://" + address + "/yacy/crawlOrder.html?"+ + "key=" + key + + "&process=crawl" + + "&youare=" + targetSeed.hash + + "&iam=" + yacyCore.seedDB.mySeed.hash + + "&url=" + crypt.simpleEncode(url) + + "&referrer=" + crypt.simpleEncode(referrer) + + "&depth=" + depth + + "&ttl=0" + ), + 10000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + } catch (Exception e) { + // most probably a network time-out exception + yacyCore.log.logError("yacyClient.crawlOrder error: peer=" + targetSeed.getName() + ", error=" + e.getMessage()); + return null; + } + } + + /* + Test: + http://217.234.95.114:5777/yacy/crawlOrder.html?key=abc&iam=S-cjM67KhtcJ&youare=EK31N7RgRqTn&process=crawl&referrer=&depth=0&url=p|http://www.heise.de/newsticker/meldung/53245 + version=0.297 uptime=225 accepted=true reason=ok delay=30 depth=0 + -er crawlt, Ergebnis erscheint aber unter falschem initiator + */ + + public static HashMap crawlReceipt(yacySeed targetSeed, String process, String result, String reason, plasmaCrawlLURL.entry entry, String wordhashes) { + if (targetSeed == null) return null; + if (yacyCore.seedDB.mySeed == null) return null; + if (yacyCore.seedDB.mySeed == targetSeed) return null; + + /* + the result can have one of the following values: + negative cases, no retry + unavailable - the resource is not avaiable (a broken link); not found or interrupted + robot - a robot-file has denied to crawl that resource + + negative cases, retry possible + rejected - the peer has rejected to load the resource + dequeue - peer too busy - rejected to crawl + + positive cases with crawling + fill - the resource was loaded and processed + update - the resource was already in database but re-loaded and processed + + positive cases without crawling + known - the resource is already in database, believed to be fresh and not reloaded + stale - the resource was reloaded but not processed because source had no changes + + */ + + // construct request + String key = crypt.randomSalt(); + + String address = targetSeed.getAddress(); + if (address == null) return null; + try { + return nxTools.table(httpc.wget( + new URL("http://" + address + "/yacy/crawlReceipt.html?" + + "iam=" + yacyCore.seedDB.mySeed.hash + + "&youare=" + targetSeed.hash + + "&process=" + process + + "&key=" + key + + "&urlhash=" + ((entry == null) ? "" : entry.hash()) + + "&result=" + result + + "&reason=" + reason + + "&wordh=" + wordhashes + + "&lurlEntry=" + ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), key)) + ), + 60000, null, null, yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort)); + } catch (Exception e) { + // most probably a network time-out exception + yacyCore.log.logError("yacyClient.crawlReceipt error:" + e.getMessage()); + return null; + } + } + /* + public static byte[] singleGET(String host, int port, String path, int timeout, + String user, String password, + httpHeader requestHeader) throws IOException { + */ + + public static String transferIndex(yacySeed targetSeed, plasmaWordIndexEntity[] indexes, plasmaCrawlLURL urlDB) { + HashMap in = transferRWI(targetSeed, indexes, urlDB); + if (in == null) return "no_connection_1"; + String result = (String) in.get("result"); + if (result == null) return "no_result_1"; + if (!(result.equals("ok"))) return result; + // in now contains a list of unknown hashes + String uhss = (String) in.get("unknownURL"); + if (uhss == null) return "no_unknownURL_tag_in_response"; + if (uhss.length() == 0) return null; // all url's known, we are ready here + String[] uhs = uhss.split(","); + //System.out.println("DEBUG yacyClient.transferIndex: " + uhs.length + " urls unknown"); + if (uhs.length == 0) return null; // all url's known + // extract the urlCache from the result + HashMap urlCache = (HashMap) in.get("$URLCACHE$"); + plasmaCrawlLURL.entry[] urls = new plasmaCrawlLURL.entry[uhs.length]; + for (int i = 0; i < uhs.length; i++) { + urls[i] = (plasmaCrawlLURL.entry) urlCache.get(uhs[i]); + if (urls[i] == null) System.out.println("DEBUG transferIndex: error with requested url hash '" + uhs[i] + "', unknownURL='" + uhss + "'"); + } + in = transferURL(targetSeed, urls); + if (in == null) return "no_connection_2"; + result = (String) in.get("result"); + if (result == null) return "no_result_2"; + if (!(result.equals("ok"))) return result; + int doubleentries = Integer.parseInt((String) in.get("double")); + //System.out.println("DEBUG tansferIndex: transferred " + uhs.length + " URL's, double=" + doubleentries); + return null; + } + + private static HashMap transferRWI(yacySeed targetSeed, plasmaWordIndexEntity[] indexes, plasmaCrawlLURL urlDB) { + String address = targetSeed.getAddress(); + if (address == null) return null; + // prepare post values + serverObjects post = new serverObjects(); + String key = crypt.randomSalt(); + post.put("key", key); + post.put("iam", yacyCore.seedDB.mySeed.hash); + post.put("youare", targetSeed.hash); + post.put("wordc", "" + indexes.length); + int indexcount = 0; + String entrypost = ""; + Enumeration eenum; + plasmaWordIndexEntry entry; + HashMap urlCache = new HashMap(); + plasmaCrawlLURL.entry urlentry; + HashSet unknownURLs = new HashSet(); + for (int i = 0; i < indexes.length; i++) { + eenum = indexes[i].elements(true); + while (eenum.hasMoreElements()) { + entry = (plasmaWordIndexEntry) eenum.nextElement(); + // check if an LURL-Entry exists + if (urlCache.containsKey(entry.getUrlHash())) { + // easy case: the url is known and in the cache + entrypost += indexes[i].wordHash() + entry.toExternalForm() + serverCore.crlfString; + indexcount++; + } else if (unknownURLs.contains(entry.getUrlHash())) { + // in this case, we do nothing + } else { + // try to get the entry from the urlDB + if ((urlDB.exists(entry.getUrlHash())) && + ((urlentry = urlDB.getEntry(entry.getUrlHash())) != null)) { + // good case: store the urlentry to the cache + urlCache.put(entry.getUrlHash(), urlentry); + // add index to list + entrypost += indexes[i].wordHash() + entry.toExternalForm() + serverCore.crlfString; + indexcount++; + } else { + // this is bad: the url is unknown. We put the link to a set and delete then later + unknownURLs.add(entry.getUrlHash()); + } + } + } + } + + // we loop again and delete all links where the url is unknown + Iterator it; + String urlhash; + for (int i = 0; i < indexes.length; i++) { + it = unknownURLs.iterator(); + while (it.hasNext()) { + urlhash = (String) it.next(); + try { + if (indexes[i].contains(urlhash)) indexes[i].removeEntry(urlhash, true); + } catch (IOException e) {} + } + } + + post.put("entryc", "" + indexcount); + post.put("indexes", entrypost); + try { + Vector v = httpc.wput(new URL("http://" + address + "/yacy/transferRWI.html"), 60000, null, null, + yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post); + // this should return a list of urlhashes that are unknwon + if (v != null) { + yacyCore.seedDB.mySeed.incSI(indexcount); + } + + HashMap result = nxTools.table(v); + result.put("$URLCACHE$", urlCache); + result.put("$UNKNOWNC$", "" + unknownURLs.size()); + return result; + } catch (Exception e) { + yacyCore.log.logError("yacyClient.transferRWI error:" + e.getMessage()); + return null; + } + } + + private static HashMap transferURL(yacySeed targetSeed, plasmaCrawlLURL.entry[] urls) { + // this post a message to the remote message board + String address = targetSeed.getAddress(); + if (address == null) return null; + // prepare post values + serverObjects post = new serverObjects(); + String key = crypt.randomSalt(); + post.put("key", key); + post.put("iam", yacyCore.seedDB.mySeed.hash); + post.put("youare", targetSeed.hash); + String resource = ""; + int urlc = 0; + for (int i = 0; i < urls.length; i++) { + if (urls[i] != null) { + resource = urls[i].toString(); + if (resource != null) { + post.put("url" + i, resource); + urlc++; + } + } + } + post.put("urlc", "" + urlc); + try { + Vector v = httpc.wput(new URL("http://" + address + "/yacy/transferURL.html"), 60000, null, null, + yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post); + if (v != null) { + yacyCore.seedDB.mySeed.incSU(urlc); + } + return nxTools.table(v); + } catch (Exception e) { + yacyCore.log.logError("yacyClient.transferRWI error:" + e.getMessage()); + return null; + } + } + + public static HashMap getProfile(yacySeed targetSeed) { + // this post a message to the remote message board + serverObjects post = new serverObjects(); + post.put("iam", yacyCore.seedDB.mySeed.hash); + post.put("youare", targetSeed.hash); + String address = targetSeed.getAddress(); + if (address == null) address = "localhost:8080"; + try { + Vector v = httpc.wput(new URL("http://" + address + "/yacy/profile.html"), 20000, null, null, + yacyCore.seedDB.sb.remoteProxyHost, yacyCore.seedDB.sb.remoteProxyPort, post); + return nxTools.table(v); + } catch (Exception e) { + yacyCore.log.logError("yacyClient.getProfile error:" + e.getMessage()); + return null; + } + } + + public static void main(String[] args) { + System.out.println("yacyClient Test"); + try { + plasmaSwitchboard sb = new plasmaSwitchboard(args[0], "httpProxy.init", "DATA/SETTINGS/httpProxy.conf"); + yacyCore core = new yacyCore(sb); + core.peerActions.loadSeedLists(); + yacySeed target = core.seedDB.getConnected(args[1]); + String wordhashe = plasmaWordIndexEntry.word2hash("test"); + //System.out.println("permission=" + permissionMessage(args[1])); + + HashMap result = nxTools.table(httpc.wget( + new URL("http://" + target.getAddress() + + "/yacy/search.html?myseed=" + core.seedDB.mySeed.genSeedStr(null) + + "&youare=" + target.hash + "&key=" + + "&myseed=" + core.seedDB.mySeed.genSeedStr(null) + + "&count=10&resource=global" + + "&query=" + wordhashe), + 5000, null, null, core.seedDB.sb.remoteProxyHost, core.seedDB.sb.remoteProxyPort)); + System.out.println("Result=" + result.toString()); + } catch (Exception e) { + e.printStackTrace(); + } + System.exit(0); + } +} diff --git a/source/de/anomic/yacy/yacyCore.java b/source/de/anomic/yacy/yacyCore.java new file mode 100644 index 000000000..9822deda7 --- /dev/null +++ b/source/de/anomic/yacy/yacyCore.java @@ -0,0 +1,482 @@ +// yacyCore.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 03.12.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + the yacy process of getting in touch of other peers starts as follows: + - init seed cache. It is needed to determine the right peer for the Hello-Process + - create a own seed. This can be a new one or one loaded from a file + - The httpd must start up then first + - the own seed is completed by performing the 'yacyHello' process. This + process will result in a request back to the own peer to check if it runs + in server mode. This is the reason that the httpd must be started in advance. + +*/ + +// contributions: +// principal peer status via file generation by Alexander Schier [AS] + + +package de.anomic.yacy; + +import java.io.*; +import java.util.*; +import java.text.*; +import java.net.*; +import de.anomic.tools.*; +import de.anomic.plasma.*; +import de.anomic.server.*; +import de.anomic.net.*; +import de.anomic.http.*; + +public class yacyCore { + + // statics + public static long startupTime = System.currentTimeMillis(); + public static yacySeedDB seedDB = null; + public static yacyPeerActions peerActions = null; + public static yacyDHTAction dhtAgent = null; + public static serverLog log; + public static long lastOnlineTime = 0; + public static String latestVersion = ""; + public static long speedKey = 0; + public static File yacyDBPath; + + //public static yacyShare shareManager = null; + //public static boolean terminate = false; + + // class variables + private int seedCacheSizeStamp = 0; + private String oldIPStamp = ""; + private int onlineMode = 1; + private plasmaSwitchboard switchboard; + + private static TimeZone GMTTimeZone = TimeZone.getTimeZone("PST"); + public static SimpleDateFormat shortFormatter = new SimpleDateFormat("yyyyMMddHHmmss"); + + public static long universalTime() { + return universalDate().getTime(); + } + + public static Date universalDate() { + return new GregorianCalendar(GMTTimeZone).getTime(); + } + + public static String universalDateShortString() { + return shortFormatter.format(universalDate()); + } + + public static Date parseUniversalDate(String remoteTimeString) { + if (remoteTimeString == null) return new Date(); + try { + return yacyCore.shortFormatter.parse(remoteTimeString); + } catch (java.text.ParseException e) { + return new Date(); + } + } + + public static int yacyTime() { + // the time since startup of yacy in seconds + return (int) ((System.currentTimeMillis() - startupTime) / 1000); + } + + public yacyCore(plasmaSwitchboard sb) throws IOException { + long time = System.currentTimeMillis(); + + this.switchboard = sb; + switchboard.setConfig("yacyStatus",""); + + // set log level + log = new serverLog("YACY", Integer.parseInt(switchboard.getConfig("yacyLoglevel", "0"))); + + // create a yacy db + yacyDBPath = new File(sb.getRootPath(), sb.getConfig("yacyDB", "DATA/YACYDB")); + if (!(yacyDBPath.exists())) yacyDBPath.mkdir(); + + // read memory amount + int mem = Integer.parseInt(switchboard.getConfig("ramCacheSize", "1")) * 0x400 * + Integer.parseInt(switchboard.getConfig("ramCachePercentDHT", "1")) / 100; + log.logSystem("DHT Cache memory = " + mem + " KB"); + + // create or init seed cache + seedDB = new yacySeedDB( + sb, + new File(yacyDBPath, "seed.new.db"), + new File(yacyDBPath, "seed.old.db"), + new File(yacyDBPath, "seed.pot.db"), + mem); + + peerActions = new yacyPeerActions(seedDB, switchboard, + new File(sb.getRootPath(), sb.getConfig("superseedFile", "superseed.txt")), + switchboard.getConfig("superseedLocation", "http://www.yacy.net/yacy/superseed.txt")); + dhtAgent = new yacyDHTAction(seedDB); + peerActions.deploy(dhtAgent); + + // create or init index sharing + //shareManager = new yacyShare(switchboard); + + seedCacheSizeStamp = seedDB.sizeConnected(); + + log.logSystem("CORE INITIALIZED"); + // ATTENTION, VERY IMPORTANT: before starting the thread, the httpd yacy server must be running! + + speedKey = System.currentTimeMillis() - time; + + // start with a seedList update to propagate out peer, if possible + onlineMode = Integer.parseInt(switchboard.getConfig("onlineMode", "1")); + //lastSeedUpdate = universalTime(); + lastOnlineTime = 0; + + // cycle + // within cycle: update seed file, strengthen network, pass news (new, old seed's) + if (online()) + log.logSystem("you are in online mode"); + else { + log.logSystem("YOU ARE OFFLINE! ---"); + log.logSystem("--- TO START BOOTSTRAPING, YOU MUST USE THE PROXY,"); + log.logSystem("--- OR HIT THE BUTTON 'go online'"); + log.logSystem("--- ON THE STATUS PAGE http://localhost:" + switchboard.getConfig("port", "8080") + "/Status.html"); + } + } + + + synchronized static public void triggerOnlineAction() { + lastOnlineTime = universalTime(); + } + + public boolean online() { + this.onlineMode = Integer.parseInt(switchboard.getConfig("onlineMode", "1")); + return ((onlineMode == 2) || ((universalTime() - lastOnlineTime) < 10000)); + } + + public void loadSeeds() { + //new Thread(new vprobe()).start(); + peerActions.loadSeedLists(); // start to bootstrap the network here + publishSeedList(); + } + + public void publishSeedList() { + log.logDebug("triggered Seed Publish"); + + // we want to be a principal... + if ((switchboard.getConfig("seedFTPPassword","").length() == 0) && + (switchboard.getConfig("seedFilePath", "").length() == 0)) { + log.logDebug("yacyCore.publishSeedList: no FTP settings present; password-len=" + + switchboard.getConfig("seedFTPPassword","").length() + ", filePath=" + + switchboard.getConfig("seedFilePath", "")); + return; + } + + /* + if (oldIPStamp.equals((String) seedDB.mySeed.get("IP", "127.0.0.1"))) + System.out.println("***DEBUG publishSeedList: oldIP is equal"); + if (seedCacheSizeStamp == seedDB.sizeConnected()) + System.out.println("***DEBUG publishSeedList: sizeConnected is equal"); + if (canReachMyself()) + System.out.println("***DEBUG publishSeedList: I can reach myself"); + */ + + if ((!(oldIPStamp.equals((String) seedDB.mySeed.get("IP", "127.0.0.1")))) || + (seedCacheSizeStamp != seedDB.sizeConnected()) || + (!(canReachMyself()))) { + // publish seed-list to ftp account, this can only a principal peer + saveSeedList(); + seedCacheSizeStamp = seedDB.sizeConnected(); + oldIPStamp = (String) seedDB.mySeed.get("IP", "127.0.0.1"); + } else { + log.logDebug("not necessary to publish: oldIP is equal, sizeConnected is equal and I can reach myself under the old IP."); + } + } + + public void peerPing() { + if (!(online())) return; + + // before publishing, update some seed data + peerActions.updateMySeed(); + + // publish own seed to other peer, this can every peer, but makes only sense for senior peers + int oldSize = seedDB.sizeConnected(); + if (oldSize == 0) { + // reload the seed lists + peerActions.loadSeedLists(); + log.logInfo("re-initialized seed list. received " + seedDB.sizeConnected() + " new peers"); + } + int newSeeds = publishMySeed(false); + if (newSeeds > 0) log.logInfo("received " + newSeeds + " new peers, know a total of " + + seedDB.sizeConnected() + " different peers"); + } + + private boolean canReachMyself() { + // returns true if we can reach ourself under our known peer address + // if we cannot reach ourself, we call a forced publishMySeed and return false + int urlc = yacyClient.queryUrlCount(seedDB.mySeed); + if (urlc >= 0) { + seedDB.mySeed.put("LastSeen", universalDateShortString()); + return true; + } + log.logInfo("re-connect own seed"); + String oldAddress = seedDB.mySeed.getAddress(); + int newSeeds = publishMySeed(true); + return ((oldAddress != null) && (oldAddress.equals(seedDB.mySeed.getAddress()))); + } + + + protected class publishThread extends Thread { + + public int added; + public yacySeed seed; + public Exception error; + + public publishThread(yacySeed seed) { + this.seed = seed; + this.added = 0; + this.error = null; + } + + public void run() { + try { + added = yacyClient.publishMySeed(seed.getAddress(), seed.hash); + if (added < 0) { + // no or wrong response, delete that address + log.logInfo("publish: disconnected " + seed.get("PeerType", "senior") + " peer '" + seed.getName() + "' from " + seed.getAddress()); + peerActions.peerDeparture(seed); + } else { + // success! we have published our peer to a senior peer + // update latest news from the other peer + log.logInfo("publish: handshaked " + seed.get("PeerType", "senior") + " peer '" + seed.getName() + "' at " + seed.getAddress()); + } + } catch (Exception e) { + error = e; + } + } + + } + + private int publishMySeed(boolean force) { + // call this after the httpd was started up + + // we need to find out our own ip + // This is not always easy, since the application may + // live behind a firewall or nat. + // the normal way to do this is either measure the value that java gives us, + // but this is not correct if the peer lives behind a NAT/Router or has several + // addresses and not the right one can be found out. + // We have several alternatives: + // 1. ask another peer. This should be normal and the default method. + // but if no other peer lives, or we don't know them, we cannot do that + // 2. ask own NAT. This is only an option if the NAT is a DI604, because this is the + // only supported for address retrieval + // 3. ask ip respond services in the internet. There are several, and they are all + // probed until we get a valid response. + + // init yacyHello-process + String address; + int added; + yacySeed[] seeds; + int attempts = seedDB.sizeConnected(); if (attempts > 10) attempts = 10; + if (seedDB.mySeed.get("PeerType", "virgin").equals("virgin")) { + seeds = seedDB.seedsByAge(true, attempts); // best for fast connection + } else { + seeds = seedDB.seedsByAge(false, attempts); // best for seed list maintenance/cleaning + } + if (seeds == null) return 0; + Vector v = new Vector(); // memory for threads + publishThread t; + for (int i = 0; i < seeds.length; i++) { + if (seeds[i] == null) continue; + log.logDebug("HELLO #" + i + " to peer " + seeds[i].get("Name", "")); // debug + address = seeds[i].getAddress(); + if ((address == null) || (!(seeds[i].isProper()))) { + // we don't like that address, delete it + peerActions.peerDeparture(seeds[i]); + } else { + // ask senior peer + t = new publishThread(seeds[i]); + v.add(t); + t.start(); + } + + // wait + try { + if (i == 0) Thread.currentThread().sleep(2000); // after the first time wait some seconds + Thread.currentThread().sleep(1000 + 500 * v.size()); // wait a while + } catch (InterruptedException e) {} + + // check all threads + for (int j = 0; j < v.size(); j++) { + t = (publishThread) v.elementAt(j); + added = t.added; + if (!(t.isAlive())) { + //log.logDebug("PEER " + seeds[j].get("Name", "") + " request terminated"); // debug + if (added >= 0) { + // success! we have published our peer to a senior peer + // update latest news from the other peer + //log.logInfo("publish: handshaked " + t.seed.get("PeerType", "senior") + " peer '" + t.seed.getName() + "' at " + t.seed.getAddress()); + peerActions.saveMySeed(); + return added; + } + } + } + } + + // if we have an address, we do nothing + if ((seedDB.mySeed.isProper()) && (!(force))) return 0; + + // still no success: ask own NAT or internet responder + boolean DI604use = switchboard.getConfig("DI604use", "false").equals("true"); + String DI604pw = switchboard.getConfig("DI604pw", ""); + String ip = natLib.retrieveIP(DI604use, DI604pw); + //System.out.println("DEBUG: new IP=" + ip); + seedDB.mySeed.put("IP", ip); + if (seedDB.mySeed.get("PeerType", "junior").equals("junior")) // ??????????????? + seedDB.mySeed.put("PeerType", "senior"); // to start bootstraping, we need to be recognised as "senior" peer + log.logInfo("publish: no recipient found, asked NAT or responder; our address is " + + ((seedDB.mySeed.getAddress() == null) ? "unknown" : seedDB.mySeed.getAddress())); + peerActions.saveMySeed(); + return 0; + } + + + public boolean saveSeedList() { + return saveSeedList(this.switchboard); + } + + public static boolean saveSeedList(serverSwitch sb) { + String logt; + // be shure that we have something to say + if (seedDB.mySeed.getAddress() == null) return false; + // upload a seed file + String seedFTPServer = sb.getConfig("seedFTPServer",""); + String seedFTPAccount = sb.getConfig("seedFTPAccount",""); + String seedFTPPassword = sb.getConfig("seedFTPPassword",""); + File seedFTPPath = new File(sb.getConfig("seedFTPPath","")); + File seedFile = new File(sb.getConfig("seedFilePath","")); + String prevStatus = seedDB.mySeed.get("PeerType", "junior"); + if (prevStatus.equals("principal")) prevStatus = "senior"; + URL seedURL; + try{ + seedURL = new URL(sb.getConfig("seedURL","")); + }catch(MalformedURLException e){ + return false; + } + if ((seedFTPServer.length() != 0) && + (seedFTPAccount.length() != 0) && + (seedFTPPassword.length() != 0) && + (seedFTPPath.toString().length() != 0)) { + try { + seedDB.mySeed.put("PeerType", "principal"); // this information shall also be uploaded + logt = seedDB.uploadCache(seedFTPServer, seedFTPAccount, seedFTPPassword, seedFTPPath, seedURL); + log.logInfo(logt); + if (logt.indexOf("Error") >= 0) { + seedDB.mySeed.put("PeerType", prevStatus); + log.logInfo("seed upload failed (ftp error): " + logt.substring(logt.indexOf("Error") + 6)); + return false; + } + // check if seed file has arrived a public accessible location + // seedURL=http://www.yacy.net/yacy/seed.txt + + // finally, set the principal status + sb.setConfig("yacyStatus","principal"); + return true; + } catch (IOException e) { + seedDB.mySeed.put("PeerType", prevStatus); + log.logInfo("seed upload failed (IO error): " + e.getMessage()); + return false; + } + }else if(seedFile.toString().length() != 0){ // [AS] + try{ + seedDB.mySeed.put("PeerType", "principal"); // this information shall also be uploaded + logt = seedDB.copyCache(seedFile, seedURL); + log.logInfo(logt); + if (logt.indexOf("Error") >= 0) { + seedDB.mySeed.put("PeerType", prevStatus); + log.logInfo("seed copy failed (IO error): " + logt.substring(logt.indexOf("Error") + 6)); + return false; + } + // check if seed file has arrived a public accessible location + // seedURL=http://www.yacy.net/yacy/seed.txt + + // finally, set the principal status + sb.setConfig("yacyStatus","principal"); + return true; + } catch (IOException e) { + seedDB.mySeed.put("PeerType", prevStatus); + log.logInfo("seed copy failed (IO error): " + e.getMessage()); + return false; + } + } + seedDB.mySeed.put("PeerType", prevStatus); + sb.setConfig("yacyStatus", prevStatus); + return false; + } + + private class vprobe implements Runnable { + public vprobe() {} + public final void run() { + // read the probe URL + String probeURL=switchboard.getConfig("onetimeAction", null); + if ((probeURL == null) || (probeURL.length() == 0)) return; // not wanted + // read version and date + String proxyHost = switchboard.getConfig("remoteProxyHost", ""); + int proxyPort = Integer.parseInt(switchboard.getConfig("remoteProxyPort", "0")); + if (!(switchboard.getConfig("remoteProxyUse", "false").equals("true"))) { + proxyHost = null; proxyPort = 0; + } + String version = switchboard.getConfig("version", ""); + String date = switchboard.getConfig("vdate", ""); + probeURL = probeURL + "?version=" + version + "&date=" + date; + // open new connection + try { + latestVersion = new String(httpc.singleGET(new URL(probeURL), 10000, null, null, proxyHost, proxyPort)).trim(); + float latest = Float.parseFloat(latestVersion); + float thisver = Float.parseFloat(version); + if (thisver > latest) System.out.println("THIS SOFTWARE VERSION IS A PRE-RELEASE"); + if (thisver < latest) { + log.logSystem("****************************************************************"); + log.logSystem("* THIS SOFTWARE VERSION IS OUTDATED."); + log.logSystem("* PLEASE GO TO ANOMIC.DE AND DOWNLOAD THE LATEST VERSION " + latestVersion); + log.logSystem("****************************************************************"); + } + } catch (Exception e) { + // we do nothing is this case + } + } + } + +} diff --git a/source/de/anomic/yacy/yacyDHTAction.java b/source/de/anomic/yacy/yacyDHTAction.java new file mode 100644 index 000000000..a8ea0bbf8 --- /dev/null +++ b/source/de/anomic/yacy/yacyDHTAction.java @@ -0,0 +1,224 @@ +// yacyDHTAction.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// last major change: 23.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.yacy; + +import java.util.*; +import de.anomic.kelondro.*; + +public class yacyDHTAction implements yacyPeerAction { + + private yacySeedDB seedDB; + private kelondroMScoreCluster seedCrawlReady; + + public yacyDHTAction(yacySeedDB seedDB) { + this.seedDB = seedDB; + this.seedCrawlReady = new kelondroMScoreCluster(); + // init crawl-ready table + try { + Enumeration en = seedDB.seedsConnected(true, false, null); + yacySeed ys; + while (en.hasMoreElements()) { + ys = (yacySeed) en.nextElement(); + if ((ys != null) && (ys.getVersion() >= ((float) 0.3))) seedCrawlReady.setScore(ys.hash, yacyCore.yacyTime()); + } + } catch (IllegalArgumentException e) { + } + } + + + public Enumeration getDHTSeeds(boolean up, String firstHash) { + // enumerates seed-type objects: all seeds with starting point in the middle, rotating at the end/beginning + return new seedDHTEnum(up, firstHash); + } + + class seedDHTEnum implements Enumeration { + + Enumeration e1, e2; + boolean up; + int steps; + + public seedDHTEnum(boolean up, String firstHash) { + this.steps = seedDB.sizeConnected(); + this.up = up; + this.e1 = seedDB.seedsConnected(up, false, firstHash); + this.e2 = null; + } + + public boolean hasMoreElements() { + return (steps > 0) && ((e2 == null) || (e2.hasMoreElements())); + } + + public Object nextElement() { + if (steps == 0) return null; + steps--; + if ((e1 != null) && (e1.hasMoreElements())) { + Object n = e1.nextElement(); + if (!(e1.hasMoreElements())) { + e1 = null; + e2 = seedDB.seedsConnected(up, false, null); + } + return n; + } else { + if (e2 == null) { + e1 = null; + e2 = seedDB.seedsConnected(up, false, null); + } + return e2.nextElement(); + } + } + + } + + + public Enumeration getAcceptRemoteIndexSeeds(String starthash) { + // returns an enumeration of yacySeed-Objects + // that have the AcceptRemoteIndex-Flag set + // the seeds are enumerated in the right order according DHT + return new acceptRemoteIndexSeedEnum(starthash); + } + + class acceptRemoteIndexSeedEnum implements Enumeration { + + Enumeration se; + yacySeed nextSeed; + + public acceptRemoteIndexSeedEnum(String starthash) { + se = getDHTSeeds(true, starthash); + nextSeed = nextInternal(); + } + + public boolean hasMoreElements() { + return nextSeed != null; + } + + private yacySeed nextInternal() { + yacySeed s; + while (se.hasMoreElements()) { + s = (yacySeed) se.nextElement(); + if (s == null) return null; + if (s.getFlagAcceptRemoteIndex()) return s; + } + return null; + } + + public Object nextElement() { + yacySeed next = nextSeed; + nextSeed = nextInternal(); + return next; + } + + } + + + public Enumeration getAcceptRemoteCrawlSeeds(String starthash, boolean available) { + return new acceptRemoteCrawlSeedEnum(starthash, available); + } + + class acceptRemoteCrawlSeedEnum implements Enumeration { + + Enumeration se; + yacySeed nextSeed; + boolean available; + + public acceptRemoteCrawlSeedEnum(String starthash, boolean available) { + this.se = getDHTSeeds(true, starthash); + this.available = available; + nextSeed = nextInternal(); + } + + public boolean hasMoreElements() { + return nextSeed != null; + } + + private yacySeed nextInternal() { + yacySeed s; + while (se.hasMoreElements()) { + s = (yacySeed) se.nextElement(); + if (s == null) return null; + if (available) { + if (seedCrawlReady.getScore(s.hash) < yacyCore.yacyTime()) return s; + } else { + if (seedCrawlReady.getScore(s.hash) > yacyCore.yacyTime()) return s; + } + } + return null; + } + + public Object nextElement() { + yacySeed next = nextSeed; + nextSeed = nextInternal(); + return next; + } + + } + + public synchronized yacySeed getCrawlSeed(String urlHash) { + Enumeration e = getAcceptRemoteCrawlSeeds(urlHash, true); + yacySeed seed; + if (e.hasMoreElements()) seed = (yacySeed) e.nextElement(); else seed = null; + e = null; + return seed; + } + + public void setCrawlTime(String seedHash, int newYacyTime) { + if (newYacyTime < yacyCore.yacyTime()) newYacyTime = yacyCore.yacyTime(); + seedCrawlReady.setScore(seedHash, newYacyTime); + } + + public void setCrawlDelay(String seedHash, int newDelay) { + seedCrawlReady.setScore(seedHash, yacyCore.yacyTime() + newDelay); + } + + public void processPeerArrival(yacySeed peer, boolean direct) { + if (peer.getVersion() >= ((float) 0.3)) { + if (!(seedCrawlReady.existsScore(peer.hash))) seedCrawlReady.setScore(peer.hash, yacyCore.yacyTime()); + } else { + seedCrawlReady.deleteScore(peer.hash); + } + } + + public void processPeerDeparture(yacySeed peer) { + seedCrawlReady.deleteScore(peer.hash); + } + + public void processPeerPing(yacySeed peer) { + } +} diff --git a/source/de/anomic/yacy/yacyPeerAction.java b/source/de/anomic/yacy/yacyPeerAction.java new file mode 100644 index 000000000..7fbfaa319 --- /dev/null +++ b/source/de/anomic/yacy/yacyPeerAction.java @@ -0,0 +1,49 @@ +// yacyPeerAction.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// last major change: 23.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.yacy; + +public interface yacyPeerAction { + + public void processPeerArrival(yacySeed peer, boolean direct); + public void processPeerDeparture(yacySeed peer); + public void processPeerPing(yacySeed peer); + +} diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java new file mode 100644 index 000000000..503086ad8 --- /dev/null +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -0,0 +1,367 @@ +// yacyPeerActions.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2005 +// last major change: 22.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.yacy; + +import java.io.*; +import java.util.*; +import de.anomic.server.*; +import de.anomic.plasma.*; + +import java.net.*; +import de.anomic.kelondro.*; +import de.anomic.tools.*; +import de.anomic.http.*; + +public class yacyPeerActions { + + private yacySeedDB seedDB; + private plasmaSwitchboard sb; + private HashSet actions; + private File superseedFile; + private String superseedURL; + public long juniorConnects; + public long seniorConnects; + public long principalConnects; + public long disconnects; + + public yacyPeerActions(yacySeedDB seedDB, plasmaSwitchboard switchboard, + File superseedFile, + String superseedURL) throws IOException { + this.seedDB = seedDB; + this.sb = switchboard; + this.actions = new HashSet(); + this.superseedFile = superseedFile; + this.superseedURL = superseedURL; + this.superseedURL = superseedURL; + this.juniorConnects = 0; + this.seniorConnects = 0; + this.principalConnects = 0; + this.disconnects = 0; + } + + public void deploy(yacyPeerAction action) { + actions.add(action); + } + + public void updateMySeed() { + if (sb.getConfig("peerName", "nameless").equals("nameless")) sb.setConfig("peerName", serverCore.publicIP().getHostName() + yacyCore.speedKey + serverSystem.infoKey() + (System.currentTimeMillis() & 99)); + seedDB.mySeed.put("Name", sb.getConfig("peerName", "nameless")); + seedDB.mySeed.put("Port", sb.getConfig("port", "8080")); + seedDB.mySeed.put("ISpeed", "unknown"); // the speed of indexing (words/minute) of the peer + long uptime = ((yacyCore.universalTime() - Long.parseLong(sb.getConfig("startupTime", "0"))) / 1000) / 60; + seedDB.mySeed.put("Uptime", "" + uptime); // the number of minutes that the peer is up in minutes/day (moving average MA30) + seedDB.mySeed.put("LCount", "" + sb.lUrlSize()); // the number of links that the peer has stored (LURL's) + seedDB.mySeed.put("ICount", "" + sb.cacheSizeMin()); // the minimum number of words that the peer has indexed (as it says) + seedDB.mySeed.put("SCount", "" + seedDB.sizeConnected()); // the number of seeds that the peer has stored + seedDB.mySeed.put("CCount", "" + (((int) ((seedDB.sizeConnected() + seedDB.sizeDisconnected() + seedDB.sizePotential()) * 60.0 / (uptime + 1.01)) * 100) / 100.0)); // the number of clients that the peer connects (as connects/hour) + seedDB.mySeed.put("Version", sb.getConfig("version", "")); + if (seedDB.mySeed.get("PeerType","").equals("principal")) { + // attach information about seed location + seedDB.mySeed.put("seedURL", sb.getConfig("seedURL", "")); + } + seedDB.mySeed.setFlagDirectConnect(true); + seedDB.mySeed.put("LastSeen", yacyCore.universalDateShortString()); + seedDB.mySeed.setFlagAcceptRemoteCrawl(sb.getConfig("crawlResponse", "").equals("true")); + seedDB.mySeed.setFlagAcceptRemoteIndex(sb.getConfig("allowReceiveIndex", "").equals("true")); + //mySeed.setFlagAcceptRemoteIndex(true); + } + + public void saveMySeed() { + try { + seedDB.mySeed.save(seedDB.myOwnSeedFile); + } catch (IOException e) {} + } + + public void loadSeedLists() { + // uses the superseed to initialize the database with known seeds + + yacySeed ys; + String seedListFileURL; + URL url; + Vector seedList; + Enumeration enu; + int lc; + int sc = seedDB.sizeConnected(); + httpHeader header; + + yacyCore.log.logInfo("BOOTSTRAP: " + sc + " seeds known from previous run"); + + // - load the superseed: a list of URL's + disorderSet superseed = loadSuperseed(superseedFile, superseedURL); + + // - use the superseed to further fill up the seedDB + int ssc = 0; + for (int i = 0; i < superseed.size(); i++) { + seedListFileURL = (String) superseed.any(); + if (seedListFileURL.startsWith("http://")) { + // load the seed list + try { + url = new URL(seedListFileURL); + header = httpc.whead(url, 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort); + if ((header == null) || (header.lastModified() == null)) { + yacyCore.log.logInfo("BOOTSTRAP: seed-list url " + seedListFileURL + " not available"); + } else if ((header.age() > 86400000) && (ssc > 0)) { + yacyCore.log.logInfo("BOOTSTRAP: seed-list url " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)"); + } else { + ssc++; + seedList = httpc.wget(url, 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort); + enu = seedList.elements(); + lc = 0; + while (enu.hasMoreElements()) { + ys = yacySeed.genRemoteSeed((String) enu.nextElement(), null, new Date()); + if ((ys != null) && (ys.isProper()) && + ((seedDB.mySeed == null) || (seedDB.mySeed.hash != ys.hash))) { + if (connectPeer(ys, false)) lc++; + //seedDB.writeMap(ys.hash, ys.getMap(), "init"); + //System.out.println("BOOTSTRAP: received peer " + ys.get("Name", "anonymous") + "/" + ys.getAddress()); + //lc++; + } + } + yacyCore.log.logInfo("BOOTSTRAP: " + lc + " seeds from seed-list url " + seedListFileURL + ", AGE=" + (header.age() / 3600000) + "h"); + } + + } catch (Exception e) { + // this is when wget fails; may be because of missing internet connection + // we do nothing here and go silently over it + System.out.println("BOOTSTRAP: failed to load seeds from seed-list url " + seedListFileURL); + } + } + } + yacyCore.log.logInfo("BOOTSTRAP: " + (seedDB.sizeConnected() - sc) + " new seeds while bootstraping."); + } + + private disorderSet loadSuperseed(File local, String url) { + // this returns a list of locations where seed list-files can be found + disorderSet supsee = new disorderSet(); + String line; + // read in local file + int lc = 0; + try { + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(local))); + while ((line = br.readLine()) != null) { + line = line.trim(); + //System.out.println("one line in file:" + line); + if (line.length() > 0) supsee.add(line); + } + br.close(); + lc = supsee.size(); + yacyCore.log.logInfo("BOOTSTRAP: " + lc + " seed-list urls from superseed file " + local.toString()); + } catch (IOException e) { + //e.printStackTrace(); + supsee = new disorderSet(); + yacyCore.log.logInfo("BOOTSTRAP: failed to load seed-list urls from superseed file " + local.toString() + ": " + e.getMessage()); + } + // read in remote file from url + try { + Vector remote = httpc.wget(new URL(url), 5000, null, null, sb.remoteProxyHost, sb.remoteProxyPort); + if ((remote != null) && (remote.size() > 0)) { + Enumeration e = remote.elements(); + while (e.hasMoreElements()) { + line = (String) e.nextElement(); + if (line != null) { + line = line.trim(); + supsee.add(line); + } + } + } + yacyCore.log.logInfo("BOOTSTRAP: " + (supsee.size() - lc) + " seed-list urls from superseed URL " + url); + } catch (Exception e) { + supsee = new disorderSet(); + yacyCore.log.logInfo("BOOTSTRAP: failed to load seed-list urls from superseed URL " + url + ": " + e.getMessage()); + } + return supsee; + } + + synchronized public boolean connectPeer(yacySeed seed, boolean direct) { + // store a remote peer's seed + // returns true if the peer is new and previously unknown + if (seed == null) { + yacyCore.log.logInfo("connect: WRONG seed (NULL)"); + return false; + } else if (!(seed.isProper())) { + yacyCore.log.logInfo("connect: WRONG seed (" + seed.getName() + "/" + seed.hash + ")"); + return false; + } else if ((seedDB.mySeed != null) && (seed.hash.equals(seedDB.mySeed.hash))) { + yacyCore.log.logInfo("connect: SELF reference " + seed.getAddress()); + return false; + } else { + String peerType = seed.get("PeerType", "virgin"); + // reject unqualified seeds + if ((peerType.equals("virgin")) || (peerType.equals("junior"))) { + yacyCore.log.logDebug("connect: rejecting NOT QUALIFIED " + peerType + " seed " + seed.getName()); + return false; + } + + // we may store that seed, but still have different cases + if (seed.get("LastSeen", "").length() < 14) { + // hack for peers that do not have a LastSeen date + seed.put("LastSeen", "20040101000000"); + } + + // connection time + long ctime; + try { + ctime = yacyCore.shortFormatter.parse(seed.get("LastSeen", "20040101000000")).getTime(); + // maybe correct it slightly + if (ctime > yacyCore.universalTime()) { + ctime = ((2 * ctime) + yacyCore.universalTime()) / 3; + seed.put("LastSeen", yacyCore.shortFormatter.format(new Date(ctime))); + } + } catch (java.text.ParseException e) { + ctime = yacyCore.universalTime(); + } + + // disconnection time + long dtime; + yacySeed disconnectedSeed = seedDB.getDisconnected(seed.hash); + if (disconnectedSeed == null) { + dtime = 0; // never disconnected: virtually disconnected maximum time ago + } else try { + dtime = yacyCore.shortFormatter.parse((String) disconnectedSeed.get("disconnected", "20040101000000")).getTime(); + } catch (java.text.ParseException e) { + dtime = 0; + } + + if (direct) { + // remember the moment + ctime = yacyCore.universalTime(); + seed.put("LastSeen", yacyCore.shortFormatter.format(new Date(ctime))); + seed.setFlagDirectConnect(true); + } else { + // set connection flag + if ((yacyCore.universalTime() - ctime) > 120000) seed.setFlagDirectConnect(false); // 2 minutes + } + + // prepare to update + if (disconnectedSeed != null) { + // if the indirect connect aims to announce a peer that we know has been disconnected + // then we compare the dates: + // if the new peer has a LastSeen date, and that date is before the disconnection date, + // then we ignore the new peer + if (!(direct)) { + if (ctime < dtime) { + // the disconnection was later, we reject the connection + yacyCore.log.logDebug("connect: rejecting disconnected peer '" + seed.getName() + "' from " + seed.getAddress()); + return false; + } + if ((yacyCore.universalTime() - ctime) > 3600000) { + // the new connection is out-of-age, we reject the connection + yacyCore.log.logDebug("connect: rejecting out-dated peer '" + seed.getName() + "' from " + seed.getAddress()); + return false; + } + if ((yacyCore.universalTime() - ctime) > 3600000) { + // the new connection is future-dated, we reject the connection + yacyCore.log.logDebug("connect: rejecting future-dated peer '" + seed.getName() + "' from " + seed.getAddress()); + return false; + } + } + + // this is a return of a lost peer + yacyCore.log.logDebug("connect: returned KNOWN " + peerType + " peer '" + seed.getName() + "' from " + seed.getAddress()); + seedDB.addConnected(seed); + return false; + } else { + yacySeed connectedSeed = seedDB.getConnected(seed.hash); + if (connectedSeed != null) { + // the seed is known: this is an update + try { + // if the old LastSeen date is later then the other info, then we reject the info + if ((ctime < yacyCore.shortFormatter.parse(connectedSeed.get("LastSeen", "20040101000000")).getTime()) && (!(direct))) { + yacyCore.log.logDebug("connect: rejecting old info about peer '" + seed.getName() + "'"); + return false; + } + } catch (java.text.ParseException e) {} + yacyCore.log.logDebug("connect: updated KNOWN " + ((direct) ? "direct " : "") + peerType + " peer '" + seed.getName() + "' from " + seed.getAddress()); + seedDB.addConnected(seed); + return false; + } else { + // the seed is new + if (((String) seed.get("IP", "127.0.0.1")).equals((String) seedDB.mySeed.get("IP", "127.0.0.1"))) { + // seed from the same IP as the calling client: can be the case if there runs another one over a NAT + yacyCore.log.logDebug("connect: saved NEW seed (myself IP) " + seed.getAddress()); + } else { + // completely new seed + yacyCore.log.logDebug("connect: saved NEW " + peerType + " peer '" + seed.getName() + "' from " + seed.getAddress()); + } + if (peerType.equals("senior")) seniorConnects++; // update statistics + if (peerType.equals("principal")) principalConnects++; // update statistics + seedDB.addConnected(seed); + return true; + } + + } + } + } + + synchronized public void disconnectPeer(yacySeed seed) { + // we do this if we did not get contact with the other peer + yacyCore.log.logDebug("connect: no contact to a " + seed.get("PeerType", "virgin") + " peer '" + seed.getName() + "' at " + seed.getAddress()); + if (!(seedDB.hasDisconnected(seed.hash))) disconnects++; + seed.put("disconnected", yacyCore.universalDateShortString()); + seedDB.addDisconnected(seed); // update info + } + + public boolean peerArrival(yacySeed peer, boolean direct) { + boolean res = connectPeer(peer, direct); + // perform all actions if peer is effective new + if (res) { + Iterator i = actions.iterator(); + while (i.hasNext()) ((yacyPeerAction) i.next()).processPeerArrival(peer, direct); + } + return res; + } + + public void peerDeparture(yacySeed peer) { + //System.out.println("PEER DEPARTURE:" + peer.toString()); + disconnectPeer(peer); + // perform all actions + Iterator i = actions.iterator(); + while (i.hasNext()) ((yacyPeerAction) i.next()).processPeerDeparture(peer); + } + + public void peerPing(yacySeed peer) { + // this is called only if the peer has junior status + seedDB.addPotential(peer); + // perform all actions + Iterator i = actions.iterator(); + while (i.hasNext()) ((yacyPeerAction) i.next()).processPeerPing(peer); + } +} diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java new file mode 100644 index 000000000..999254eed --- /dev/null +++ b/source/de/anomic/yacy/yacySearch.java @@ -0,0 +1,176 @@ +// yacySearch.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 13.06.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notice above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.yacy; + +import java.io.*; +import java.util.*; +import de.anomic.plasma.*; +import de.anomic.server.*; +import de.anomic.kelondro.*; +import de.anomic.htmlFilter.*; + +public class yacySearch extends Thread { + + private Set wordhashes; + private int count; + private boolean global; + private plasmaCrawlLURL urlManager; + private plasmaSearch searchManager; + private yacySeed targetPeer; + private int links; + private long duetime; + + public yacySearch(Set wordhashes, int count, boolean global, yacySeed targetPeer, + plasmaCrawlLURL urlManager, plasmaSearch searchManager, long duetime) { + this.wordhashes = wordhashes; + this.count = count; + this.global = global; + this.urlManager = urlManager; + this.searchManager = searchManager; + this.targetPeer = targetPeer; + this.links = -1; + this.duetime = duetime; + } + + public void run() { + String wh = ""; + Iterator i = wordhashes.iterator(); + while (i.hasNext()) wh = wh + (String) i.next(); + this.links = yacyClient.search(wh, count, global, targetPeer, urlManager, searchManager, duetime); + if (links != 0) { + //yacyCore.log.logInfo("REMOTE SEARCH - remote peer '" + targetPeer.get("Name", "anonymous") + "' contributed " + links + " links for word hash " + wordhashes); + yacyCore.seedDB.mySeed.incRI(links); + yacyCore.seedDB.mySeed.incRU(links); + } + } + + public int links() { + return this.links; + } + + private static yacySeed[] selectPeers(Set wordhashes, int seedcount) { + // find out a specific number of seeds, that would be relevant for the given word hash(es) + // the result is ordered by relevance: [0] is most relevant + // the seedcount is the maximum number of wanted results + if (yacyCore.seedDB == null) return null; + if (seedcount > yacyCore.seedDB.sizeConnected()) seedcount = yacyCore.seedDB.sizeConnected(); + + kelondroMScoreCluster ranking = new kelondroMScoreCluster(); + yacySeed seed; + Enumeration dhtEnum; + Iterator i = wordhashes.iterator(); + int c; + while (i.hasNext()) { + dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, (String) i.next()); + c = seedcount; + while ((dhtEnum.hasMoreElements()) && (c > 0)) { + seed = (yacySeed) dhtEnum.nextElement(); + ranking.addScore(seed.hash, c); + c--; + } + } + if (ranking.size() < seedcount) seedcount = ranking.size(); + yacySeed[] result = new yacySeed[seedcount]; + Iterator e = ranking.scores(false); + c = 0; + while ((e.hasNext()) && (c < result.length)) + result[c++] = yacyCore.seedDB.getConnected((String) e.next()); + + //System.out.println("DEBUG yacySearch.selectPeers = " + seedcount + " seeds:"); for (int i = 0; i < seedcount; i++) System.out.println(" #" + i + ":" + result[i]); // debug + return result; + } + + public static int search(Set querywords, plasmaCrawlLURL urlManager, plasmaSearch searchManager, + int count, int targets, long waitingtime) { + // check own peer status + if ((yacyCore.seedDB.mySeed == null) || (yacyCore.seedDB.mySeed.getAddress() == null)) return 0; + + // start delay control + long start = System.currentTimeMillis(); + + // set a duetime for clients + long duetime = waitingtime - 4000; // subtract network traffic overhead, guessed 4 seconds + if (duetime < 1000) duetime = 1000; + + // prepare seed targets and threads + Set wordhashes = plasmaSearch.words2hashes(querywords); + yacySeed[] targetPeers = selectPeers(wordhashes, targets); + if (targetPeers == null) return 0; + targets = targetPeers.length; + if (targets == 0) return 0; + yacySearch[] searchThreads = new yacySearch[targets]; + for (int i = 0; i < targets; i++) { + searchThreads[i]= new yacySearch(wordhashes, count, true, targetPeers[i], + urlManager, searchManager, duetime); + searchThreads[i].start(); + try {Thread.currentThread().sleep(20);} catch (InterruptedException e) {} + if ((System.currentTimeMillis() - start) > waitingtime) { + targets = i + 1; + break; + } + } + + int c; + // wait until wanted delay passed or wanted result appeared + boolean anyIdle = true; + while ((anyIdle) && ((System.currentTimeMillis() - start) < waitingtime)) { + // wait.. + try {Thread.currentThread().sleep(200);} catch (InterruptedException e) {} + // check if all threads have been finished or results so far are enough + c = 0; + anyIdle = false; + for (int i = 0; i < targets; i++) { + if (searchThreads[i].links() < 0) anyIdle = true; else c = c + searchThreads[i].links(); + } + if ((c >= count * 3) && ((System.currentTimeMillis() - start) > (waitingtime * 2 / 3))) { + System.out.println("DEBUG yacySearch: c=" + c + ", count=" + count + ", waitingtime=" + waitingtime); + break; // we have enough + } + if (c >= count * 5) break; + } + + // collect results + c = 0; + for (int i = 0; i < targets; i++) c = c + ((searchThreads[i].links() > 0) ? searchThreads[i].links() : 0); + return c; + } + +} diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java new file mode 100644 index 000000000..e5d477895 --- /dev/null +++ b/source/de/anomic/yacy/yacySeed.java @@ -0,0 +1,417 @@ +// yacySeed.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004 +// last major change: 11.07.2004 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + YACY stands for Yet Another CYberspace + + the yacySeed Object is the object that bundles and carries all information about + a single peer in the yacy space. + The yacySeed object is carried along peers using a string representation, that can + be compressed and/or scrambled, depending on the purpose of the process. + + the yacy status + any value that is defined here will be overwritten each time the proxy is started + to prevent that the system gets confused, it should be set to "" which means + undefined. Other status' that can be reached at run-time are + junior - a peer that has no public socket, thus cannot be reached on demand + senior - a peer that has a public socked and serves search queries + principal - a peer like a senior socket and serves as gateway for network definition + +*/ + + +package de.anomic.yacy; + +import java.io.*; +import java.util.*; +import java.text.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.plasma.*; +import de.anomic.net.*; +import de.anomic.yacy.*; + +public class yacySeed { + + // class variables + public String hash; + private Map dna; + public int available; + + public yacySeed(String hash, Map dna) { + // create a seed with a pre-defined hash map + this.hash = hash; + this.dna = dna; + this.available = 0; + } + + public yacySeed(String hash) { + dna = new HashMap(); + + // settings that can only be computed by originating peer: + // at first startup - + this.hash = hash; // the hash key of the peer - very important. should be static somehow, even after restart + dna.put("Name", "∅"); // the name that the peer has given itself + dna.put("BDate", "∅"); // birthdate - first startup + // later during operation - + dna.put("ISpeed", "0"); // the speed of indexing (words/minute) of the peer + dna.put("Uptime", "0"); // the number of minutes that the peer is up in minutes/day (moving average MA30) + dna.put("LCount", "0"); // the number of links that the peer has stored (LURL's) + dna.put("ICount", "0"); // the number of words that the peer has indexed (as it says) + dna.put("SCount", "0"); // the number of seeds that the peer has stored + dna.put("CCount", "0"); // the number of clients that the peer connects (as connects/hour) + dna.put("Version", "0"); // the applications version + + // settings that is created during the 'hello' phase - in first contact + dna.put("IP", ""); // 123.234.345.456 + dna.put("Port", "∅"); // + dna.put("PeerType", "virgin"); // virgin/junior/senior/principal + dna.put("IPType", "∅"); // static/dynamic (if the ip changes often for any reason) + + // settings that can only be computed by visiting peer + dna.put("LastSeen", yacyCore.universalDateShortString()); // for last-seen date + dna.put("USpeed", "0"); // the computated uplink speed of the peer + + // settings that are needed to organize the seed round-trip + dna.put("Flags", "0000"); + setFlagDirectConnect(false); + setFlagAcceptRemoteCrawl(true); + setFlagAcceptRemoteIndex(true); + + // index transfer + dna.put("sI", "0"); // send index + dna.put("rI", "0"); // received Index + dna.put("sU", "0"); // send url + dna.put("rU", "0"); // received URL + + available = 0; + } + + + public String get(String key, String dflt) { + Object o = dna.get(key); + if (o == null) return dflt; else return (String) o; + } + + public void put(String key, String value) { + dna.put(key, value); + } + + public Map getMap() { + return dna; + } + + public String getName() { + return get("Name", "∅"); + } + + public String getHexHash() { + return b64Hash2hexHash(hash); + } + + public void incSI(int count) { + String v = (String) dna.get("sI"); if (v == null) v = "0"; + dna.put("sI", "" + (Integer.parseInt(v) + count)); + } + public void incRI(int count) { + String v = (String) dna.get("rI"); if (v == null) v = "0"; + dna.put("rI", "" + (Integer.parseInt(v) + count)); + } + public void incSU(int count) { + String v = (String) dna.get("sU"); if (v == null) v = "0"; + dna.put("sU", "" + (Integer.parseInt(v) + count)); + } + public void incRU(int count) { + String v = (String) dna.get("rU"); if (v == null) v = "0"; + dna.put("rU", "" + (Integer.parseInt(v) + count)); + } + + // 12 * 6 bit = 72 bit = 9 byte + public static String hexHash2b64Hash(String hexHash) { + return serverCodings.enhancedCoder.encodeBase64(serverCodings.decodeHex(hexHash)); + } + + public static String b64Hash2hexHash(String b64Hash) { + // the hash string represents 12 * 6 bit = 72 bits. This is too much for a long integer. + return serverCodings.encodeHex(serverCodings.enhancedCoder.decodeBase64(b64Hash)); + } + + public float getVersion() { + try { + return Float.parseFloat(get("Version", "0")); + } catch (NumberFormatException e) { + return 0; + } + } + + public String getAddress() { + String ip = (String) dna.get("IP"); + String port = (String) dna.get("Port"); + if ((ip != null) && (ip.length() >= 8) && (port != null) && (port.length() >= 2)) return ip + ":" + port; else return null; + } + + private boolean getFlag(int flag) { + String flags = get("Flags", "0000"); + return (new bitfield(flags.getBytes())).get(flag); + } + + private void setFlag(int flag, boolean value) { + String flags = get("Flags", "0000"); + bitfield f = new bitfield(flags.getBytes()); + f.set(flag, value); + put("Flags", f.toString()); + } + + public void setFlagDirectConnect(boolean value) {setFlag(0, value);} + public void setFlagAcceptRemoteCrawl(boolean value) {setFlag(1, value);} + public void setFlagAcceptRemoteIndex(boolean value) {setFlag(2, value);} + public boolean getFlagDirectConnect() {return getFlag(0);} + public boolean getFlagAcceptRemoteCrawl() { + //if (getVersion() < 0.300) return false; + //if (getVersion() < 0.334) return true; + return getFlag(1); + } + public boolean getFlagAcceptRemoteIndex() { + //if (getVersion() < 0.335) return false; + return getFlag(2); + } + + public boolean isVirgin() { + return get("PeerType", "").equals("virgin"); + } + public boolean isJunior() { + return get("PeerType", "").equals("junior"); + } + public boolean isSenior() { + return get("PeerType", "").equals("senior"); + } + public boolean isPrincipal() { + return get("PeerType", "").equals("principal"); + } + public boolean isOnline() { + return (isSenior()) || (isPrincipal()); + } + + public String encodeLex(long c, int length) { + if (length < 0) length = 0; + String s = ""; + if (c == 0) s = '-' + s; + else while (c > 0) { + s = ((char) (32 + (c % 96))) + s; + c = c / 96; + } + if ((length != 0) && (s.length() > length)) + throw new RuntimeException("encodeLex result '" + s + "' exceeds demanded length of " + length + " digits"); + if (length == 0) length = 1; // rare exception for the case that c == 0 + while (s.length() < length) s = '-' + s; + return s; + } + + public long decodeLex(String s) { + long c = 0; + for (int i = 0; i < s.length(); i++) c = c * 96 + (byte) s.charAt(i) - 32; + return c; + } + + private static long maxLex(int len) { + // computes the maximum number that can be coded with a lex-encoded String of length len + long c = 0; + for (int i = 0; i < len; i++) c = c * 96 + 95; + return c; + } + + public static final long maxDHTDistance = maxLex(9); + + public long dhtDistance(String wordhash) { + // computes a virtual distance, the result must be set in relation to maxDHTDistace + // if the distance is small, this peer is more responsible for that word hash + // if the distance is big, this peer is less responsible for that word hash + long myPos = decodeLex(hash.substring(0,9)); + long wordPos = decodeLex(wordhash.substring(0,9)); + return (myPos > wordPos) ? (myPos - wordPos) : (myPos + maxDHTDistance - wordPos); + } + + public static yacySeed genLocalSeed(plasmaSwitchboard sb) { + // genera a seed for the local peer + // this is the birthplace of a seed, that then will start to travel to other peers + + // at first we need a good peer hash + // that hash should be as static as possible, so that it depends mainly on system + // variables and can even then be reconstructed if the local seed has disappeared + Properties sp = System.getProperties(); + String slow = + sp.getProperty("file.encoding","") + + sp.getProperty("file.separator","") + + sp.getProperty("java.class.path","") + + sp.getProperty("java.vendor","") + + sp.getProperty("os.arch","") + + sp.getProperty("os.name","") + + sp.getProperty("path.separator","") + + sp.getProperty("user.dir","") + + sp.getProperty("user.home","") + + sp.getProperty("user.language","") + + sp.getProperty("user.name","") + + sp.getProperty("user.timezone", ""); + String medium = + sp.getProperty("java.class.version","") + + sp.getProperty("java.version","") + + sp.getProperty("os.version","") + + sb.getConfig("peerName", "noname"); + String fast = "" + System.currentTimeMillis(); + // the resultinh hash does not have any information than can be used to reconstruct the + // original system information that has been collected here to create the hash + // We simply distinuguish three parts of the hash: slow, medium and fast changing character of system idenfification + // the Hash is constructed in such a way, that the slow part influences the main aerea of the distributed hash location + // more than the fast part. The effect is, that if the peer looses it's seed information and is reconstructed, it + // still hosts most information of the distributed hash the an appropriate 'position' + String hash = + serverCodings.encodeMD5B64(slow, true).substring(0, 4) + + serverCodings.encodeMD5B64(medium, true).substring(0, 4) + + serverCodings.encodeMD5B64(fast, true).substring(0, 4); + yacyCore.log.logInfo("init: OWN SEED = " + hash); + + if (hash.length() != yacySeedDB.commonHashLength) { + yacyCore.log.logFailure("YACY Internal error: distributed hash conceptual error"); + System.exit(-1); + } + + yacySeed newSeed = new yacySeed(hash); + + // now calculate other information about the host + newSeed.dna.put("Name", sb.getConfig("peerName", "unnamed")); + newSeed.dna.put("Port", sb.getConfig("port", "8080")); + newSeed.dna.put("BDate", yacyCore.universalDateShortString()); + newSeed.dna.put("LastSeen", newSeed.dna.get("BDate")); // just as initial setting + newSeed.dna.put("PeerType", "virgin"); + + return newSeed; + } + + public static yacySeed genRemoteSeed(String seedStr, String key, Date remoteTime) { + // this method is used to convert the external representation of a seed into a seed object + if (seedStr == null) return null; + String seed = crypt.simpleDecode(seedStr, key); + if (seed == null) return null; + HashMap dna = new HashMap(); + int pos; + pos = seed.indexOf("{"); if (pos >= 0) seed = seed.substring(pos + 1).trim(); + pos = seed.lastIndexOf("}"); if (pos >= 0) seed = seed.substring(0, pos).trim(); + StringTokenizer st = new StringTokenizer(seed, ","); + String token; + while (st.hasMoreTokens()) { + token = st.nextToken().trim(); + //System.out.println("PARSED TOKEN: " + token); + pos = token.indexOf("="); + if (pos > 0) dna.put(token.substring(0, pos).trim(), token.substring(pos + 1).trim()); + } + String hash = (String) dna.remove("Hash"); + return new yacySeed(hash, dna); + } + + public String toString() { + // set hash into seed code structure + dna.put("Hash", this.hash); + // generate string representation + String s = dna.toString(); + // reconstruct original: hash is stored external + dna.remove("Hash"); + // return string + return s; + } + + public String genSeedStr(String key) { + // use a default encoding + return genSeedStr('b', key); + } + + public String genSeedStr(char method, String key) { + return crypt.simpleEncode(toString(), key, method); + } + + public boolean isProper() { + // checks if everything is ok with that seed + if (this.hash == null) return false; + if (this.hash.length() != yacySeedDB.commonHashLength) return false; + String ip = (String) dna.get("IP"); + if ((ip == null) || (ip.length() < 8)) return false; + return (natLib.isProper(ip)); + } + + public void save(File f) throws IOException { + String out = genSeedStr('p', null); + FileWriter fw = new FileWriter(f); + fw.write(out, 0, out.length()); + fw.close(); + } + + + public static yacySeed load(File f) throws IOException { + FileReader fr = new FileReader(f); + char[] b = new char[(int) f.length()]; + fr.read(b, 0, b.length); + fr.close(); + return genRemoteSeed(new String(b), null, new Date()); + } + + public Object clone() { + return new yacySeed(this.hash, (HashMap) (new HashMap(dna)).clone()); + } + + /* + public static void main(String[] argv) { + try { + plasmaSwitchboard sb = new plasmaSwitchboard("../httpProxy.init", "../httpProxy.conf"); + yacySeed ys = genLocalSeed(sb); + String yp, yz, yc; + System.out.println("YACY String = " + ys.toString()); + System.out.println("YACY SeedStr/p = " + (yp = ys.genSeedStr('p', null))); + //System.out.println("YACY SeedStr/z = " + (yz = ys.genSeedStr('z', null))); + System.out.println("YACY SeedStr/c = " + (yc = ys.genSeedStr('c', "abc"))); + System.out.println("YACY remote/p = " + genRemoteSeed(yp, null).toString()); + //System.out.println("YACY remote/z = " + genRemoteSeed(yz, null).toString()); + System.out.println("YACY remote/c = " + genRemoteSeed(yc, "abc").toString()); + System.exit(0); + } catch (IOException e) { + e.printStackTrace(); + } + } + */ + +} diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java new file mode 100644 index 000000000..45ba56063 --- /dev/null +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -0,0 +1,612 @@ +// yacySeedDB.java +// ------------------------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2004, 2005 +// last major change: 22.02.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.yacy; + +import java.io.*; +import java.util.*; +import java.net.*; +import de.anomic.kelondro.*; +import de.anomic.tools.*; +import de.anomic.http.*; +import de.anomic.server.*; +import de.anomic.net.*; +import de.anomic.plasma.*; + +public class yacySeedDB { + + // global statics + public static final int commonHashLength = 12; + // this is the lenght of the hash key that is used: + // - for seed hashes (this Object) + // - for word hashes (plasmaIndexEntry.wordHashLength) + // - for L-URL hashes (plasmaLURL.urlHashLength) + // these hashes all shall be generated by base64.enhancedCoder + + public static final String[] sortFields = new String[] {"LCount", "ICount", "Uptime", "Version", "LastSeen"}; + public static final String[] accFields = new String[] {"LCount", "ICount"}; + + // class objects + private File seedActiveDBFile, seedPassiveDBFile, seedPotentialDBFile; + + private disorderHeap seedQueue; + private kelondroMap seedActiveDB, seedPassiveDB, seedPotentialDB; + private int seedDBBufferKB; + + public plasmaSwitchboard sb; + public yacySeed mySeed; // my own seed + public File myOwnSeedFile; + private Hashtable nameLookupCache; + + + public yacySeedDB(plasmaSwitchboard sb, + File seedActiveDBFile, + File seedPassiveDBFile, + File seedPotentialDBFile, + int bufferkb) throws IOException { + + this.seedActiveDBFile = seedActiveDBFile; + this.seedPassiveDBFile = seedPassiveDBFile; + this.seedPotentialDBFile = seedPotentialDBFile; + this.mySeed = null; // my own seed + this.sb = sb; + + // set up seed database + seedActiveDB = openSeedTable(seedActiveDBFile); + seedPassiveDB = openSeedTable(seedPassiveDBFile); + seedPotentialDB = openSeedTable(seedPotentialDBFile); + + // create or init own seed + myOwnSeedFile = new File(sb.getRootPath(), sb.getConfig("yacyOwnSeedFile", "mySeed.txt")); + if (myOwnSeedFile.exists()) { + // load existing identity + mySeed = yacySeed.load(myOwnSeedFile); + } else { + // create new identity + mySeed = yacySeed.genLocalSeed(sb); + // save of for later use + mySeed.save(myOwnSeedFile); // in a file + //writeMap(mySeed.hash, mySeed.dna, "new"); // in a database + } + + mySeed.put("IP", ""); // we delete the old information to see what we have now + mySeed.put("Port", sb.getConfig("port", "8080")); // set my seed's correct port number + mySeed.put("PeerType", "virgin"); // markup startup condition + + // start our virtual DNS service for yacy peers with empty cache + nameLookupCache = new Hashtable(); + + // check if we are in the seedCaches: this can happen if someone else published our seed + removeMySeed(); + + // set up seed queue (for probing candidates) + seedQueue = null; + } + + public synchronized void removeMySeed() { + try { + seedActiveDB.remove(mySeed.hash); + seedPassiveDB.remove(mySeed.hash); + seedPotentialDB.remove(mySeed.hash); + } catch (IOException e) {} + } + + private synchronized kelondroMap openSeedTable(File seedDBFile) throws IOException { + if (seedDBFile.exists()) { + // open existing seed database + return new kelondroMap(new kelondroDyn(seedDBFile, seedDBBufferKB * 0x400), sortFields, accFields); + } else { + // create new seed database + new File(seedDBFile.getParent()).mkdir(); + return new kelondroMap(new kelondroDyn(seedDBFile, seedDBBufferKB * 0x400, commonHashLength, 480), sortFields, accFields); + } + } + + private synchronized kelondroMap resetSeedTable(kelondroMap seedDB, File seedDBFile) { + // this is an emergency function that should only be used if any problem with the + // seed.db is detected + yacyCore.log.logError("seed-db " + seedDBFile.toString() + " reset (on-the-fly)"); + try { + seedDB.close(); + seedDBFile.delete(); + // create new seed database + seedDB = openSeedTable(seedDBFile); + } catch (IOException e) { + e.printStackTrace(); + } + return seedDB; + } + + public void close() { + try { + seedActiveDB.close(); + seedPassiveDB.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public Enumeration seedsSortedConnected(boolean up, String field) { + // enumerates seed-type objects: all seeds sequentially ordered by field + return new seedEnum(up, field, seedActiveDB); + } + + public Enumeration seedsSortedDisconnected(boolean up, String field) { + // enumerates seed-type objects: all seeds sequentially ordered by field + return new seedEnum(up, field, seedPassiveDB); + } + + public Enumeration seedsSortedPotential(boolean up, String field) { + // enumerates seed-type objects: all seeds sequentially ordered by field + return new seedEnum(up, field, seedPotentialDB); + } + + public Enumeration seedsConnected(boolean up, boolean rot, String firstHash) { + // enumerates seed-type objects: all seeds sequentially without order + return new seedEnum(up, rot, (firstHash == null) ? null : firstHash.getBytes(), seedActiveDB); + } + + public Enumeration seedsDisconnected(boolean up, boolean rot, String firstHash) { + // enumerates seed-type objects: all seeds sequentially without order + return new seedEnum(up, rot, (firstHash == null) ? null : firstHash.getBytes(), seedPassiveDB); + } + + public Enumeration seedsPotential(boolean up, boolean rot, String firstHash) { + // enumerates seed-type objects: all seeds sequentially without order + return new seedEnum(up, rot, (firstHash == null) ? null : firstHash.getBytes(), seedPotentialDB); + } + + public yacySeed anySeed() { + // return just any probe candidate + yacySeed seed; + if ((seedQueue == null) || (seedQueue.size() == 0)) { + if (seedActiveDB.size() <= 0) return null; + + // fill up the queue + seedQueue = new disorderHeap(); + Iterator keyIt; + try { + keyIt = seedActiveDB.keys(true, false); // iteration of String - Objects + } catch (IOException e) { + yacyCore.log.logError("yacySeedCache.anySeed: seed.db not available: " + e.getMessage()); + keyIt = (new HashSet()).iterator(); + } + String seedHash; + String myIP = (mySeed == null) ? "" : ((String) mySeed.get("IP", "127.0.0.1")); + while (keyIt.hasNext()) { + seedHash = (String) keyIt.next(); + try { + seed = new yacySeed(seedHash, seedActiveDB.get(seedHash)); + // check here if the seed is equal to the own seed + // this should never be the case, but it happens if a redistribution circle exists + if ((mySeed != null) && (seedHash.equals(mySeed.hash))) { + // this seed should not be in the database + seedActiveDB.remove(seedHash); + } else { + // add to queue + seedQueue.add(seed); + } + } catch (IOException e) {} + } + // the queue is filled up! + } + if ((seedQueue == null) || (seedQueue.size() == 0)) return null; + return (yacySeed) seedQueue.remove(); + } + + public yacySeed anySeedType(String type) { + // this returns any seed that has a special PeerType + yacySeed ys; + String t; + for (int i = 0; i < seedActiveDB.size(); i++) { + ys = anySeed(); + if (ys == null) return null; + t = (String) ys.get("PeerType", ""); + if ((t != null) && (t.equals(type))) return ys; + } + return null; + } + + public yacySeed[] seedsByAge(boolean up, int count) { + if (count > sizeConnected()) count = sizeConnected(); + + // fill a score object + kelondroMScoreCluster seedScore = new kelondroMScoreCluster(); + yacySeed ys; + String t; + long absage; + Enumeration s = seedsConnected(true, false, null); + int searchcount = 1000; + if (searchcount > sizeConnected()) searchcount = sizeConnected(); + try { + while ((s.hasMoreElements()) && (searchcount-- > 0)) { + ys = (yacySeed) s.nextElement(); + if ((ys != null) && ((t = ys.get("LastSeen", "")).length() > 10)) try { + absage = Math.abs(yacyCore.universalTime() - yacyCore.shortFormatter.parse(t).getTime()); + seedScore.addScore(ys.hash, (int) absage); + } catch (Exception e) {} + } + + // result is now in the score object; create a result vector + yacySeed[] result = new yacySeed[count]; + Iterator it = seedScore.scores(up); + int c = 0; + while ((c < count) && (it.hasNext())) result[c++] = getConnected((String) it.next()); + return result; + } catch (NullPointerException e) { + seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile); + System.out.println("Internal Error at yacySeedDB.seedsByAge: " + e.getMessage()); + e.printStackTrace(); + return null; + } + } + + public int sizeConnected() { + return seedActiveDB.size(); + /* + Enumeration e = seedsConnected(true, false, null); + int c = 0; while (e.hasMoreElements()) {c++; e.nextElement();} + return c; + */ + } + + public int sizeDisconnected() { + return seedPassiveDB.size(); + /* + Enumeration e = seedsDisconnected(true, false, null); + int c = 0; while (e.hasMoreElements()) {c++; e.nextElement();} + return c; + */ + } + + public int sizePotential() { + return seedPotentialDB.size(); + /* + Enumeration e = seedsPotential(true, false, null); + int c = 0; while (e.hasMoreElements()) {c++; e.nextElement();} + return c; + */ + } + + public long countActiveURL() { return seedActiveDB.getAcc("LCount"); } + public long countActiveRWI() { return seedActiveDB.getAcc("ICount"); } + public long countPassiveURL() { return seedPassiveDB.getAcc("LCount"); } + public long countPassiveRWI() { return seedPassiveDB.getAcc("ICount"); } + public long countPotentialURL() { return seedPotentialDB.getAcc("LCount"); } + public long countPotentialRWI() { return seedPotentialDB.getAcc("ICount"); } + + public void addConnected(yacySeed seed) { + if ((seed == null) || (!(seed.isProper()))) return; + //seed.put("LastSeen", yacyCore.shortFormatter.format(new Date(yacyCore.universalTime()))); + try { + nameLookupCache.put(seed.getName(), seed); + seedActiveDB.set(seed.hash, seed.getMap()); + seedPassiveDB.remove(seed.hash); + seedPotentialDB.remove(seed.hash); + } catch (IOException e) { + } catch (IllegalArgumentException e) { + System.out.println("ERROR add: seed.db corrupt; resetting seed.db"); + e.printStackTrace(); + seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile); + } + } + + public void addDisconnected(yacySeed seed) { + if ((seed == null) || (!(seed.isProper()))) return; + //seed.put("LastSeen", yacyCore.shortFormatter.format(new Date(yacyCore.universalTime()))); + try { + nameLookupCache.remove(seed.getName()); + seedPassiveDB.set(seed.hash, seed.getMap()); + seedActiveDB.remove(seed.hash); + seedPotentialDB.remove(seed.hash); + } catch (IOException e) { + } catch (IllegalArgumentException e) { + System.out.println("ERROR add: seed.db corrupt; resetting seed.db"); + e.printStackTrace(); + seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile); + } + } + + public void addPotential(yacySeed seed) { + if ((seed == null) || (!(seed.isProper()))) return; + //seed.put("LastSeen", yacyCore.shortFormatter.format(new Date(yacyCore.universalTime()))); + try { + nameLookupCache.remove(seed.getName()); + seedPotentialDB.set(seed.hash, seed.getMap()); + seedActiveDB.remove(seed.hash); + seedPassiveDB.remove(seed.hash); + } catch (IOException e) { + } catch (IllegalArgumentException e) { + System.out.println("ERROR add: seed.db corrupt; resetting seed.db"); + e.printStackTrace(); + seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile); + } + } + + public boolean hasConnected(String hash) { + try { + return (seedActiveDB.get(hash) != null); + } catch (IOException e) { + return false; + } + } + + public boolean hasDisconnected(String hash) { + try { + return (seedPassiveDB.get(hash) != null); + } catch (IOException e) { + return false; + } + } + + public boolean hasPotential(String hash) { + try { + return (seedPotentialDB.get(hash) != null); + } catch (IOException e) { + return false; + } + } + + private yacySeed get(String hash, kelondroMap database) { + if (hash == null) return null; + if ((mySeed != null) && (hash.equals(mySeed.hash))) return mySeed; + try { + Map entry = database.get(hash); + if (entry == null) return null; + return new yacySeed(hash, entry); + } catch (IOException e) { + return null; + } + } + + public yacySeed getConnected(String hash) { + return get(hash, seedActiveDB); + } + + public yacySeed getDisconnected(String hash) { + return get(hash, seedPassiveDB); + } + + public yacySeed lookupByName(String peerName) { + // reads a seed by searching by name + + // local peer? + if (peerName.equals("localpeer")) return mySeed; + + // then try to use the cache + yacySeed seed = (yacySeed) nameLookupCache.get(peerName); + if (seed != null) return seed; + + // enumerate the cache and simultanous insert values + Enumeration e = seedsConnected(true, false, null); + String name; + while (e.hasMoreElements()) { + seed = (yacySeed) e.nextElement(); + if (seed != null) { + name = seed.getName().toLowerCase(); + if (seed.isProper()) nameLookupCache.put(name, seed); + if (name.equals(peerName)) return seed; + } + } + // check local seed + name = mySeed.getName().toLowerCase(); + if (mySeed.isProper()) nameLookupCache.put(name, mySeed); + if (name.equals(peerName)) return mySeed; + // nothing found + return null; + } + + public Vector storeCache(File seedFile) throws IOException { + return storeCache(seedFile, false); + } + + private Vector storeCache(File seedFile, boolean addMySeed) throws IOException { + PrintWriter pw = new PrintWriter(new FileWriter(seedFile)); + Vector v = new Vector(); + // store own seed + String line; + if ((addMySeed) && (mySeed != null)) { + line = mySeed.genSeedStr(null); + v.add(line); + pw.print(line + serverCore.crlfString); + } + // store other seeds + yacySeed ys; + for (int i = 0; i < seedActiveDB.size(); i++) { + ys = anySeed(); + if (ys != null) { + line = ys.genSeedStr(null); + v.add(line); + pw.print(line + serverCore.crlfString); + } + } + pw.close(); + return v; + } + + public String uploadCache(String seedFTPServer, + String seedFTPAccount, + String seedFTPPassword, + File seedFTPPath, + URL seedURL) throws IOException { + // upload a seed file, if possible + if (seedURL == null) return "UPLOAD - Error: URL not given"; + File seedFile = new File("seedFile.txt"); + Vector uv = storeCache(seedFile, true); + // upload the seed file + String log = ftpc.put(seedFTPServer, seedFile, seedFTPPath.getParent(), seedFTPPath.getName(), seedFTPAccount, seedFTPPassword); + try { + // check also if the result can be retrieved again + if (checkCache(uv, seedURL)) + log = log + "UPLOAD CHECK - Success: the result vectors are equal" + serverCore.crlfString; + else + log = log + "UPLOAD CHECK - Error: the result vector is different" + serverCore.crlfString; + } catch (IOException e) { + log = log + "UPLOAD CHECK - Error: IO problem " + e.getMessage() + serverCore.crlfString; + } + seedFile.delete(); + return log; + } + + public String copyCache(File seedFile, URL seedURL) throws IOException { + if (seedURL == null) return "COPY - Error: URL not given"; + Vector uv = storeCache(seedFile, true); + try { + // check also if the result can be retrieved again + if (checkCache(uv, seedURL)) + return "COPY CHECK - Success: the result vectors are equal" + serverCore.crlfString; + else + return "COPY CHECK - Error: the result vector is different" + serverCore.crlfString; + } catch (IOException e) { + return "COPY CHECK - Error: IO problem " + e.getMessage() + serverCore.crlfString; + } + } + + private boolean checkCache(Vector uv, URL seedURL) throws IOException { + // check if the result can be retrieved again + Vector check = httpc.wget(seedURL, 10000, null, null, sb.remoteProxyHost, sb.remoteProxyPort); + if ((check == null) || (uv == null) || (uv.size() != check.size())) { + return false; + } else { + int i; + for (i = 0; i < uv.size(); i++) { + if (!(((String) uv.elementAt(i)).equals((String) check.elementAt(i)))) return false; + } + if (i == uv.size()) return true; + } + return false; + } + + public String resolveYacyAddress(String host) { + yacySeed seed; + int p; + String subdom = null; + if (host.endsWith(".yacyh")) { + // this is not functional at the moment + // caused by lowecasing of hashes at the browser client + p = host.indexOf("."); + if ((p > 0) && (p != (host.length() - 6))) { + subdom = host.substring(0, p); + host = host.substring(p + 1); + } + // check remote seeds + seed = getConnected(host.substring(0, host.length() - 6)); // checks only remote, not local + // check local seed + if (seed == null) { + if (host.substring(0, host.length() - 6).equals(mySeed.hash)) + seed = mySeed; + else return null; + } + return seed.getAddress() + ((subdom == null) ? "" : ("/" + subdom)); + } else if (host.endsWith(".yacy")) { + // identify subdomain + p = host.indexOf("."); + if ((p > 0) && (p != (host.length() - 5))) { + subdom = host.substring(0, p); // no double-dot attack possible, the subdom cannot have ".." in it + host = host.substring(p + 1); // if ever, the double-dots are here but do not harm + } + // identify domain + String domain = host.substring(0, host.length() - 5).toLowerCase(); + seed = lookupByName(domain); + if (seed == null) return null; + if ((seed == mySeed) && (!(seed.isOnline()))) { + // take local ip instead of external + return serverCore.publicIP().getHostAddress() + ":" + sb.getConfig("port", "8080") + ((subdom == null) ? "" : ("/" + subdom)); + } + return seed.getAddress() + ((subdom == null) ? "" : ("/" + subdom)); + } else { + return null; + } + } + + class seedEnum implements Enumeration { + + kelondroMap.mapIterator it; + yacySeed nextSeed; + kelondroMap database; + + public seedEnum(boolean up, boolean rot, byte[] firstKey, kelondroMap database) { + this.database = database; + try { + it = (firstKey == null) ? database.maps(up, rot) : database.maps(up, rot, firstKey); + nextSeed = internalNext(); + } catch (Exception e) { + System.out.println("ERROR seedLinEnum: seed.db corrupt; resetting seed.db"); + e.printStackTrace(); + if (database == seedActiveDB) seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile); + if (database == seedPassiveDB) seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile); + it = null; + } + } + + public seedEnum(boolean up, String field, kelondroMap database) { + this.database = database; + try { + it = database.maps(up, field); + nextSeed = internalNext(); + } catch (Exception e) { + System.out.println("ERROR seedLinEnum: seed.db corrupt; resetting seed.db"); + e.printStackTrace(); + if (database == seedActiveDB) seedActiveDB = resetSeedTable(seedActiveDB, seedActiveDBFile); + if (database == seedPassiveDB) seedPassiveDB = resetSeedTable(seedPassiveDB, seedPassiveDBFile); + it = null; + } + } + + public boolean hasMoreElements() { + return (nextSeed != null); + } + + public yacySeed internalNext() { + if ((it == null) || (!(it.hasNext()))) return null; + Map dna = (Map) it.next(); + String hash = (String) dna.remove("key"); + return new yacySeed(hash, dna); + } + + public Object nextElement() { + yacySeed seed = nextSeed; + nextSeed = internalNext(); + return seed; + } + + } + +} diff --git a/source/yacy.java b/source/yacy.java new file mode 100644 index 000000000..5aa5ea1f4 --- /dev/null +++ b/source/yacy.java @@ -0,0 +1,571 @@ +// yacy.java +// ----------------------- +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.yacy.net +// Frankfurt, Germany, 2004, 2005 +// last major change: 24.03.2005 +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +/* + This is the main class of the proxy. + From here, several threads are started: + + - one single instance of the plasmaSwitchboard is generated, + which itself starts a thread with a plasmaHTMLCache object. This object simply counts + files sizes in the cache and terminates then. + It also generates a plasmaCrawlerLoader object, which may itself start + some more httpc-calling threads to load web pages. They terminate automatically when a page has loaded + - one serverCore - thread is started, which implements a multi-threaded server. + The process may start itself many more processes that handle connections. + - finally, all idle-dependent processes are written in a queue in plasmaSwitchboard + which are worked off inside an idle-sensitive loop of the main process. (here) + + On termination, the following must be done: + - stop feeding of the crawling process because it othervise fills the indexing queue. + - say goodby to connected peers and disable new connections. Don't wait for success. + - first terminate the serverCore thread. This prevents that new cache objects are queued + - wait that the plasmaHTMLCache terminates (it should be normal that this process already has terminated) + - then wait for termination of all loader process of the plasmaCrawlerLoader + - work off the indexing and cache storage queue. These values are inside a RAM cache and would be lost othervise + - write all settings + - terminate +*/ + + +import java.io.*; +import java.net.*; +import java.util.*; +import de.anomic.http.*; +import de.anomic.plasma.*; +import de.anomic.kelondro.*; +import de.anomic.tools.*; +import de.anomic.server.*; +import de.anomic.yacy.*; +//import de.anomic.http.*; + +public class yacy { + + // static objects + private static final String vString = "<>"; + private static final String vDATE = "<>"; + private static final String copyright = "[ YACY Proxy v" + vString + ", build " + vDATE + " by Michael Christen / www.yacy.net ]"; + private static final String hline = "-------------------------------------------------------------------------------"; + + private static void startup(String homePath) { + long startup = yacyCore.universalTime(); + try { + // start up + System.out.println(copyright); + System.out.println(hline); + + // check java version + try { + String[] check = "a,b".split(","); // split needs java 1.4 + } catch (NoSuchMethodError e) { + serverLog.logFailure("STARTUP", "Java Version too low. You need at least Java 1.4.2 to run YACY"); + Thread.currentThread().sleep(3000); + System.exit(-1); + } + + serverLog.logSystem("STARTUP", "java version " + System.getProperty("java.version", "no-java-version")); + serverLog.logSystem("STARTUP", "Application Root Path: " + homePath.toString()); + + // create data folder + File dataFolder = new File(homePath, "DATA"); + if (!(dataFolder.exists())) dataFolder.mkdir(); + + plasmaSwitchboard sb = new plasmaSwitchboard(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf"); + sb.setConfig("version", vString); + sb.setConfig("vdate", vDATE); + sb.setConfig("applicationRoot", homePath); + sb.setConfig("startupTime", "" + startup); + serverLog.logSystem("STARTUP", "YACY Version: " + vString + ", Built " + vDATE); + + // read environment + //new + int port = Integer.parseInt(sb.getConfig("port", "8080")); + int httpdLoglevel = Integer.parseInt(sb.getConfig("httpdLoglevel", "2")); + int timeout = Integer.parseInt(sb.getConfig("httpdTimeout", "60000")); + if (timeout < 60000) timeout = 60000; + int maxSessions = Integer.parseInt(sb.getConfig("httpdMaxSessions", "100")); + + // hardcoded, forced, temporary value-migration + sb.setConfig("htTemplatePath", "htroot/env/templates"); + + // create some directories + File htRootPath = new File(sb.getRootPath(), sb.getConfig("htRootPath", "htroot")); + File htDocsPath = new File(sb.getRootPath(), sb.getConfig("htDocsPath", "DATA/HTDOCS")); + File htTemplatePath = new File(sb.getRootPath(), sb.getConfig("htTemplatePath","htdocs")); + + if (!(htDocsPath.exists())) htDocsPath.mkdir(); + File htdocsDefaultReadme = new File(htDocsPath, "readme.txt"); + if (!(htdocsDefaultReadme.exists())) try {serverFileUtils.write(( + "This is your root directory for individual Web Content\r\n" + + "\r\n" + + "Please place your html files into the www subdirectory.\r\n" + + "The URL of that path is either\r\n" + + "http://www..yacy or\r\n" + + "http://:/www\r\n" + + "\r\n" + + "Other subdirectories may be created; they map to corresponding sub-domains.\r\n" + + "This directory shares it's content with the applications htroot path, so you\r\n" + + "may access your yacy search page with\r\n" + + "http://.yacy/\r\n" + + "\r\n").getBytes(), htdocsDefaultReadme);} catch (IOException e) { + System.out.println("Error creating htdocs readme: " + e.getMessage()); + } + + File wwwDefaultPath = new File(htDocsPath, "www"); + if (!(wwwDefaultPath.exists())) wwwDefaultPath.mkdir(); + + File wwwDefaultClass = new File(wwwDefaultPath, "welcome.class"); + //if ((!(wwwDefaultClass.exists())) || (wwwDefaultClass.length() != (new File(htRootPath, "htdocsdefault/welcome.class")).length())) try { + if((new File(htRootPath, "htdocsdefault/welcome.java")).exists()) + serverFileUtils.copy(new File(htRootPath, "htdocsdefault/welcome.java"), new File(wwwDefaultPath, "welcome.java")); + serverFileUtils.copy(new File(htRootPath, "htdocsdefault/welcome.class"), wwwDefaultClass); + serverFileUtils.copy(new File(htRootPath, "htdocsdefault/welcome.html"), new File(wwwDefaultPath, "welcome.html")); + //} catch (IOException e) {} + + File shareDefaultPath = new File(htDocsPath, "share"); + if (!(shareDefaultPath.exists())) shareDefaultPath.mkdir(); + + File shareDefaultClass = new File(shareDefaultPath, "dir.class"); + //if ((!(shareDefaultClass.exists())) || (shareDefaultClass.length() != (new File(htRootPath, "htdocsdefault/dir.class")).length())) try { + if((new File(htRootPath, "htdocsdefault/dir.java")).exists()) + serverFileUtils.copy(new File(htRootPath, "htdocsdefault/dir.java"), new File(shareDefaultPath, "dir.java")); + serverFileUtils.copy(new File(htRootPath, "htdocsdefault/dir.class"), shareDefaultClass); + serverFileUtils.copy(new File(htRootPath, "htdocsdefault/dir.html"), new File(shareDefaultPath, "dir.html")); + //} catch (IOException e) {} + + + // set preset accounts/passwords + String acc; + if ((acc = sb.getConfig("proxyAccount", "")).length() > 0) { + sb.setConfig("proxyAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String(acc))); + sb.setConfig("proxyAccount", ""); + } + if ((acc = sb.getConfig("serverAccount", "")).length() > 0) { + sb.setConfig("serverAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String(acc))); + sb.setConfig("serverAccount", ""); + } + if ((acc = sb.getConfig("adminAccount", "")).length() > 0) { + sb.setConfig("adminAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(serverCodings.standardCoder.encodeBase64String(acc))); + sb.setConfig("adminAccount", ""); + } + + // fix unsafe old passwords + if ((acc = sb.getConfig("proxyAccountBase64", "")).length() > 0) { + sb.setConfig("proxyAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(acc)); + sb.setConfig("proxyAccountBase64", ""); + } + if ((acc = sb.getConfig("serverAccountBase64", "")).length() > 0) { + sb.setConfig("serverAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(acc)); + sb.setConfig("serverAccountBase64", ""); + } + if ((acc = sb.getConfig("adminAccountBase64", "")).length() > 0) { + sb.setConfig("adminAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(acc)); + sb.setConfig("adminAccountBase64", ""); + } + if ((acc = sb.getConfig("uploadAccountBase64", "")).length() > 0) { + sb.setConfig("uploadAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(acc)); + sb.setConfig("uploadAccountBase64", ""); + } + if ((acc = sb.getConfig("downloadAccountBase64", "")).length() > 0) { + sb.setConfig("downloadAccountBase64MD5", serverCodings.standardCoder.encodeMD5Hex(acc)); + sb.setConfig("downloadAccountBase64", ""); + } + + // init parser + de.anomic.htmlFilter.htmlFilterContentScraper.mediaExt = sb.getConfig("mediaExt",""); + + // start main threads + try { + httpd protocolHandler = new httpd(sb, new httpdFileHandler(sb), new httpdProxyHandler(sb)); + serverCore server = new serverCore(port, + maxSessions /*sessions*/, + timeout /*control socket timeout in milliseconds*/, + true /* terminate sleeping threads */, + true /* block attacks (wrong protocol) */, + protocolHandler /*command class*/, + sb, + 30000 /*command max length incl. GET args*/, + httpdLoglevel /*loglevel*/); + if (server == null) { + serverLog.logFailure("STARTUP", "Failed to start server. Probably port " + port + " already in use."); + } else { + // first start the server + sb.deployThread("10_httpd", "HTTPD Server/Proxy", "the HTTPD, used as web server and proxy", server, null, 0, 0, 0); + //server.start(); + + // open the browser window + boolean browserPopUpTrigger = sb.getConfig("browserPopUpTrigger", "true").equals("true"); + if (browserPopUpTrigger) { + String browserPopUpPage = sb.getConfig("browserPopUpPage", "Status.html"); + String browserPopUpApplication = sb.getConfig("browserPopUpApplication", "netscape"); + serverSystem.openBrowser("http://localhost:" + port + "/" + browserPopUpPage, browserPopUpApplication); + } + + // loop and wait + while (!(sb.terminate)) try { + Thread.currentThread().sleep(1000); // wait a while + // System.gc(); // prevent that we catch too much memory + } catch (Exception e) { + serverLog.logError("MAIN CONTROL LOOP", "PANIK: " + e.getMessage()); + e.printStackTrace(); + } + + // shut down + serverLog.logSystem("SHUTDOWN", "catched termination signal"); + server.terminate(false); + server.interrupt(); + if (server.isAlive()) try { + httpc.wget(new URL("http://localhost:" + port), 1000, null, null, null, 0); // kick server + serverLog.logSystem("SHUTDOWN", "sent termination signal to server socket"); + } catch (IOException ee) { + serverLog.logSystem("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)"); + } + + // idle until the processes are down + while (server.isAlive()) { + Thread.currentThread().sleep(2000); // wait a while + } + serverLog.logSystem("SHUTDOWN", "server has terminated"); + sb.close(); + } + } catch (Exception e) { + serverLog.logError("STARTUP", "" + e); + e.printStackTrace(); + //System.exit(1); + } + } catch (Exception ee) { + serverLog.logFailure("STARTUP", "FATAL ERROR: " + ee.getMessage()); + ee.printStackTrace(); + } + serverLog.logSystem("SHUTDOWN", "goodby. (this is the last line)"); + try {System.exit(0);} catch (Exception e) {} // was once stopped by de.anomic.net.ftpc$sm.checkExit(ftpc.java:1790) + } + + + + private static Properties configuration(String mes, String homePath) { + serverLog.logSystem(mes, "Application Root Path: " + homePath.toString()); + + // read data folder + File dataFolder = new File(homePath, "DATA"); + if (!(dataFolder.exists())) { + serverLog.logError(mes, "Application was never started or root path wrong."); + System.exit(-1); + } + + Properties config = new Properties(); + try { + config.load(new FileInputStream(new File(homePath, "DATA/SETTINGS/httpProxy.conf"))); + } catch (FileNotFoundException e) { + serverLog.logError(mes, "could not find configuration file."); + System.exit(-1); + } catch (IOException e) { + serverLog.logError(mes, "could not read configuration file."); + System.exit(-1); + } + + return config; + } + + private static void shutdown(String homePath) { + // start up + System.out.println(copyright); + System.out.println(hline); + + Properties config = configuration("REMOTE-SHUTDOWN", homePath); + + // read port + int port = Integer.parseInt((String) config.get("port")); + + // read password + String encodedPassword = (String) config.get("adminAccountBase64MD5"); + if (encodedPassword == null) encodedPassword = ""; // not defined + + // send 'wget' to web interface + httpHeader requestHeader = new httpHeader(); + requestHeader.put("Authorization", "realm=" + encodedPassword); // for http-authentify + try { + httpc con = new httpc("localhost", port, 10000, false); + httpc.response res = con.GET("Steering.html?shutdown=", requestHeader); + + // read response + if (res.status.startsWith("2")) { + serverLog.logSystem("REMOTE-SHUTDOWN", "YACY accepted shutdown command."); + serverLog.logSystem("REMOTE-SHUTDOWN", "Stand by for termination, which may last some seconds."); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + res.writeContent(bos, null); + con.close(); + } else { + serverLog.logError("REMOTE-SHUTDOWN", "error response from YACY socket: " + res.status); + System.exit(-1); + } + } catch (IOException e) { + serverLog.logError("REMOTE-SHUTDOWN", "could not establish connection to YACY socket: " + e.getMessage()); + System.exit(-1); + } + + // finished + serverLog.logSystem("REMOTE-SHUTDOWN", "SUCCESSFULLY FINISHED remote-shutdown:"); + serverLog.logSystem("REMOTE-SHUTDOWN", "YACY will terminate after working off all enqueued tasks."); + } + + private static void genWordstat(String homePath) { + // start up + System.out.println(copyright); + System.out.println(hline); + + Properties config = configuration("GEN-WORDSTAT", homePath); + + // load words + serverLog.logInfo("GEN-WORDSTAT", "loading words..."); + HashMap words = loadWordMap(new File(homePath, "yacy.words")); + + // find all hashes + serverLog.logInfo("GEN-WORDSTAT", "searching all word-hash databases..."); + File dbRoot = new File(homePath, config.getProperty("dbPath")); + enumerateFiles ef = new enumerateFiles(new File(dbRoot, "WORDS"), true, false, true); + File f; + String h; + kelondroMScoreCluster hs = new kelondroMScoreCluster(); + while (ef.hasMoreElements()) { + f = (File) ef.nextElement(); + h = f.getName().substring(0, plasmaURL.urlHashLength); + hs.addScore(h, (int) f.length()); + } + + // list the hashes in reverse order + serverLog.logInfo("GEN-WORDSTAT", "listing words in reverse size order..."); + String w; + Iterator i = hs.scores(false); + while (i.hasNext()) { + h = (String) i.next(); + w = (String) words.get(h); + if (w == null) System.out.print("# " + h); else System.out.print(w); + System.out.println(" - " + hs.getScore(h)); + } + + // finished + serverLog.logSystem("GEN-WORDSTAT", "FINISHED"); + } + + + private static HashMap loadWordMap(File wordlist) { + // returns a hash-word - Relation + HashMap wordmap = new HashMap(); + try { + String word; + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(wordlist))); + while ((word = br.readLine()) != null) wordmap.put(plasmaWordIndexEntry.word2hash(word),word); + br.close(); + } catch (IOException e) {} + return wordmap; + } + + private static HashSet loadWordSet(File wordlist) { + // returns a set of words + HashSet wordset = new HashSet(); + try { + String word; + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(wordlist))); + while ((word = br.readLine()) != null) wordset.add(word); + br.close(); + } catch (IOException e) {} + return wordset; + } + + private static void cleanwordlist(String wordlist, int minlength, int maxlength) { + // start up + System.out.println(copyright); + System.out.println(hline); + serverLog.logSystem("CLEAN-WORDLIST", "START"); + + String word; + TreeSet wordset = new TreeSet(); + int count = 0; + try { + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(wordlist))); + String seps = "' .,:/-&"; + while ((word = br.readLine()) != null) { + word = word.toLowerCase().trim(); + for (int i = 0; i < seps.length(); i++) { + if (word.indexOf(seps.charAt(i)) >= 0) word = word.substring(0, word.indexOf(seps.charAt(i))); + } + if ((word.length() >= minlength) && (word.length() <= maxlength)) wordset.add(word); + count++; + } + br.close(); + + if (wordset.size() != count) { + count = count - wordset.size(); + BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(wordlist))); + while (wordset.size() > 0) { + word = (String) wordset.first(); + bw.write(word + "\n"); + wordset.remove(word); + } + bw.close(); + serverLog.logInfo("CLEAN-WORDLIST", "shrinked wordlist by " + count + " words."); + } else { + serverLog.logInfo("CLEAN-WORDLIST", "not necessary to change wordlist"); + } + } catch (IOException e) { + serverLog.logError("CLEAN-WORDLIST", "ERROR: " + e.getMessage()); + System.exit(-1); + } + + // finished + serverLog.logSystem("CLEAN-WORDLIST", "FINISHED"); + } + + private static void deleteStopwords(String homePath) { + // start up + System.out.println(copyright); + System.out.println(hline); + serverLog.logSystem("DELETE-STOPWORDS", "START"); + + Properties config = configuration("DELETE-STOPWORDS", homePath); + File dbRoot = new File(homePath, config.getProperty("dbPath")); + + // load stopwords + HashSet stopwords = loadWordSet(new File(homePath, "yacy.stopwords")); + serverLog.logInfo("DELETE-STOPWORDS", "loaded stopwords, " + stopwords.size() + " entries in list, starting scanning"); + + // find all hashes + File f; + String w; + int count = 0; + long thisamount, totalamount = 0; + Iterator i = stopwords.iterator(); + while (i.hasNext()) { + w = (String) i.next(); + f = plasmaWordIndexEntity.wordHash2path(dbRoot, plasmaWordIndexEntry.word2hash(w)); + if (f.exists()) { + thisamount = f.length(); + if (f.delete()) { + count++; + totalamount += thisamount; + serverLog.logInfo("DELETE-STOPWORDS", "deleted index for word '" + w + "', " + thisamount + " bytes"); + } + } + } + + serverLog.logInfo("DELETE-STOPWORDS", "TOTALS: deleted " + count + " indexes; " + (totalamount / 1024) + " kbytes"); + + // finished + serverLog.logSystem("DELETE-STOPWORDS", "FINISHED"); + } + + // application wrapper + public static void main(String args[]) { + String applicationRoot = System.getProperty("user.dir"); + if ((args.length >= 1) && ((args[0].equals("-startup")) || (args[0].equals("-start")))) { + // normal start-up of yacy + if (args.length == 2) applicationRoot= args[1]; + startup(applicationRoot); + } else if ((args.length >= 1) && ((args[0].equals("-shutdown")) || (args[0].equals("-stop")))) { + // normal shutdown of yacy + if (args.length == 2) applicationRoot= args[1]; + shutdown(applicationRoot); + } else if ((args.length >= 1) && (args[0].equals("-deletestopwords"))) { + // delete those words in the index that are listed in the stopwords file + if (args.length == 2) applicationRoot= args[1]; + deleteStopwords(applicationRoot); + } else if ((args.length >= 1) && (args[0].equals("-genwordstat"))) { + // this can help to create a stop-word list + // to use this, you need a 'yacy.words' file in the root path + // start this with "java -classpath classes yacy -genwordstat []" + if (args.length == 2) applicationRoot= args[1]; + genWordstat(applicationRoot); + } else if ((args.length == 4) && (args[0].equals("-cleanwordlist"))) { + // this can be used to organize and clean a word-list + // start this with "java -classpath classes yacy -cleanwordlist " + int minlength = Integer.parseInt(args[2]); + int maxlength = Integer.parseInt(args[3]); + cleanwordlist(args[1], minlength, maxlength); + } else { + if (args.length == 1) applicationRoot= args[0]; + startup(applicationRoot); + } + } + +} + +/* + +package de; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.net.Socket; + +import javax.net.ssl.SSLSocketFactory; + +public class ssltest { + + public static final String TARGET_HTTPS_SERVER = "www.verisign.com"; + public static final int TARGET_HTTPS_PORT = 443; + + public static void main(String[] args) throws Exception { + + Socket socket = SSLSocketFactory.getDefault(). + createSocket(TARGET_HTTPS_SERVER, TARGET_HTTPS_PORT); + try { + Writer out = new OutputStreamWriter( + socket.getOutputStream(), "ISO-8859-1"); + out.write("GET / HTTP/1.1\r\n"); + out.write("Host: " + TARGET_HTTPS_SERVER + ":" + + TARGET_HTTPS_PORT + "\r\n"); + out.write("Agent: SSL-TEST\r\n"); + out.write("\r\n"); + out.flush(); + BufferedReader in = new BufferedReader( + new InputStreamReader(socket.getInputStream(), "ISO-8859-1")); + String line = null; + while ((line = in.readLine()) != null) { + System.out.println(line); + } + } finally { + socket.close(); + } + } +} + +*/ + diff --git a/startYACY.bat b/startYACY.bat new file mode 100644 index 000000000..cd885c50f --- /dev/null +++ b/startYACY.bat @@ -0,0 +1 @@ +@java -classpath classes yacy diff --git a/startYACY.command b/startYACY.command new file mode 100755 index 000000000..44b994508 --- /dev/null +++ b/startYACY.command @@ -0,0 +1,2 @@ +cd `dirname $0` +java -classpath classes -server yacy diff --git a/startYACY.sh b/startYACY.sh new file mode 100755 index 000000000..339f41a9e --- /dev/null +++ b/startYACY.sh @@ -0,0 +1,18 @@ +#!/bin/sh +if [ $UID -eq 0 ] +then + echo + echo "For security reasons, you should not run this as root!" + echo +else + cd `dirname $0` + if [ x$1 != x-d ] + then + nohup java -classpath classes yacy >> yacy.log & + echo "YaCy started as daemon process. View it's activity in yacy.log" + echo "To stop YaCy, please execute stopYACY.sh and wait some seconds" + echo "To administrate YaCy, start your web browser and open http://localhost:8080" + else + java -classpath classes yacy + fi +fi diff --git a/startYACY_noconsole.bat b/startYACY_noconsole.bat new file mode 100644 index 000000000..c01b26b15 --- /dev/null +++ b/startYACY_noconsole.bat @@ -0,0 +1,2 @@ +@javaw -classpath classes yacy +@echo You can close the console safely \ No newline at end of file diff --git a/stopYACY.bat b/stopYACY.bat new file mode 100644 index 000000000..5704cc083 --- /dev/null +++ b/stopYACY.bat @@ -0,0 +1,2 @@ +@java -classpath classes yacy -shutdown + diff --git a/stopYACY.command b/stopYACY.command new file mode 100755 index 000000000..b5e0e9ac6 --- /dev/null +++ b/stopYACY.command @@ -0,0 +1,2 @@ +cd `dirname $0` +java -classpath classes yacy -shutdown diff --git a/stopYACY.sh b/stopYACY.sh new file mode 100755 index 000000000..650ead523 --- /dev/null +++ b/stopYACY.sh @@ -0,0 +1,5 @@ +#!/bin/sh +cd `dirname $0` +java -classpath classes yacy -shutdown +echo "please wait until the YaCy daemon process terminates" +echo "you can monitor this with 'tail -f yacy.log' and 'fuser yacy.log'" \ No newline at end of file diff --git a/superseed.txt b/superseed.txt new file mode 100644 index 000000000..7dd19ed6b --- /dev/null +++ b/superseed.txt @@ -0,0 +1,2 @@ +http://www.yacy.net/yacy/seed.txt +http://www.suma-lab.de/yacy/seed.txt \ No newline at end of file diff --git a/yacy.init b/yacy.init new file mode 100644 index 000000000..a2874c3c5 --- /dev/null +++ b/yacy.init @@ -0,0 +1,406 @@ +### +### YACY Init File +### +# These properties will be loaded upon installation. +# They are used only once for set-up. +# If you make changes to this file and want these to make any effect, +# you must delete the httpProxy.conf file in DATA/SETTINGS + +# ---------------------------------------------------------------------------- +# the http service configurations + +# port number of server +port = 8080 + +# shutdown time limit +# this is the time that a peer takes at most for shutdown +# the shutdown-procedure is difficult since there are many +# caches that must be flushed first +# measured in seconds +shutdownWaiting = 120 + +# time-out of client control socket in milliseconds +# since this applies only to the client-proxy connection, +# it can be rather short +# milliseconds +clientTimeout = 8000 + +# maximal number of httpd sessions +# a client may open several connections at one, and the maxSessions value sets +# a limit on the number of concurrent connections +httpdMaxSessions = 150 + +# default root path for the file server +# may be overridden by the htdocs parameter +# users shall be encouraged to use the htdocs path for individual content, +# not this path defined here +htRootPath = htroot +htTemplatePath = htroot/env/templates + +# individual htroot folder +# every user may publicise her/his own web pages +# these pages shall be placed in the path defined here +# the htdocs path shares its content with the htroot path +htDocsPath = DATA/HTDOCS + +# the default files (typically index.html), if no file name is given +# The complete path to this file is created by combination with the rootPath +# you can set a list of defaults, separated by comma +# the first one is priorized +defaultFiles = index.html,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt + +# virtual host for httpdFileServlet access +# for example http:/// shall access the file servlet and +# return the defaultFile at rootPath +# either way, http:/// denotes the same as http://localhost:/ +# for the preconfigured value 'localpeer', the url is: +# http://localpeer/ +fileHost = localpeer + +# root path for message files +messPath = C:/AnomicServer + +# specify the path to the MIME matching file table +mimeConfig = httpd.mime + +# specify the path to message resource file +messConfig = httpd.messages + +# proxy use. This server can also act as an caching proxy. +# to enable that function, set proxy=true +proxy=true + +# a path to the proxy's file cache. +# This will be used if the server is addressed as a proxy +proxyCache = DATA/HTCACHE + +# the proxy's maximum disc cache size in megabytes +# there should be enough space for the browsing load of an internet caffee +# running at 56kbit/s modem speed (this time not unusual) +# during 3 days, 8 hours a day +# necessary space = 3 * 8 * 60 * 60 * 56 / 8 = 604800 KB = ca. 590 MB +# since 600 MB is not much these days (it's below one GB!) +# we recommend using that space +#proxyCacheSize = 600 +#for testing: +proxyCacheSize = 200 + +# the following mime-types are the whitelist for indexing +parseableMime=application/xhtml+xml,text/html,text/plain + +# media extension string +# a comma-separated list of extensions that denote media file formats +# this is important to recognize - tags as not-html reference +# These files will be excluded from indexing +mediaExt=swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar,sit,hqx,img,dmg,tar,gz,ps,pdf,doc,xls,ppt,ram,bz2,arj,jar,deb,torrent,ogg,iso,bin,ace,tgz,rpm + +# the proxy's and indexing maximum ram cache size in megabytes +ramCacheSize = 12 +# ram cache is partitioned into different separate caches +# we define percentual shares of the complete ram cache +# values here are computed from usage profiles and should not be changed +# it is not computed if the complete sum of percentages computes to 100 +ramCachePercentRWI = 85 +ramCachePercentHTTP = 6 +ramCachePercentLURL = 4 +ramCachePercentDHT = 3 +ramCachePercentMessage = 1 +ramCachePercentWiki = 1 + +# Promotion Strings +# These strings appear in the Web Mask of the YACY search client +# Set these Strings to cusomize your peer and give any message to +# other peer users +promoteSearchPageGreeting = + +# the log level, each for a specific software part +# loglevels are: +# NULL = 0: no output at all +# FAILURE = 1: system-level error, internal cause, critical and not fixeable (i.e. inconsistency) +# ERROR = 2: exceptional error, catcheable and non-critical (i.e. file error) +# WARNING = 3: uncritical service failure, may require user activity (i.e. input required, wrong authorization) +# SYSTEM = 4: regular system status information (i.e. start-up messages) +# INFO = 5: regular action information (i.e. any httpd request URL) +# DEBUG = 6: in-function status debug output +httpdLoglevel=4 +proxyLoglevel=5 +plasmaLoglevel=5 +yacyLoglevel=5 + +# the path to the PLASMA database, especially the reverse word index +dbPath=DATA/PLASMADB + +# the path to the LISTS files. Most lists are used to filter web content +listsPath=DATA/LISTS + +# the path to the SKINS files. +skinPath=DATA/SKINS + +# the yellow-list; url's elements +# (the core of an url; like 'yahoo' in 'de.yahoo.com') +# appearing in this list will not get a manipulated user agent string +proxyYellowList=yacy.yellow + +# the black-list; url's appearing in this list will not be loaded; +# instead always a 404 is returned +# all these files will be placed in the listsPath +proxyBlackLists=url.default.black +proxyBlackListsActive=url.default.black +proxyBlackListsShared=url.default.black +proxyCookieBlackList=cookie.default.black +proxyCookieWhiteList=cookie.default.black + +# the blue-list; +# no search result is locally presented that has any word of the bluelist +# in the search words, the url or the url's description +plasmaBlueList=yacy.blue + +# this proxy may in turn again access another proxy +# if you wish to do that, specify it here +# if you want to switch on the proxy use, set remoteProxyUse=true +# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy +remoteProxyHost=192.168.2.2 +remoteProxyPort=4239 +remoteProxyNoProxy=192.*,10.*,127.*,localhost +remoteProxyUse=false +#remoteProxyUse=true + +# the proxy may filter the content of transferred web pages +# this is archieved using a special filtering class that can be +# exchanged like a transformation plug-in +# If you want to do this, you must implement the htmlFilterTransformer +# -Interface and set the name of the implementing class here. +# As a default, we use a filtering Transformer that takes a blacklist +# and blocks all text fragments where a word from the blacklist appears +# as the blacklist, we use the search-engine's blue-list +# please see that class as an implementation example for your own transformers +pageTransformerClass=htmlFilterContentTransformer +pageTransformerArg=yacy.blue + +# security settigns +# we provide proxy and server security through a 2-stage security gate: +# 1st stage: firewall-like access control trough ip filter for clients +# 2nd stage: password settings for proxy, server and server administrators +# by default, these settings are weak to simplify set-up and testing +# every user/adiministrator shall be encouraged to change these settings +# your can change them also online during run-time on +# http://localhost:8080/ + +# proxyClient: client-ip's that may connect the proxy for proxy service +# if several ip's are allowed then they must be separated by a ',' +# any ip may contain the wildcard-sign '*' +#proxyClient=192.168.0.4 +proxyClient=localhost,127.0.0.1,192.168*,10* + +# serverClient: client-ip's that may connect to the web server, +# thus are allowed to use the search service +# if you set this to another value, search requst from others +# are blocked, but you will also be blocked from using others +# search services. +serverClient=* + +# proxyAccount: a user:password - pair for proxy authentification +# leave empty for no authenication +# example: +#proxyAccount=jim:knopf +proxyAccount= +proxyAccountBase64MD5= + +# serverAccount: a user:password - pair for web server access +# this is the access to the 'public' pages on the server +# should be always open, but you get the option here +# if set to a user:password, you get a conflict with the administration account +# future versions will check if the server is unprotected, +# because the p2p-index-sharing function will use the http server for +# data exchange. +# example +#serverAccount=dicke:berta +serverAccount= +serverAccountBase64MD5= + +# adminAccount: a user:password - pair for administration of +# settings through the web interface +# should be set to a secret. By default it is without a password +# but you are encouraged to set it to another value on the page +# http://localhost:8080/ +#adminAccount=admin:anomic +adminAccount= +adminAccountBase64MD5= + +# peer-to-peer construction for distributed search +# we have several stages: +# 1st: a file within every distribution that has a list of URL's: +# -> this is the superseed file +# 2nd: the files that can be retrieved by the superseed's URL's +# are called seed list-files. +# -> the seed list-files contain IP/port combinations of running +# AnomicHTTPProxies +# 3rd: the peers that are targeted within the seed files are called superpeers +# 4th: the superpeers hold and share a list of all client/search/crawl peers +# +# some superpeers should be able to create again seed list-files. +# These superpeers must upload their IP or their list of peer-IP's to a +# ftp location to create the seed list-file. +# Everyone who do so should mail his/her new seed location to mcanomic.de +# The seed list-file location will then be included in the superseed file. +# This superseed file is available then at two localtions: +# - it is included in every distribution and +# - updated through a specific URL-location +# we see the file name and the url of the superseed here: +superseedFile=superseed.txt +superseedLocation=http://www.yacy.net/superseed.txt + +# if you are running a principal peer, you must update the following variables +# This is an ftp account with all relevant information +# the cycle parameter is the update period in minutes. +# The update is only made if there had been changes in between. +seedFTPServer= +seedFTPAccount= +seedFTPPassword= +seedFTPPath= +seedURL= + +# alternatively to an FTP account, a peer can also become a principal peer +# if the seed-list can be generated as a file and that file is also accessible from +# the internet. In this case, omit any ftp settings and set this path here. +# if this path stays empty, an ftp account is considered +# however, you must always set a seedURL because it is used to check if the +# file is actually accessible from the internet +seedFilePath= + +# every peer should have a name. inded, we try to give every peer an unique ID, +# which is necessary for internal organization of the index sharing, but the +# peer's name is purely informal. No function but information is applied. +# please change this at your pleasure +peerName=nameless + +# every peer periodically scans for other peers. you can set the time +# of the period here (minutes) +peerCycle=2 + +# The p2p maintenance can run in either of two online modes: +# - process any job only if we are online, which is technically only the case +# if the proxy is used -> mode 1 +# - process jobs periodically, with periodes according to peerCycle -> mode 2 +#onlineMode=1 +onlineMode=2 + +# Debug mode for YACY network: this will trigger that also local ip's are +# accepted as peer addresses +yacyDebugMode=false + +# if the process is running behind a NAT or ROUTER, we cannot easily identify +# the public IP of the process. We can ask a public IP responder, but cannot +# rely on it. Therefore, AnomicHTTPProxy includes it's own responder. +# But for the first running peer this is not an option. +# The author uses a DI-604 router, which can be +# asked for the public IP. If you own a DI-604 as well, please set the +# DI604use to true and put in your router password, it will not be used for any +# other purpose of asking for the IP +#DI604use=true +DI604use=false +DI604pw= + +# each time the proxy starts up, it can trigger the local browser to show the +# status page. This is active by default, to make it easier for first-time +# users to understand what this application does. You can disable browser +# pop-up here or set a different start page, like the search page +# the browser type is optional and works only under certain conditions +#browserPopUpTrigger=false +browserPopUpTrigger=true +#browserPopUpPage=index.html +browserPopUpPage=Status.html +browserPopUpApplication=netscape + +# the proxy saves it's own seed information. It is positive for the network if +# the seed does not change it's configuration often (or not at all). +# The reason for that is that the seed hash is the target for the +# distributed hash table distribution function. +# The following file will contain the saved seed: +yacyOwnSeedFile=DATA/YACYDB/mySeed.txt +yacyDB=DATA/YACYDB + +# index sharing attributes +# by default, sharing is on. If you want to use the proxy only for +# local indexing, you may switch this off +allowDistributeIndex=true +allowReceiveIndex=true + +# the frequency is the number of links per minute, that the peer allowes +# _every_ other peer to send to this peer +defaultWordReceiveFrequency=100 +defaultLinkReceiveFrequency=30 +# the default may be overridden for each peer individually, these +# settings are only available through the online interface + +# prefetch parameters +# the prefetch depth assigns a specific depth to the prefetch mechanism +# prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all +# embedded url's, but since embedded image links are loaded by the browser +# this means that only embedded anchors are prefetched additionally +# a prefetch of 2 would result in loading of all images and anchor pages +# of all embedded anchors. Be careful with this value, since even a prefetch +# of 2 would result in hundreds of prefetched urls for each single proxy fill. +proxyPrefetchDepth=0 +proxyStoreHTCache=true + +# From the 'IndexCreate' menu point you can also define a crawling start point. +# The crawling works the same way as the prefetch, but it is possible to +# assign a different crawling depth. +# Be careful with this number. Consider a branching factor of average 20; +# A prefect-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW. +crawlingDepth=2 +localIndexing=true + +# Filter for crawlinig; may be used to restrict a crawl to a specific domain +# URL's are only indexed and further crawled if they match this filter +crawlingFilter=.* +crawlingQ=false +storeHTCache=false +storeTXCache=true + +# peers may initiate remote crawling tasks. +# every peer may allow or disallow to be used as crawling-peer; +# you can also set a maximum crawl depth that can be requested or accepted +# order=parameters for requester; response=parameters for responder +# these values apply only for senior-senior - communication +# The delay value is number of seconds bewteen two separate orders +crawlOrder=true +crawlOrderDepth=0 +crawlOrderDelay=8 +crawlResponse=true +crawlResponseDepth=0 +crawlResponseDelay=30 + +# indexing-exclusion - rules +# There rules are important to reduce the number of words that are indexed +# We distinguish three different sets of stop-words: +# static - excludes all words given in the file yacy.stopwords from indexing, +# dynamic - excludes all words from indexing which are listed by statistic rules, +# parental - excludes all words from indexing which had been indexed in the parent web page. +xsstopw=true +xdstopw=true +xpstopw=true + + +# performance-settings +# delay-times for permanent loops (milliseconds) +# the idlesleep is the pause that an proces sleeps if the last call to the +# process job was without execution of anything; +# the busysleep is the pause after a full job execution +20_dhtdistribution_idlesleep=20000 +20_dhtdistribution_busysleep=5000 +30_peerping_idlesleep=120000 +30_peerping_busysleep=120000 +40_peerseedcycle_idlesleep=1800000 +40_peerseedcycle_busysleep=1200000 +50_localcrawl_idlesleep=5000 +50_localcrawl_busysleep=1000 +60_globalcrawl_idlesleep=60000 +60_globalcrawl_busysleep=5000 +70_cachemanager_idlesleep=10000 +70_cachemanager_busysleep=4000 +80_dequeue_idlesleep=4000 +80_dequeue_busysleep=1000 +90_cleanup_idlesleep=300000 +90_cleanup_busysleep=300000 diff --git a/yacy.stopwords b/yacy.stopwords new file mode 100644 index 000000000..99da3c6bb --- /dev/null +++ b/yacy.stopwords @@ -0,0 +1,796 @@ +aber +about +above +according +across +actually +addition +adj +aehnlich +aenderung +after +afterwards +again +against +aktuell +aktuelle +aktuellen +alignright +all +alle +allein +allem +allen +aller +allerdings +alles +allg +allgemein +allgemeine +almost +alone +along +already +als +also +alte +alten +although +always +among +amongst +and +andere +anderen +anderes +anders +another +antwort +any +anyhow +anyone +anything +anywhere +are +aren +around +arpa +atop +auch +auf +aus +ausser +ausserdem +back +bald +band +became +because +become +becomes +becoming +been +before +beforehand +begin +beginn +beginning +behind +bei +beide +beiden +beim +being +bekannt +bekommen +below +bereich +bereichen +bereits +berichtet +beside +besides +besonders +besser +besteht +besten +besuchen +between +bevor +beyond +bieten +bietet +bis +bisher +bislang +bitte +bleiben +bleibt +blieb +both +bringen +but +buy +bzw +can +cannot +center +copy +could +couldn +dabei +dadurch +dafuer +dagegen +daher +damals +damit +dann +daran +darauf +daraus +darf +darueber +darum +das +dass +date +datum +davon +davor +dazu +deine +dem +den +denen +denn +dennoch +der +derem +deren +derzeit +des +deshalb +dessen +deutlich +deutsch +deutsche +did +didn +die +dies +diese +diesem +diesen +dieser +dieses +direkt +doch +document +does +doesn +don +dort +down +dran +drauf +due +duerfen +duerfte +durch +during +each +eben +ebenfalls +ebenso +eher +eigene +eigenen +eigentlich +ein +eine +einem +einen +einer +eines +einfach +einige +einigen +einmal +either +else +elsewhere +end +ende +ending +enough +erhalten +erklaert +erklaerte +erneut +erreicht +erst +erste +erstellt +ersten +erstmals +erwartet +etc +etwa +etwas +euch +even +ever +every +everyone +everything +everywhere +except +fast +fest +few +find +finden +findet +firma +first +folgende +folgenden +for +form +former +formerly +forty +found +free +from +fuehren +fuehrt +fuer +further +ganz +ganze +ganzem +ganzen +ganzer +ganzes +gar +gebe +geben +gebracht +gefunden +gegeben +gegen +gegenueber +gehen +gehoeren +gehoert +geht +gekommen +gemacht +genau +gerade +gestellt +gestern +get +gewesen +geworden +gibt +gif +gilt +ging +gleich +going +got +groessten +grosse +grossen +gute +habe +haben +had +haelfte +haelt +haette +haetten +hallo +halten +has +hasn +hat +hatte +hatten +have +haven +having +heisst +help +hence +her +here +hereafter +hereby +herein +hereupon +hers +herself +heute +hier +hiess +hilfe +him +himself +hinter +hinweise +his +hohen +how +however +ich +ihm +ihnen +ihr +ihre +ihrem +ihren +ihrer +ihres +immer +inc +indeed +innerhalb +ins +insgesamt +instead +int +into +inzwischen +isn +ist +its +itself +jede +jedem +jeden +jeder +jedes +jedoch +jene +jenem +jenen +jener +jenes +jetzt +jeweils +just +kann +kaum +kein +keine +keinem +keinen +keiner +keines +klar +kleine +kleinen +klicken +knapp +know +koenne +koennen +koennte +koennten +kommen +kommenden +kommt +konnte +konnten +kuenftig +laendern +laengst +laesst +lage +land +lang +lange +lassen +last +later +latter +laut +least +leben +lediglich +leicht +leider +less +let +letzte +letzten +liegen +liegt +liess +like +likely +links +ltd +machen +macht +machte +made +make +makes +many +mark +may +maybe +meantime +meanwhile +mehr +mehrere +mein +meine +meiner +meint +meist +meisten +met +mich +might +miss +mit +mitte +moechte +moechten +moeglich +monaten +more +moreover +most +mostly +mrs +much +muesse +muessen +muesste +muessten +muss +musste +must +myself +nach +nachdem +naechsten +name +namely +natuerlich +nbsp +neben +nehmen +neither +neu +neue +neuem +neuen +neuer +neues +never +next +nicht +nichts +nimmt +nobody +noch +non +none +noone +nor +not +nothing +now +nowhere +nun +nur +oder +off +offenbar +often +ohne +once +online +only +onto +orthopoint +other +others +otherwise +our +ours +ourselves +out +over +overall +own +paar +page +per +perhaps +post +privat +private +problem +prof +put +quite +rather +recent +recently +recht +relevant +relevante +relevanz +reserved +ring +rund +sache +sagen +sagt +sagte +same +scheint +schnell +schon +schwer +see +seem +seemed +seeming +seems +sehen +sehr +sei +seien +sein +seine +seinem +seinen +seiner +seines +seit +seite +seiten +selbst +service +setzt +several +she +should +shouldn +sich +sicher +sie +sieht +since +sind +site +sodass +sofort +sogar +solch +solche +solchem +solchen +solcher +solches +soll +sollen +sollte +sollten +some +somehow +someone +something +sometime +sometimes +somewhere +sondern +sonst +soviel +sowie +sowohl +spaeter +spiel +spielen +stand +stark +start +startseite +statt +stehen +steht +stellen +stellt +still +stop +stueck +subject +such +suche +suchen +taking +teil +text +than +that +the +their +them +thema +themselves +then +thence +there +thereafter +thereby +therefore +therein +thereupon +these +they +this +those +though +through +throughout +thru +thus +tipps +tips +titel +title +too +top +toward +towards +trotz +try +ueber +ueberblick +ueberhaupt +uebersicht +und +under +unless +unlike +unlikely +uns +unser +unsere +unserem +unseren +unserer +unseres +unter +until +untitled +upon +use +used +using +version +versucht +verwendet +very +via +viel +viele +vielen +vieles +vielleicht +voellig +vom +von +vor +vorhanden +waehrend +waere +wann +want +war +waren +warum +was +wasn +wegen +weil +weiss +weit +weiter +weitere +weiteren +weiterhin +welche +welcome +well +wenig +wenige +weniger +wenn +went +wer +werde +werden +were +weren +werk +weshalb +wessen +what +whatever +when +whence +whenever +where +whereafter +whereas +whereby +wherein +whereupon +wherever +whether +which +while +whither +who +whoever +whole +whom +whomever +whose +why +wie +wieder +wieso +wieviel +will +wir +wird +wirklich +with +within +without +wohl +wolle +wollen +wollte +wollten +worden +would +wouldn +wuerde +wuerden +wurde +wurden +yet +you +your +yours +yourself +yourselves +zeigen +zeigt +zeit +zubehoer +zuletzt +zum +zumindest +zunaechst +zur +zurueck +zusammen +zuvor +zwar +zweite +zweiten +zwischen diff --git a/yacy.yellow b/yacy.yellow new file mode 100644 index 000000000..c52b9b878 --- /dev/null +++ b/yacy.yellow @@ -0,0 +1,8 @@ +# proxy yellow list +# if any keyword in this list appears in a domain name, +# then the proxy passes the client's user agent to the domain's server +google +yahoo +heise +ebay +stern \ No newline at end of file