*) some minor changes for better code readability

*) added more SVN properties

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6787 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 15 years ago
parent 7a3c19846f
commit b97ad0f380

@ -85,6 +85,7 @@ public class CrawlProfile {
this.profileTable = null; this.profileTable = null;
} }
@Override
public void finalize() { public void finalize() {
this.close(); this.close();
} }
@ -319,6 +320,7 @@ public class CrawlProfile {
doms = new ConcurrentHashMap<String, DomProfile>(); doms = new ConcurrentHashMap<String, DomProfile>();
} }
@Override
public String toString() { public String toString() {
final StringBuilder str = new StringBuilder(); final StringBuilder str = new StringBuilder();

@ -535,6 +535,7 @@ public class CrawlQueues {
return System.currentTimeMillis() - start; return System.currentTimeMillis() - start;
} }
@Override
public void run() { public void run() {
try { try {
// checking robots.txt for http(s) resources // checking robots.txt for http(s) resources

@ -4,9 +4,9 @@
// //
// This is a part of YaCy, a peer-to-peer based web search engine // This is a part of YaCy, a peer-to-peer based web search engine
// //
// $LastChangedDate: 2009-05-28 01:51:34 +0200 (Do, 28 Mai 2009) $ // $LastChangedDate$
// $LastChangedRevision: 5988 $ // $LastChangedRevision$
// $LastChangedBy: orbiter $ // $LastChangedBy$
// //
// LICENSE // LICENSE
// //

@ -30,7 +30,7 @@ public class ImporterManager {
final Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2]; final Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2];
final int activeCount = this.runningJobs.enumerate(importThreads); final int activeCount = this.runningJobs.enumerate(importThreads);
final Importer[] importers = new Importer[activeCount]; final Importer[] importers = new Importer[activeCount];
for (int i=0; i<activeCount; i++) { for (int i = 0; i < activeCount; i++) {
importers[i] = (Importer) importThreads[i]; importers[i] = (Importer) importThreads[i];
} }
return importers; return importers;
@ -43,10 +43,9 @@ public class ImporterManager {
public Importer getImporterByID(final int jobID) { public Importer getImporterByID(final int jobID) {
final Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2]; final Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2];
final int activeCount = this.runningJobs.enumerate(importThreads);
for (int i=0; i < activeCount; i++) { for(final Thread importThread : importThreads) {
final Importer currThread = (Importer) importThreads[i]; final Importer currThread = (Importer) importThread;
if (currThread.getJobID() == jobID) { if (currThread.getJobID() == jobID) {
return currThread; return currThread;
} }
@ -73,8 +72,7 @@ public class ImporterManager {
try { try {
// trying to gracefull stop all still running sessions ... // trying to gracefull stop all still running sessions ...
log.logInfo("Signaling shutdown to " + threadCount + " remaining dbImporter threads ..."); log.logInfo("Signaling shutdown to " + threadCount + " remaining dbImporter threads ...");
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) { for (final Thread currentThread : threadList) {
final Thread currentThread = threadList[currentThreadIdx];
if (currentThread.isAlive()) { if (currentThread.isAlive()) {
((Importer)currentThread).stopIt(); ((Importer)currentThread).stopIt();
} }
@ -89,10 +87,10 @@ public class ImporterManager {
// we need to use a timeout here because of missing interruptable session threads ... // we need to use a timeout here because of missing interruptable session threads ...
if (log.isFine()) log.logFine("Waiting for " + runningJobs.activeCount() + " remaining dbImporter threads to finish shutdown ..."); if (log.isFine()) log.logFine("Waiting for " + runningJobs.activeCount() + " remaining dbImporter threads to finish shutdown ...");
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) { int currentThreadIdx = 0;
final Thread currentThread = threadList[currentThreadIdx]; for (final Thread currentThread : threadList) {
if (currentThread.isAlive()) { if (currentThread.isAlive()) {
if (log.isFine()) log.logFine("Waiting for dbImporter thread '" + currentThread.getName() + "' [" + currentThreadIdx + "] to finish shutdown."); if (log.isFine()) log.logFine("Waiting for dbImporter thread '" + currentThread.getName() + "' [" + currentThreadIdx++ + "] to finish shutdown.");
try { currentThread.join(500); } catch (final InterruptedException ex) {} try { currentThread.join(500); } catch (final InterruptedException ex) {}
} }
} }

@ -4,9 +4,9 @@
// //
// This is a part of YaCy, a peer-to-peer based web search engine // This is a part of YaCy, a peer-to-peer based web search engine
// //
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ // $LastChangedDate$
// $LastChangedRevision: 1986 $ // $LastChangedRevision$
// $LastChangedBy: orbiter $ // $LastChangedBy$
// //
// LICENSE // LICENSE
// //

@ -5,9 +5,9 @@
// first published on http://yacy.net // first published on http://yacy.net
// Frankfurt, Germany, 2004 // Frankfurt, Germany, 2004
// //
// $LastChangedDate: 2008-03-16 23:31:54 +0100 (So, 16 Mrz 2008) $ // $LastChangedDate$
// $LastChangedRevision: 4575 $ // $LastChangedRevision$
// $LastChangedBy: orbiter $ // $LastChangedBy$
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by

@ -28,12 +28,11 @@
package de.anomic.crawler; package de.anomic.crawler;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Date; import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List;
import java.util.Map; import java.util.Map;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
@ -53,7 +52,7 @@ public class RobotsEntry {
// this is a simple record structure that holds all properties of a single crawl start // this is a simple record structure that holds all properties of a single crawl start
private Map<String, byte[]> mem; private Map<String, byte[]> mem;
private LinkedList<String> allowPathList, denyPathList; private List<String> allowPathList, denyPathList;
String hostName; String hostName;
public RobotsEntry(final String hostName, final Map<String, byte[]> mem) { public RobotsEntry(final String hostName, final Map<String, byte[]> mem) {
@ -88,8 +87,8 @@ public class RobotsEntry {
public RobotsEntry( public RobotsEntry(
final DigestURI theURL, final DigestURI theURL,
final ArrayList<String> allowPathList, final List<String> allowPathList,
final ArrayList<String> disallowPathList, final List<String> disallowPathList,
final Date loadedDate, final Date loadedDate,
final Date modDate, final Date modDate,
final String eTag, final String eTag,
@ -114,8 +113,8 @@ public class RobotsEntry {
this.allowPathList.addAll(allowPathList); this.allowPathList.addAll(allowPathList);
final StringBuilder pathListStr = new StringBuilder(allowPathList.size() * 30); final StringBuilder pathListStr = new StringBuilder(allowPathList.size() * 30);
for (int i=0; i<allowPathList.size();i++) { for (String element : allowPathList) {
pathListStr.append(allowPathList.get(i)) pathListStr.append(element)
.append(ROBOTS_DB_PATH_SEPARATOR); .append(ROBOTS_DB_PATH_SEPARATOR);
} }
this.mem.put(ALLOW_PATH_LIST, pathListStr.substring(0,pathListStr.length()-1).getBytes()); this.mem.put(ALLOW_PATH_LIST, pathListStr.substring(0,pathListStr.length()-1).getBytes());
@ -125,8 +124,8 @@ public class RobotsEntry {
this.denyPathList.addAll(disallowPathList); this.denyPathList.addAll(disallowPathList);
final StringBuilder pathListStr = new StringBuilder(disallowPathList.size() * 30); final StringBuilder pathListStr = new StringBuilder(disallowPathList.size() * 30);
for (int i=0; i<disallowPathList.size();i++) { for (String element : disallowPathList) {
pathListStr.append(disallowPathList.get(i)) pathListStr.append(element)
.append(ROBOTS_DB_PATH_SEPARATOR); .append(ROBOTS_DB_PATH_SEPARATOR);
} }
this.mem.put(DISALLOW_PATH_LIST,pathListStr.substring(0, pathListStr.length()-1).getBytes()); this.mem.put(DISALLOW_PATH_LIST,pathListStr.substring(0, pathListStr.length()-1).getBytes());
@ -138,6 +137,7 @@ public class RobotsEntry {
return this.mem; return this.mem;
} }
@Override
public String toString() { public String toString() {
final StringBuilder str = new StringBuilder(6000); final StringBuilder str = new StringBuilder(6000);
str.append((this.hostName == null) ? "null" : this.hostName).append(": "); str.append((this.hostName == null) ? "null" : this.hostName).append(": ");
@ -198,12 +198,10 @@ public class RobotsEntry {
// escaping all occurences of ; because this char is used as special char in the Robots DB // escaping all occurences of ; because this char is used as special char in the Robots DB
else path = path.replaceAll(ROBOTS_DB_PATH_SEPARATOR,"%3B"); else path = path.replaceAll(ROBOTS_DB_PATH_SEPARATOR,"%3B");
final Iterator<String> pathIter = this.denyPathList.iterator(); for (String element : this.denyPathList) {
while (pathIter.hasNext()) {
final String nextPath = pathIter.next();
// disallow rule // disallow rule
if (path.startsWith(nextPath)) { if (path.startsWith(element)) {
return true; return true;
} }
} }

@ -4,9 +4,9 @@
// //
// This is a part of YaCy, a peer-to-peer based web search engine // This is a part of YaCy, a peer-to-peer based web search engine
// //
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ // $LastChangedDate$
// $LastChangedRevision: 1986 $ // $LastChangedRevision$
// $LastChangedBy: orbiter $ // $LastChangedBy$
// //
// LICENSE // LICENSE
// //

Loading…
Cancel
Save