@ -752,12 +752,12 @@ public final class Fulltext {
}
}
// export methods
// export methods
public Export export ( final File f , final String filter , final int format , final boolean dom ) {
public Export export ( final File f , final String filter , final String query , final int format , final boolean dom ) {
if ( ( this . exportthread ! = null ) & & ( this . exportthread . isAlive ( ) ) ) {
if ( ( this . exportthread ! = null ) & & ( this . exportthread . isAlive ( ) ) ) {
ConcurrentLog . warn ( "LURL-EXPORT" , "cannot start another export thread, already one running" ) ;
ConcurrentLog . warn ( "LURL-EXPORT" , "cannot start another export thread, already one running" ) ;
return this . exportthread ;
return this . exportthread ;
}
}
this . exportthread = new Export ( f , filter , format, dom ) ;
this . exportthread = new Export ( f , filter , query, format, dom ) ;
this . exportthread . start ( ) ;
this . exportthread . start ( ) ;
return this . exportthread ;
return this . exportthread ;
}
}
@ -770,14 +770,15 @@ public final class Fulltext {
private final File f ;
private final File f ;
private final Pattern pattern ;
private final Pattern pattern ;
private int count ;
private int count ;
private String failure ;
private String failure , query ;
private final int format ;
private final int format ;
private final boolean dom ;
private final boolean dom ;
private Export ( final File f , final String filter , final int format , boolean dom ) {
private Export ( final File f , final String filter , final String query , final int format , boolean dom ) {
// format: 0=text, 1=html, 2=rss/xml
// format: 0=text, 1=html, 2=rss/xml
this . f = f ;
this . f = f ;
this . pattern = filter = = null ? null : Pattern . compile ( filter ) ;
this . pattern = filter = = null ? null : Pattern . compile ( filter ) ;
this . query = query = = null ? "*:*" : query ;
this . count = 0 ;
this . count = 0 ;
this . failure = null ;
this . failure = null ;
this . format = format ;
this . format = format ;
@ -806,7 +807,7 @@ public final class Fulltext {
if ( this . dom ) {
if ( this . dom ) {
Map < String , ReversibleScoreMap < String > > scores = Fulltext . this . getDefaultConnector ( ) . getFacets ( CollectionSchema . httpstatus_i . getSolrFieldName ( ) + ":200" , 100000000 , CollectionSchema . host_s . getSolrFieldName ( ) ) ;
Map < String , ReversibleScoreMap < String > > scores = Fulltext . this . getDefaultConnector ( ) . getFacets ( this . query + " AND " + CollectionSchema . httpstatus_i . getSolrFieldName ( ) + ":200" , 100000000 , CollectionSchema . host_s . getSolrFieldName ( ) ) ;
ReversibleScoreMap < String > stats = scores . get ( CollectionSchema . host_s . getSolrFieldName ( ) ) ;
ReversibleScoreMap < String > stats = scores . get ( CollectionSchema . host_s . getSolrFieldName ( ) ) ;
for ( final String host : stats ) {
for ( final String host : stats ) {
if ( this . pattern ! = null & & ! this . pattern . matcher ( host ) . matches ( ) ) continue ;
if ( this . pattern ! = null & & ! this . pattern . matcher ( host ) . matches ( ) ) continue ;
@ -815,21 +816,19 @@ public final class Fulltext {
this . count + + ;
this . count + + ;
}
}
} else {
} else {
BlockingQueue < SolrDocument > docs = Fulltext . this . getDefaultConnector ( ) . concurrentDocumentsByQuery ( CollectionSchema . httpstatus_i . getSolrFieldName ( ) + ":200" , 0 , 100000000 , 10 * 60 * 60 * 1000 , 100 ,
BlockingQueue < SolrDocument > docs = Fulltext . this . getDefaultConnector ( ) . concurrentDocumentsByQuery ( this . query + " AND " + CollectionSchema . httpstatus_i . getSolrFieldName ( ) + ":200" , 0 , 100000000 , 10 * 60 * 60 * 1000 , 100 ,
CollectionSchema . id . getSolrFieldName ( ) , CollectionSchema . sku . getSolrFieldName ( ) , CollectionSchema . title . getSolrFieldName ( ) ,
CollectionSchema . id . getSolrFieldName ( ) , CollectionSchema . sku . getSolrFieldName ( ) , CollectionSchema . title . getSolrFieldName ( ) ,
CollectionSchema . author . getSolrFieldName ( ) , CollectionSchema . description_txt . getSolrFieldName ( ) , CollectionSchema . size_i . getSolrFieldName ( ) , CollectionSchema . last_modified . getSolrFieldName ( ) ) ;
CollectionSchema . author . getSolrFieldName ( ) , CollectionSchema . description_txt . getSolrFieldName ( ) , CollectionSchema . size_i . getSolrFieldName ( ) , CollectionSchema . last_modified . getSolrFieldName ( ) ) ;
SolrDocument doc ;
SolrDocument doc ;
ArrayList < ? > title ;
String url , hash , title , author , description ;
String url , author , hash ;
String [ ] descriptions ;
Integer size ;
Integer size ;
Date date ;
Date date ;
while ( ( doc = docs . take ( ) ) ! = AbstractSolrConnector . POISON_DOCUMENT ) {
while ( ( doc = docs . take ( ) ) ! = AbstractSolrConnector . POISON_DOCUMENT ) {
hash = ( String ) doc . getFieldValue ( CollectionSchema . id . getSolrFieldName ( ) ) ;
hash = getStringFrom ( doc . getFieldValue ( CollectionSchema . id . getSolrFieldName ( ) ) ) ;
url = ( String ) doc . getFieldValue ( CollectionSchema . sku . getSolrFieldName ( ) ) ;
url = getStringFrom ( doc . getFieldValue ( CollectionSchema . sku . getSolrFieldName ( ) ) ) ;
title = ( ArrayList < ? > ) doc . getFieldValue ( CollectionSchema . title . getSolrFieldName ( ) ) ;
title = getStringFrom ( doc . getFieldValue ( CollectionSchema . title . getSolrFieldName ( ) ) ) ;
author = ( String ) doc . getFieldValue ( CollectionSchema . author . getSolrFieldName ( ) ) ;
author = getStringFrom ( doc . getFieldValue ( CollectionSchema . author . getSolrFieldName ( ) ) ) ;
description s = ( String [ ] ) doc . getFieldValue ( CollectionSchema . description_txt . getSolrFieldName ( ) ) ;
description = getStringFrom ( doc . getFieldValue ( CollectionSchema . description_txt . getSolrFieldName ( ) ) ) ;
size = ( Integer ) doc . getFieldValue ( CollectionSchema . size_i . getSolrFieldName ( ) ) ;
size = ( Integer ) doc . getFieldValue ( CollectionSchema . size_i . getSolrFieldName ( ) ) ;
date = ( Date ) doc . getFieldValue ( CollectionSchema . last_modified . getSolrFieldName ( ) ) ;
date = ( Date ) doc . getFieldValue ( CollectionSchema . last_modified . getSolrFieldName ( ) ) ;
if ( this . pattern ! = null & & ! this . pattern . matcher ( url ) . matches ( ) ) continue ;
if ( this . pattern ! = null & & ! this . pattern . matcher ( url ) . matches ( ) ) continue ;
@ -837,16 +836,14 @@ public final class Fulltext {
pw . println ( url ) ;
pw . println ( url ) ;
}
}
if ( this . format = = 1 ) {
if ( this . format = = 1 ) {
if ( title ! = null ) pw . println ( "<a href=\"" + MultiProtocolURL . escape ( url ) + "\">" + CharacterCoding . unicode2xml ( ( String ) title . iterator ( ) . next ( ) , true ) + "</a>" ) ;
if ( title ! = null ) pw . println ( "<a href=\"" + MultiProtocolURL . escape ( url ) + "\">" + CharacterCoding . unicode2xml ( title , true ) + "</a>" ) ;
}
}
if ( this . format = = 2 ) {
if ( this . format = = 2 ) {
pw . println ( "<item>" ) ;
pw . println ( "<item>" ) ;
if ( title ! = null ) pw . println ( "<title>" + CharacterCoding . unicode2xml ( ( String ) title . iterator ( ) . next ( ) , true ) + "</title>" ) ;
if ( title ! = null ) pw . println ( "<title>" + CharacterCoding . unicode2xml ( title , true ) + "</title>" ) ;
pw . println ( "<link>" + MultiProtocolURL . escape ( url ) + "</link>" ) ;
pw . println ( "<link>" + MultiProtocolURL . escape ( url ) + "</link>" ) ;
if ( author ! = null & & ! author . isEmpty ( ) ) pw . println ( "<author>" + CharacterCoding . unicode2xml ( author , true ) + "</author>" ) ;
if ( author ! = null & & ! author . isEmpty ( ) ) pw . println ( "<author>" + CharacterCoding . unicode2xml ( author , true ) + "</author>" ) ;
if ( descriptions ! = null & & descriptions . length > 0 ) {
if ( description ! = null & & ! description . isEmpty ( ) ) pw . println ( "<description>" + CharacterCoding . unicode2xml ( description , true ) + "</description>" ) ;
for ( String d : descriptions ) pw . println ( "<description>" + CharacterCoding . unicode2xml ( d , true ) + "</description>" ) ;
}
if ( date ! = null ) pw . println ( "<pubDate>" + HeaderFramework . formatRFC1123 ( date ) + "</pubDate>" ) ;
if ( date ! = null ) pw . println ( "<pubDate>" + HeaderFramework . formatRFC1123 ( date ) + "</pubDate>" ) ;
if ( size ! = null ) pw . println ( "<yacy:size>" + size . intValue ( ) + "</yacy:size>" ) ;
if ( size ! = null ) pw . println ( "<yacy:size>" + size . intValue ( ) + "</yacy:size>" ) ;
pw . println ( "<guid isPermaLink=\"false\">" + hash + "</guid>" ) ;
pw . println ( "<guid isPermaLink=\"false\">" + hash + "</guid>" ) ;
@ -885,6 +882,13 @@ public final class Fulltext {
return this . count ;
return this . count ;
}
}
@SuppressWarnings ( "unchecked" )
private String getStringFrom ( final Object o ) {
if ( o = = null ) return "" ;
if ( o instanceof ArrayList ) return ( ( ArrayList < String > ) o ) . get ( 0 ) ;
return ( String ) o ;
}
}
}
public Iterator < HostStat > statistics ( int count , final ScoreMap < String > domainScore ) {
public Iterator < HostStat > statistics ( int count , final ScoreMap < String > domainScore ) {