@ -35,6 +35,7 @@ import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.protocol.RequestHeader ;
import net.yacy.cora.protocol.RequestHeader ;
import net.yacy.document.LibraryProvider ;
import net.yacy.document.LibraryProvider ;
import net.yacy.kelondro.data.meta.DigestURI ;
import net.yacy.kelondro.data.meta.DigestURI ;
import net.yacy.kelondro.data.meta.URIMetadataRow ;
import net.yacy.kelondro.logging.Log ;
import net.yacy.kelondro.logging.Log ;
import net.yacy.search.Switchboard ;
import net.yacy.search.Switchboard ;
import net.yacy.search.SwitchboardConstants ;
import net.yacy.search.SwitchboardConstants ;
@ -50,44 +51,69 @@ public class Vocabulary_p {
Collection < Tagging > vocs = LibraryProvider . autotagging . getVocabularies ( ) ;
Collection < Tagging > vocs = LibraryProvider . autotagging . getVocabularies ( ) ;
String vocabularyName = ( post = = null ) ? null : post . get ( "vocabulary" , null ) ;
String vocabularyName = ( post = = null ) ? null : post . get ( "vocabulary" , null ) ;
String discovername = ( post = = null ) ? null : post . get ( "discovername" , null ) ;
Tagging vocabulary = vocabularyName = = null ? null : LibraryProvider . autotagging . getVocabulary ( vocabularyName ) ;
Tagging vocabulary = vocabularyName = = null ? null : LibraryProvider . autotagging . getVocabulary ( vocabularyName ) ;
if ( vocabulary = = null ) vocabularyName = null ;
if ( vocabulary = = null ) vocabularyName = null ;
int count = 0 ;
for ( Tagging v : vocs ) {
prop . put ( "vocabularyset_" + count + "_name" , v . getName ( ) ) ;
prop . put ( "vocabularyset_" + count + "_selected" , ( vocabularyName ! = null & & vocabularyName . equals ( v . getName ( ) ) ) ? 1 : 0 ) ;
count + + ;
}
prop . put ( "vocabularyset" , count ) ;
if ( post ! = null ) {
if ( post ! = null ) {
try {
try {
if ( vocabulary = = null ) {
if ( vocabulary = = null ) {
// create a vocabulary
// create a vocabulary
String discovername = post . get ( "discovername" , "" ) ;
if ( discovername ! = null & & discovername . length ( ) > 0 ) {
if ( discovername . length ( ) > 0 ) {
String discoverobjectspace = post . get ( "discoverobjectspace" , "" ) ;
String discoverobjectspace = post . get ( "discoverobjectspace" , "" ) ;
MultiProtocolURI discoveruri = null ;
MultiProtocolURI discoveruri = null ;
if ( discoverobjectspace . length ( ) > 0 ) try { discoveruri = new MultiProtocolURI ( discoverobjectspace ) ; } catch ( MalformedURLException e ) { }
if ( discoverobjectspace . length ( ) > 0 ) try { discoveruri = new MultiProtocolURI ( discoverobjectspace ) ; } catch ( MalformedURLException e ) { }
if ( discoveruri = = null ) discoverobjectspace = "" ;
if ( discoveruri = = null ) discoverobjectspace = "" ;
Map < String , Tagging . SOTuple > table = new TreeMap < String , Tagging . SOTuple > ( ) ;
Map < String , Tagging . SOTuple > table = new TreeMap < String , Tagging . SOTuple > ( ) ;
File propFile = LibraryProvider . autotagging . getVocabularyFile ( discovername ) ;
File propFile = LibraryProvider . autotagging . getVocabularyFile ( discovername ) ;
boolean discoverFromPath = post . get ( "discovermethod" , "" ) . equals ( "path" ) ;
boolean discoverFromTitle = post . get ( "discovermethod" , "" ) . equals ( "title" ) ;
boolean discoverFromTitleSplitted = post . get ( "discovermethod" , "" ) . equals ( "titlesplitted" ) ;
boolean discoverFromAuthor = post . get ( "discovermethod" , "" ) . equals ( "author" ) ;
if ( discoveruri ! = null ) {
if ( discoveruri ! = null ) {
String segmentName = sb . getConfig ( SwitchboardConstants . SEGMENT_PUBLIC , "default" ) ;
String segmentName = sb . getConfig ( SwitchboardConstants . SEGMENT_PUBLIC , "default" ) ;
Segment segment = sb . indexSegments . segment ( segmentName ) ;
Segment segment = sb . indexSegments . segment ( segmentName ) ;
Iterator < DigestURI > ui = segment . urlSelector ( discoveruri ) ;
Iterator < DigestURI > ui = segment . urlSelector ( discoveruri ) ;
String t ;
while ( ui . hasNext ( ) ) {
while ( ui . hasNext ( ) ) {
DigestURI u = ui . next ( ) ;
DigestURI u = ui . next ( ) ;
String u0 = u . toNormalform ( true , false ) ;
String u0 = u . toNormalform ( true , false ) ;
String t = u0 . substring ( discoverobjectspace . length ( ) ) ;
t = "" ;
if ( t . indexOf ( '/' ) > = 0 ) continue ;
if ( discoverFromPath ) {
int p = t . indexOf ( '.' ) ;
t = u0 . substring ( discoverobjectspace . length ( ) ) ;
if ( p > = 0 ) t = t . substring ( 0 , p ) ;
if ( t . indexOf ( '/' ) > = 0 ) continue ;
while ( ( p = t . indexOf ( ':' ) ) > = 0 ) t = t . substring ( p + 1 ) ;
int p = t . indexOf ( '.' ) ;
while ( ( p = t . indexOf ( '=' ) ) > = 0 ) t = t . substring ( p + 1 ) ;
if ( p > = 0 ) t = t . substring ( 0 , p ) ;
if ( p > = 0 ) t = t . substring ( p + 1 ) ;
while ( ( p = t . indexOf ( ':' ) ) > = 0 ) t = t . substring ( p + 1 ) ;
while ( ( p = t . indexOf ( '=' ) ) > = 0 ) t = t . substring ( p + 1 ) ;
if ( p > = 0 ) t = t . substring ( p + 1 ) ;
}
if ( discoverFromTitle | | discoverFromTitleSplitted ) {
URIMetadataRow m = segment . urlMetadata ( ) . load ( u . hash ( ) ) ;
if ( m ! = null ) t = m . dc_title ( ) ;
}
if ( discoverFromAuthor ) {
URIMetadataRow m = segment . urlMetadata ( ) . load ( u . hash ( ) ) ;
if ( m ! = null ) t = m . dc_creator ( ) ;
}
t = t . replaceAll ( "\"" , " " ) . replaceAll ( "'" , " " ) . replaceAll ( "," , " " ) . replaceAll ( " " , " " ) . trim ( ) ;
if ( t . length ( ) = = 0 ) continue ;
if ( t . length ( ) = = 0 ) continue ;
table . put ( t , new Tagging . SOTuple ( "" , u0 ) ) ;
if ( discoverFromTitleSplitted ) {
String [ ] ts = t . split ( " " ) ;
for ( String s : ts ) {
if ( s . length ( ) = = 0 ) continue ;
table . put ( s , new Tagging . SOTuple ( "" , u0 ) ) ;
}
} else if ( discoverFromAuthor ) {
String [ ] ts = t . split ( ";" ) ; // author names are often separated by ';'
for ( String s : ts ) {
if ( s . length ( ) = = 0 ) continue ;
int p = s . indexOf ( ',' ) ; // check if there is a reversed method to mention the name
if ( p > = 0 ) s = s . substring ( p + 1 ) . trim ( ) + " " + s . substring ( 0 , p ) . trim ( ) ;
table . put ( s , new Tagging . SOTuple ( "" , u0 ) ) ;
}
} else {
table . put ( t , new Tagging . SOTuple ( "" , u0 ) ) ;
}
}
}
}
}
Tagging newvoc = new Tagging ( discovername , propFile , discoverobjectspace , table ) ;
Tagging newvoc = new Tagging ( discovername , propFile , discoverobjectspace , table ) ;
@ -143,6 +169,14 @@ public class Vocabulary_p {
}
}
}
}
int count = 0 ;
for ( Tagging v : vocs ) {
prop . put ( "vocabularyset_" + count + "_name" , v . getName ( ) ) ;
prop . put ( "vocabularyset_" + count + "_selected" , ( ( vocabularyName ! = null & & vocabularyName . equals ( v . getName ( ) ) ) | | ( discovername ! = null & & discovername . equals ( v . getName ( ) ) ) ) ? 1 : 0 ) ;
count + + ;
}
prop . put ( "vocabularyset" , count ) ;
prop . put ( "create" , vocabularyName = = null ? 1 : 0 ) ;
prop . put ( "create" , vocabularyName = = null ? 1 : 0 ) ;
if ( vocabulary = = null ) {
if ( vocabulary = = null ) {