package net.yacy; // migration.java // ----------------------- // (C) by Alexander Schier // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import net.yacy.search.index.ReindexSolrBusyThread; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Set; import java.util.StringTokenizer; import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Digest; import net.yacy.kelondro.util.FileUtils; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import com.google.common.io.Files; import net.yacy.cora.protocol.TimeoutRequest; import net.yacy.cora.storage.Configuration.Entry; import net.yacy.cora.util.ConcurrentLog; import net.yacy.kelondro.workflow.BusyThread; import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionSchema; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo; public class migration { //SVN constants (version & revision format = v.vvv0rrrr) public static final double TAGDB_WITH_TAGHASH=0.43101635; //tagDB keys are tagHashes instead of plain tagname. public static final double NEW_OVERLAYS =0.56504422; public static final double IDX_HOST_VER =0.99007724; // api for index retrieval: host index public static final double SSLPORT_CFG =1.67009578; // https port in cfg /** Removal of deprecated IPAccessHandler for white list implementation (serverClient setting) */ public static final double NEW_IPPATTERNS = 1.92109489; /** * Migrates older configuratin to current version * @param sb * @param fromVer the long version & revision (example 1.83009123) * @param toRev to current version */ public static void migrate(final Switchboard sb, final double fromVer, final double toVer){ if(fromVer < toVer){ if(fromVer < TAGDB_WITH_TAGHASH){ migrateBookmarkTagsDB(sb); } if(fromVer < NEW_OVERLAYS){ migrateDefaultFiles(sb); } if (fromVer < NEW_IPPATTERNS) { migrateServerClientSetting(sb); } // use String.format to cut-off small rounding errors ConcurrentLog.info("MIGRATION", "Migrating from "+ String.format(Locale.US, "%.8f",fromVer) + " to " + String.format(Locale.US, "%.8f",toVer)); if (fromVer < 0.47d) { presetPasswords(sb); migrateSwitchConfigSettings(sb); migrateWorkFiles(sb); } } installSkins(sb); // FIXME: yes, bad fix for quick release 0.47 // ssl/https support currently on hardcoded default port 8443 (v1.67/9563) // make sure YaCy can start (disable ssl/https support if port is used) if (sb.getConfigBool("server.https", false)) { int sslport = 8443; if (fromVer > SSLPORT_CFG) { sslport = sb.getConfigInt(SwitchboardConstants.SERVER_SSLPORT, 8443); } if (TimeoutRequest.ping("127.0.0.1", sslport, 3000)) { sb.setConfig("server.https", false); ConcurrentLog.config("MIGRATION", "disabled https support (reason: port already used)"); } } } /* * remove the static defaultfiles. We use them through a overlay now. */ public static void migrateDefaultFiles(final Switchboard sb){ File file=new File(sb.htDocsPath, "share/dir.html"); if(file.exists()) delete(file); file=new File(sb.htDocsPath, "share/dir.class"); if(file.exists()) delete(file); file=new File(sb.htDocsPath, "share/dir.java"); if(file.exists()) delete(file); file=new File(sb.htDocsPath, "www/welcome.html"); if(file.exists()) delete(file); file=new File(sb.htDocsPath, "www/welcome.java"); if(file.exists()) delete(file); file=new File(sb.htDocsPath, "www/welcome.class"); if(file.exists()) delete(file); } /* * copy skins from the release to DATA/SKINS. */ public static void installSkins(final Switchboard sb){ final File skinsPath = sb.getDataPath("skinPath", SwitchboardConstants.SKINS_PATH_DEFAULT); final File defaultSkinsPath = new File(sb.getAppPath(), "skins"); if (defaultSkinsPath.exists()) { final List skinFiles = FileUtils.getDirListing(defaultSkinsPath.getAbsolutePath()); mkdirs(skinsPath); for (final String skinFile : skinFiles){ if (skinFile.endsWith(".css")){ final File from = new File(defaultSkinsPath, skinFile); final File to = new File(skinsPath, skinFile); if (from.lastModified() > to.lastModified()) try { Files.copy(from, to); } catch (final IOException e) {} } } } String skin=sb.getConfig("currentSkin", "default"); if(skin.equals("")){ skin="default"; } final File skinsDir=sb.getDataPath("skinPath", SwitchboardConstants.SKINS_PATH_DEFAULT); final File skinFile=new File(skinsDir, skin+".css"); final File htdocsPath=new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT), "env"); final File styleFile=new File(htdocsPath, "style.css"); if(!skinFile.exists()){ if(styleFile.exists()){ ConcurrentLog.info("MIGRATION", "Skin "+skin+" not found. Keeping old skin."); }else{ ConcurrentLog.severe("MIGRATION", "Skin "+skin+" and no existing Skin found."); } }else{ try { mkdirs(styleFile.getParentFile()); Files.copy(skinFile, styleFile); ConcurrentLog.info("MIGRATION", "copied new Skinfile"); } catch (final IOException e) { ConcurrentLog.severe("MIGRATION", "Cannot copy skinfile."); } } } /** * @param path */ private static void mkdirs(final File path) { if (!path.exists()) { if(!path.mkdirs()) ConcurrentLog.warn("MIGRATION", "could not create directories for "+ path); } } public static void migrateBookmarkTagsDB(final Switchboard sb){ if (sb.bookmarksDB != null) sb.bookmarksDB.close(); final File tagsDBFile=new File(sb.workPath, "bookmarkTags.db"); if(tagsDBFile.exists()){ delete(tagsDBFile); ConcurrentLog.info("MIGRATION", "Migrating bookmarkTags.db to use wordhashs as keys."); } try { sb.initBookmarks(); } catch (final IOException e) { ConcurrentLog.logException(e); } } /** * @param filename */ private static void delete(final File filename) { if(!filename.delete()) ConcurrentLog.warn("MIGRATION", "could not delete "+ filename); } public static void migrateWorkFiles(final Switchboard sb){ File file=new File(sb.getDataPath(), "DATA/SETTINGS/wiki.db"); File file2; if (file.exists()) { ConcurrentLog.info("MIGRATION", "Migrating wiki.db to "+ sb.workPath); sb.wikiDB.close(); file2 = new File(sb.workPath, "wiki.db"); try { Files.copy(file, file2); file.delete(); } catch (final IOException e) { } file = new File(sb.getDataPath(), "DATA/SETTINGS/wiki-bkp.db"); if (file.exists()) { ConcurrentLog.info("MIGRATION", "Migrating wiki-bkp.db to "+ sb.workPath); file2 = new File(sb.workPath, "wiki-bkp.db"); try { Files.copy(file, file2); file.delete(); } catch (final IOException e) {} } try { sb.initWiki(); } catch (final IOException e) { ConcurrentLog.logException(e); } } file=new File(sb.getDataPath(), "DATA/SETTINGS/message.db"); if(file.exists()){ ConcurrentLog.info("MIGRATION", "Migrating message.db to "+ sb.workPath); sb.messageDB.close(); file2=new File(sb.workPath, "message.db"); try { Files.copy(file, file2); file.delete(); } catch (final IOException e) {} try { sb.initMessages(); } catch (final IOException e) { ConcurrentLog.logException(e); } } } public static void presetPasswords(final Switchboard sb) { // set preset accounts/passwords String acc; if ((acc = sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT, "")).length() > 0) { sb.setConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, Digest.encodeMD5Hex(Base64Order.standardCoder.encodeString(acc))); } // fix unsafe old passwords if ((acc = sb.getConfig("proxyAccountBase64", "")).length() > 0) { sb.setConfig("proxyAccountBase64MD5", Digest.encodeMD5Hex(acc)); sb.setConfig("proxyAccountBase64", ""); } if ((acc = sb.getConfig("uploadAccountBase64", "")).length() > 0) { sb.setConfig("uploadAccountBase64MD5", Digest.encodeMD5Hex(acc)); sb.setConfig("uploadAccountBase64", ""); } if ((acc = sb.getConfig("downloadAccountBase64", "")).length() > 0) { sb.setConfig("downloadAccountBase64MD5", Digest.encodeMD5Hex(acc)); sb.setConfig("downloadAccountBase64", ""); } } public static void migrateSwitchConfigSettings(final Switchboard sb) { // migration for additional parser settings String value = ""; //Locales in DATA, because DATA must be writable, htroot not. if(sb.getConfig("locale.translated_html", "DATA/LOCALE/htroot").equals("htroot/locale")){ sb.setConfig("locale.translated_html", "DATA/LOCALE/htroot"); } // migration for blacklists if ((value = sb.getConfig("proxyBlackListsActive","")).length() > 0) { sb.setConfig("proxy.BlackLists", value); sb.setConfig("crawler.BlackLists", value); sb.setConfig("dht.BlackLists", value); sb.setConfig("search.BlackLists", value); sb.setConfig("surftips.BlackLists", value); sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared","")); sb.setConfig("proxyBlackListsActive", ""); } // migration of http specific crawler settings if ((value = sb.getConfig("crawler.acceptLanguage","")).length() > 0) { sb.setConfig("crawler.http.acceptEncoding", sb.getConfig("crawler.acceptEncoding","gzip,deflate")); sb.setConfig("crawler.http.acceptLanguage", sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5")); sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7")); } // patch the blacklist because of a release strategy change from 0.7 and up if ((value = sb.getConfig("update.blacklist","")).equals("....[123]")) { value = ""; // no default (remove prev. setting "...[123]" as it hits "1.71" release, added 2014-04-13) sb.setConfig("update.blacklist", value); } } /** * Setting "serverClient" : migrate eventual address patterns using deprecated * formats previously supported by the IPAccessHandler and IPAddressMap classes. */ public static void migrateServerClientSetting(final Switchboard sb) { final String patternSeparator = ","; final String white = sb.getConfig("serverClient", "*"); if (!white.equals("*")) { final StringBuilder migrated = new StringBuilder(); boolean hasDeprecated = migrateIPAddressPatterns(patternSeparator, white, migrated); if (hasDeprecated) { sb.setConfig("serverClient", migrated.toString()); ConcurrentLog.info("MIGRATION", "Migrated serverClient setting from " + white + " to " + migrated); } } } /** * Convert eventual address patterns using deprecated formats previously * supported by the IPAccessHandler and IPAddressMap classes. All parameters * must be not null. * * @param patternSeparator * pattern separator * @param patterns * patterns to convert * @param migrated * the result of the conversion. Equals the patterns String when it * contained no pattern using a deprecated format. * @return true when patterns contained at least one pattern using a deprecated * format. */ protected static boolean migrateIPAddressPatterns(final String patternSeparator, final String patterns, final StringBuilder migrated) { final StringTokenizer st = new StringTokenizer(patterns, patternSeparator); boolean hasDeprecated = false; while (st.hasMoreTokens()) { final String pattern = st.nextToken(); int idx; if (pattern.indexOf('|') > 0) { idx = pattern.indexOf('|'); } else { idx = pattern.indexOf('/'); if (idx >= 0) { /* * First "/" character of the URI pattern used to separate it from the internet * address. But it can now be used in CIDR notation */ final String intPart = pattern.substring(idx + 1); try { int intValue = Integer.parseInt(intPart); if (intValue >= 0 && intValue <= 128) { idx = -1; } else { /* No a valid CIDR notation : maybe a path with only numbers? */ hasDeprecated = true; } } catch (final NumberFormatException e) { hasDeprecated = true; } } } String addr = idx > 0 ? pattern.substring(0, idx) : pattern; String path = idx > 0 ? pattern.substring(idx) : "/*"; if (addr.endsWith(".")) { /* * Migrating prefix wildcard specification range format (e.g. "10.10." becomes * "10.10.0.0-10.10.255.255") . */ hasDeprecated = true; final String[] parts = addr.split("\\."); final StringBuilder migratedAddr = new StringBuilder(addr.substring(0, addr.length() - 1)); for (int i = parts.length; i < 4; i++) { migratedAddr.append(".0"); } migratedAddr.append("-").append(addr.substring(0, addr.length() - 1)); for (int i = parts.length; i < 4; i++) { migratedAddr.append(".255"); } addr = migratedAddr.toString(); } if (path.startsWith("|") || path.startsWith("/*.")) { path = path.substring(1); } if (migrated.length() > 0) { migrated.append(patternSeparator); } migrated.append(addr); if (!"/*".equals(path)) { migrated.append("|").append(path); } } return hasDeprecated; } /** * Reindex embedded solr index * - all documents with inactive fields (according to current schema) * - all documents with obsolete fields * A worker thread is initialized with fieldnames or a solr query which selects the documents for reindexing * implemented via deployed BusyThread which is called repeatedly by system * reindexes a fixed chunk of documents per cycle (allowing to easy interrupt process after completion of a chunck) * and monitoring in default process monitor (PerformanceQueues_p.html) */ public static int reindexToschema (final Switchboard sb) { BusyThread bt = sb.getThread(ReindexSolrBusyThread.THREAD_NAME); // a reindex job is already running if (bt != null) { return bt.getJobCount(); } boolean lukeCheckok = false; Set omitFields = new HashSet(4); omitFields.add(CollectionSchema.author_sxt.getSolrFieldName()); // special fields to exclude from disabled check omitFields.add(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName()); omitFields.add(CollectionSchema.coordinate_p_1_coordinate.getSolrFieldName()); omitFields.add("_version_"); // exclude internal Solr std. field from obsolete check CollectionConfiguration colcfg = Switchboard.getSwitchboard().index.fulltext().getDefaultConfiguration(); ReindexSolrBusyThread reidx = new ReindexSolrBusyThread(null); // ("*:*" would reindex all); try { // get all fields contained in index Collection solrfields = Switchboard.getSwitchboard().index.fulltext().getDefaultEmbeddedConnector().getFields(); for (FieldInfo solrfield : solrfields) { if (!colcfg.contains(solrfield.getName()) && !omitFields.contains(solrfield.getName()) && !solrfield.getName().startsWith(CollectionSchema.VOCABULARY_PREFIX)) { // add found fields not in config for reindexing but omit the vocabulary fields reidx.addSelectFieldname(solrfield.getName()); } } lukeCheckok = true; } catch (final SolrServerException ex) { ConcurrentLog.logException(ex); } if (!lukeCheckok) { // if luke failed alternatively use config and manual list // add all disabled fields Iterator itcol = colcfg.entryIterator(); while (itcol.hasNext()) { // check for disabled fields in config Entry etr = itcol.next(); if (!etr.enabled() && !omitFields.contains(etr.key())) { reidx.addSelectFieldname(etr.key()); } } // add obsolete fields (not longer part of main index) reidx.addSelectFieldname("author_s"); reidx.addSelectFieldname("css_tag_txt"); reidx.addSelectFieldname("css_url_txt"); reidx.addSelectFieldname("scripts_txt"); reidx.addSelectFieldname("images_tag_txt"); reidx.addSelectFieldname("images_urlstub_txt"); reidx.addSelectFieldname("canonical_t"); reidx.addSelectFieldname("frames_txt"); reidx.addSelectFieldname("iframes_txt"); reidx.addSelectFieldname("inboundlinks_tag_txt"); reidx.addSelectFieldname("inboundlinks_relflags_val"); reidx.addSelectFieldname("inboundlinks_name_txt"); reidx.addSelectFieldname("inboundlinks_rel_sxt"); reidx.addSelectFieldname("inboundlinks_text_txt"); reidx.addSelectFieldname("inboundlinks_text_chars_val"); reidx.addSelectFieldname("inboundlinks_text_words_val"); reidx.addSelectFieldname("inboundlinks_alttag_txt"); reidx.addSelectFieldname("outboundlinks_tag_txt"); reidx.addSelectFieldname("outboundlinks_relflags_val"); reidx.addSelectFieldname("outboundlinks_name_txt"); reidx.addSelectFieldname("outboundlinks_rel_sxt"); reidx.addSelectFieldname("outboundlinks_text_txt"); reidx.addSelectFieldname("outboundlinks_text_chars_val"); reidx.addSelectFieldname("outboundlinks_text_words_val"); reidx.addSelectFieldname("outboundlinks_alttag_txt"); } sb.deployThread(ReindexSolrBusyThread.THREAD_NAME, "Reindex Solr", "reindex documents with obsolete fields in embedded Solr index", "/IndexReIndexMonitor_p.html",reidx , 0); return 0; } }