You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
268 lines
11 KiB
268 lines
11 KiB
18 years ago
|
// Diff.java
|
||
|
// ---------
|
||
|
// part of YaCy
|
||
17 years ago
|
// (C) by Michael Peter Christen; mc@yacy.net
|
||
18 years ago
|
// first published on http://www.anomic.de
|
||
|
// Frankfurt, Germany, 2007
|
||
|
// Created 03.02.2007
|
||
|
//
|
||
18 years ago
|
// This file is contributed by Franz Brausze
|
||
18 years ago
|
//
|
||
15 years ago
|
// $LastChangedDate$
|
||
|
// $LastChangedRevision$
|
||
|
// $LastChangedBy$
|
||
18 years ago
|
//
|
||
|
// This program is free software; you can redistribute it and/or modify
|
||
|
// it under the terms of the GNU General Public License as published by
|
||
|
// the Free Software Foundation; either version 2 of the License, or
|
||
|
// (at your option) any later version.
|
||
|
//
|
||
|
// This program is distributed in the hope that it will be useful,
|
||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
// GNU General Public License for more details.
|
||
|
//
|
||
|
// You should have received a copy of the GNU General Public License
|
||
|
// along with this program; if not, write to the Free Software
|
||
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
||
12 years ago
|
package net.yacy.data;
|
||
18 years ago
|
|
||
|
import java.util.ArrayList;
|
||
|
|
||
15 years ago
|
import net.yacy.document.parser.html.CharacterCoding;
|
||
|
|
||
16 years ago
|
|
||
18 years ago
|
/**
|
||
|
* This class provides a diff-functionality.
|
||
|
*/
|
||
14 years ago
|
public class Diff {
|
||
18 years ago
|
|
||
16 years ago
|
private final ArrayList <Part> parts = new ArrayList<Part>();
|
||
15 years ago
|
protected final Object[] original;
|
||
|
protected final Object[] changed;
|
||
18 years ago
|
|
||
|
/**
|
||
15 years ago
|
* @param original the original <code>String</code>
|
||
|
* @param changed the new <code>String</code>
|
||
18 years ago
|
* @throws NullPointerException if one of the arguments is <code>null</code>
|
||
|
*/
|
||
14 years ago
|
public Diff(final String original, final String changed) {
|
||
15 years ago
|
this(original, changed, 1);
|
||
18 years ago
|
}
|
||
|
|
||
|
/**
|
||
15 years ago
|
* @param original the original <code>String</code>
|
||
|
* @param changed the new <code>String</code>
|
||
18 years ago
|
* @param minConsecutive the minimum number of consecutive equal characters in
|
||
|
* both Strings. Smaller seperations will only be performed on the end of either
|
||
|
* String if needed
|
||
15 years ago
|
* @throws NullPointerException if <code>original</code> or <code>changed</code> is
|
||
18 years ago
|
* <code>null</code>
|
||
|
*/
|
||
14 years ago
|
public Diff(final String original, final String changed, final int minConsecutive) {
|
||
15 years ago
|
if (original == null || changed == null) throw new NullPointerException("input Strings must be null");
|
||
|
this.original = new Comparable[original.length()];
|
||
|
for (int i=0; i<original.length(); i++)
|
||
|
this.original[i] = Character.valueOf(original.charAt(i));
|
||
|
this.changed = new Comparable[changed.length()];
|
||
|
for (int i=0; i<changed.length(); i++)
|
||
|
this.changed[i] = Character.valueOf(changed.charAt(i));
|
||
18 years ago
|
parse((minConsecutive > 0) ? minConsecutive : 1);
|
||
|
}
|
||
|
|
||
14 years ago
|
public Diff(final Object[] original, final Object[] changed, final int minConsecutive) {
|
||
15 years ago
|
if (original == null || changed == null) throw new NullPointerException("input Objects must be null");
|
||
|
this.original = original;
|
||
|
this.changed = changed;
|
||
18 years ago
|
parse((minConsecutive > 0) ? minConsecutive : 1);
|
||
|
}
|
||
|
|
||
17 years ago
|
private void parse(final int minLength) {
|
||
18 years ago
|
/* Matrix: find as long diagonals as possible,
|
||
|
* delete the old horizontally and add the new vertically
|
||
|
*
|
||
|
* ~ OLD ~
|
||
|
* |T|H|E| |F|I|R|S|T| |S|E|N|T|E|N|C|E|
|
||
|
* T|#| | | | | | | |#| | | | |#| | | | |
|
||
|
* H| |#| | | | | | | | | | | | | | | | |
|
||
|
* E| | |#| | | | | | | | |#| | |#| | |#|
|
||
|
* | | | |#| | | | | |#| | | | | | | | |
|
||
|
* N| | | | | | | | | | | | |#| | |#| | |
|
||
|
* E| | |#| | | | | | | | |#| | |#| | |#|
|
||
|
* ~ X| | | | | | | | | | | | | | | | | | |
|
||
|
* N T|#| | | | | | | |#| | | | |#| | | | |
|
||
|
* E | | | |#| | | | | |#| | | | | | | | |
|
||
|
* W S| | | | | | | |#| | |#| | | | | | | |
|
||
|
* ~ E| | |#| | | | | | | | |#| | |#| | |#|
|
||
|
* N| | | | | | | | | | | | |#| | |#| | |
|
||
|
* T|#| | | | | | | |#| | | | |#| | | | |
|
||
|
* E| | |#| | | | | | | | |#| | |#| | |#|
|
||
|
* N| | | | | | | | | | | | |#| | |#| | |
|
||
|
* C| | | | | | | | | | | | | | | | |#| |
|
||
18 years ago
|
* E| | |#| | | | | | | | |#| | |#| | |#|
|
||
18 years ago
|
*/
|
||
15 years ago
|
final boolean[][] matrix = new boolean[this.changed.length][this.original.length];
|
||
|
for (int y=0; y<this.changed.length; y++)
|
||
|
for (int x=0; x<this.original.length; x++)
|
||
|
matrix[y][x] = this.original[x].equals(this.changed[y]);
|
||
18 years ago
|
|
||
|
int s = 0, t = 0;
|
||
18 years ago
|
int[] tmp;
|
||
|
while ((tmp = findDiagonal(s, t, matrix, minLength)) != null) {
|
||
18 years ago
|
addReplacementParts(s, t, tmp[0], tmp[1]);
|
||
|
this.parts.add(new Part(Part.UNCHANGED, tmp[0], s = tmp[0] + tmp[2]));
|
||
|
t = tmp[1] + tmp[2];
|
||
|
}
|
||
15 years ago
|
addReplacementParts(s, t, this.original.length, this.changed.length);
|
||
18 years ago
|
}
|
||
|
|
||
17 years ago
|
private void addReplacementParts(final int startx, final int starty, final int endx, final int endy) {
|
||
18 years ago
|
if (startx < endx) this.parts.add(new Part(Part.DELETED, startx, endx));
|
||
|
if (starty < endy) this.parts.add(new Part(Part.ADDED, starty, endy));
|
||
|
}
|
||
|
|
||
18 years ago
|
/** Search for a diagonal with minimal length <code>minLength</code> line by line in a submatrix
|
||
|
* <code>{ x, y, matrix[0].length, matrix.length}</code> of the <code>matrix</code>:<br>
|
||
|
* <code> {_1,__,__} -> X axis</code><br>
|
||
|
* <code> ,{__,_1,__} </code><br>
|
||
|
* <code> ,{__,__,_1} </code><br>
|
||
|
* <ul>
|
||
18 years ago
|
* TODO: some optimisation ideas
|
||
18 years ago
|
* <li>search for a better algorithm on the inet!!! :) </li>
|
||
|
* <li>pass only the part of the matrix where the search takes place - not the whole matrix everytime</li>
|
||
|
* <li>break the inner loop if the rest of the matrix is smaller than minLength (and no diagonal has been found yet) </li>
|
||
|
* <li>return diagonal topologicaly closest to the {0,0} </li>
|
||
|
* </ul>
|
||
|
* @param x the starting position of the search on the optical horizontal axis
|
||
|
* @param y the starting position of the search on the optical vertical axis<br>
|
||
|
* @param matrix the matrix to search through
|
||
|
* @param minLength the minimal desired length of a diagonal to find
|
||
|
* @return a vector in the form <code>{ diagStartX, diagStartY, diagLength }</code> where <code> diagLength >= minLength</code>
|
||
|
*/
|
||
17 years ago
|
private static int[] findDiagonal(final int x, final int y, final boolean[][] matrix, final int minLength) {
|
||
18 years ago
|
int rx, ry, yy, xx, i;
|
||
|
for (yy=y; yy<matrix.length; yy++)
|
||
|
for (xx=x; xx<matrix[yy].length; xx++)
|
||
18 years ago
|
if (matrix[yy][xx]) { // reverse order! [y][x]
|
||
18 years ago
|
// save position
|
||
18 years ago
|
rx = xx;
|
||
|
ry = yy;
|
||
18 years ago
|
// follow diagonal as long as far as possible
|
||
18 years ago
|
for (i=1; (yy + i)<matrix.length && (xx + i)<matrix[yy].length; i++)
|
||
18 years ago
|
if (!matrix[yy + i][xx + i])
|
||
|
break;
|
||
|
if (i >= minLength)
|
||
|
return new int[] { rx, ry, i }; // swap back the x and y axes for better readability
|
||
18 years ago
|
}
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
/**
|
||
18 years ago
|
* @return the original <code>Object[]</code> passed to this class on instantiation
|
||
18 years ago
|
*/
|
||
15 years ago
|
public Object[] getOriginal() { return this.original; }
|
||
18 years ago
|
|
||
|
/**
|
||
18 years ago
|
* @return the new <code>Object[]</code> passed to this class on instantiation
|
||
18 years ago
|
*/
|
||
15 years ago
|
public Object[] getNew() { return this.changed; }
|
||
18 years ago
|
|
||
|
/**
|
||
|
* A diff is composed of different parts. Each of these parts stands for an
|
||
|
* operation, like "do nothing", "add" or "delete".
|
||
|
*
|
||
|
* @see Part
|
||
|
* @return all parts this diff consists of in correct order
|
||
|
*/
|
||
17 years ago
|
public Part[] getParts() { return this.parts.toArray(new Part[this.parts.size()]); }
|
||
18 years ago
|
|
||
15 years ago
|
@Override
|
||
18 years ago
|
public String toString() {
|
||
16 years ago
|
final StringBuilder sb = new StringBuilder(this.parts.size() * 20);
|
||
15 years ago
|
for (final Part part :parts)
|
||
|
sb.append(part.toString()).append("\n");
|
||
|
return sb.toString();
|
||
18 years ago
|
}
|
||
|
|
||
18 years ago
|
/**
|
||
|
* This class represents a part of the diff, meaning one operation
|
||
|
* (or one line of a "normal" diff)
|
||
|
*/
|
||
|
public class Part {
|
||
|
|
||
|
/** The string this diff-part cares about has not been changed */
|
||
|
public static final int UNCHANGED = 0;
|
||
|
/** The string this diff-part cares about has been added in the new version */
|
||
|
public static final int ADDED = 1;
|
||
|
/** The string this diff-part cares about has been removed in the new version */
|
||
|
public static final int DELETED = 2;
|
||
|
|
||
|
private final int action;
|
||
|
private final int posOld;
|
||
|
private final int posNew;
|
||
|
|
||
17 years ago
|
Part(final int action, final int posOld, final int posNew) {
|
||
18 years ago
|
this.action = action;
|
||
|
this.posOld = posOld;
|
||
|
this.posNew = posNew;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @return whether the string shan't be changed, shall be added or deleted
|
||
|
*/
|
||
|
public int getAction() { return this.action; }
|
||
|
public int getPosOld() { return this.posOld; }
|
||
|
public int getPosNew() { return this.posNew; }
|
||
|
|
||
|
/**
|
||
|
* @return the plain string this diff-part cares about
|
||
|
*/
|
||
|
public String getString() {
|
||
16 years ago
|
final StringBuilder sb = new StringBuilder(this.posNew - this.posOld);
|
||
18 years ago
|
if (this.action == ADDED) {
|
||
15 years ago
|
for (int i = this.posOld; i < this.posNew; i++)
|
||
14 years ago
|
sb.append(Diff.this.changed[i]);
|
||
18 years ago
|
} else {
|
||
15 years ago
|
for (int i = this.posOld; i < this.posNew; i++)
|
||
14 years ago
|
sb.append(Diff.this.original[i]);
|
||
18 years ago
|
}
|
||
15 years ago
|
return sb.toString();
|
||
18 years ago
|
}
|
||
|
|
||
|
/**
|
||
|
* @return the string this diff-part cares about in typical diff-notation:
|
||
|
* <dl>
|
||
|
* <dt>unchanged</dt><dd>"<code> STRING</code>"</dd>
|
||
|
* <dt>added</dt><dd>"<code>+ STRING</code>"</dd>
|
||
|
* <dt>deleted</dt><dd>"<code>- STRING</code>"</dd>
|
||
|
* </dl>
|
||
|
*/
|
||
15 years ago
|
@Override
|
||
18 years ago
|
public String toString() {
|
||
|
return ((this.action == UNCHANGED) ? " " :
|
||
|
(this.action == ADDED) ? "+" : "-") + " " + getString();
|
||
|
}
|
||
|
}
|
||
18 years ago
|
|
||
14 years ago
|
public static String toHTML(final Diff[] diffs) {
|
||
16 years ago
|
final StringBuilder sb = new StringBuilder(diffs.length * 60);
|
||
14 years ago
|
Diff.Part[] ps;
|
||
|
for (Diff d : diffs) {
|
||
18 years ago
|
sb.append("<p class=\"diff\">\n");
|
||
15 years ago
|
ps = d.getParts();
|
||
14 years ago
|
for (Diff.Part part :ps) {
|
||
18 years ago
|
sb.append("<span\nclass=\"");
|
||
15 years ago
|
switch (part.getAction()) {
|
||
14 years ago
|
case Diff.Part.UNCHANGED: sb.append("unchanged"); break;
|
||
|
case Diff.Part.ADDED: sb.append("added"); break;
|
||
|
case Diff.Part.DELETED: sb.append("deleted"); break;
|
||
18 years ago
|
}
|
||
15 years ago
|
sb.append("\">").append(CharacterCoding.unicode2html(part.getString(), true).replaceAll("\n", "<br />"));
|
||
18 years ago
|
sb.append("</span>");
|
||
|
}
|
||
|
sb.append("</p>");
|
||
|
}
|
||
15 years ago
|
return sb.toString();
|
||
18 years ago
|
}
|
||
18 years ago
|
}
|