Merge pull request #12 from Kabhal/normalization
Normalization + hardbreaks fixes
This commit is contained in:
@@ -48,6 +48,8 @@ dependencies {
|
||||
compile 'com.google.guava:guava'
|
||||
compile "commons-codec:commons-codec"
|
||||
testCompile 'junit:junit'
|
||||
testCompile "org.mockito:mockito-core"
|
||||
|
||||
testCompile 'ch.qos.logback:logback-classic'
|
||||
}
|
||||
|
||||
@@ -55,6 +57,7 @@ dependencyManagement {
|
||||
dependencies {
|
||||
dependency "org.slf4j:slf4j-api:1.7.12"
|
||||
dependency "junit:junit:4.11"
|
||||
dependency "org.mockito:mockito-core:1.9.5"
|
||||
dependency "ch.qos.logback:logback-classic:1.1.2"
|
||||
dependency "commons-collections:commons-collections:3.2.1"
|
||||
dependency "org.apache.commons:commons-lang3:3.2.1"
|
||||
|
||||
@@ -36,8 +36,8 @@ import java.util.regex.Pattern;
|
||||
*/
|
||||
public abstract class AbstractMarkupDocBuilder implements MarkupDocBuilder {
|
||||
|
||||
private static final Pattern ANCHOR_FORBIDDEN_PATTERN = Pattern.compile("[^0-9a-zA-Z-_]+");
|
||||
private static final Pattern ANCHOR_PUNCTUATION_PATTERN = Pattern.compile("\\p{Punct}+");
|
||||
private static final Pattern ANCHOR_UNIGNORABLE_PATTERN = Pattern.compile("[^0-9a-zA-Z-_]+");
|
||||
private static final Pattern ANCHOR_IGNORABLE_PATTERN = Pattern.compile("[\\p{InCombiningDiacriticalMarks}@#&(){}\\[\\]!$*%+=/:.;,?\\\\<>|]+");
|
||||
private static final Pattern ANCHOR_SPACE_PATTERN = Pattern.compile("[\\s]+");
|
||||
|
||||
protected StringBuilder documentBuilder = new StringBuilder();
|
||||
@@ -159,15 +159,15 @@ public abstract class AbstractMarkupDocBuilder implements MarkupDocBuilder {
|
||||
*/
|
||||
protected String normalizeAnchor(Markup spaceEscape, String anchor) {
|
||||
String normalizedAnchor = anchor.trim();
|
||||
String trimAnchor = normalizedAnchor;
|
||||
normalizedAnchor = Normalizer.normalize(normalizedAnchor, Normalizer.Form.NFD);
|
||||
normalizedAnchor = ANCHOR_PUNCTUATION_PATTERN.matcher(normalizedAnchor).replaceAll("");
|
||||
normalizedAnchor = ANCHOR_IGNORABLE_PATTERN.matcher(normalizedAnchor).replaceAll("");
|
||||
normalizedAnchor = normalizedAnchor.trim();
|
||||
normalizedAnchor = normalizedAnchor.toLowerCase();
|
||||
normalizedAnchor = ANCHOR_SPACE_PATTERN.matcher(normalizedAnchor).replaceAll(spaceEscape.toString());
|
||||
|
||||
String validAnchor = ANCHOR_FORBIDDEN_PATTERN.matcher(normalizedAnchor).replaceAll("");
|
||||
String validAnchor = ANCHOR_UNIGNORABLE_PATTERN.matcher(normalizedAnchor).replaceAll("");
|
||||
if (validAnchor.length() != normalizedAnchor.length())
|
||||
normalizedAnchor = DigestUtils.md5Hex(trimAnchor);
|
||||
normalizedAnchor = DigestUtils.md5Hex(normalizedAnchor);
|
||||
else
|
||||
normalizedAnchor = validAnchor;
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ public enum AsciiDoc implements Markup {
|
||||
TABLE_COLUMN_DELIMITER("|"),
|
||||
TABLE_COLUMN_DELIMITER_ESCAPE("\\|"), // AsciiDoctor supports both \| and {vbar}
|
||||
LISTING("----"),
|
||||
HARDBREAKS(":hardbreaks:"),
|
||||
HARDBREAKS("[%hardbreaks]"),
|
||||
DOCUMENT_TITLE("= "),
|
||||
SECTION_TITLE_LEVEL1("== "),
|
||||
SECTION_TITLE_LEVEL2("=== "),
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
package io.github.robwin.markup.builder;
|
||||
|
||||
import io.github.robwin.markup.builder.asciidoc.AsciiDoc;
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
public class AbstractMarkupDocBuilderTest {
|
||||
|
||||
AbstractMarkupDocBuilder builder;
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
builder = mock(AbstractMarkupDocBuilder.class, Mockito.CALLS_REAL_METHODS);
|
||||
}
|
||||
|
||||
private String normalize(String anchor) {
|
||||
return builder.normalizeAnchor(AsciiDoc.SPACE_ESCAPE, anchor);
|
||||
}
|
||||
|
||||
private void assertNormalization(String result, String anchor) {
|
||||
assertEquals(result, normalize(anchor));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNormalizeAnchor() throws Exception {
|
||||
assertNormalization("", "");
|
||||
assertNormalization("anchor", "anchor");
|
||||
assertNormalization("anchor", "aNcHoR");
|
||||
assertNormalization("__anchor__", "_ anchor _");
|
||||
assertNormalization("-_anchor_-", "- anchor -");
|
||||
assertNormalization("classic-simple_anchor", "classic-simple_anchor");
|
||||
assertNormalization("an_chor", " an chor ");
|
||||
assertNormalization("anchor", "# anchor &");
|
||||
assertNormalization(DigestUtils.md5Hex("\u0240"), "\u0240");
|
||||
assertNormalization(normalize("\u0240"), " \u0240 ");
|
||||
assertNormalization(DigestUtils.md5Hex("µu_\u0240this_-_"), " µ&|ù \u0240This .:/-_# ");
|
||||
assertNormalization("this_is_a_funky_string", "Tĥïŝ ĩš â fůňķŷ Šťŕĭńġ");
|
||||
assertNormalization("", " @#&(){}[]!$*%+=/:.;,?\\<>| ");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user