Merge pull request #10 from Kabhal/normalization

fixed #9 Refactor anchor and cross-references
This commit is contained in:
Robert Winkler
2016-02-11 19:40:54 +01:00
8 changed files with 79 additions and 56 deletions

View File

@@ -46,6 +46,7 @@ dependencies {
compile 'org.apache.commons:commons-lang3'
compile "commons-io:commons-io"
compile 'com.google.guava:guava'
compile "commons-codec:commons-codec"
testCompile 'junit:junit'
testCompile 'ch.qos.logback:logback-classic'
}
@@ -59,6 +60,7 @@ dependencyManagement {
dependency "org.apache.commons:commons-lang3:3.2.1"
dependency "commons-io:commons-io:2.4"
dependency "com.google.guava:guava:18.0"
dependency "commons-codec:commons-codec:1.6"
}
}

View File

@@ -18,6 +18,7 @@
*/
package io.github.robwin.markup.builder;
import org.apache.commons.codec.digest.DigestUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -26,35 +27,49 @@ import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.text.Normalizer;
import java.util.List;
import java.util.regex.Pattern;
/**
* @author Robert Winkler
*/
public abstract class AbstractMarkupDocBuilder implements MarkupDocBuilder {
private static final Pattern ANCHOR_FORBIDDEN_PATTERN = Pattern.compile("[^0-9a-zA-Z-_]+");
private static final Pattern ANCHOR_SPACE_PATTERN = Pattern.compile("[\\s]+");
protected StringBuilder documentBuilder = new StringBuilder();
protected String newLine = System.getProperty("line.separator");
protected Logger logger = LoggerFactory.getLogger(getClass());
protected void documentTitle(Markup markup, String title){
anchor(title).newLine();
documentBuilder.append(markup).append(title).append(newLine).append(newLine);
}
protected void sectionTitleLevel1(Markup markup, String title){
documentBuilder.append(newLine).append(markup).append(title).append(newLine);
documentBuilder.append(newLine);
anchor(title).newLine();
documentBuilder.append(markup).append(title).append(newLine);
}
protected void sectionTitleLevel2(Markup markup, String title){
documentBuilder.append(newLine).append(markup).append(title).append(newLine);
documentBuilder.append(newLine);
anchor(title).newLine();
documentBuilder.append(markup).append(title).append(newLine);
}
protected void sectionTitleLevel3(Markup markup, String title){
documentBuilder.append(newLine).append(markup).append(title).append(newLine);
documentBuilder.append(newLine);
anchor(title).newLine();
documentBuilder.append(markup).append(title).append(newLine);
}
protected void sectionTitleLevel4(Markup markup, String title){
documentBuilder.append(newLine).append(markup).append(title).append(newLine);
documentBuilder.append(newLine);
anchor(title).newLine();
documentBuilder.append(markup).append(title).append(newLine);
}
@Override
@@ -114,20 +129,38 @@ public abstract class AbstractMarkupDocBuilder implements MarkupDocBuilder {
return anchor(anchor, null);
}
/**
* Generic normalization algorithm for all markups
*/
protected String normalizeAnchor(Markup spaceEscape, String anchor) {
String normalizedAnchor = anchor.trim();
normalizedAnchor = Normalizer.normalize(normalizedAnchor, Normalizer.Form.NFD);
normalizedAnchor = normalizedAnchor.toLowerCase();
normalizedAnchor = ANCHOR_SPACE_PATTERN.matcher(normalizedAnchor).replaceAll(spaceEscape.toString());
String validAnchor = ANCHOR_FORBIDDEN_PATTERN.matcher(normalizedAnchor).replaceAll("");
if (validAnchor.length() != normalizedAnchor.length())
normalizedAnchor = DigestUtils.md5Hex(anchor);
else
normalizedAnchor = validAnchor;
return normalizedAnchor;
}
@Override
public MarkupDocBuilder crossReferenceAnchor(String document, String anchor, String text) {
documentBuilder.append(crossReferenceAnchorAsString(document, anchor, text));
public MarkupDocBuilder crossReferenceRaw(String document, String anchor, String text) {
documentBuilder.append(crossReferenceRawAsString(document, anchor, text));
return this;
}
@Override
public MarkupDocBuilder crossReferenceAnchor(String anchor, String text) {
return crossReferenceAnchor(null, anchor, text);
public MarkupDocBuilder crossReferenceRaw(String anchor, String text) {
return crossReferenceRaw(null, anchor, text);
}
@Override
public MarkupDocBuilder crossReferenceAnchor(String anchor) {
return crossReferenceAnchor(null, anchor, null);
public MarkupDocBuilder crossReferenceRaw(String anchor) {
return crossReferenceRaw(null, anchor, null);
}
@Override

View File

@@ -66,13 +66,13 @@ public interface MarkupDocBuilder {
String anchorAsString(String anchor, String text);
MarkupDocBuilder crossReferenceAnchor(String document, String anchor, String text);
MarkupDocBuilder crossReferenceRaw(String document, String anchor, String text);
MarkupDocBuilder crossReferenceAnchor(String anchor, String text);
MarkupDocBuilder crossReferenceRaw(String anchor, String text);
MarkupDocBuilder crossReferenceAnchor(String anchor);
MarkupDocBuilder crossReferenceRaw(String anchor);
String crossReferenceAnchorAsString(String document, String anchor, String text);
String crossReferenceRawAsString(String document, String anchor, String text);
MarkupDocBuilder crossReference(String document, String title, String text);

View File

@@ -43,7 +43,8 @@ public enum AsciiDoc implements Markup {
CROSS_REFERENCE_END(">>"),
ANCHOR_START("[["),
ANCHOR_END("]]"),
FILE_EXTENSION("adoc");
FILE_EXTENSION("adoc"),
SPACE_ESCAPE("_");
private final String markup;

View File

@@ -38,10 +38,6 @@ import static org.apache.commons.lang3.StringUtils.*;
*/
public class AsciiDocBuilder extends AbstractMarkupDocBuilder {
private static final Pattern ANCHOR_FORBIDDEN_PATTERN = Pattern.compile("[^\\p{ASCII}]+");
private static final Pattern SPACE_PATTERN = Pattern.compile("[\\s]+");
@Override
public MarkupDocBuilder documentTitle(String title){
documentTitle(AsciiDoc.DOCUMENT_TITLE, title);
@@ -128,17 +124,12 @@ public class AsciiDocBuilder extends AbstractMarkupDocBuilder {
}
private String normalizeAnchor(String anchor) {
return "_" + SPACE_PATTERN.matcher(ANCHOR_FORBIDDEN_PATTERN.matcher(anchor.trim().toLowerCase()).replaceAll("")).replaceAll("_");
}
String normalizedAnchor = "_" + normalizeAnchor(AsciiDoc.SPACE_ESCAPE, anchor);
private String normalizeTitle(String document, String title) {
if (document == null)
return title.trim();
else {
// Reference to a title in another document is not yet supported in AsciiDoctor.
// The following workaround works with AsciiDoctor HTML output.
return "_" + title.trim().toLowerCase();
}
if (normalizedAnchor.endsWith("-"))
normalizedAnchor += "_";
return normalizedAnchor;
}
/**
@@ -161,8 +152,8 @@ public class AsciiDocBuilder extends AbstractMarkupDocBuilder {
return stringBuilder.toString();
}
protected String normalizedCrossReferenceAsString(String document, String anchor, String text) {
@Override
public String crossReferenceRawAsString(String document, String anchor, String text) {
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append(AsciiDoc.CROSS_REFERENCE_START);
if (document != null)
@@ -171,17 +162,11 @@ public class AsciiDocBuilder extends AbstractMarkupDocBuilder {
if (text != null)
stringBuilder.append(",").append(text);
stringBuilder.append(AsciiDoc.CROSS_REFERENCE_END);
return stringBuilder.toString();
}
@Override
public String crossReferenceAnchorAsString(String document, String anchor, String text) {
return normalizedCrossReferenceAsString(normalizeDocument(document), normalizeAnchor(anchor), text);
}
return stringBuilder.toString(); }
@Override
public String crossReferenceAsString(String document, String title, String text) {
return normalizedCrossReferenceAsString(normalizeDocument(document), normalizeTitle(document, title), text);
return crossReferenceRawAsString(normalizeDocument(document), normalizeAnchor(title), text);
}
private String escapeTableCell(String cell) {

View File

@@ -37,7 +37,8 @@ public enum Markdown implements Markup {
BOLD("**"),
ITALIC("*"),
LIST_ENTRY("* "),
FILE_EXTENSION("md");
FILE_EXTENSION("md"),
SPACE_ESCAPE("-");
private final String markup;

View File

@@ -29,7 +29,6 @@ import org.apache.commons.lang3.Validate;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.regex.Pattern;
import static org.apache.commons.lang3.StringUtils.defaultString;
import static org.apache.commons.lang3.StringUtils.join;
@@ -39,9 +38,6 @@ import static org.apache.commons.lang3.StringUtils.join;
*/
public class MarkdownBuilder extends AbstractMarkupDocBuilder
{
private static final Pattern ANCHOR_FORBIDDEN_PATTERN = Pattern.compile("[^0-9a-zA-Z-_\\s]+");
private static final Pattern ANCHOR_SPACE_PATTERN = Pattern.compile("[\\s]+");
@Override
public MarkupDocBuilder documentTitle(String title){
documentTitle(Markdown.DOCUMENT_TITLE, title);
@@ -149,29 +145,32 @@ public class MarkdownBuilder extends AbstractMarkupDocBuilder
return this;
}
private static String normalizeReferenceAnchor(String anchor) {
return ANCHOR_SPACE_PATTERN.matcher(ANCHOR_FORBIDDEN_PATTERN.matcher(anchor.trim().toLowerCase()).replaceAll("")).replaceAll("-");
private String normalizeAnchor(String anchor) {
return normalizeAnchor(Markdown.SPACE_ESCAPE, anchor);
}
@Override
public String anchorAsString(String anchor, String text) {
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append("<a name=\"").append(normalizeReferenceAnchor(anchor)).append("\"></a>");
stringBuilder.append("<a name=\"").append(normalizeAnchor(anchor)).append("\"></a>");
return stringBuilder.toString();
}
@Override
public String crossReferenceAnchorAsString(String document, String anchor, String text) {
public String crossReferenceRawAsString(String document, String anchor, String text) {
StringBuilder stringBuilder = new StringBuilder();
if (text == null)
text = anchor.trim();
stringBuilder.append("[").append(text).append("](#").append(normalizeReferenceAnchor(anchor)).append(")");
stringBuilder.append("[").append(text).append("]").append("(");
if (document != null)
stringBuilder.append(document);
stringBuilder.append("#").append(anchor).append(")");
return stringBuilder.toString();
}
@Override
public String crossReferenceAsString(String document, String title, String text) {
return crossReferenceAnchorAsString(document, title, text);
return crossReferenceRawAsString(document, normalizeAnchor(title), text);
}
private String escapeTableCell(String cell) {

View File

@@ -75,9 +75,10 @@ public class MarkupDocBuilderTest {
.italicTextLine("Italic text line b")
.unorderedList(Arrays.asList("Entry1", "Entry2", "Entry 2"))
.anchor("anchor", "text").newLine()
.anchor(" Simple anchor").newLine()
.anchor(" \u0240 µ&|ù This .:/-_# ").newLine()
.crossReferenceAnchor("./document.adoc", "anchor", "text").newLine()
.crossReferenceAnchor(" \u0240 µ&|ù This .:/-_ ").newLine()
.crossReferenceRaw("./document.adoc", "anchor", "text").newLine()
.crossReferenceRaw(" \u0240 µ&|ù This .:/-_ ").newLine()
.crossReference("./document.adoc", "anchor", "text").newLine()
.crossReference(" \u0240 µ&|ù This .:/-_ ").newLine()
.writeToFile("build/tmp", "test", StandardCharsets.UTF_8);
@@ -104,10 +105,11 @@ public class MarkupDocBuilderTest {
.italicTextLine("Italic text line b")
.unorderedList(Arrays.asList("Entry1", "Entry2", "Entry 2"))
.anchor("anchor", "text").newLine()
.anchor(" Simple anchor").newLine()
.anchor(" \u0240 µ&|ù This .:/-_# ").newLine()
.crossReferenceAnchor("./document.adoc", "anchor", "text").newLine()
.crossReferenceAnchor(" \u0240 µ&|ù This .:/-_ ").newLine()
.crossReference("./document.adoc", "anchor", "text").newLine()
.crossReferenceRaw("./document.md", "anchor", "text").newLine()
.crossReferenceRaw(" \u0240 µ&|ù This .:/-_ ").newLine()
.crossReference("./document.md", "anchor", "text").newLine()
.crossReference(" \u0240 µ&|ù This .:/-_ ").newLine()
.writeToFile("build/tmp", "test", StandardCharsets.UTF_8);
}