Merge pull request #10 from Kabhal/normalization
fixed #9 Refactor anchor and cross-references
This commit is contained in:
@@ -46,6 +46,7 @@ dependencies {
|
||||
compile 'org.apache.commons:commons-lang3'
|
||||
compile "commons-io:commons-io"
|
||||
compile 'com.google.guava:guava'
|
||||
compile "commons-codec:commons-codec"
|
||||
testCompile 'junit:junit'
|
||||
testCompile 'ch.qos.logback:logback-classic'
|
||||
}
|
||||
@@ -59,6 +60,7 @@ dependencyManagement {
|
||||
dependency "org.apache.commons:commons-lang3:3.2.1"
|
||||
dependency "commons-io:commons-io:2.4"
|
||||
dependency "com.google.guava:guava:18.0"
|
||||
dependency "commons-codec:commons-codec:1.6"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
*/
|
||||
package io.github.robwin.markup.builder;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -26,35 +27,49 @@ import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.text.Normalizer;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* @author Robert Winkler
|
||||
*/
|
||||
public abstract class AbstractMarkupDocBuilder implements MarkupDocBuilder {
|
||||
|
||||
|
||||
private static final Pattern ANCHOR_FORBIDDEN_PATTERN = Pattern.compile("[^0-9a-zA-Z-_]+");
|
||||
private static final Pattern ANCHOR_SPACE_PATTERN = Pattern.compile("[\\s]+");
|
||||
|
||||
protected StringBuilder documentBuilder = new StringBuilder();
|
||||
protected String newLine = System.getProperty("line.separator");
|
||||
protected Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
protected void documentTitle(Markup markup, String title){
|
||||
anchor(title).newLine();
|
||||
documentBuilder.append(markup).append(title).append(newLine).append(newLine);
|
||||
}
|
||||
|
||||
protected void sectionTitleLevel1(Markup markup, String title){
|
||||
documentBuilder.append(newLine).append(markup).append(title).append(newLine);
|
||||
documentBuilder.append(newLine);
|
||||
anchor(title).newLine();
|
||||
documentBuilder.append(markup).append(title).append(newLine);
|
||||
}
|
||||
|
||||
protected void sectionTitleLevel2(Markup markup, String title){
|
||||
documentBuilder.append(newLine).append(markup).append(title).append(newLine);
|
||||
documentBuilder.append(newLine);
|
||||
anchor(title).newLine();
|
||||
documentBuilder.append(markup).append(title).append(newLine);
|
||||
}
|
||||
|
||||
protected void sectionTitleLevel3(Markup markup, String title){
|
||||
documentBuilder.append(newLine).append(markup).append(title).append(newLine);
|
||||
documentBuilder.append(newLine);
|
||||
anchor(title).newLine();
|
||||
documentBuilder.append(markup).append(title).append(newLine);
|
||||
}
|
||||
|
||||
protected void sectionTitleLevel4(Markup markup, String title){
|
||||
documentBuilder.append(newLine).append(markup).append(title).append(newLine);
|
||||
documentBuilder.append(newLine);
|
||||
anchor(title).newLine();
|
||||
documentBuilder.append(markup).append(title).append(newLine);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -114,20 +129,38 @@ public abstract class AbstractMarkupDocBuilder implements MarkupDocBuilder {
|
||||
return anchor(anchor, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic normalization algorithm for all markups
|
||||
*/
|
||||
protected String normalizeAnchor(Markup spaceEscape, String anchor) {
|
||||
String normalizedAnchor = anchor.trim();
|
||||
normalizedAnchor = Normalizer.normalize(normalizedAnchor, Normalizer.Form.NFD);
|
||||
normalizedAnchor = normalizedAnchor.toLowerCase();
|
||||
normalizedAnchor = ANCHOR_SPACE_PATTERN.matcher(normalizedAnchor).replaceAll(spaceEscape.toString());
|
||||
|
||||
String validAnchor = ANCHOR_FORBIDDEN_PATTERN.matcher(normalizedAnchor).replaceAll("");
|
||||
if (validAnchor.length() != normalizedAnchor.length())
|
||||
normalizedAnchor = DigestUtils.md5Hex(anchor);
|
||||
else
|
||||
normalizedAnchor = validAnchor;
|
||||
|
||||
return normalizedAnchor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MarkupDocBuilder crossReferenceAnchor(String document, String anchor, String text) {
|
||||
documentBuilder.append(crossReferenceAnchorAsString(document, anchor, text));
|
||||
public MarkupDocBuilder crossReferenceRaw(String document, String anchor, String text) {
|
||||
documentBuilder.append(crossReferenceRawAsString(document, anchor, text));
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MarkupDocBuilder crossReferenceAnchor(String anchor, String text) {
|
||||
return crossReferenceAnchor(null, anchor, text);
|
||||
public MarkupDocBuilder crossReferenceRaw(String anchor, String text) {
|
||||
return crossReferenceRaw(null, anchor, text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MarkupDocBuilder crossReferenceAnchor(String anchor) {
|
||||
return crossReferenceAnchor(null, anchor, null);
|
||||
public MarkupDocBuilder crossReferenceRaw(String anchor) {
|
||||
return crossReferenceRaw(null, anchor, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -66,13 +66,13 @@ public interface MarkupDocBuilder {
|
||||
|
||||
String anchorAsString(String anchor, String text);
|
||||
|
||||
MarkupDocBuilder crossReferenceAnchor(String document, String anchor, String text);
|
||||
MarkupDocBuilder crossReferenceRaw(String document, String anchor, String text);
|
||||
|
||||
MarkupDocBuilder crossReferenceAnchor(String anchor, String text);
|
||||
MarkupDocBuilder crossReferenceRaw(String anchor, String text);
|
||||
|
||||
MarkupDocBuilder crossReferenceAnchor(String anchor);
|
||||
MarkupDocBuilder crossReferenceRaw(String anchor);
|
||||
|
||||
String crossReferenceAnchorAsString(String document, String anchor, String text);
|
||||
String crossReferenceRawAsString(String document, String anchor, String text);
|
||||
|
||||
MarkupDocBuilder crossReference(String document, String title, String text);
|
||||
|
||||
|
||||
@@ -43,7 +43,8 @@ public enum AsciiDoc implements Markup {
|
||||
CROSS_REFERENCE_END(">>"),
|
||||
ANCHOR_START("[["),
|
||||
ANCHOR_END("]]"),
|
||||
FILE_EXTENSION("adoc");
|
||||
FILE_EXTENSION("adoc"),
|
||||
SPACE_ESCAPE("_");
|
||||
|
||||
private final String markup;
|
||||
|
||||
|
||||
@@ -38,10 +38,6 @@ import static org.apache.commons.lang3.StringUtils.*;
|
||||
*/
|
||||
public class AsciiDocBuilder extends AbstractMarkupDocBuilder {
|
||||
|
||||
private static final Pattern ANCHOR_FORBIDDEN_PATTERN = Pattern.compile("[^\\p{ASCII}]+");
|
||||
private static final Pattern SPACE_PATTERN = Pattern.compile("[\\s]+");
|
||||
|
||||
|
||||
@Override
|
||||
public MarkupDocBuilder documentTitle(String title){
|
||||
documentTitle(AsciiDoc.DOCUMENT_TITLE, title);
|
||||
@@ -128,17 +124,12 @@ public class AsciiDocBuilder extends AbstractMarkupDocBuilder {
|
||||
}
|
||||
|
||||
private String normalizeAnchor(String anchor) {
|
||||
return "_" + SPACE_PATTERN.matcher(ANCHOR_FORBIDDEN_PATTERN.matcher(anchor.trim().toLowerCase()).replaceAll("")).replaceAll("_");
|
||||
}
|
||||
String normalizedAnchor = "_" + normalizeAnchor(AsciiDoc.SPACE_ESCAPE, anchor);
|
||||
|
||||
private String normalizeTitle(String document, String title) {
|
||||
if (document == null)
|
||||
return title.trim();
|
||||
else {
|
||||
// Reference to a title in another document is not yet supported in AsciiDoctor.
|
||||
// The following workaround works with AsciiDoctor HTML output.
|
||||
return "_" + title.trim().toLowerCase();
|
||||
}
|
||||
if (normalizedAnchor.endsWith("-"))
|
||||
normalizedAnchor += "_";
|
||||
|
||||
return normalizedAnchor;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -161,8 +152,8 @@ public class AsciiDocBuilder extends AbstractMarkupDocBuilder {
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
|
||||
protected String normalizedCrossReferenceAsString(String document, String anchor, String text) {
|
||||
@Override
|
||||
public String crossReferenceRawAsString(String document, String anchor, String text) {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
stringBuilder.append(AsciiDoc.CROSS_REFERENCE_START);
|
||||
if (document != null)
|
||||
@@ -171,17 +162,11 @@ public class AsciiDocBuilder extends AbstractMarkupDocBuilder {
|
||||
if (text != null)
|
||||
stringBuilder.append(",").append(text);
|
||||
stringBuilder.append(AsciiDoc.CROSS_REFERENCE_END);
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String crossReferenceAnchorAsString(String document, String anchor, String text) {
|
||||
return normalizedCrossReferenceAsString(normalizeDocument(document), normalizeAnchor(anchor), text);
|
||||
}
|
||||
return stringBuilder.toString(); }
|
||||
|
||||
@Override
|
||||
public String crossReferenceAsString(String document, String title, String text) {
|
||||
return normalizedCrossReferenceAsString(normalizeDocument(document), normalizeTitle(document, title), text);
|
||||
return crossReferenceRawAsString(normalizeDocument(document), normalizeAnchor(title), text);
|
||||
}
|
||||
|
||||
private String escapeTableCell(String cell) {
|
||||
|
||||
@@ -37,7 +37,8 @@ public enum Markdown implements Markup {
|
||||
BOLD("**"),
|
||||
ITALIC("*"),
|
||||
LIST_ENTRY("* "),
|
||||
FILE_EXTENSION("md");
|
||||
FILE_EXTENSION("md"),
|
||||
SPACE_ESCAPE("-");
|
||||
|
||||
private final String markup;
|
||||
|
||||
|
||||
@@ -29,7 +29,6 @@ import org.apache.commons.lang3.Validate;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.apache.commons.lang3.StringUtils.defaultString;
|
||||
import static org.apache.commons.lang3.StringUtils.join;
|
||||
@@ -39,9 +38,6 @@ import static org.apache.commons.lang3.StringUtils.join;
|
||||
*/
|
||||
public class MarkdownBuilder extends AbstractMarkupDocBuilder
|
||||
{
|
||||
private static final Pattern ANCHOR_FORBIDDEN_PATTERN = Pattern.compile("[^0-9a-zA-Z-_\\s]+");
|
||||
private static final Pattern ANCHOR_SPACE_PATTERN = Pattern.compile("[\\s]+");
|
||||
|
||||
@Override
|
||||
public MarkupDocBuilder documentTitle(String title){
|
||||
documentTitle(Markdown.DOCUMENT_TITLE, title);
|
||||
@@ -149,29 +145,32 @@ public class MarkdownBuilder extends AbstractMarkupDocBuilder
|
||||
return this;
|
||||
}
|
||||
|
||||
private static String normalizeReferenceAnchor(String anchor) {
|
||||
return ANCHOR_SPACE_PATTERN.matcher(ANCHOR_FORBIDDEN_PATTERN.matcher(anchor.trim().toLowerCase()).replaceAll("")).replaceAll("-");
|
||||
private String normalizeAnchor(String anchor) {
|
||||
return normalizeAnchor(Markdown.SPACE_ESCAPE, anchor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String anchorAsString(String anchor, String text) {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
stringBuilder.append("<a name=\"").append(normalizeReferenceAnchor(anchor)).append("\"></a>");
|
||||
stringBuilder.append("<a name=\"").append(normalizeAnchor(anchor)).append("\"></a>");
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String crossReferenceAnchorAsString(String document, String anchor, String text) {
|
||||
public String crossReferenceRawAsString(String document, String anchor, String text) {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
if (text == null)
|
||||
text = anchor.trim();
|
||||
stringBuilder.append("[").append(text).append("](#").append(normalizeReferenceAnchor(anchor)).append(")");
|
||||
stringBuilder.append("[").append(text).append("]").append("(");
|
||||
if (document != null)
|
||||
stringBuilder.append(document);
|
||||
stringBuilder.append("#").append(anchor).append(")");
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String crossReferenceAsString(String document, String title, String text) {
|
||||
return crossReferenceAnchorAsString(document, title, text);
|
||||
return crossReferenceRawAsString(document, normalizeAnchor(title), text);
|
||||
}
|
||||
|
||||
private String escapeTableCell(String cell) {
|
||||
|
||||
@@ -75,9 +75,10 @@ public class MarkupDocBuilderTest {
|
||||
.italicTextLine("Italic text line b")
|
||||
.unorderedList(Arrays.asList("Entry1", "Entry2", "Entry 2"))
|
||||
.anchor("anchor", "text").newLine()
|
||||
.anchor(" Simple anchor").newLine()
|
||||
.anchor(" \u0240 µ&|ù This .:/-_# ").newLine()
|
||||
.crossReferenceAnchor("./document.adoc", "anchor", "text").newLine()
|
||||
.crossReferenceAnchor(" \u0240 µ&|ù This .:/-_ ").newLine()
|
||||
.crossReferenceRaw("./document.adoc", "anchor", "text").newLine()
|
||||
.crossReferenceRaw(" \u0240 µ&|ù This .:/-_ ").newLine()
|
||||
.crossReference("./document.adoc", "anchor", "text").newLine()
|
||||
.crossReference(" \u0240 µ&|ù This .:/-_ ").newLine()
|
||||
.writeToFile("build/tmp", "test", StandardCharsets.UTF_8);
|
||||
@@ -104,10 +105,11 @@ public class MarkupDocBuilderTest {
|
||||
.italicTextLine("Italic text line b")
|
||||
.unorderedList(Arrays.asList("Entry1", "Entry2", "Entry 2"))
|
||||
.anchor("anchor", "text").newLine()
|
||||
.anchor(" Simple anchor").newLine()
|
||||
.anchor(" \u0240 µ&|ù This .:/-_# ").newLine()
|
||||
.crossReferenceAnchor("./document.adoc", "anchor", "text").newLine()
|
||||
.crossReferenceAnchor(" \u0240 µ&|ù This .:/-_ ").newLine()
|
||||
.crossReference("./document.adoc", "anchor", "text").newLine()
|
||||
.crossReferenceRaw("./document.md", "anchor", "text").newLine()
|
||||
.crossReferenceRaw(" \u0240 µ&|ù This .:/-_ ").newLine()
|
||||
.crossReference("./document.md", "anchor", "text").newLine()
|
||||
.crossReference(" \u0240 µ&|ù This .:/-_ ").newLine()
|
||||
.writeToFile("build/tmp", "test", StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user