Regular modification after pandoc word to markdown

Question

After the doc file is converted into a markdown file with the pandoc tool, the processing of the image will become:

(./url path){width=”3.46875in” height=”1.0729166666666667in”}

But I want to show the v-md-preview used by the front-end components. When the result is displayed, the width and height of the back are not recognized and displayed, so I have to remove it. After removing it, I don’t feel anything. Influence.

Method

Backend processing:

Thoughts

java regular match substring ending with {start} and replace with “”

Regex: (({width)(.*?)(})) matches a substring starting with {width and ending with }

 public static void main(String[] args) {<!-- -->
        //string
        String line = "abcd{width=jfdksljfsdfjdslk}11111";
        // regular expression
        String pattern = "(\{width)(.*?)(\})"; //Java regular expressions are grouped in brackets, the first bracket means starting with "{width", the third bracket means starting with } at the end, the middle bracket is the target value
        String replaceStr = "";
        Pattern r = Pattern.compile(pattern);
        Matcher m = r. matcher(line);
        while (m. find()) {<!-- -->
           m.group();
           replaceStr = m.replaceAll(""); //Clear the matched content
        }

Final output: abcd11111

solve!
Reference from: regular match

Follow

As a result, the width and height are always newlines for some reason, and then the regex cannot be matched, and no regex that can be captured across lines is found, so it is changed to half and half to match and replace, and some other possible existences are added. If the conversion format is wrong, it will be added.

0818
1. Newly added picture centering processing (regular replacement, changing md picture style to html picture tag, handwriting centering style)
2. Add image description centering processing (similar to 1, increase

)

3. Add the indentation of the first line similar to a. b. c. subtitle, add & amp; nbsp; & amp; nbsp; & amp; nbsp; & amp; nbsp;

3 There is a problem. When saving the text, it is a space by default. This is still not indented when the page is officially displayed, so we can only manually replace the four spaces with the global. There is a better way to add it in the future. Add \ or &amp;nbsp;neither

package Tool;

import cn.hutool.core.io.FileUtil;

import java.io.File;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Convert the md file after pandoc conversion to a compliant file
 */
public class FileTrans {<!-- -->


    /**
     * Replace the string in the document
     * Original string: strOld
     * Replaced since ancient times: strNew
     *
     * @param mdStr
     * @return
     */
    public static String turnStr(String mdStr, String strOld, String strNew) {<!-- -->
        String turnStr = mdStr. replaceAll(strOld, strNew);
        return turnStr;
    }


    /**
     * Remove the useless string width in the generated file
     *
     * @param
     */
    public static String delUnUsefulStr(String text) {<!-- -->
        String regex = "((\{width)(. + )("))";
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern. matcher(text);
        String replaceStr = "";

        if (matcher. find()) {<!-- -->
            // The string matches successfully, delete the content here
            matcher. group();
            replaceStr = matcher. replaceAll("");
        }
        return replaceStr;
    }


    /**
     * Delete the useless string height in the generated file
     *
     * @param
     */
    public static String delUnUsefulStr1(String text) {<!-- -->
        String regex = "((height)(.*?)(\}))";
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern. matcher(text);
        String replaceStr = "";

        if (matcher. find()) {<!-- -->
            // The string matches successfully, delete the content here
            matcher. group();
            replaceStr = matcher. replaceAll("");

        }
        return replaceStr;
    }

    /**
     * Picture description is centered
     * Describe the picture as shown in Figure 1-2-3-1 Add html centering tag <center></center>
     *
     * @param
     */
    public static String centerStr(String text) {<!-- -->
        String regex = "((Figure\d)(.*?)(\\r))";

        System.out.println("text = " + text);
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern. matcher(text);
        String replaceStr = "";

        List<String> list = new ArrayList<String>();


        while (matcher. find()) {<!-- -->
            // The string matches successfully, delete the content here
            list.add(matcher.group());
        }

        for (String s : list) {<!-- -->
            s = s. replaceAll("\r", "");
            String replace = s. replaceAll("\
", "");
            String groupTurn = "<center>" + replace + "</center>\
";
            //replaceStr = matcher. replaceAll(groupTurn);
            text = text. replaceAll(s, groupTurn);
        }
        return replaceStr == "" ? text : replaceStr;
    }

    /**
     * Image centering
     * ![](/pss/abf/template/xxx/image4.png)
     * Convert to html image tags, and then handle the centering format through style
     * <img src="/pss/abf/template/xxx/image4.png", style="display:block; margin:auto">
     *
     * @param
     */
    public static String centerPicture(String text) {<!-- -->
        String regex = "((\!\[)(.*?)(\)))";

        System.out.println("text = " + text);
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern. matcher(text);

        List<String> list = new ArrayList<>();

        while (matcher. find()) {<!-- -->
            // The string matches successfully, delete the content here
            list.add(matcher.group());
        }

        for (String s : list) {<!-- -->
            /**
             * ![](/pss/abf/template/xxx/image4.png)
             * Split, take the middle path
             */
            String str = s. split("\(")[1];
            if (str. contains(")")) {<!-- -->
                String replace = str. split("\)")[0];
                String groupTurn = "<img src="" + replace + "", style="display:block; margin:auto">";
                text = text.replace(s, groupTurn);
            } else {<!-- -->
                System.out.println("str = " + str);
            }
        }
        return text;
    }

    /**
     * Indent the a. b. c. d. format and replace it with & amp; nbsp; & amp; nbsp; & amp; nbsp; & amp; nbsp; a.
     *
     * @param text
     * @return
     */
    public static String turnIndentation(String text) {<!-- -->
        String regex = "((^[a-z])(.*?)(\.))";

        System.out.println("text = " + text);
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern. matcher(text);

        List<String> list = new ArrayList<>();

        while (matcher. find()) {<!-- -->
            // The string matches successfully, delete the content here
            list.add(matcher.group());
        }
        for (String s : list) {<!-- -->
            String groupTurn = " &nbsp; &nbsp; &nbsp; &nbsp;" + s;
            text = text.replace(s, groupTurn);
        }

        return text;
    }

    public static void main(String[] args) {<!-- -->

        String mdStr = FileUtil. readString(new File("D:\pandoc\Test2.md"), "utf-8");
        //delete width
        String s1 = delUnUsefulStr(mdStr);
        //delete height
        String s2 = delUnUsefulStr1(s1);
        //Replacement path The official storage path is the Chinese third-level menu /pss/template/third-level menu/images/1.png Business needs
        String s3 = turnStr(s2, "./test2/images/media", "/pss/abf/template//images");
        //Replace the redundant * in the title
        String s4 = turnStr(s3, "\*\*.\*\*", ".");
        // replace multiple *
        String s5 = turnStr(s4, "\*\*\*\*", " ");
        //Picture description is centered
        String centerStr = centerStr(s5);
        //Convert md image format to html image tag, and center the style
        String centerPic = centerPicture(centerStr);
        //Indent the first line of the subtitle such as a. b. c. Supplement & amp;nbsp; & amp;nbsp; & amp;nbsp; & amp;nbsp;
        String finalStr = turnIndentation(centerPic);
        //Regenerate the replaced String to the file
        FileUtil.writeBytes(finalStr.getBytes(StandardCharsets.UTF_8), "D:\pandoc\\
ew.md");

    }
}


Form processing

The format of the pandoc word to markdown form will definitely be lost. Therefore, this step also needs to be handled manually. I found an online replacement website, which can be copied directly from word, and then replaced after conversion.

markdown table conversion