Java操作Word用到的工具类库是基于POI4.1.0版本的, poi官方API ,可以使用Google自带的全文翻译,很方便。注意文章中操作的Word都是docx后缀的,即Word2007版本,如果需要操作Word2003版本还需自行转换。
后续将更新从Excel读取表格数据写入到Word,从另一个Word读取模板表格到当前Word,代码中每一个功能都提供了test类,你需要拉下代码修改文件目录即可执行,一步到位。
下面开始进入主题,文章中只贴关键代码,全部代码请通过去GitHub拉取,如果感觉对你有帮助请在GitHub上点亮你尊贵的小星星,码砖不易,转载请说明出处,谢谢。
pox.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.corey</groupId> <artifactId>wordtools</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <!-- !! POI依赖包 --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>4.1.0</version> </dependency> <!-- POI依赖包 !!--> <!--out net begin --> <dependency> <groupId>org.docx4j</groupId> <artifactId>docx4j</artifactId> <version>3.3.6</version> </dependency> <dependency> <groupId>org.docx4j</groupId> <artifactId>docx4j-ImportXHTML</artifactId> <version>3.3.6</version> </dependency> <dependency> <groupId>org.docx4j</groupId> <artifactId>docx4j-export-fo</artifactId> <version>3.3.6</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.2</version> </dependency> <!--out net end --> <!-- https://mvnrepository.com/artifact/org.springframework/spring-core --> <!--只是使用到用spring的工具类--> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-core</artifactId> <version>5.2.1.RELEASE</version> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.5</version> </dependency> <!-- https://mvnrepository.com/artifact/javax.servlet/javax.servlet-api --> <dependency> <groupId>javax.servlet</groupId> <artifactId>javax.servlet-api</artifactId> <version>4.0.1</version> <scope>provided</scope> </dependency> </dependencies> </project> 复制代码
POI合并文档的基本思路,Word本身是一个xml文件,通过把不同xml的Xmlns去重合并,添加固定的格式标签,然后把不同xml里面的元素都拼接到一起,组成一个新的xml文件,输出成为一个新的Word。更多代码请查看的magerword目录。
package magerword; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.xmlbeans.XmlOptions; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; import org.springframework.util.ObjectUtils; import java.io.*; import java.util.*; /** * @program: 合并多份word文件 * @description: * @author: corey * @create: 2020-04-29 19:04 **/ public class MagerUtil { /** * 合并多个Word * @param filepaths * @throws Exception */ public static void mergeDoc(String... filepaths) throws Exception { // 需要配置导出文件路径 记得替换为自己电脑的路径 OutputStream dest = new FileOutputStream("/Users/corey/Desktop/temp/wordtools/合并文档3.docx"); List<CTBody> ctBodyList = new ArrayList<>(); List<XWPFDocument> srcDocuments = new ArrayList<>(); for (String filepath : filepaths) { InputStream in = null; OPCPackage srcPackage = null; try { in = new FileInputStream(filepath); srcPackage = OPCPackage.open(in); } catch (Exception e) { e.printStackTrace(); } finally { closeStream(in); } XWPFDocument srcDocument = new XWPFDocument(srcPackage); CTBody srcBody = srcDocument.getDocument().getBody(); ctBodyList.add(srcBody); srcDocuments.add(srcDocument); } if (!ObjectUtils.isEmpty(ctBodyList)) { appendBody(ctBodyList); srcDocuments.get(0).write(dest); } } /** * 拼接所有的文档元素 * @param ctBodyList * @throws Exception */ private static void appendBody(List<CTBody> ctBodyList) throws Exception { XmlOptions optionsOuter = new XmlOptions(); optionsOuter.setSaveOuter(); // 所有的xmlns StringBuffer allAmlns = new StringBuffer(); // 所有文档的内部元素 StringBuffer allElement = new StringBuffer(); ctBodyList.forEach(ct -> { // 拿到每一个文档的完整xml String appentString = ct.xmlText(); // 拼接所有的xmlns allAmlns.append(appentString.substring(appentString.indexOf("xmlns"), appentString.indexOf(">"))); // 拼接所有的内部元素 allElement.append(appentString.substring(appentString.indexOf(">") + 1, appentString.lastIndexOf("</"))); }); // 将xmlns去重 String distinctPrefix = distinctXmlns(allAmlns.toString()); // 合并文档 CTBody makeBody = CTBody.Factory.parse(distinctPrefix + allElement.toString() + "</xml-fragment>"); ctBodyList.get(0).set(makeBody); } /** * 去重合并xml的Xmlns * * @param prefix * @return */ public static String distinctXmlns(String prefix) { int start = prefix.indexOf("xmlns"); int end = prefix.indexOf("xmlns", start + 1); Set s = new HashSet(); while (end > 0) { s.add(prefix.substring(start, end)); start = end; end = prefix.indexOf("xmlns", start + 1); } String xmlHead = "<xml-fragment "; StringBuffer sb = new StringBuffer(xmlHead); Map<String, String> map = distinctXmlns(s); for (Map.Entry<String, String> entry : map.entrySet()) { sb.append(" "); sb.append(entry.getKey()); sb.append("="); sb.append(entry.getValue()); } sb.append(">"); return sb.toString(); } /** * xmlns 可能存在xmlns头相同但是指向地址不同的情况 * * @param set * @return */ public static Map<String, String> distinctXmlns(Set set) { Map<String, String> map = new HashMap(); Iterator i = set.iterator(); while (i.hasNext()) { String xmlns = (String) i.next(); map.put(xmlns.substring(0, xmlns.indexOf("=")), xmlns.substring(xmlns.indexOf("=") + 1)); } return map; } /** * 关闭流 * 这一步可以放到公用工具类中,close的类型可以使用Closeable,这样就可以关闭input和output的流 * @param inputStream */ public static void closeStream(InputStream... inputStream) { for (InputStream i : inputStream) { if (i != null) { try { i.close(); } catch (IOException e) { e.printStackTrace(); } } } } } 复制代码
替换占位符的思路,首先需要遍历文档中所有的段落和表格,再去一个个匹配占位符与你需要替换的参数,Word中段落是XWPFParagraph对象,表格是XWPFTable对象。更多代码请查看的replacemark目录。
package replacemark; import org.apache.poi.xwpf.usermodel.*; import org.springframework.util.StringUtils; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 替换文档中的段落和表格占位符 * @author corey * @version 1.0 * @date 2020/5/9 9:14 上午 */ public class ReplaceUtil { /** * 替换段落中的占位符 * @param doc 需要替换的文档 * @param params 替换的参数,key=占位符,value=实际值 */ public static void replaceInPara(XWPFDocument doc, Map<String,Object> params) { Iterator<XWPFParagraph> iterator = doc.getParagraphsIterator(); XWPFParagraph para; while (iterator.hasNext()) { para = iterator.next(); if(!StringUtils.isEmpty(para.getParagraphText())){ replaceInPara(para, params); } } } /** * 替换段落中的占位符 * @param para */ public static void replaceInPara(XWPFParagraph para, Map<String,Object> params) { // 获取当前段落的文本 String sourceText = para.getParagraphText(); // 控制变量 boolean replace = false; for (Map.Entry<String, Object> entry : params.entrySet()) { String key = entry.getKey(); if(sourceText.indexOf(key)!=-1){ Object value = entry.getValue(); if(value instanceof String){ // 替换文本占位符 sourceText = sourceText.replace(key, value.toString()); replace = true; } } } if(replace){ // 获取段落中的行数 List<XWPFRun> runList = para.getRuns(); for (int i=runList.size();i>=0;i--){ // 删除之前的行 para.removeRun(i); } // 创建一个新的文本并设置为替换后的值 这样操作之后之前文本的样式就没有了,待改进 para.createRun().setText(sourceText); } } /** * 替换表格中的占位符 * @param doc * @param params */ public static void replaceTable(XWPFDocument doc,Map<String,Object> params){ // 获取文档中所有的表格 Iterator<XWPFTable> iterator = doc.getTablesIterator(); XWPFTable table; List<XWPFTableRow> rows; List<XWPFTableCell> cells; List<XWPFParagraph> paras; while (iterator.hasNext()) { table = iterator.next(); if (table.getRows().size() > 1) { //判断表格是需要替换还是需要插入,判断逻辑有${为替换, if (matcher(table.getText()).find()) { rows = table.getRows(); for (XWPFTableRow row : rows) { cells = row.getTableCells(); for (XWPFTableCell cell : cells) { paras = cell.getParagraphs(); for (XWPFParagraph para : paras) { replaceInPara(para, params); } } } } } } } /** * 正则匹配字符串 * * @param str * @return */ private static Matcher matcher(String str) { Pattern pattern = Pattern.compile("//$//{(.+?)//}", Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(str); return matcher; } } 复制代码
富文本转成Word的思路,富文本本身就是一段HTML字符串,可以直接把这段字符串当做一个段落写入到Word中,但这样会丢失HTML样式,所以需要将识别到的HTML标签替换成Word标签,这也是难点所在,所以需要设计一个大而全的样式替换工具,目前笔者的项目中只做H1/H2/H3/段落/表格/img的src是url的图片转换(base64流放在富文本中太大了,不易识别),再提一句这些替换的工具可以设计为责任链模式,笔者也还没有这样做。更多代码在的insertword目录。
package insertword; import org.apache.poi.util.Units; import org.apache.poi.xwpf.usermodel.*; import org.apache.xmlbeans.XmlCursor; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.util.ObjectUtils; import org.springframework.util.StringUtils; import java.io.*; /** * Html工具类 * @author corey * @version 1.0 * @date 2020/5/5 9:36 下午 */ public class HtmlUtil { /** * 给document添加指定元素 * @param document */ public static void addElement(Document document){ if(ObjectUtils.isEmpty(document)){ throw new NullPointerException("不允许为空的对象添加元素"); } Elements elements = document.getAllElements(); for(Element e:elements){ String attrName = ElementEnum.getValueByCode(e.tag().getName()); if(!StringUtils.isEmpty(attrName)) { e.attr(CommonConStant.COMMONATTR, attrName); } } } /** * 将富文本内容写入到Word * 因富文本样式种类繁多,不能一一枚举,目前实现了H1、H2、H3、段落、图片、表格枚举 * @param ritchText 富文本内容 * @param doc 需要写入富文本内容的Word 写入图片和表格需要用到 * @param paragraph */ public static void resolveHtml(String ritchText, XWPFDocument doc, XWPFParagraph paragraph){ Document document = Jsoup.parseBodyFragment(ritchText, "UTF-8"); try { // 添加固定元素 HtmlUtil.addElement(document); Elements elements = document.select("["+CommonConStant.COMMONATTR+"]"); for (Element em : elements) { XmlCursor xmlCursor = paragraph.getCTP().newCursor(); switch (em.attr(CommonConStant.COMMONATTR)) { case "title": break; case "subtitle": break; case "imgurl": String url = em.attr("src"); InputStream inputStream = new FileInputStream(url); XWPFParagraph imgurlparagraph = doc.insertNewParagraph(xmlCursor); //居中 ParagraphStyleUtil.setImageCenter(imgurlparagraph); imgurlparagraph.createRun().addPicture(inputStream,XWPFDocument.PICTURE_TYPE_PNG,"图片.jpeg", Units.toEMU(200),Units.toEMU(200)); closeStream(inputStream); break; case "imgbase64": break; case "table": XWPFTable xwpfTable = doc.insertNewTbl(xmlCursor); addTable(xwpfTable,em); // 设置表格居中 ParagraphStyleUtil.setTableLocation(xwpfTable,"center"); // 设置内容居中 ParagraphStyleUtil.setCellLocation(xwpfTable,"CENTER","center"); break; case "h1": XWPFParagraph h1paragraph1 = doc.insertNewParagraph(xmlCursor); XWPFRun xwpfRun_1 = h1paragraph1.createRun(); xwpfRun_1.setText(em.text()); // 设置字体 ParagraphStyleUtil.setTitle(xwpfRun_1, TitleFontEnum.H1.getTitle()); break; case "h2": XWPFParagraph h2paragraph = doc.insertNewParagraph(xmlCursor); XWPFRun xwpfRun_2 = h2paragraph.createRun(); xwpfRun_2.setText(em.text()); // 设置字体 ParagraphStyleUtil.setTitle(xwpfRun_2, TitleFontEnum.H2.getTitle()); break; case "h3": XWPFParagraph h3paragraph = doc.insertNewParagraph(xmlCursor); XWPFRun xwpfRun_3 = h3paragraph.createRun(); xwpfRun_3.setText(em.text()); // 设置字体 ParagraphStyleUtil.setTitle(xwpfRun_3, TitleFontEnum.H3.getTitle()); break; case "paragraph": XWPFParagraph paragraphd = doc.insertNewParagraph(xmlCursor); // 设置段落缩进 4个空格 paragraphd.createRun().setText(" "+em.text()); break; default: break; } } } catch (Exception e) { e.printStackTrace(); } } /** * 读取txt文件的内容 * * @param file 想要读取的文件对象 * @return 返回文件内容 */ public static String txt2String(File file) { StringBuilder result = new StringBuilder(); try { BufferedReader br = new BufferedReader(new FileReader(file));//构造一个BufferedReader类来读取文件 String s = null; while ((s = br.readLine()) != null) {//使用readLine方法,一次读一行 result.append(System.lineSeparator() + s); } br.close(); } catch (Exception e) { e.printStackTrace(); } return result.toString(); } /** * 将富文本的表格转换为Word里面的表格 */ private static void addTable(XWPFTable xwpfTable,Element table) { Elements trs = table.getElementsByTag("tr"); // XWPFTableRow 第0行特殊处理 int rownum = 0; for (Element tr : trs) { addTableTr(xwpfTable,tr,rownum); rownum++; } } /** * 将元素里面的tr 提取到 xwpfTabel */ private static void addTableTr(XWPFTable xwpfTable,Element tr,int rownum) { Elements tds = tr.getElementsByTag("th").isEmpty() ? tr.getElementsByTag("td") : tr.getElementsByTag("th"); XWPFTableRow row_1 = null; for (int i = 0, j = tds.size(); i < j; i++) { if(0==rownum){ // XWPFTableRow 第0行特殊处理, XWPFTableRow row_0 = xwpfTable.getRow(0); if(i==0){ row_0.getCell(0).setText(tds.get(i).text()); }else{ row_0.addNewTableCell().setText(tds.get(i).text()); } }else{ if(i==0) { // 换行需要创建一个新行 row_1 = xwpfTable.createRow(); row_1.getCell(i).setText(tds.get(i).text()); }else { row_1.getCell(i).setText(tds.get(i).text()); } } } } /** * 关闭输入流 * * @param closeables */ public static void closeStream(Closeable... closeables) { for (Closeable c: closeables) { if (c != null) { try { c.close(); } catch (IOException e) { e.printStackTrace(); } } } } } 复制代码
Word添加水印的思路,利用XWPFHeader对象创建页眉,给页眉添加文字,设置字体、大小、颜色、旋转角度即可。代码在的insertword目录
package insertword; import com.microsoft.schemas.office.office.CTLock; import com.microsoft.schemas.vml.*; import org.apache.poi.wp.usermodel.HeaderFooterType; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFHeader; import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; import java.util.stream.Stream; /** * @desc 添加水印 * @author corey * @version 1.0 * @date 2020/5/5 10:07 下午 */ public class WatermarkUtil { // word字体 private static final String fontName = "宋体"; // 字体大小 private static final String fontSize = "0.2pt"; // 字体颜色 private static final String fontColor = "#d0d0d0"; // 一个字平均长度,单位pt,用于:计算文本占用的长度(文本总个数*单字长度) private static final Integer widthPerWord = 10; // 与顶部的间距 private static Integer styleTop = 0; // 文本旋转角度 private static final String styleRotation = "45"; /** * 给文档添加水印 * 此方法可以单独使用 * @param doc * @param customText */ public static void waterMarkDocXDocument(XWPFDocument doc,String customText){ // 把整页都打上水印 for (int lineIndex = -5; lineIndex < 20; lineIndex++) { styleTop = 100*lineIndex; waterMarkDocXDocument_0(doc,customText); } } /** * 为文档添加水印 * @param doc 需要被处理的docx文档对象 * @param customText 需要添加的水印文字 */ public static void waterMarkDocXDocument_0(XWPFDocument doc,String customText) { // 水印文字之间使用8个空格分隔 customText = customText + repeatString(" ", 8); // 一行水印重复水印文字次数 customText = repeatString(customText, 10); // 如果之前已经创建过 DEFAULT 的Header,将会复用 XWPFHeader header = doc.createHeader(HeaderFooterType.DEFAULT); int size = header.getParagraphs().size(); if (size == 0) { header.createParagraph(); } CTP ctp = header.getParagraphArray(0).getCTP(); byte[] rsidr = doc.getDocument().getBody().getPArray(0).getRsidR(); byte[] rsidrdefault = doc.getDocument().getBody().getPArray(0).getRsidRDefault(); ctp.setRsidP(rsidr); ctp.setRsidRDefault(rsidrdefault); CTPPr ppr = ctp.addNewPPr(); ppr.addNewPStyle().setVal("Header"); // 开始加水印 CTR ctr = ctp.addNewR(); CTRPr ctrpr = ctr.addNewRPr(); ctrpr.addNewNoProof(); CTGroup group = CTGroup.Factory.newInstance(); CTShapetype shapetype = group.addNewShapetype(); CTTextPath shapeTypeTextPath = shapetype.addNewTextpath(); shapeTypeTextPath.setOn(STTrueFalse.T); shapeTypeTextPath.setFitshape(STTrueFalse.T); CTLock lock = shapetype.addNewLock(); lock.setExt(STExt.VIEW); CTShape shape = group.addNewShape(); shape.setId("PowerPlusWaterMarkObject"); shape.setSpid("_x0000_s102"); shape.setType("#_x0000_t136"); // 设置形状样式(旋转,位置,相对路径等参数) shape.setStyle(getShapeStyle(customText)); shape.setFillcolor(fontColor); // 字体设置为实心 shape.setStroked(STTrueFalse.FALSE); // 绘制文本的路径 CTTextPath shapeTextPath = shape.addNewTextpath(); // 设置文本字体与大小 shapeTextPath.setStyle("font-family:" + fontName + ";font-size:" + fontSize); shapeTextPath.setString(customText); CTPicture pict = ctr.addNewPict(); pict.set(group); } /** * 构建Shape的样式参数 * @param customText * @return */ private static String getShapeStyle(String customText) { StringBuilder sb = new StringBuilder(); // 文本path绘制的定位方式 sb.append("position: ").append("absolute"); // 计算文本占用的长度(文本总个数*单字长度) sb.append(";width: ").append(customText.length() * widthPerWord).append("pt"); // 字体高度 sb.append(";height: ").append("20pt"); sb.append(";z-index: ").append("-251654144"); sb.append(";mso-wrap-edited: ").append("f"); // 设置水印的间隔,这是一个大坑,不能用top,必须要margin-top。 sb.append(";margin-top: ").append(styleTop); sb.append(";mso-position-horizontal-relative: ").append("page"); sb.append(";mso-position-vertical-relative: ").append("page"); sb.append(";mso-position-vertical: ").append("left"); sb.append(";mso-position-horizontal: ").append("center"); sb.append(";rotation: ").append(styleRotation); return sb.toString(); } /** * 将指定的字符串重复repeats次. */ private static String repeatString(String pattern, int repeats) { StringBuilder buffer = new StringBuilder(pattern.length() * repeats); Stream.generate(() -> pattern).limit(repeats).forEach(buffer::append); return new String(buffer); } } 复制代码