资源描述
1,导jar包。
2,Word2Html.java
1. package com.poi;
2. import java.io.BufferedWriter;
3. import java.io.File;
4. import java.io.FileInputStream;
5. import java.io.FileNotFoundException;
6. import java.io.FileOutputStream;
7. import java.io.IOException;
8. import java.io.OutputStream;
9. import java.io.OutputStreamWriter;
10. import java.util.List;
11.
12. import javax.xml.parsers.DocumentBuilderFactory;
13. import javax.xml.parsers.ParserConfigurationException;
14. import javax.xml.transform.OutputKeys;
15. import javax.xml.transform.Transformer;
16. import javax.xml.transform.TransformerException;
17. import javax.xml.transform.TransformerFactory;
18. import javax.xml.transform.dom.DOMSource;
19. import javax.xml.transform.stream.StreamResult;
20.
21. import mons.io.output.ByteArrayOutputStream;
22. import org.apache.poi.hwpf.HWPFDocument;
23. import org.apache.poi.hwpf.converter.PicturesManager;
24. import org.apache.poi.hwpf.converter.WordToHtmlConverter;
25. import org.apache.poi.hwpf.model.PicturesTable;
26. import org.apache.poi.hwpf.usermodel.CharacterRun;
27. import org.apache.poi.hwpf.usermodel.Paragraph;
28. import org.apache.poi.hwpf.usermodel.Picture;
29. import org.apache.poi.hwpf.usermodel.PictureType;
30. import org.apache.poi.hwpf.usermodel.Range;
31. import org.apache.poi.hwpf.usermodel.Table;
32. import org.apache.poi.hwpf.usermodel.TableCell;
33. import org.apache.poi.hwpf.usermodel.TableIterator;
34. import org.apache.poi.hwpf.usermodel.TableRow;
35. import org.w3c.dom.Document;
36. /**
37. * @author: Chembo Huang
38. * @since: May 3, 2012
39. * @modified: May 3, 2012
40. * @version:
41. */
42. public class Word2Html {
43.
44. public static void main(String argv[]) {
45. try {
46. convert2Html("E://test//33.doc","E://test//abc.html");
47. } catch (Exception e) {
48. e.printStackTrace();
49. }
50. }
51.
52. public static void writeFile(String content, String path) {
53. FileOutputStream fos = null;
54. BufferedWriter bw = null;
55. try {
56. File file = new File(path);
57. fos = new FileOutputStream(file);
58. bw = new BufferedWriter(new OutputStreamWriter(fos,"GB2312"));
59. bw.write(content);
60. } catch (FileNotFoundException fnfe) {
61. fnfe.printStackTrace();
62. } catch (IOException ioe) {
63. ioe.printStackTrace();
64. } finally {
65. try {
66. if (bw != null)
67. bw.close();
68. if (fos != null)
69. fos.close();
70. } catch (IOException ie) {
71. }
72. }
73. }
74.
75. public static void convert2Html(String fileName, String outPutFile)
76. throws TransformerException, IOException,
77. ParserConfigurationException {
78. HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));
79. WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
80. DocumentBuilderFactory.newInstance().newDocumentBuilder()
81. .newDocument());
82. wordToHtmlConverter.setPicturesManager( new PicturesManager()
83. {
84. public String savePicture( byte[] content,
85. PictureType pictureType, String suggestedName,
86. float widthInches, float heightInches )
87. {
88. return suggestedName;
89. }
90. } );
91. wordToHtmlConverter.processDocument(wordDocument);
92. //save pictures
93. List pics=wordDocument.getPicturesTable().getAllPictures();
94. if(pics!=null){
95. for(int i=0;i<pics.size();i++){
96. Picture pic = (Picture)pics.get(i);
97. System.out.println();
98. try {
99. pic.writeImageContent(new FileOutputStream("E:/test/"
100. + pic.suggestFullFileName()));
101. } catch (FileNotFoundException e) {
102. e.printStackTrace();
103. }
104. }
105. }
106. Document htmlDocument = wordToHtmlConverter.getDocument();
107. ByteArrayOutputStream out = new ByteArrayOutputStream();
108. DOMSource domSource = new DOMSource(htmlDocument);
109. StreamResult streamResult = new StreamResult(out);
110.
111. TransformerFactory tf = TransformerFactory.newInstance();
112. Transformer serializer = tf.newTransformer();
113. serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
114. serializer.setOutputProperty(OutputKeys.INDENT, "yes");
115. serializer.setOutputProperty(OutputKeys.METHOD, "html");
116. serializer.transform(domSource, streamResult);
117. out.close();
118. writeFile(new String(out.toByteArray()), outPutFile);
119. }
120. }
展开阅读全文