├── .gitignore ├── tests └── org │ └── rtf │ └── test │ ├── TestSuite.java │ ├── TextTest.java │ ├── ReaderTest.java │ └── FontTest.java ├── src └── org │ └── rtf │ ├── RtfParseException.java │ ├── RtfText.java │ ├── RtfControlWord.java │ ├── RtfControlSymbol.java │ ├── RtfElement.java │ ├── RtfGroup.java │ ├── RtfState.java │ ├── RtfReader.java │ └── RtfHtml.java ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | .classpath 3 | .project 4 | -------------------------------------------------------------------------------- /tests/org/rtf/test/TestSuite.java: -------------------------------------------------------------------------------- 1 | package org.rtf.test; 2 | 3 | import org.junit.runner.RunWith; 4 | import org.junit.runners.Suite; 5 | 6 | @RunWith(Suite.class) 7 | 8 | @Suite.SuiteClasses({ FontTest.class, ReaderTest.class, TextTest.class }) 9 | 10 | public class TestSuite { 11 | } -------------------------------------------------------------------------------- /src/org/rtf/RtfParseException.java: -------------------------------------------------------------------------------- 1 | package org.rtf; 2 | 3 | /** 4 | * This exception is thrown if errors occur when parsing RTF strings, e.g. with 5 | * an invalid structure. 6 | * 7 | * @author Kay Schröer 8 | */ 9 | public class RtfParseException extends Exception { 10 | private static final long serialVersionUID = 0L; 11 | 12 | /** 13 | * Creates the new exception. 14 | * 15 | * @param message 16 | * error details 17 | */ 18 | public RtfParseException(String message) { 19 | super(message); 20 | } 21 | } -------------------------------------------------------------------------------- /src/org/rtf/RtfText.java: -------------------------------------------------------------------------------- 1 | package org.rtf; 2 | 3 | /** 4 | * This class represents an RTF text element in the element tree. 5 | * 6 | * @author Kay Schröer 7 | */ 8 | public class RtfText extends RtfElement { 9 | /** 10 | * Plain text 11 | */ 12 | public String text; 13 | 14 | /* 15 | * (non-Javadoc) 16 | * 17 | * @see org.rtf.RtfElement#dump(int) 18 | */ 19 | @Override 20 | public void dump(int level) { 21 | System.out.println("
"); 22 | indent(level); 23 | System.out.println("TEXT " + text); 24 | System.out.println("
"); 25 | } 26 | } -------------------------------------------------------------------------------- /src/org/rtf/RtfControlWord.java: -------------------------------------------------------------------------------- 1 | package org.rtf; 2 | 3 | /** 4 | * This class represents an RTF control word in the element tree. 5 | * 6 | * @author Kay Schröer 7 | */ 8 | public class RtfControlWord extends RtfElement { 9 | /** 10 | * Control word, e.g. fs 11 | */ 12 | public String word; 13 | 14 | /** 15 | * Word parameter, e.g. 22 16 | */ 17 | public int parameter; 18 | 19 | /* 20 | * (non-Javadoc) 21 | * 22 | * @see org.rtf.RtfElement#dump(int) 23 | */ 24 | @Override 25 | public void dump(int level) { 26 | System.out.println("
"); 27 | indent(level); 28 | System.out.println("WORD " + word + " (" + parameter + ")"); 29 | System.out.println("
"); 30 | } 31 | } -------------------------------------------------------------------------------- /src/org/rtf/RtfControlSymbol.java: -------------------------------------------------------------------------------- 1 | package org.rtf; 2 | 3 | /** 4 | * This class represents an RTF control symbol in the element tree. 5 | * 6 | * @author Kay Schröer 7 | */ 8 | public class RtfControlSymbol extends RtfElement { 9 | /** 10 | * Control symbol, e.g. * 11 | */ 12 | public char symbol; 13 | 14 | /** 15 | * Symbol parameter, e.g. 0 16 | */ 17 | public int parameter = 0; 18 | 19 | /* 20 | * (non-Javadoc) 21 | * 22 | * @see org.rtf.RtfElement#dump(int) 23 | */ 24 | @Override 25 | public void dump(int level) { 26 | System.out.println("
"); 27 | indent(level); 28 | System.out.println("SYMBOL " + symbol + " (" + parameter + ")"); 29 | System.out.println("
"); 30 | } 31 | } -------------------------------------------------------------------------------- /src/org/rtf/RtfElement.java: -------------------------------------------------------------------------------- 1 | package org.rtf; 2 | 3 | /** 4 | * This class provides the base technology for debugging and is used as 5 | * superclass for specific RTF elements like groups, control words, control 6 | * symbols and texts. 7 | * 8 | * @author Kay Schröer 9 | */ 10 | public abstract class RtfElement { 11 | /** 12 | * Outputs debug information. 13 | * 14 | * @param level 15 | * a value greater than or equal to 0 that specifies the number 16 | * of spaces by which the text should be indented 17 | */ 18 | protected abstract void dump(int level); 19 | 20 | /** 21 | * Outputs a text indent. 22 | * 23 | * @param level 24 | * a value greater than or equal to 0 that specifies the number 25 | * of spaces 26 | */ 27 | protected void indent(int level) { 28 | for (int i = 0; i < level; i++) { 29 | System.out.println(" "); 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | MIT License 3 | 4 | Copyright (c) 2018 Kay Schröer 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rtf-html-java 2 | @author Kay Schröer (acsf.dev@gmail.com) 3 | 4 | Background of the project: For some of my applications, I was looking for a way to convert RTF texts to HTML so that they could be displayed in an embedded web view. 5 | Googling around turned up several matches, the most common being the use of `javax.swing.text.EditorKit`: 6 | 7 | ```java 8 | public String toHTML(File file) throws Exception { 9 | JEditorPane p = new JEditorPane(); 10 | p.setContentType("text/rtf"); 11 | 12 | EditorKit kitRtf = p.getEditorKitForContentType("text/rtf"); 13 | kitRtf.read(new FileReader(file), p.getDocument(), 0); 14 | kitRtf = null; 15 | 16 | EditorKit kitHtml = p.getEditorKitForContentType("text/html"); 17 | Writer writer = new StringWriter(); 18 | kitHtml.write(writer, p.getDocument(), 0, p.getDocument().getLength()); 19 | 20 | return writer.toString(); 21 | } 22 | ``` 23 | 24 | The problem with this solution is its close connection to the Swing toolkit, which I do not use in my applications. In addition, this snippet is difficult to port to other platforms (such as Android), which was also one of my requirements. 25 | 26 | Another commonly recommended method is the use of command-line tools. However, this would involve building an infrastructure with a server and e.g. require a REST API. I really did not want to do this effort. 27 | 28 | So what I had in mind was an API that manages a handful of classes and is written in pure Java. I found [rtf-html-php](https://github.com/henck/rtf-html-php). The project is written entirely in PHP, but perfectly meets all my requirements and was easy to port. I had to make some changes, such as set data types for all variables or replace the expandable arrays with the ArrayList, but my project is a very accurate adaptation of the PHP library. 29 | 30 | ## Features 31 | 32 | - Parsing of files, streams and strings 33 | - Including paragraphs 34 | - Support of font styles like bold, italic, underline, strike through and hidden 35 | - Handling of different font sizes, text colors and background colors 36 | - Escaping special characters 37 | - Building HTML entities from unicode characters 38 | 39 | ## Usage 40 | 41 | ```java 42 | import java.io.File; 43 | 44 | import org.rtf.RtfHtml; 45 | import org.rtf.RtfParseException; 46 | import org.rtf.RtfReader; 47 | 48 | public class Demo { 49 | 50 | public static void main(String[] args) { 51 | File file = new File(args[0]); 52 | RtfReader reader = new RtfReader(); 53 | RtfHtml formatter = new RtfHtml(); 54 | 55 | try { 56 | reader.parse(file); 57 | System.out.println(formatter.format(reader.root, true)); 58 | } catch (RtfParseException e) { 59 | e.printStackTrace(); 60 | } 61 | } 62 | } 63 | ``` 64 | -------------------------------------------------------------------------------- /src/org/rtf/RtfGroup.java: -------------------------------------------------------------------------------- 1 | package org.rtf; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * This class represents an RTF group in the element tree. 8 | * 9 | * @author Kay Schröer 10 | */ 11 | public class RtfGroup extends RtfElement { 12 | /** 13 | * Instance of the parent group element 14 | */ 15 | public RtfGroup parent; 16 | 17 | /** 18 | * List of child elements (group, control word, control symbol, text) 19 | */ 20 | public List children; 21 | 22 | /** 23 | * Creates a new group element. 24 | */ 25 | public RtfGroup() { 26 | parent = null; 27 | children = new ArrayList<>(); 28 | } 29 | 30 | /** 31 | * Gets the group type. 32 | * 33 | * @return control word of the first child as type or an empty string if 34 | * there are no children or the first child is not a control word 35 | */ 36 | public String getType() { 37 | // No children? 38 | if (children.isEmpty()) { 39 | return ""; 40 | } 41 | 42 | // First child not a control word? 43 | RtfElement child = children.get(0); 44 | if (!(child instanceof RtfControlWord)) { 45 | return ""; 46 | } 47 | 48 | return ((RtfControlWord) child).word; 49 | } 50 | 51 | /** 52 | * Checks if the group is a destination. 53 | * 54 | * @return {@code true} if a certain control word is referred 55 | */ 56 | public boolean isDestination() { 57 | // No children? 58 | if (children.isEmpty()) { 59 | return false; 60 | } 61 | 62 | // First child not a control symbol? 63 | RtfElement child = children.get(0); 64 | if (!(child instanceof RtfControlSymbol)) { 65 | return false; 66 | } 67 | 68 | return ((RtfControlSymbol) child).symbol == '*'; 69 | } 70 | 71 | /** 72 | * Outputs debug information. 73 | */ 74 | public void dump() { 75 | dump(0); 76 | } 77 | 78 | /* 79 | * (non-Javadoc) 80 | * 81 | * @see org.rtf.RtfElement#dump(int) 82 | */ 83 | @Override 84 | public void dump(int level) { 85 | System.out.println("
"); 86 | indent(level); 87 | System.out.println("{"); 88 | System.out.println("
"); 89 | 90 | for (RtfElement child : children) { 91 | if (child instanceof RtfGroup) { 92 | RtfGroup group = (RtfGroup) child; 93 | 94 | // Can we ignore this group? 95 | if (group.getType().equals("fonttbl")) { 96 | continue; 97 | } 98 | if (group.getType().equals("colortbl")) { 99 | continue; 100 | } 101 | if (group.getType().equals("stylesheet")) { 102 | continue; 103 | } 104 | if (group.getType().equals("info")) { 105 | continue; 106 | } 107 | 108 | // Skip any pictures and destinations. 109 | if (group.getType().length() >= 4 && group.getType().substring(0, 4).equals("pict")) { 110 | continue; 111 | } 112 | if (group.isDestination()) { 113 | continue; 114 | } 115 | } 116 | 117 | child.dump(level + 2); 118 | } 119 | 120 | System.out.println("
"); 121 | indent(level); 122 | System.out.println("}"); 123 | System.out.println("
"); 124 | } 125 | } -------------------------------------------------------------------------------- /src/org/rtf/RtfState.java: -------------------------------------------------------------------------------- 1 | package org.rtf; 2 | 3 | /** 4 | * This class specifies a structure of layout information used for text 5 | * formatting in the span tag and obtained from RTF control words. 6 | * 7 | * @author Kay Schröer 8 | */ 9 | public class RtfState implements Cloneable { 10 | /** 11 | * Attribute that specifies that text should be written in bold 12 | */ 13 | public boolean bold; 14 | 15 | /** 16 | * Attribute that specifies that text should be written in italic 17 | */ 18 | public boolean italic; 19 | 20 | /** 21 | * Attribute that specifies that text should be underlined 22 | */ 23 | public boolean underline; 24 | 25 | /** 26 | * Attribute that specifies that text should be striked through 27 | */ 28 | public boolean strike; 29 | 30 | /** 31 | * Attribute that specifies that text should be hidden 32 | */ 33 | public boolean hidden; 34 | 35 | /** 36 | * Attribute that specifies that the text should be beneath the baseline ("down", negative) or above the baseline ("up", positive) by N. 37 | *
RTF "dnN" move down N half-points; does not imply font size reduction, thus font size is given separately --> value negative from param, fontsize unchanged. 38 | *
RTF "upN" move up N half-points; does not imply font size reduction, thus font size is given separately --> value positive from param, fontsize unchanged. 39 | */ 40 | public int dnup; 41 | 42 | /** 43 | * Attribute that specifies that the text should be subscript. Switchs of superscript. 44 | *
RTF "sub" denotes subscript and implies font size reduction --> true, actual fontsize is 1/2 of actual font size. 45 | *
Turned of by /nosupersub. 46 | */ 47 | public boolean subscript; 48 | 49 | /** 50 | * Attribute that specifies that the text should be superscript. Switches of subscript. 51 | *
RTF "super" denotes superscript and implies font size reduction --> true, actual fontsize is 1/2 of actual font size. 52 | *
Turned of by /nosupersub. 53 | */ 54 | public boolean superscript; 55 | 56 | /** 57 | * Font size in pixels 58 | */ 59 | public int fontSize; 60 | 61 | /** 62 | * Font as a position in the font table 63 | */ 64 | public int font; 65 | 66 | /** 67 | * Text color as a position in the color table 68 | */ 69 | public int textColor; 70 | 71 | /** 72 | * Background color as a position in the color table 73 | */ 74 | public int background; 75 | 76 | /** 77 | * Creates a new RTF state. 78 | */ 79 | public RtfState() { 80 | reset(); 81 | } 82 | 83 | /** 84 | * Clones the layout information. 85 | * 86 | * @return a copy of this object 87 | */ 88 | @Override 89 | public Object clone() { 90 | RtfState newState = new RtfState(); 91 | newState.bold = this.bold; 92 | newState.italic = this.italic; 93 | newState.underline = this.underline; 94 | newState.strike = this.strike; 95 | newState.hidden = this.hidden; 96 | newState.dnup = this.dnup; 97 | newState.subscript = this.subscript; 98 | newState.superscript = this.superscript; 99 | newState.fontSize = this.fontSize; 100 | newState.font = this.font; 101 | newState.textColor = this.textColor; 102 | newState.background = this.background; 103 | return newState; 104 | } 105 | 106 | /** 107 | * Compares two states for equality. 108 | * 109 | * @param obj 110 | * the object to compare with 111 | * @return {@code true} if and only if the argument is not {@code null} and 112 | * is a {@code RtfState} object that contains the same layout 113 | * information as this object 114 | */ 115 | @Override 116 | public boolean equals(Object obj) { 117 | if (obj == null) { 118 | return false; 119 | } 120 | if (!(obj instanceof RtfState)) { 121 | return false; 122 | } 123 | 124 | RtfState anotherState = (RtfState) obj; 125 | return this.bold == anotherState.bold && this.italic == anotherState.italic 126 | && this.underline == anotherState.underline && this.strike == anotherState.strike 127 | && this.dnup == anotherState.dnup 128 | && this.subscript == anotherState.subscript && this.superscript == anotherState.superscript 129 | && this.hidden == anotherState.hidden && this.fontSize == anotherState.fontSize 130 | && this.font == anotherState.font 131 | && this.textColor == anotherState.textColor && this.background == anotherState.background; 132 | } 133 | 134 | /** 135 | * Sets the attributes to default values. 136 | */ 137 | public void reset() { 138 | bold = false; 139 | italic = false; 140 | underline = false; 141 | strike = false; 142 | hidden = false; 143 | dnup = 0; 144 | subscript = false; 145 | superscript = false; 146 | fontSize = 0; 147 | font = 0; 148 | textColor = 0; 149 | background = 0; 150 | } 151 | } -------------------------------------------------------------------------------- /tests/org/rtf/test/TextTest.java: -------------------------------------------------------------------------------- 1 | package org.rtf.test; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | import org.rtf.RtfHtml; 6 | import org.rtf.RtfParseException; 7 | import org.rtf.RtfReader; 8 | 9 | public class TextTest { 10 | @Test 11 | public void testParagraphs() throws RtfParseException { 12 | String expectedString = "

This is the first line.

And this is the second one.

"; 13 | 14 | StringBuilder rtfBuilder = new StringBuilder(); 15 | rtfBuilder.append( 16 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 17 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 18 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs24\\lang7 This is the first line.\\par\r\n"); 19 | rtfBuilder.append("\\fs28 And this is the second one.\\par\r\n"); 20 | rtfBuilder.append("}\r\n"); 21 | String rtfString = rtfBuilder.toString(); 22 | 23 | RtfReader reader = new RtfReader(); 24 | reader.parse(rtfString); 25 | 26 | RtfHtml formatter = new RtfHtml(); 27 | String htmlString = formatter.format(reader.root); 28 | 29 | Assert.assertEquals(expectedString, htmlString); 30 | } 31 | 32 | @Test 33 | public void testParagraphsWithUnchangedFontFormat() throws RtfParseException { 34 | String expectedString = "

This is the first line.

" 35 | + "

And this is the second one with unchanged font format.

"; 36 | 37 | StringBuilder rtfBuilder = new StringBuilder(); 38 | rtfBuilder.append( 39 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031\r\n"); 40 | rtfBuilder.append("{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 41 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 42 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 This is the first line."); 43 | rtfBuilder.append("\\par And this is the second one with unchanged font format."); 44 | rtfBuilder.append("\\par}\r\n"); 45 | String rtfString = rtfBuilder.toString(); 46 | 47 | RtfReader reader = new RtfReader(); 48 | reader.parse(rtfString); 49 | 50 | RtfHtml formatter = new RtfHtml(); 51 | String htmlString = formatter.format(reader.root); 52 | 53 | Assert.assertEquals(expectedString, htmlString); 54 | } 55 | 56 | @Test 57 | public void testEscapeSequences() throws RtfParseException { 58 | String expectedString = "

Hello {World}

"; 59 | 60 | StringBuilder rtfBuilder = new StringBuilder(); 61 | rtfBuilder.append( 62 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 63 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 64 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello \\{World\\}\\par\r\n"); 65 | rtfBuilder.append("}\r\n"); 66 | String rtfString = rtfBuilder.toString(); 67 | 68 | RtfReader reader = new RtfReader(); 69 | reader.parse(rtfString); 70 | 71 | RtfHtml formatter = new RtfHtml(); 72 | String htmlString = formatter.format(reader.root); 73 | 74 | Assert.assertEquals(expectedString, htmlString); 75 | } 76 | 77 | @Test 78 | public void testUnicodeCharacters() throws RtfParseException { 79 | String expectedString = "

Kay Schröer

"; 80 | 81 | StringBuilder rtfBuilder = new StringBuilder(); 82 | rtfBuilder.append( 83 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 84 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 85 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Kay Schr\\'f6er\\par\r\n"); 86 | rtfBuilder.append("}\r\n"); 87 | String rtfString = rtfBuilder.toString(); 88 | 89 | RtfReader reader = new RtfReader(); 90 | reader.parse(rtfString); 91 | 92 | RtfHtml formatter = new RtfHtml(); 93 | String htmlString = formatter.format(reader.root); 94 | 95 | Assert.assertEquals(expectedString, htmlString); 96 | } 97 | 98 | @Test 99 | public void testEntities() throws RtfParseException { 100 | String expectedString = "

Hello –   World

"; 101 | 102 | StringBuilder rtfBuilder = new StringBuilder(); 103 | rtfBuilder.append("{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031"); 104 | rtfBuilder.append("{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}{\\f1\\fnil Tahoma;}}\r\n"); 105 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 106 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello "); 107 | rtfBuilder.append("\\f1\\endash \\~ World\\f0\\par\r\n"); 108 | rtfBuilder.append("}\r\n"); 109 | String rtfString = rtfBuilder.toString(); 110 | 111 | RtfReader reader = new RtfReader(); 112 | reader.parse(rtfString); 113 | 114 | RtfHtml formatter = new RtfHtml(); 115 | String htmlString = formatter.format(reader.root); 116 | 117 | Assert.assertEquals(expectedString, htmlString); 118 | } 119 | } -------------------------------------------------------------------------------- /tests/org/rtf/test/ReaderTest.java: -------------------------------------------------------------------------------- 1 | package org.rtf.test; 2 | 3 | import java.io.ByteArrayOutputStream; 4 | import java.io.IOException; 5 | import java.io.PrintStream; 6 | 7 | import org.junit.Assert; 8 | import org.junit.Test; 9 | import org.rtf.RtfHtml; 10 | import org.rtf.RtfParseException; 11 | import org.rtf.RtfReader; 12 | 13 | public class ReaderTest { 14 | @Test 15 | public void testHtmlPage() throws RtfParseException { 16 | StringBuilder expectedBuilder = new StringBuilder(); 17 | expectedBuilder.append("\n"); 18 | expectedBuilder.append("\n"); 19 | expectedBuilder.append(" \n"); 20 | expectedBuilder.append(" \n"); 21 | expectedBuilder.append(" \n"); 22 | expectedBuilder.append(" \n"); 23 | expectedBuilder.append("

Hello World

\n"); 24 | expectedBuilder.append(" \n"); 25 | expectedBuilder.append("\n"); 26 | String expectedString = expectedBuilder.toString(); 27 | 28 | StringBuilder rtfBuilder = new StringBuilder(); 29 | rtfBuilder.append( 30 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 31 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 32 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello World\\par\r\n"); 33 | rtfBuilder.append("}\r\n"); 34 | String rtfString = rtfBuilder.toString(); 35 | 36 | RtfReader reader = new RtfReader(); 37 | reader.parse(rtfString); 38 | 39 | RtfHtml formatter = new RtfHtml(); 40 | String htmlString = formatter.format(reader.root, true); 41 | 42 | Assert.assertEquals(expectedString, htmlString); 43 | } 44 | 45 | @Test 46 | public void testFormatDump() throws IOException, RtfParseException { 47 | StringBuilder dumpBuilder = new StringBuilder(); 48 | dumpBuilder.append("

\r\n"); 49 | dumpBuilder.append("{\r\n"); 50 | dumpBuilder.append("
\r\n"); 51 | dumpBuilder.append("
\r\n"); 52 | dumpBuilder.append(" \r\n"); 53 | dumpBuilder.append(" \r\n"); 54 | dumpBuilder.append("WORD rtf (1)\r\n"); 55 | dumpBuilder.append("
\r\n"); 56 | dumpBuilder.append("
\r\n"); 57 | dumpBuilder.append(" \r\n"); 58 | dumpBuilder.append(" \r\n"); 59 | dumpBuilder.append("WORD ansi (1)\r\n"); 60 | dumpBuilder.append("
\r\n"); 61 | dumpBuilder.append("
\r\n"); 62 | dumpBuilder.append(" \r\n"); 63 | dumpBuilder.append(" \r\n"); 64 | dumpBuilder.append("WORD ansicpg (1252)\r\n"); 65 | dumpBuilder.append("
\r\n"); 66 | dumpBuilder.append("
\r\n"); 67 | dumpBuilder.append(" \r\n"); 68 | dumpBuilder.append(" \r\n"); 69 | dumpBuilder.append("WORD deff (0)\r\n"); 70 | dumpBuilder.append("
\r\n"); 71 | dumpBuilder.append("
\r\n"); 72 | dumpBuilder.append(" \r\n"); 73 | dumpBuilder.append(" \r\n"); 74 | dumpBuilder.append("WORD nouicompat (1)\r\n"); 75 | dumpBuilder.append("
\r\n"); 76 | dumpBuilder.append("
\r\n"); 77 | dumpBuilder.append(" \r\n"); 78 | dumpBuilder.append(" \r\n"); 79 | dumpBuilder.append("WORD deflang (1031)\r\n"); 80 | dumpBuilder.append("
\r\n"); 81 | dumpBuilder.append("
\r\n"); 82 | dumpBuilder.append(" \r\n"); 83 | dumpBuilder.append(" \r\n"); 84 | dumpBuilder.append("WORD viewkind (4)\r\n"); 85 | dumpBuilder.append("
\r\n"); 86 | dumpBuilder.append("
\r\n"); 87 | dumpBuilder.append(" \r\n"); 88 | dumpBuilder.append(" \r\n"); 89 | dumpBuilder.append("WORD uc (1)\r\n"); 90 | dumpBuilder.append("
\r\n"); 91 | dumpBuilder.append("
\r\n"); 92 | dumpBuilder.append(" \r\n"); 93 | dumpBuilder.append(" \r\n"); 94 | dumpBuilder.append("WORD pard (1)\r\n"); 95 | dumpBuilder.append("
\r\n"); 96 | dumpBuilder.append("
\r\n"); 97 | dumpBuilder.append(" \r\n"); 98 | dumpBuilder.append(" \r\n"); 99 | dumpBuilder.append("WORD sa (200)\r\n"); 100 | dumpBuilder.append("
\r\n"); 101 | dumpBuilder.append("
\r\n"); 102 | dumpBuilder.append(" \r\n"); 103 | dumpBuilder.append(" \r\n"); 104 | dumpBuilder.append("WORD sl (276)\r\n"); 105 | dumpBuilder.append("
\r\n"); 106 | dumpBuilder.append("
\r\n"); 107 | dumpBuilder.append(" \r\n"); 108 | dumpBuilder.append(" \r\n"); 109 | dumpBuilder.append("WORD slmult (1)\r\n"); 110 | dumpBuilder.append("
\r\n"); 111 | dumpBuilder.append("
\r\n"); 112 | dumpBuilder.append(" \r\n"); 113 | dumpBuilder.append(" \r\n"); 114 | dumpBuilder.append("WORD f (0)\r\n"); 115 | dumpBuilder.append("
\r\n"); 116 | dumpBuilder.append("
\r\n"); 117 | dumpBuilder.append(" \r\n"); 118 | dumpBuilder.append(" \r\n"); 119 | dumpBuilder.append("WORD fs (22)\r\n"); 120 | dumpBuilder.append("
\r\n"); 121 | dumpBuilder.append("
\r\n"); 122 | dumpBuilder.append(" \r\n"); 123 | dumpBuilder.append(" \r\n"); 124 | dumpBuilder.append("WORD lang (7)\r\n"); 125 | dumpBuilder.append("
\r\n"); 126 | dumpBuilder.append("
\r\n"); 127 | dumpBuilder.append(" \r\n"); 128 | dumpBuilder.append(" \r\n"); 129 | dumpBuilder.append("TEXT Hello World\r\n"); 130 | dumpBuilder.append("
\r\n"); 131 | dumpBuilder.append("
\r\n"); 132 | dumpBuilder.append(" \r\n"); 133 | dumpBuilder.append(" \r\n"); 134 | dumpBuilder.append("WORD par (1)\r\n"); 135 | dumpBuilder.append("
\r\n"); 136 | dumpBuilder.append("
\r\n"); 137 | dumpBuilder.append("}\r\n"); 138 | dumpBuilder.append("
\r\n"); 139 | String expectedString = dumpBuilder.toString(); 140 | 141 | StringBuilder rtfBuilder = new StringBuilder(); 142 | rtfBuilder.append( 143 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 144 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 145 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello World\\par\r\n"); 146 | rtfBuilder.append("}\r\n"); 147 | String rtfString = rtfBuilder.toString(); 148 | 149 | RtfReader reader = new RtfReader(); 150 | reader.parse(rtfString); 151 | 152 | try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { 153 | System.setOut(new PrintStream(baos)); 154 | reader.root.dump(); 155 | Assert.assertEquals(expectedString, baos.toString()); 156 | } 157 | } 158 | 159 | @Test 160 | public void testParseError() { 161 | String rtfString = "This text is not a valid RTF string."; 162 | Throwable t = null; 163 | 164 | try { 165 | RtfReader reader = new RtfReader(); 166 | reader.parse(rtfString); 167 | } catch (Exception e) { 168 | t = e; 169 | } 170 | 171 | Assert.assertNotNull(t); 172 | Assert.assertTrue(t instanceof RtfParseException); 173 | } 174 | } -------------------------------------------------------------------------------- /src/org/rtf/RtfReader.java: -------------------------------------------------------------------------------- 1 | package org.rtf; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.io.InputStreamReader; 9 | import java.util.stream.Collectors; 10 | 11 | /** 12 | * This class parses RTF strings and documents and provides the read RTF 13 | * structure as an element tree for further processing. 14 | * 15 | * @author Kay Schröer 16 | */ 17 | public class RtfReader { 18 | private String rtf; 19 | private int pos; 20 | private int len; 21 | private char tchar; 22 | private RtfGroup group; 23 | 24 | /** 25 | * Root element of an element tree that contains the processed RTF groups 26 | */ 27 | public RtfGroup root = null; 28 | 29 | /** 30 | * Reads the next character from the RTF string at a time and stores it in 31 | * global variable for later interpretation. 32 | */ 33 | protected void getChar() { 34 | if (pos < rtf.length()) { 35 | tchar = rtf.charAt(pos++); 36 | } 37 | } 38 | 39 | /** 40 | * Converts a hexadecimal string to a decimal value. 41 | * 42 | * @param s 43 | * hex string, e.g. "a0" 44 | * @return number 45 | */ 46 | protected int hexdec(String s) { 47 | return Integer.parseInt(s, 16); 48 | } 49 | 50 | /** 51 | * Checks if the previously read character is a digit. 52 | * 53 | * @return {@code true} if the character is one of 0-9 54 | */ 55 | protected boolean isDigit() { 56 | if (tchar >= 48 && tchar <= 57) { 57 | return true; 58 | } 59 | return false; 60 | } 61 | 62 | /** 63 | * Checks if the previously read character is a letter. 64 | * 65 | * @return {@code true} if the character is one of a-z or A-Z 66 | */ 67 | protected boolean isLetter() { 68 | if (tchar >= 65 && tchar <= 90) { 69 | return true; 70 | } 71 | if (tchar >= 97 && tchar <= 122) { 72 | return true; 73 | } 74 | return false; 75 | } 76 | 77 | /** 78 | * Handles the start of a group represented by an opening brace. 79 | */ 80 | protected void parseStartGroup() { 81 | // Store state of document on stack. 82 | RtfGroup newGroup = new RtfGroup(); 83 | if (group != null) { 84 | newGroup.parent = group; 85 | } 86 | if (root == null) { 87 | group = newGroup; 88 | root = newGroup; 89 | } else { 90 | group.children.add(newGroup); 91 | group = newGroup; 92 | } 93 | } 94 | 95 | /** 96 | * Handles the end of a group represented by a closing brace. 97 | */ 98 | protected void parseEndGroup() { 99 | // Retrieve state of document from stack. 100 | group = group.parent; 101 | } 102 | 103 | /** 104 | * Gets the name and parameter of the control word and finally adds a new 105 | * word element to the current group. 106 | */ 107 | protected void parseControlWord() { 108 | getChar(); 109 | String word = ""; 110 | 111 | while (isLetter()) { 112 | word += tchar; 113 | getChar(); 114 | } 115 | 116 | // Read parameter (if any) consisting of digits. 117 | // Paramater may be negative. 118 | int parameter = -1; 119 | boolean negative = false; 120 | if (tchar == '-') { 121 | getChar(); 122 | negative = true; 123 | } 124 | 125 | while (isDigit()) { 126 | if (parameter == -1) { 127 | parameter = 0; 128 | } 129 | parameter = parameter * 10 + Integer.parseInt(tchar + ""); 130 | getChar(); 131 | } 132 | 133 | if (parameter == -1) { 134 | parameter = 1; 135 | } 136 | if (negative) { 137 | parameter = -parameter; 138 | } 139 | 140 | // If this is u, then the parameter will be followed by a character. 141 | if (word.equals("u")) { 142 | // Ignore space delimiter. 143 | if (tchar == ' ') { 144 | getChar(); 145 | } 146 | 147 | // If the replacement character is encoded as hexadecimal value \'hh 148 | // then jump over it. 149 | if (tchar == '\\' && rtf.charAt(pos) == '\'') { 150 | pos += 3; 151 | } 152 | 153 | // Convert to UTF unsigned decimal code. 154 | if (negative) { 155 | parameter += 65536; 156 | } 157 | } 158 | // If the current character is a space, then it is a delimiter. It is 159 | // consumed. 160 | // If it's not a space, then it's part of the next item in the text, so 161 | // put the character back. 162 | else { 163 | if (tchar != ' ') { 164 | pos--; 165 | } 166 | } 167 | 168 | RtfControlWord rtfWord = new RtfControlWord(); 169 | rtfWord.word = word; 170 | rtfWord.parameter = parameter; 171 | group.children.add(rtfWord); 172 | } 173 | 174 | /** 175 | * Gets the name and parameter of the control symbol and finally adds a new 176 | * symbol element to the current group. 177 | */ 178 | protected void parseControlSymbol() { 179 | // Read symbol (one character only). 180 | getChar(); 181 | char symbol = tchar; 182 | 183 | // Symbols ordinarily have no parameter. However, if this is \', then it 184 | // is followed by a 2-digit hex-code. 185 | int parameter = 0; 186 | if (symbol == '\'') { 187 | getChar(); 188 | String firstChar = tchar + ""; 189 | getChar(); 190 | String secondChar = tchar + ""; 191 | parameter = hexdec(firstChar + secondChar); 192 | } 193 | 194 | RtfControlSymbol rtfSymbol = new RtfControlSymbol(); 195 | rtfSymbol.symbol = symbol; 196 | rtfSymbol.parameter = parameter; 197 | group.children.add(rtfSymbol); 198 | } 199 | 200 | /** 201 | * Reads the next character from the string and identifies it as start of a 202 | * control word or control symbol. 203 | */ 204 | protected void parseControl() { 205 | // Beginning of an RTF control word or control symbol. 206 | // Look ahead by one character to see if it starts with a letter 207 | // (control word) or another symbol (control symbol). 208 | getChar(); 209 | pos--; 210 | if (isLetter()) { 211 | parseControlWord(); 212 | } else { 213 | parseControlSymbol(); 214 | } 215 | } 216 | 217 | /** 218 | * Iteratively reads the next characters from the string and handles them as 219 | * plain text. Finally, a new text element is added to the current group. 220 | * 221 | * @throws RtfParseException 222 | * is thrown if errors occur when parsing RTF strings 223 | */ 224 | protected void parseText() throws RtfParseException { 225 | // Parse plain text up to backslash or brace, unless escaped. 226 | String text = ""; 227 | boolean terminate = false; 228 | 229 | do { 230 | terminate = false; 231 | 232 | // Is this an escape? 233 | if (tchar == '\\') { 234 | // Perform lookahead to see if this is really an escape 235 | // sequence. 236 | getChar(); 237 | switch (tchar) { 238 | case '\\': 239 | case '{': 240 | case '}': 241 | break; 242 | default: 243 | // Not an escape. Roll back. 244 | pos -= 2; 245 | terminate = true; 246 | break; 247 | } 248 | } else if (tchar == '{' || tchar == '}') { 249 | pos--; 250 | terminate = true; 251 | } 252 | 253 | if (!terminate) { 254 | text += tchar; 255 | getChar(); 256 | } 257 | } while (!terminate && pos < len); 258 | 259 | RtfText rtfText = new RtfText(); 260 | rtfText.text = text; 261 | 262 | // If group does not exist, then this is not a valid RTF file. Throw an 263 | // exception. 264 | if (group == null) { 265 | throw new RtfParseException("Invalid RTF file."); 266 | } 267 | 268 | group.children.add(rtfText); 269 | } 270 | 271 | /** 272 | * Parses RTF. 273 | * 274 | * @param rtfFile 275 | * local file containing the rich text 276 | * @throws RtfParseException 277 | * is thrown if errors occur when parsing RTF strings 278 | */ 279 | public void parse(File rtfFile) throws RtfParseException { 280 | try { 281 | try (FileInputStream fis = new FileInputStream(rtfFile)) { 282 | parse(fis); 283 | } 284 | } catch (IOException e) { 285 | throw new RtfParseException(e.getMessage()); 286 | } 287 | } 288 | 289 | /** 290 | * Parses RTF. 291 | * 292 | * @param rtfStream 293 | * stream containing the rich text 294 | * @throws RtfParseException 295 | * is thrown if errors occur when parsing RTF strings 296 | */ 297 | public void parse(InputStream rtfStream) throws RtfParseException { 298 | String rtfSource = new BufferedReader(new InputStreamReader(rtfStream)).lines() 299 | .collect(Collectors.joining("\n")); 300 | parse(rtfSource); 301 | } 302 | 303 | /** 304 | * Parses RTF. 305 | * 306 | * @param rtfSource 307 | * string containing the rich text 308 | * @throws RtfParseException 309 | * is thrown if errors occur when parsing RTF strings 310 | */ 311 | public void parse(String rtfSource) throws RtfParseException { 312 | rtf = rtfSource; 313 | pos = 0; 314 | len = rtf.length(); 315 | group = null; 316 | root = null; 317 | 318 | while (pos < len) { 319 | // Read next character. 320 | getChar(); 321 | 322 | // Ignore \r and \n. 323 | if (tchar == '\n' || tchar == '\r') { 324 | continue; 325 | } 326 | 327 | // What type of character is this? 328 | switch (tchar) { 329 | case '{': 330 | parseStartGroup(); 331 | break; 332 | case '}': 333 | parseEndGroup(); 334 | break; 335 | case '\\': 336 | parseControl(); 337 | break; 338 | default: 339 | parseText(); 340 | break; 341 | } 342 | } 343 | } 344 | } -------------------------------------------------------------------------------- /tests/org/rtf/test/FontTest.java: -------------------------------------------------------------------------------- 1 | package org.rtf.test; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | import org.rtf.RtfHtml; 6 | import org.rtf.RtfParseException; 7 | import org.rtf.RtfReader; 8 | 9 | public class FontTest { 10 | @Test 11 | public void testFontNormal() throws RtfParseException { 12 | String expectedString = "

Hello World

"; 13 | 14 | StringBuilder rtfBuilder = new StringBuilder(); 15 | rtfBuilder.append( 16 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 17 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 18 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello World\\par\r\n"); 19 | rtfBuilder.append("}\r\n"); 20 | String rtfString = rtfBuilder.toString(); 21 | 22 | RtfReader reader = new RtfReader(); 23 | reader.parse(rtfString); 24 | 25 | RtfHtml formatter = new RtfHtml(); 26 | String htmlString = formatter.format(reader.root); 27 | 28 | Assert.assertEquals(expectedString, htmlString); 29 | } 30 | 31 | @Test 32 | public void testDifferentFontSizes() throws RtfParseException { 33 | String expectedString = "

Hello World

"; 34 | 35 | StringBuilder rtfBuilder = new StringBuilder(); 36 | rtfBuilder.append( 37 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 38 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 39 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs24\\lang7 Hello\\fs28 \\fs32 World\\fs22\\par\r\n"); 40 | rtfBuilder.append("}\r\n"); 41 | String rtfString = rtfBuilder.toString(); 42 | 43 | RtfReader reader = new RtfReader(); 44 | reader.parse(rtfString); 45 | 46 | RtfHtml formatter = new RtfHtml(); 47 | String htmlString = formatter.format(reader.root); 48 | 49 | Assert.assertEquals(expectedString, htmlString); 50 | } 51 | 52 | @Test 53 | public void testSuperscriptSubscriptByRTFUpDn() throws RtfParseException { 54 | String expectedString = "

" 55 | + "Hello" 56 | + " " 57 | + "World" 58 | + "down by 4px and smaller" 59 | + "up by 4px and smaller" 60 | + "

"; 61 | 62 | StringBuilder rtfBuilder = new StringBuilder(); 63 | rtfBuilder.append("{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031\r\n"); 64 | rtfBuilder.append("{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 65 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 66 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1"); 67 | rtfBuilder.append("\\f0\\fs24\\lang7 Hello"); 68 | rtfBuilder.append("\\fs28 "); 69 | rtfBuilder.append("\\fs32 World"); 70 | rtfBuilder.append("\\plain\\f0\\fs22\\dn5 down by 4px and smaller"); 71 | rtfBuilder.append("\\plain\\f0\\fs22\\up5 up by 4px and smaller"); 72 | rtfBuilder.append("\\par\r\n"); 73 | rtfBuilder.append("}\r\n"); 74 | String rtfString = rtfBuilder.toString(); 75 | 76 | RtfReader reader = new RtfReader(); 77 | reader.parse(rtfString); 78 | 79 | RtfHtml formatter = new RtfHtml(); 80 | String htmlString = formatter.format(reader.root); 81 | 82 | Assert.assertEquals(expectedString, htmlString); 83 | } 84 | 85 | @Test 86 | public void testSuperscriptSubscriptByRTFSuperSub() throws RtfParseException { 87 | String expectedString = "

" 88 | + "Hello" 89 | + " " 90 | + "World" 91 | + "down by sub" 92 | + "up by super" 93 | + "regular again" 94 | + "

"; 95 | 96 | StringBuilder rtfBuilder = new StringBuilder(); 97 | rtfBuilder.append("{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031\r\n"); 98 | rtfBuilder.append("{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 99 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 100 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1"); 101 | rtfBuilder.append("\\f0\\fs24\\lang7 Hello"); 102 | rtfBuilder.append("\\fs28 "); 103 | rtfBuilder.append("\\fs32 World"); 104 | rtfBuilder.append("\\plain\\f0\\fs22\\sub down by sub"); 105 | rtfBuilder.append("\\plain\\f0\\fs22\\super up by super"); 106 | rtfBuilder.append("\\nosupersub regular again"); 107 | rtfBuilder.append("\\par\r\n"); 108 | rtfBuilder.append("}\r\n"); 109 | String rtfString = rtfBuilder.toString(); 110 | 111 | RtfReader reader = new RtfReader(); 112 | reader.parse(rtfString); 113 | 114 | RtfHtml formatter = new RtfHtml(); 115 | String htmlString = formatter.format(reader.root); 116 | 117 | Assert.assertEquals(expectedString, htmlString); 118 | } 119 | 120 | @Test 121 | public void testFontColor() throws RtfParseException { 122 | String expectedString = "

Hello World

"; 123 | 124 | StringBuilder rtfBuilder = new StringBuilder(); 125 | rtfBuilder.append( 126 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 127 | rtfBuilder.append("{\\colortbl ;\\red143\\green176\\blue140;}\r\n"); 128 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 129 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\cf1\\f0\\fs22\\lang7 Hello World\\par\r\n"); 130 | rtfBuilder.append("}\r\n"); 131 | String rtfString = rtfBuilder.toString(); 132 | 133 | RtfReader reader = new RtfReader(); 134 | reader.parse(rtfString); 135 | 136 | RtfHtml formatter = new RtfHtml(); 137 | String htmlString = formatter.format(reader.root); 138 | 139 | Assert.assertEquals(expectedString, htmlString); 140 | } 141 | 142 | @Test 143 | public void testBold() throws RtfParseException { 144 | String expectedString = "

Hello World

"; 145 | 146 | StringBuilder rtfBuilder = new StringBuilder(); 147 | rtfBuilder.append( 148 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 149 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 150 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\b\\f0\\fs22\\lang7 Hello World\\par\r\n"); 151 | rtfBuilder.append("}\r\n"); 152 | String rtfString = rtfBuilder.toString(); 153 | 154 | RtfReader reader = new RtfReader(); 155 | reader.parse(rtfString); 156 | 157 | RtfHtml formatter = new RtfHtml(); 158 | String htmlString = formatter.format(reader.root); 159 | 160 | Assert.assertEquals(expectedString, htmlString); 161 | } 162 | 163 | @Test 164 | public void testItalic() throws RtfParseException { 165 | String expectedString = "

Hello World

"; 166 | 167 | StringBuilder rtfBuilder = new StringBuilder(); 168 | rtfBuilder.append( 169 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 170 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 171 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\i\\f0\\fs22\\lang7 Hello World\\par\r\n"); 172 | rtfBuilder.append("}\r\n"); 173 | String rtfString = rtfBuilder.toString(); 174 | 175 | RtfReader reader = new RtfReader(); 176 | reader.parse(rtfString); 177 | 178 | RtfHtml formatter = new RtfHtml(); 179 | String htmlString = formatter.format(reader.root); 180 | 181 | Assert.assertEquals(expectedString, htmlString); 182 | } 183 | 184 | @Test 185 | public void testUnderline() throws RtfParseException { 186 | String expectedString = "

Hello World

"; 187 | 188 | StringBuilder rtfBuilder = new StringBuilder(); 189 | rtfBuilder.append( 190 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 191 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 192 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\ul\\f0\\fs22\\lang7 Hello World\\par\r\n"); 193 | rtfBuilder.append("}\r\n"); 194 | String rtfString = rtfBuilder.toString(); 195 | 196 | RtfReader reader = new RtfReader(); 197 | reader.parse(rtfString); 198 | 199 | RtfHtml formatter = new RtfHtml(); 200 | String htmlString = formatter.format(reader.root); 201 | 202 | Assert.assertEquals(expectedString, htmlString); 203 | } 204 | 205 | @Test 206 | public void testStrikethrough() throws RtfParseException { 207 | String expectedString = "

Hello World

"; 208 | 209 | StringBuilder rtfBuilder = new StringBuilder(); 210 | rtfBuilder.append( 211 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n"); 212 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n"); 213 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\strike\\f0\\fs22\\lang7 Hello World\\par\r\n"); 214 | rtfBuilder.append("}\r\n"); 215 | String rtfString = rtfBuilder.toString(); 216 | 217 | RtfReader reader = new RtfReader(); 218 | reader.parse(rtfString); 219 | 220 | RtfHtml formatter = new RtfHtml(); 221 | String htmlString = formatter.format(reader.root); 222 | 223 | Assert.assertEquals(expectedString, htmlString); 224 | } 225 | } -------------------------------------------------------------------------------- /src/org/rtf/RtfHtml.java: -------------------------------------------------------------------------------- 1 | package org.rtf; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedHashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.Stack; 8 | 9 | /** 10 | * This class is the HTML formatter. 11 | * 12 | * @author Kay Schröer 13 | */ 14 | public class RtfHtml { 15 | private String output; 16 | private Stack states; 17 | private RtfState state; 18 | private RtfState previousState; 19 | private Map openedTags; 20 | private List fonttbl; 21 | private List colortbl; 22 | private boolean newRootPar; 23 | 24 | /** 25 | * Transforms an RTF group with all children into HTML tags. 26 | * 27 | * @param root 28 | * element from which the formatting should be started 29 | * @return HTML string 30 | */ 31 | public String format(RtfGroup root) { 32 | return format(root, false); 33 | } 34 | 35 | /** 36 | * Transforms an RTF group with all children into HTML tags. 37 | * 38 | * @param root 39 | * element from which the formatting should be started 40 | * @param page 41 | * defines whether a complete HTML page should be generated or 42 | * the new tags should be returned as snippet 43 | * @return HTML string 44 | */ 45 | public String format(RtfGroup root, boolean page) { 46 | // Keeping track of style modifications. 47 | previousState = null; 48 | openedTags = new LinkedHashMap<>(); 49 | openedTags.put("span", false); 50 | openedTags.put("p", true); 51 | 52 | // Create a stack of states and put an initial standard state onto the 53 | // stack. 54 | states = new Stack<>(); 55 | state = new RtfState(); 56 | states.push(state); 57 | 58 | // Do the job. 59 | output = "

"; 60 | newRootPar = true; 61 | formatGroup(root); 62 | if (page) { 63 | wrapTags(); 64 | } 65 | 66 | return output; 67 | } 68 | 69 | /** 70 | * @param fontTblGrp 71 | * list with child elements of the "fonttbl" group element 72 | */ 73 | protected void extractFontTable(List fontTblGrp) { 74 | // {\fonttbl 75 | // {\f0\fswiss\fcharset0\fprq2 Arial;} 76 | // {\f1\froman\fcharset2\fprq2 Symbol;} 77 | // } 78 | // index 0 is the "default" font (in fact: default font is declared by /deffN in RTF header section) 79 | List fonttbl = new ArrayList<>(); 80 | 81 | int c = fontTblGrp.size(); 82 | 83 | for (int i = 1; i < c; i++) { 84 | // assume that font table entries are present in order of their index, i. e. f0, f1, f2... 85 | if (fontTblGrp.get(i) instanceof RtfGroup) { 86 | RtfGroup fontDesc = (RtfGroup) fontTblGrp.get(i); 87 | String fontFamily = ""; 88 | // process font description group 89 | List fontAttrs = fontDesc.children; 90 | // assume that the font index is the first (at least) RtfElement in the font descriptor RtfGroup. 91 | // Only RtfControlWord and RtfText elements are processed here. RtfGroups are not processed. 92 | for (int fa = 1; fa < fontAttrs.size(); fa++) { 93 | RtfElement faElem = fontAttrs.get(fa); 94 | if (faElem instanceof RtfControlWord) { 95 | // font attribute 96 | RtfControlWord fontAttr = (RtfControlWord) faElem; 97 | // font family (has only one of): 98 | if (fontAttr.word.equals("fnil")) { 99 | // font family Unknown/Default -> no font name applicable so far 100 | } else 101 | if (fontAttr.word.equals("froman")) { 102 | // font family Roman (proportionally spaced, serif) 103 | fontFamily = "Times,serif"; 104 | } else 105 | if (fontAttr.word.equals("fswiss")) { 106 | // font family Swiss (proportionally spaced, sans-serif) 107 | fontFamily = "Helvetica,Swiss,sans-serif"; 108 | } else 109 | if (fontAttr.word.equals("fmodern")) { 110 | // font family Fixed-pitch (typewriter) 111 | fontFamily = "Courier,monospace"; 112 | } else 113 | if (fontAttr.word.equals("fscript")) { 114 | // font family Script (like handwritten) 115 | fontFamily = "Cursive"; 116 | } else 117 | if (fontAttr.word.equals("fdecor")) { 118 | // font family Decorative 119 | fontFamily = "'ITC Zapf Chancery'"; 120 | } else 121 | if (fontAttr.word.equals("ftech")) { 122 | // font family Non-Unicode, technical, symbol 123 | fontFamily = "Symbol,Wingdings"; 124 | } else 125 | if (fontAttr.word.equals("fbidi")) { 126 | // font family bi-directional 127 | fontFamily = "Miriam"; 128 | } else 129 | // charset (after font family setting): 130 | if (fontAttr.word.equals("fcharset")) { 131 | // font charset reference (with parameter) 132 | // 0 = default charset as defined in RTF header (assume ANSI, CP1252) 133 | // 2 = SYMBOL_CHARSET (CP42) 134 | if (fontAttr.parameter == 2) { 135 | // supersede font family by forcing "Symbol" font 136 | fontFamily = "Symbol"; 137 | } 138 | } 139 | // /cpgN (code page) is ignored. 42 however would equal /fcharset2 (Symbol) 140 | } 141 | if (faElem instanceof RtfText) { 142 | // font name 143 | RtfText fontName = (RtfText) faElem; 144 | String fontNameText = fontName.text; 145 | if (!";".equals(fontNameText)) { 146 | if (fontNameText.endsWith(";")) { 147 | fontNameText = fontNameText.substring(0, fontNameText.length() - 1); 148 | } 149 | if (!fontFamily.contains(fontNameText)) { 150 | // DRY... 151 | if (fontFamily.length() > 0) { 152 | fontFamily = "," + fontFamily; 153 | } 154 | fontFamily = "'" + fontNameText + "'" + fontFamily; 155 | } 156 | } 157 | } 158 | } 159 | fonttbl.add(fontFamily); 160 | } 161 | } 162 | 163 | this.fonttbl = fonttbl; 164 | } 165 | 166 | /** 167 | * Extracts the color information available in the document and fills the 168 | * color table. 169 | * 170 | * @param colorTblGrp 171 | * list with child elements of the "colortbl" group element 172 | */ 173 | protected void extractColorTable(List colorTblGrp) { 174 | // {\colortbl;\red0\green0\blue0;} 175 | // index 0 is the "auto" color 176 | // force list to begin at index 1 177 | List colortbl = new ArrayList<>(); 178 | colortbl.add(null); 179 | 180 | int c = colorTblGrp.size(); 181 | String color = ""; 182 | 183 | for (int i = 2; i < c; i++) { 184 | if (colorTblGrp.get(i) instanceof RtfControlWord) { 185 | // Extract RGB color and convert it to hex string. 186 | int red = ((RtfControlWord) colorTblGrp.get(i)).parameter; 187 | int green = ((RtfControlWord) colorTblGrp.get(i + 1)).parameter; 188 | int blue = ((RtfControlWord) colorTblGrp.get(i + 2)).parameter; 189 | 190 | color = String.format("#%02x%02x%02x", red, green, blue); 191 | i += 2; 192 | } else if (colorTblGrp.get(i) instanceof RtfText) { 193 | // This a delimiter ";" so store the already extracted color. 194 | colortbl.add(color); 195 | } 196 | } 197 | 198 | this.colortbl = colortbl; 199 | } 200 | 201 | /** 202 | * Formats an RTF group. 203 | * 204 | * @param group 205 | * group element to process 206 | */ 207 | protected void formatGroup(RtfGroup group) { 208 | // Can we ignore this group? 209 | // Font table extraction. 210 | if (group.getType().equals("fonttbl")) { 211 | extractFontTable(group.children); 212 | return; 213 | } 214 | // Extract color table. 215 | if (group.getType().equals("colortbl")) { 216 | extractColorTable(group.children); 217 | return; 218 | } 219 | // Stylesheet extraction not yet supported. 220 | if (group.getType().equals("stylesheet")) { 221 | return; 222 | } 223 | // Info extraction not yet supported. 224 | if (group.getType().equals("info")) { 225 | return; 226 | } 227 | // Picture extraction not yet supported. 228 | if (group.getType().length() >= 4 && group.getType().substring(0, 4).equals("pict")) { 229 | return; 230 | } 231 | // Ignore destinations. 232 | if (group.isDestination()) { 233 | return; 234 | } 235 | 236 | // Push a new state onto the stack. 237 | state = (RtfState) state.clone(); 238 | states.push(state); 239 | 240 | // Format all group children. 241 | for (RtfElement child : group.children) { 242 | if (child instanceof RtfGroup) { 243 | formatGroup((RtfGroup) child); 244 | } else if (child instanceof RtfControlWord) { 245 | formatControlWord((RtfControlWord) child); 246 | } else if (child instanceof RtfControlSymbol) { 247 | formatControlSymbol((RtfControlSymbol) child); 248 | } else if (child instanceof RtfText) { 249 | formatText((RtfText) child); 250 | } 251 | } 252 | 253 | // Pop state from stack. 254 | states.pop(); 255 | state = states.peek(); 256 | } 257 | 258 | /** 259 | * Formats an RTF control word. 260 | * 261 | * @param rtfWord 262 | * word element to process 263 | */ 264 | protected void formatControlWord(RtfControlWord rtfWord) { 265 | if (rtfWord.word.equals("plain") || rtfWord.word.equals("pard")) { 266 | state.reset(); 267 | } else 268 | // state changers, not printed immediately: 269 | if (rtfWord.word.equals("f")) { 270 | state.font = rtfWord.parameter; 271 | } else if (rtfWord.word.equals("b")) { 272 | state.bold = rtfWord.parameter > 0; 273 | } else if (rtfWord.word.equals("i")) { 274 | state.italic = rtfWord.parameter > 0; 275 | } else if (rtfWord.word.equals("ul")) { 276 | state.underline = rtfWord.parameter > 0; 277 | } else if (rtfWord.word.equals("ulnone")) { 278 | state.underline = false; 279 | } else if (rtfWord.word.equals("strike")) { 280 | state.strike = rtfWord.parameter > 0; 281 | } else if (rtfWord.word.equals("v")) { 282 | state.hidden = rtfWord.parameter > 0; 283 | } else if (rtfWord.word.equals("fs")) { 284 | state.fontSize = (int) Math.ceil((rtfWord.parameter / 24.0) * 16.0); 285 | } else if (rtfWord.word.equals("dn")) { 286 | state.dnup = (int) Math.ceil((rtfWord.parameter / 24.0) * 16.0) * -1; 287 | } else if (rtfWord.word.equals("up")) { 288 | state.dnup = (int) Math.ceil((rtfWord.parameter / 24.0) * 16.0); 289 | } else if (rtfWord.word.equals("sub")) { 290 | state.subscript = true; 291 | state.superscript = false; 292 | } else if (rtfWord.word.equals("super")) { 293 | state.subscript = false; 294 | state.superscript = true; 295 | } else if (rtfWord.word.equals("nosupersub")) { 296 | state.subscript = false; 297 | state.superscript = false; 298 | } else if (rtfWord.word.equals("cf")) { 299 | state.textColor = rtfWord.parameter; 300 | } else if (rtfWord.word.equals("cb") || rtfWord.word.equals("chcbpat") || rtfWord.word.equals("highlight")) { 301 | state.background = rtfWord.parameter; 302 | } else 303 | // special characters, printed immediately: 304 | if (rtfWord.word.equals("lquote")) { 305 | applyStyle("‘"); 306 | } else if (rtfWord.word.equals("rquote")) { 307 | applyStyle("’"); 308 | } else if (rtfWord.word.equals("ldblquote")) { 309 | applyStyle("“"); 310 | } else if (rtfWord.word.equals("rdblquote")) { 311 | applyStyle("”"); 312 | } else if (rtfWord.word.equals("emdash")) { 313 | applyStyle("—"); 314 | } else if (rtfWord.word.equals("endash")) { 315 | applyStyle("–"); 316 | } else if (rtfWord.word.equals("emspace")) { 317 | applyStyle(" "); 318 | } else if (rtfWord.word.equals("enspace")) { 319 | applyStyle(" "); 320 | } else if (rtfWord.word.equals("tab")) { 321 | applyStyle("     "); 322 | } else if (rtfWord.word.equals("line")) { 323 | applyStyle("
"); 324 | } else if (rtfWord.word.equals("bullet")) { 325 | applyStyle("•"); 326 | } else if (rtfWord.word.equals("u")) { 327 | applyStyle("&#" + rtfWord.parameter + ";"); 328 | } else if (rtfWord.word.equals("par") || rtfWord.word.equals("row")) { 329 | // Close previously opened tags. 330 | closeTags(); 331 | 332 | output += "

"; 333 | openedTags.put("p", true); 334 | newRootPar = true; 335 | } 336 | } 337 | 338 | /** 339 | * Adds the new layout information using the span tag. 340 | * 341 | * @param txt 342 | * text to be formatted 343 | */ 344 | protected void applyStyle(String txt) { 345 | // Create span only when a style change occurs or a root paragraph start was just inserted. 346 | if (!state.equals(previousState) || newRootPar) { 347 | String span = ""; 348 | 349 | if (state.font >= 0) { 350 | span += "font-family:" + printFontFamily(state.font) + ";"; 351 | } 352 | if (state.bold) { 353 | span += "font-weight:bold;"; 354 | } 355 | if (state.italic) { 356 | span += "font-style:italic;"; 357 | } 358 | if (state.underline) { 359 | span += "text-decoration:underline;"; 360 | } 361 | if (state.strike) { 362 | span += "text-decoration:strikethrough;"; 363 | } 364 | if (state.hidden) { 365 | span += "display:none;"; 366 | } 367 | if (state.fontSize != 0) { 368 | span += "font-size:" + state.fontSize + "px;"; 369 | } 370 | // RTF dn/up: 371 | // By spec, RTF fs and RTF dn/up are independent of each other; 372 | // there is no documented "auto-reducing" for the font size. 373 | // In the wild, RTF dn/up often is given together with a "full" RTF fs but rendered with reduced font size. 374 | // Thus, RTF dn/up is rendered with implicit font size reduction. 375 | // This font-size setting supersedes the explicit "fs" font-size setting. 376 | if (state.dnup != 0) { 377 | span += calculateReducedFontSize() + "vertical-align:" + state.dnup + "px;"; 378 | } 379 | // RTF sub/super: 380 | // Reduced font-size and vertical-align supersede settings from fs,dn,up. 381 | if (state.subscript) { 382 | span += calculateReducedFontSize() + "vertical-align:sub;"; 383 | } 384 | if (state.superscript) { 385 | span += calculateReducedFontSize() + "vertical-align:super;"; 386 | } 387 | if (state.textColor != 0) { 388 | span += "color:" + printColor(state.textColor) + ";"; 389 | } 390 | if (state.background != 0) { 391 | span += "background-color:" + printColor(state.background) + ";"; 392 | } 393 | 394 | // Keep track of preceding style. 395 | previousState = (RtfState) state.clone(); 396 | 397 | // Close previously opened "span" tag. 398 | closeTag("span"); 399 | 400 | output += "" + txt; 401 | openedTags.put("span", true); 402 | } else { 403 | output += txt; 404 | } 405 | newRootPar = false; 406 | } 407 | 408 | /** 409 | * Calculate reduced font size based on actual state. 410 | * If actual state defines a font size, then CSS fon-size with 2/3 of this is returned, 411 | * else "smaller" is returned. 412 | * @return CSS for reduced font size. 413 | */ 414 | protected String calculateReducedFontSize() { 415 | String css; 416 | if (state.fontSize != 0) { 417 | int reducedFontSize = (int) Math.ceil((state.fontSize / 3.0) * 2.0); 418 | css = "font-size:" + reducedFontSize + "px;"; 419 | } else { 420 | css = "font-size:smaller;"; 421 | } 422 | return css; 423 | } 424 | 425 | protected String printFontFamily(int index) { 426 | // index is 0-based 427 | if (index >= 0 && index < fonttbl.size()) { 428 | return fonttbl.get(index); 429 | } else { 430 | return ""; 431 | } 432 | } 433 | 434 | /** 435 | * Gets the color at the specified position from the color table. 436 | * 437 | * @param index 438 | * a value greater than 0 and less than the number of list items 439 | * @return RGB hex string or an empty string if the position is invalid 440 | */ 441 | protected String printColor(int index) { 442 | if (index >= 1 && index < colortbl.size()) { 443 | return colortbl.get(index); 444 | } else { 445 | return ""; 446 | } 447 | } 448 | 449 | /** 450 | * Adds the closing tag to match the last opening tag. 451 | * 452 | * @param tag 453 | * the HTML tag name, e.g. "span" or "p" 454 | */ 455 | protected void closeTag(String tag) { 456 | if (openedTags.get(tag)) { 457 | output += ""; 458 | openedTags.put(tag, false); 459 | } 460 | } 461 | 462 | /** 463 | * Closes all opened tags. 464 | */ 465 | protected void closeTags() { 466 | for (String tag : openedTags.keySet()) { 467 | closeTag(tag); 468 | } 469 | } 470 | 471 | /** 472 | * Wraps HTML head and body tags around the output string. 473 | */ 474 | protected void wrapTags() { 475 | StringBuilder source = new StringBuilder(); 476 | source.append("\n"); 477 | source.append("\n"); 478 | source.append(" \n"); 479 | source.append(" \n"); 480 | source.append(" \n"); 481 | source.append(" \n"); 482 | source.append(output + "\n"); 483 | source.append(" \n"); 484 | source.append("\n"); 485 | output = source.toString(); 486 | } 487 | 488 | /** 489 | * Formats an RTF control symbol. 490 | * 491 | * @param rtfSymbol 492 | * symbol element to process 493 | */ 494 | protected void formatControlSymbol(RtfControlSymbol rtfSymbol) { 495 | if (rtfSymbol.symbol == '\'') { 496 | applyStyle("&#" + rtfSymbol.parameter + ";"); 497 | } 498 | if (rtfSymbol.symbol == '~') { 499 | output += " "; 500 | } 501 | } 502 | 503 | /** 504 | * Formats an RTF text. 505 | * 506 | * @param rtfText 507 | * text element to process 508 | */ 509 | protected void formatText(RtfText rtfText) { 510 | applyStyle(rtfText.text); 511 | } 512 | } --------------------------------------------------------------------------------