├── .gitignore
├── tests
└── org
│ └── rtf
│ └── test
│ ├── TestSuite.java
│ ├── TextTest.java
│ ├── ReaderTest.java
│ └── FontTest.java
├── src
└── org
│ └── rtf
│ ├── RtfParseException.java
│ ├── RtfText.java
│ ├── RtfControlWord.java
│ ├── RtfControlSymbol.java
│ ├── RtfElement.java
│ ├── RtfGroup.java
│ ├── RtfState.java
│ ├── RtfReader.java
│ └── RtfHtml.java
├── LICENSE
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/
2 | .classpath
3 | .project
4 |
--------------------------------------------------------------------------------
/tests/org/rtf/test/TestSuite.java:
--------------------------------------------------------------------------------
1 | package org.rtf.test;
2 |
3 | import org.junit.runner.RunWith;
4 | import org.junit.runners.Suite;
5 |
6 | @RunWith(Suite.class)
7 |
8 | @Suite.SuiteClasses({ FontTest.class, ReaderTest.class, TextTest.class })
9 |
10 | public class TestSuite {
11 | }
--------------------------------------------------------------------------------
/src/org/rtf/RtfParseException.java:
--------------------------------------------------------------------------------
1 | package org.rtf;
2 |
3 | /**
4 | * This exception is thrown if errors occur when parsing RTF strings, e.g. with
5 | * an invalid structure.
6 | *
7 | * @author Kay Schröer
8 | */
9 | public class RtfParseException extends Exception {
10 | private static final long serialVersionUID = 0L;
11 |
12 | /**
13 | * Creates the new exception.
14 | *
15 | * @param message
16 | * error details
17 | */
18 | public RtfParseException(String message) {
19 | super(message);
20 | }
21 | }
--------------------------------------------------------------------------------
/src/org/rtf/RtfText.java:
--------------------------------------------------------------------------------
1 | package org.rtf;
2 |
3 | /**
4 | * This class represents an RTF text element in the element tree.
5 | *
6 | * @author Kay Schröer
7 | */
8 | public class RtfText extends RtfElement {
9 | /**
10 | * Plain text
11 | */
12 | public String text;
13 |
14 | /*
15 | * (non-Javadoc)
16 | *
17 | * @see org.rtf.RtfElement#dump(int)
18 | */
19 | @Override
20 | public void dump(int level) {
21 | System.out.println("
");
22 | indent(level);
23 | System.out.println("TEXT " + text);
24 | System.out.println("
");
25 | }
26 | }
--------------------------------------------------------------------------------
/src/org/rtf/RtfControlWord.java:
--------------------------------------------------------------------------------
1 | package org.rtf;
2 |
3 | /**
4 | * This class represents an RTF control word in the element tree.
5 | *
6 | * @author ");
27 | indent(level);
28 | System.out.println("WORD " + word + " (" + parameter + ")");
29 | System.out.println("
");
30 | }
31 | }
--------------------------------------------------------------------------------
/src/org/rtf/RtfControlSymbol.java:
--------------------------------------------------------------------------------
1 | package org.rtf;
2 |
3 | /**
4 | * This class represents an RTF control symbol in the element tree.
5 | *
6 | * @author ");
27 | indent(level);
28 | System.out.println("SYMBOL " + symbol + " (" + parameter + ")");
29 | System.out.println("
");
30 | }
31 | }
--------------------------------------------------------------------------------
/src/org/rtf/RtfElement.java:
--------------------------------------------------------------------------------
1 | package org.rtf;
2 |
3 | /**
4 | * This class provides the base technology for debugging and is used as
5 | * superclass for specific RTF elements like groups, control words, control
6 | * symbols and texts.
7 | *
8 | * @author children;
21 |
22 | /**
23 | * Creates a new group element.
24 | */
25 | public RtfGroup() {
26 | parent = null;
27 | children = new ArrayList<>();
28 | }
29 |
30 | /**
31 | * Gets the group type.
32 | *
33 | * @return control word of the first child as type or an empty string if
34 | * there are no children or the first child is not a control word
35 | */
36 | public String getType() {
37 | // No children?
38 | if (children.isEmpty()) {
39 | return "";
40 | }
41 |
42 | // First child not a control word?
43 | RtfElement child = children.get(0);
44 | if (!(child instanceof RtfControlWord)) {
45 | return "";
46 | }
47 |
48 | return ((RtfControlWord) child).word;
49 | }
50 |
51 | /**
52 | * Checks if the group is a destination.
53 | *
54 | * @return {@code true} if a certain control word is referred
55 | */
56 | public boolean isDestination() {
57 | // No children?
58 | if (children.isEmpty()) {
59 | return false;
60 | }
61 |
62 | // First child not a control symbol?
63 | RtfElement child = children.get(0);
64 | if (!(child instanceof RtfControlSymbol)) {
65 | return false;
66 | }
67 |
68 | return ((RtfControlSymbol) child).symbol == '*';
69 | }
70 |
71 | /**
72 | * Outputs debug information.
73 | */
74 | public void dump() {
75 | dump(0);
76 | }
77 |
78 | /*
79 | * (non-Javadoc)
80 | *
81 | * @see org.rtf.RtfElement#dump(int)
82 | */
83 | @Override
84 | public void dump(int level) {
85 | System.out.println("");
86 | indent(level);
87 | System.out.println("{");
88 | System.out.println("
");
89 |
90 | for (RtfElement child : children) {
91 | if (child instanceof RtfGroup) {
92 | RtfGroup group = (RtfGroup) child;
93 |
94 | // Can we ignore this group?
95 | if (group.getType().equals("fonttbl")) {
96 | continue;
97 | }
98 | if (group.getType().equals("colortbl")) {
99 | continue;
100 | }
101 | if (group.getType().equals("stylesheet")) {
102 | continue;
103 | }
104 | if (group.getType().equals("info")) {
105 | continue;
106 | }
107 |
108 | // Skip any pictures and destinations.
109 | if (group.getType().length() >= 4 && group.getType().substring(0, 4).equals("pict")) {
110 | continue;
111 | }
112 | if (group.isDestination()) {
113 | continue;
114 | }
115 | }
116 |
117 | child.dump(level + 2);
118 | }
119 |
120 | System.out.println("");
121 | indent(level);
122 | System.out.println("}");
123 | System.out.println("
");
124 | }
125 | }
--------------------------------------------------------------------------------
/src/org/rtf/RtfState.java:
--------------------------------------------------------------------------------
1 | package org.rtf;
2 |
3 | /**
4 | * This class specifies a structure of layout information used for text
5 | * formatting in the span tag and obtained from RTF control words.
6 | *
7 | * @author Kay Schröer
8 | */
9 | public class RtfState implements Cloneable {
10 | /**
11 | * Attribute that specifies that text should be written in bold
12 | */
13 | public boolean bold;
14 |
15 | /**
16 | * Attribute that specifies that text should be written in italic
17 | */
18 | public boolean italic;
19 |
20 | /**
21 | * Attribute that specifies that text should be underlined
22 | */
23 | public boolean underline;
24 |
25 | /**
26 | * Attribute that specifies that text should be striked through
27 | */
28 | public boolean strike;
29 |
30 | /**
31 | * Attribute that specifies that text should be hidden
32 | */
33 | public boolean hidden;
34 |
35 | /**
36 | * Attribute that specifies that the text should be beneath the baseline ("down", negative) or above the baseline ("up", positive) by N.
37 | *
RTF "dnN" move down N half-points; does not imply font size reduction, thus font size is given separately --> value negative from param, fontsize unchanged.
38 | *
RTF "upN" move up N half-points; does not imply font size reduction, thus font size is given separately --> value positive from param, fontsize unchanged.
39 | */
40 | public int dnup;
41 |
42 | /**
43 | * Attribute that specifies that the text should be subscript. Switchs of superscript.
44 | *
RTF "sub" denotes subscript and implies font size reduction --> true, actual fontsize is 1/2 of actual font size.
45 | *
Turned of by /nosupersub.
46 | */
47 | public boolean subscript;
48 |
49 | /**
50 | * Attribute that specifies that the text should be superscript. Switches of subscript.
51 | *
RTF "super" denotes superscript and implies font size reduction --> true, actual fontsize is 1/2 of actual font size.
52 | *
Turned of by /nosupersub.
53 | */
54 | public boolean superscript;
55 |
56 | /**
57 | * Font size in pixels
58 | */
59 | public int fontSize;
60 |
61 | /**
62 | * Font as a position in the font table
63 | */
64 | public int font;
65 |
66 | /**
67 | * Text color as a position in the color table
68 | */
69 | public int textColor;
70 |
71 | /**
72 | * Background color as a position in the color table
73 | */
74 | public int background;
75 |
76 | /**
77 | * Creates a new RTF state.
78 | */
79 | public RtfState() {
80 | reset();
81 | }
82 |
83 | /**
84 | * Clones the layout information.
85 | *
86 | * @return a copy of this object
87 | */
88 | @Override
89 | public Object clone() {
90 | RtfState newState = new RtfState();
91 | newState.bold = this.bold;
92 | newState.italic = this.italic;
93 | newState.underline = this.underline;
94 | newState.strike = this.strike;
95 | newState.hidden = this.hidden;
96 | newState.dnup = this.dnup;
97 | newState.subscript = this.subscript;
98 | newState.superscript = this.superscript;
99 | newState.fontSize = this.fontSize;
100 | newState.font = this.font;
101 | newState.textColor = this.textColor;
102 | newState.background = this.background;
103 | return newState;
104 | }
105 |
106 | /**
107 | * Compares two states for equality.
108 | *
109 | * @param obj
110 | * the object to compare with
111 | * @return {@code true} if and only if the argument is not {@code null} and
112 | * is a {@code RtfState} object that contains the same layout
113 | * information as this object
114 | */
115 | @Override
116 | public boolean equals(Object obj) {
117 | if (obj == null) {
118 | return false;
119 | }
120 | if (!(obj instanceof RtfState)) {
121 | return false;
122 | }
123 |
124 | RtfState anotherState = (RtfState) obj;
125 | return this.bold == anotherState.bold && this.italic == anotherState.italic
126 | && this.underline == anotherState.underline && this.strike == anotherState.strike
127 | && this.dnup == anotherState.dnup
128 | && this.subscript == anotherState.subscript && this.superscript == anotherState.superscript
129 | && this.hidden == anotherState.hidden && this.fontSize == anotherState.fontSize
130 | && this.font == anotherState.font
131 | && this.textColor == anotherState.textColor && this.background == anotherState.background;
132 | }
133 |
134 | /**
135 | * Sets the attributes to default values.
136 | */
137 | public void reset() {
138 | bold = false;
139 | italic = false;
140 | underline = false;
141 | strike = false;
142 | hidden = false;
143 | dnup = 0;
144 | subscript = false;
145 | superscript = false;
146 | fontSize = 0;
147 | font = 0;
148 | textColor = 0;
149 | background = 0;
150 | }
151 | }
--------------------------------------------------------------------------------
/tests/org/rtf/test/TextTest.java:
--------------------------------------------------------------------------------
1 | package org.rtf.test;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 | import org.rtf.RtfHtml;
6 | import org.rtf.RtfParseException;
7 | import org.rtf.RtfReader;
8 |
9 | public class TextTest {
10 | @Test
11 | public void testParagraphs() throws RtfParseException {
12 | String expectedString = "This is the first line.
And this is the second one.
";
13 |
14 | StringBuilder rtfBuilder = new StringBuilder();
15 | rtfBuilder.append(
16 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
17 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
18 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs24\\lang7 This is the first line.\\par\r\n");
19 | rtfBuilder.append("\\fs28 And this is the second one.\\par\r\n");
20 | rtfBuilder.append("}\r\n");
21 | String rtfString = rtfBuilder.toString();
22 |
23 | RtfReader reader = new RtfReader();
24 | reader.parse(rtfString);
25 |
26 | RtfHtml formatter = new RtfHtml();
27 | String htmlString = formatter.format(reader.root);
28 |
29 | Assert.assertEquals(expectedString, htmlString);
30 | }
31 |
32 | @Test
33 | public void testParagraphsWithUnchangedFontFormat() throws RtfParseException {
34 | String expectedString = "
This is the first line.
"
35 | + "And this is the second one with unchanged font format.
";
36 |
37 | StringBuilder rtfBuilder = new StringBuilder();
38 | rtfBuilder.append(
39 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031\r\n");
40 | rtfBuilder.append("{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
41 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
42 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 This is the first line.");
43 | rtfBuilder.append("\\par And this is the second one with unchanged font format.");
44 | rtfBuilder.append("\\par}\r\n");
45 | String rtfString = rtfBuilder.toString();
46 |
47 | RtfReader reader = new RtfReader();
48 | reader.parse(rtfString);
49 |
50 | RtfHtml formatter = new RtfHtml();
51 | String htmlString = formatter.format(reader.root);
52 |
53 | Assert.assertEquals(expectedString, htmlString);
54 | }
55 |
56 | @Test
57 | public void testEscapeSequences() throws RtfParseException {
58 | String expectedString = "
Hello {World}
";
59 |
60 | StringBuilder rtfBuilder = new StringBuilder();
61 | rtfBuilder.append(
62 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
63 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
64 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello \\{World\\}\\par\r\n");
65 | rtfBuilder.append("}\r\n");
66 | String rtfString = rtfBuilder.toString();
67 |
68 | RtfReader reader = new RtfReader();
69 | reader.parse(rtfString);
70 |
71 | RtfHtml formatter = new RtfHtml();
72 | String htmlString = formatter.format(reader.root);
73 |
74 | Assert.assertEquals(expectedString, htmlString);
75 | }
76 |
77 | @Test
78 | public void testUnicodeCharacters() throws RtfParseException {
79 | String expectedString = "
Kay Schröer
";
80 |
81 | StringBuilder rtfBuilder = new StringBuilder();
82 | rtfBuilder.append(
83 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
84 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
85 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Kay Schr\\'f6er\\par\r\n");
86 | rtfBuilder.append("}\r\n");
87 | String rtfString = rtfBuilder.toString();
88 |
89 | RtfReader reader = new RtfReader();
90 | reader.parse(rtfString);
91 |
92 | RtfHtml formatter = new RtfHtml();
93 | String htmlString = formatter.format(reader.root);
94 |
95 | Assert.assertEquals(expectedString, htmlString);
96 | }
97 |
98 | @Test
99 | public void testEntities() throws RtfParseException {
100 | String expectedString = "
Hello – World
";
101 |
102 | StringBuilder rtfBuilder = new StringBuilder();
103 | rtfBuilder.append("{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031");
104 | rtfBuilder.append("{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}{\\f1\\fnil Tahoma;}}\r\n");
105 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
106 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello ");
107 | rtfBuilder.append("\\f1\\endash \\~ World\\f0\\par\r\n");
108 | rtfBuilder.append("}\r\n");
109 | String rtfString = rtfBuilder.toString();
110 |
111 | RtfReader reader = new RtfReader();
112 | reader.parse(rtfString);
113 |
114 | RtfHtml formatter = new RtfHtml();
115 | String htmlString = formatter.format(reader.root);
116 |
117 | Assert.assertEquals(expectedString, htmlString);
118 | }
119 | }
--------------------------------------------------------------------------------
/tests/org/rtf/test/ReaderTest.java:
--------------------------------------------------------------------------------
1 | package org.rtf.test;
2 |
3 | import java.io.ByteArrayOutputStream;
4 | import java.io.IOException;
5 | import java.io.PrintStream;
6 |
7 | import org.junit.Assert;
8 | import org.junit.Test;
9 | import org.rtf.RtfHtml;
10 | import org.rtf.RtfParseException;
11 | import org.rtf.RtfReader;
12 |
13 | public class ReaderTest {
14 | @Test
15 | public void testHtmlPage() throws RtfParseException {
16 | StringBuilder expectedBuilder = new StringBuilder();
17 | expectedBuilder.append("\n");
18 | expectedBuilder.append("\n");
19 | expectedBuilder.append("
\n");
20 | expectedBuilder.append(" \n");
21 | expectedBuilder.append(" \n");
22 | expectedBuilder.append(" \n");
23 | expectedBuilder.append("Hello World
\n");
24 | expectedBuilder.append(" \n");
25 | expectedBuilder.append("\n");
26 | String expectedString = expectedBuilder.toString();
27 |
28 | StringBuilder rtfBuilder = new StringBuilder();
29 | rtfBuilder.append(
30 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
31 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
32 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello World\\par\r\n");
33 | rtfBuilder.append("}\r\n");
34 | String rtfString = rtfBuilder.toString();
35 |
36 | RtfReader reader = new RtfReader();
37 | reader.parse(rtfString);
38 |
39 | RtfHtml formatter = new RtfHtml();
40 | String htmlString = formatter.format(reader.root, true);
41 |
42 | Assert.assertEquals(expectedString, htmlString);
43 | }
44 |
45 | @Test
46 | public void testFormatDump() throws IOException, RtfParseException {
47 | StringBuilder dumpBuilder = new StringBuilder();
48 | dumpBuilder.append("
\r\n");
49 | dumpBuilder.append("{\r\n");
50 | dumpBuilder.append("
\r\n");
51 | dumpBuilder.append("\r\n");
52 | dumpBuilder.append(" \r\n");
53 | dumpBuilder.append(" \r\n");
54 | dumpBuilder.append("WORD rtf (1)\r\n");
55 | dumpBuilder.append("
\r\n");
56 | dumpBuilder.append("\r\n");
57 | dumpBuilder.append(" \r\n");
58 | dumpBuilder.append(" \r\n");
59 | dumpBuilder.append("WORD ansi (1)\r\n");
60 | dumpBuilder.append("
\r\n");
61 | dumpBuilder.append("\r\n");
62 | dumpBuilder.append(" \r\n");
63 | dumpBuilder.append(" \r\n");
64 | dumpBuilder.append("WORD ansicpg (1252)\r\n");
65 | dumpBuilder.append("
\r\n");
66 | dumpBuilder.append("\r\n");
67 | dumpBuilder.append(" \r\n");
68 | dumpBuilder.append(" \r\n");
69 | dumpBuilder.append("WORD deff (0)\r\n");
70 | dumpBuilder.append("
\r\n");
71 | dumpBuilder.append("\r\n");
72 | dumpBuilder.append(" \r\n");
73 | dumpBuilder.append(" \r\n");
74 | dumpBuilder.append("WORD nouicompat (1)\r\n");
75 | dumpBuilder.append("
\r\n");
76 | dumpBuilder.append("\r\n");
77 | dumpBuilder.append(" \r\n");
78 | dumpBuilder.append(" \r\n");
79 | dumpBuilder.append("WORD deflang (1031)\r\n");
80 | dumpBuilder.append("
\r\n");
81 | dumpBuilder.append("\r\n");
82 | dumpBuilder.append(" \r\n");
83 | dumpBuilder.append(" \r\n");
84 | dumpBuilder.append("WORD viewkind (4)\r\n");
85 | dumpBuilder.append("
\r\n");
86 | dumpBuilder.append("\r\n");
87 | dumpBuilder.append(" \r\n");
88 | dumpBuilder.append(" \r\n");
89 | dumpBuilder.append("WORD uc (1)\r\n");
90 | dumpBuilder.append("
\r\n");
91 | dumpBuilder.append("\r\n");
92 | dumpBuilder.append(" \r\n");
93 | dumpBuilder.append(" \r\n");
94 | dumpBuilder.append("WORD pard (1)\r\n");
95 | dumpBuilder.append("
\r\n");
96 | dumpBuilder.append("\r\n");
97 | dumpBuilder.append(" \r\n");
98 | dumpBuilder.append(" \r\n");
99 | dumpBuilder.append("WORD sa (200)\r\n");
100 | dumpBuilder.append("
\r\n");
101 | dumpBuilder.append("\r\n");
102 | dumpBuilder.append(" \r\n");
103 | dumpBuilder.append(" \r\n");
104 | dumpBuilder.append("WORD sl (276)\r\n");
105 | dumpBuilder.append("
\r\n");
106 | dumpBuilder.append("\r\n");
107 | dumpBuilder.append(" \r\n");
108 | dumpBuilder.append(" \r\n");
109 | dumpBuilder.append("WORD slmult (1)\r\n");
110 | dumpBuilder.append("
\r\n");
111 | dumpBuilder.append("\r\n");
112 | dumpBuilder.append(" \r\n");
113 | dumpBuilder.append(" \r\n");
114 | dumpBuilder.append("WORD f (0)\r\n");
115 | dumpBuilder.append("
\r\n");
116 | dumpBuilder.append("\r\n");
117 | dumpBuilder.append(" \r\n");
118 | dumpBuilder.append(" \r\n");
119 | dumpBuilder.append("WORD fs (22)\r\n");
120 | dumpBuilder.append("
\r\n");
121 | dumpBuilder.append("\r\n");
122 | dumpBuilder.append(" \r\n");
123 | dumpBuilder.append(" \r\n");
124 | dumpBuilder.append("WORD lang (7)\r\n");
125 | dumpBuilder.append("
\r\n");
126 | dumpBuilder.append("\r\n");
127 | dumpBuilder.append(" \r\n");
128 | dumpBuilder.append(" \r\n");
129 | dumpBuilder.append("TEXT Hello World\r\n");
130 | dumpBuilder.append("
\r\n");
131 | dumpBuilder.append("\r\n");
132 | dumpBuilder.append(" \r\n");
133 | dumpBuilder.append(" \r\n");
134 | dumpBuilder.append("WORD par (1)\r\n");
135 | dumpBuilder.append("
\r\n");
136 | dumpBuilder.append("\r\n");
137 | dumpBuilder.append("}\r\n");
138 | dumpBuilder.append("
\r\n");
139 | String expectedString = dumpBuilder.toString();
140 |
141 | StringBuilder rtfBuilder = new StringBuilder();
142 | rtfBuilder.append(
143 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
144 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
145 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello World\\par\r\n");
146 | rtfBuilder.append("}\r\n");
147 | String rtfString = rtfBuilder.toString();
148 |
149 | RtfReader reader = new RtfReader();
150 | reader.parse(rtfString);
151 |
152 | try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
153 | System.setOut(new PrintStream(baos));
154 | reader.root.dump();
155 | Assert.assertEquals(expectedString, baos.toString());
156 | }
157 | }
158 |
159 | @Test
160 | public void testParseError() {
161 | String rtfString = "This text is not a valid RTF string.";
162 | Throwable t = null;
163 |
164 | try {
165 | RtfReader reader = new RtfReader();
166 | reader.parse(rtfString);
167 | } catch (Exception e) {
168 | t = e;
169 | }
170 |
171 | Assert.assertNotNull(t);
172 | Assert.assertTrue(t instanceof RtfParseException);
173 | }
174 | }
--------------------------------------------------------------------------------
/src/org/rtf/RtfReader.java:
--------------------------------------------------------------------------------
1 | package org.rtf;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.File;
5 | import java.io.FileInputStream;
6 | import java.io.IOException;
7 | import java.io.InputStream;
8 | import java.io.InputStreamReader;
9 | import java.util.stream.Collectors;
10 |
11 | /**
12 | * This class parses RTF strings and documents and provides the read RTF
13 | * structure as an element tree for further processing.
14 | *
15 | * @author Kay Schröer
16 | */
17 | public class RtfReader {
18 | private String rtf;
19 | private int pos;
20 | private int len;
21 | private char tchar;
22 | private RtfGroup group;
23 |
24 | /**
25 | * Root element of an element tree that contains the processed RTF groups
26 | */
27 | public RtfGroup root = null;
28 |
29 | /**
30 | * Reads the next character from the RTF string at a time and stores it in
31 | * global variable for later interpretation.
32 | */
33 | protected void getChar() {
34 | if (pos < rtf.length()) {
35 | tchar = rtf.charAt(pos++);
36 | }
37 | }
38 |
39 | /**
40 | * Converts a hexadecimal string to a decimal value.
41 | *
42 | * @param s
43 | * hex string, e.g. "a0"
44 | * @return number
45 | */
46 | protected int hexdec(String s) {
47 | return Integer.parseInt(s, 16);
48 | }
49 |
50 | /**
51 | * Checks if the previously read character is a digit.
52 | *
53 | * @return {@code true} if the character is one of 0-9
54 | */
55 | protected boolean isDigit() {
56 | if (tchar >= 48 && tchar <= 57) {
57 | return true;
58 | }
59 | return false;
60 | }
61 |
62 | /**
63 | * Checks if the previously read character is a letter.
64 | *
65 | * @return {@code true} if the character is one of a-z or A-Z
66 | */
67 | protected boolean isLetter() {
68 | if (tchar >= 65 && tchar <= 90) {
69 | return true;
70 | }
71 | if (tchar >= 97 && tchar <= 122) {
72 | return true;
73 | }
74 | return false;
75 | }
76 |
77 | /**
78 | * Handles the start of a group represented by an opening brace.
79 | */
80 | protected void parseStartGroup() {
81 | // Store state of document on stack.
82 | RtfGroup newGroup = new RtfGroup();
83 | if (group != null) {
84 | newGroup.parent = group;
85 | }
86 | if (root == null) {
87 | group = newGroup;
88 | root = newGroup;
89 | } else {
90 | group.children.add(newGroup);
91 | group = newGroup;
92 | }
93 | }
94 |
95 | /**
96 | * Handles the end of a group represented by a closing brace.
97 | */
98 | protected void parseEndGroup() {
99 | // Retrieve state of document from stack.
100 | group = group.parent;
101 | }
102 |
103 | /**
104 | * Gets the name and parameter of the control word and finally adds a new
105 | * word element to the current group.
106 | */
107 | protected void parseControlWord() {
108 | getChar();
109 | String word = "";
110 |
111 | while (isLetter()) {
112 | word += tchar;
113 | getChar();
114 | }
115 |
116 | // Read parameter (if any) consisting of digits.
117 | // Paramater may be negative.
118 | int parameter = -1;
119 | boolean negative = false;
120 | if (tchar == '-') {
121 | getChar();
122 | negative = true;
123 | }
124 |
125 | while (isDigit()) {
126 | if (parameter == -1) {
127 | parameter = 0;
128 | }
129 | parameter = parameter * 10 + Integer.parseInt(tchar + "");
130 | getChar();
131 | }
132 |
133 | if (parameter == -1) {
134 | parameter = 1;
135 | }
136 | if (negative) {
137 | parameter = -parameter;
138 | }
139 |
140 | // If this is u, then the parameter will be followed by a character.
141 | if (word.equals("u")) {
142 | // Ignore space delimiter.
143 | if (tchar == ' ') {
144 | getChar();
145 | }
146 |
147 | // If the replacement character is encoded as hexadecimal value \'hh
148 | // then jump over it.
149 | if (tchar == '\\' && rtf.charAt(pos) == '\'') {
150 | pos += 3;
151 | }
152 |
153 | // Convert to UTF unsigned decimal code.
154 | if (negative) {
155 | parameter += 65536;
156 | }
157 | }
158 | // If the current character is a space, then it is a delimiter. It is
159 | // consumed.
160 | // If it's not a space, then it's part of the next item in the text, so
161 | // put the character back.
162 | else {
163 | if (tchar != ' ') {
164 | pos--;
165 | }
166 | }
167 |
168 | RtfControlWord rtfWord = new RtfControlWord();
169 | rtfWord.word = word;
170 | rtfWord.parameter = parameter;
171 | group.children.add(rtfWord);
172 | }
173 |
174 | /**
175 | * Gets the name and parameter of the control symbol and finally adds a new
176 | * symbol element to the current group.
177 | */
178 | protected void parseControlSymbol() {
179 | // Read symbol (one character only).
180 | getChar();
181 | char symbol = tchar;
182 |
183 | // Symbols ordinarily have no parameter. However, if this is \', then it
184 | // is followed by a 2-digit hex-code.
185 | int parameter = 0;
186 | if (symbol == '\'') {
187 | getChar();
188 | String firstChar = tchar + "";
189 | getChar();
190 | String secondChar = tchar + "";
191 | parameter = hexdec(firstChar + secondChar);
192 | }
193 |
194 | RtfControlSymbol rtfSymbol = new RtfControlSymbol();
195 | rtfSymbol.symbol = symbol;
196 | rtfSymbol.parameter = parameter;
197 | group.children.add(rtfSymbol);
198 | }
199 |
200 | /**
201 | * Reads the next character from the string and identifies it as start of a
202 | * control word or control symbol.
203 | */
204 | protected void parseControl() {
205 | // Beginning of an RTF control word or control symbol.
206 | // Look ahead by one character to see if it starts with a letter
207 | // (control word) or another symbol (control symbol).
208 | getChar();
209 | pos--;
210 | if (isLetter()) {
211 | parseControlWord();
212 | } else {
213 | parseControlSymbol();
214 | }
215 | }
216 |
217 | /**
218 | * Iteratively reads the next characters from the string and handles them as
219 | * plain text. Finally, a new text element is added to the current group.
220 | *
221 | * @throws RtfParseException
222 | * is thrown if errors occur when parsing RTF strings
223 | */
224 | protected void parseText() throws RtfParseException {
225 | // Parse plain text up to backslash or brace, unless escaped.
226 | String text = "";
227 | boolean terminate = false;
228 |
229 | do {
230 | terminate = false;
231 |
232 | // Is this an escape?
233 | if (tchar == '\\') {
234 | // Perform lookahead to see if this is really an escape
235 | // sequence.
236 | getChar();
237 | switch (tchar) {
238 | case '\\':
239 | case '{':
240 | case '}':
241 | break;
242 | default:
243 | // Not an escape. Roll back.
244 | pos -= 2;
245 | terminate = true;
246 | break;
247 | }
248 | } else if (tchar == '{' || tchar == '}') {
249 | pos--;
250 | terminate = true;
251 | }
252 |
253 | if (!terminate) {
254 | text += tchar;
255 | getChar();
256 | }
257 | } while (!terminate && pos < len);
258 |
259 | RtfText rtfText = new RtfText();
260 | rtfText.text = text;
261 |
262 | // If group does not exist, then this is not a valid RTF file. Throw an
263 | // exception.
264 | if (group == null) {
265 | throw new RtfParseException("Invalid RTF file.");
266 | }
267 |
268 | group.children.add(rtfText);
269 | }
270 |
271 | /**
272 | * Parses RTF.
273 | *
274 | * @param rtfFile
275 | * local file containing the rich text
276 | * @throws RtfParseException
277 | * is thrown if errors occur when parsing RTF strings
278 | */
279 | public void parse(File rtfFile) throws RtfParseException {
280 | try {
281 | try (FileInputStream fis = new FileInputStream(rtfFile)) {
282 | parse(fis);
283 | }
284 | } catch (IOException e) {
285 | throw new RtfParseException(e.getMessage());
286 | }
287 | }
288 |
289 | /**
290 | * Parses RTF.
291 | *
292 | * @param rtfStream
293 | * stream containing the rich text
294 | * @throws RtfParseException
295 | * is thrown if errors occur when parsing RTF strings
296 | */
297 | public void parse(InputStream rtfStream) throws RtfParseException {
298 | String rtfSource = new BufferedReader(new InputStreamReader(rtfStream)).lines()
299 | .collect(Collectors.joining("\n"));
300 | parse(rtfSource);
301 | }
302 |
303 | /**
304 | * Parses RTF.
305 | *
306 | * @param rtfSource
307 | * string containing the rich text
308 | * @throws RtfParseException
309 | * is thrown if errors occur when parsing RTF strings
310 | */
311 | public void parse(String rtfSource) throws RtfParseException {
312 | rtf = rtfSource;
313 | pos = 0;
314 | len = rtf.length();
315 | group = null;
316 | root = null;
317 |
318 | while (pos < len) {
319 | // Read next character.
320 | getChar();
321 |
322 | // Ignore \r and \n.
323 | if (tchar == '\n' || tchar == '\r') {
324 | continue;
325 | }
326 |
327 | // What type of character is this?
328 | switch (tchar) {
329 | case '{':
330 | parseStartGroup();
331 | break;
332 | case '}':
333 | parseEndGroup();
334 | break;
335 | case '\\':
336 | parseControl();
337 | break;
338 | default:
339 | parseText();
340 | break;
341 | }
342 | }
343 | }
344 | }
--------------------------------------------------------------------------------
/tests/org/rtf/test/FontTest.java:
--------------------------------------------------------------------------------
1 | package org.rtf.test;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 | import org.rtf.RtfHtml;
6 | import org.rtf.RtfParseException;
7 | import org.rtf.RtfReader;
8 |
9 | public class FontTest {
10 | @Test
11 | public void testFontNormal() throws RtfParseException {
12 | String expectedString = "Hello World
";
13 |
14 | StringBuilder rtfBuilder = new StringBuilder();
15 | rtfBuilder.append(
16 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
17 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
18 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs22\\lang7 Hello World\\par\r\n");
19 | rtfBuilder.append("}\r\n");
20 | String rtfString = rtfBuilder.toString();
21 |
22 | RtfReader reader = new RtfReader();
23 | reader.parse(rtfString);
24 |
25 | RtfHtml formatter = new RtfHtml();
26 | String htmlString = formatter.format(reader.root);
27 |
28 | Assert.assertEquals(expectedString, htmlString);
29 | }
30 |
31 | @Test
32 | public void testDifferentFontSizes() throws RtfParseException {
33 | String expectedString = "
Hello World
";
34 |
35 | StringBuilder rtfBuilder = new StringBuilder();
36 | rtfBuilder.append(
37 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
38 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
39 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\f0\\fs24\\lang7 Hello\\fs28 \\fs32 World\\fs22\\par\r\n");
40 | rtfBuilder.append("}\r\n");
41 | String rtfString = rtfBuilder.toString();
42 |
43 | RtfReader reader = new RtfReader();
44 | reader.parse(rtfString);
45 |
46 | RtfHtml formatter = new RtfHtml();
47 | String htmlString = formatter.format(reader.root);
48 |
49 | Assert.assertEquals(expectedString, htmlString);
50 | }
51 |
52 | @Test
53 | public void testSuperscriptSubscriptByRTFUpDn() throws RtfParseException {
54 | String expectedString = "
"
55 | + "Hello"
56 | + " "
57 | + "World"
58 | + "down by 4px and smaller"
59 | + "up by 4px and smaller"
60 | + "
";
61 |
62 | StringBuilder rtfBuilder = new StringBuilder();
63 | rtfBuilder.append("{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031\r\n");
64 | rtfBuilder.append("{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
65 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
66 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1");
67 | rtfBuilder.append("\\f0\\fs24\\lang7 Hello");
68 | rtfBuilder.append("\\fs28 ");
69 | rtfBuilder.append("\\fs32 World");
70 | rtfBuilder.append("\\plain\\f0\\fs22\\dn5 down by 4px and smaller");
71 | rtfBuilder.append("\\plain\\f0\\fs22\\up5 up by 4px and smaller");
72 | rtfBuilder.append("\\par\r\n");
73 | rtfBuilder.append("}\r\n");
74 | String rtfString = rtfBuilder.toString();
75 |
76 | RtfReader reader = new RtfReader();
77 | reader.parse(rtfString);
78 |
79 | RtfHtml formatter = new RtfHtml();
80 | String htmlString = formatter.format(reader.root);
81 |
82 | Assert.assertEquals(expectedString, htmlString);
83 | }
84 |
85 | @Test
86 | public void testSuperscriptSubscriptByRTFSuperSub() throws RtfParseException {
87 | String expectedString = "
"
88 | + "Hello"
89 | + " "
90 | + "World"
91 | + "down by sub"
92 | + "up by super"
93 | + "regular again"
94 | + "
";
95 |
96 | StringBuilder rtfBuilder = new StringBuilder();
97 | rtfBuilder.append("{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031\r\n");
98 | rtfBuilder.append("{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
99 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
100 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1");
101 | rtfBuilder.append("\\f0\\fs24\\lang7 Hello");
102 | rtfBuilder.append("\\fs28 ");
103 | rtfBuilder.append("\\fs32 World");
104 | rtfBuilder.append("\\plain\\f0\\fs22\\sub down by sub");
105 | rtfBuilder.append("\\plain\\f0\\fs22\\super up by super");
106 | rtfBuilder.append("\\nosupersub regular again");
107 | rtfBuilder.append("\\par\r\n");
108 | rtfBuilder.append("}\r\n");
109 | String rtfString = rtfBuilder.toString();
110 |
111 | RtfReader reader = new RtfReader();
112 | reader.parse(rtfString);
113 |
114 | RtfHtml formatter = new RtfHtml();
115 | String htmlString = formatter.format(reader.root);
116 |
117 | Assert.assertEquals(expectedString, htmlString);
118 | }
119 |
120 | @Test
121 | public void testFontColor() throws RtfParseException {
122 | String expectedString = "
Hello World
";
123 |
124 | StringBuilder rtfBuilder = new StringBuilder();
125 | rtfBuilder.append(
126 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
127 | rtfBuilder.append("{\\colortbl ;\\red143\\green176\\blue140;}\r\n");
128 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
129 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\cf1\\f0\\fs22\\lang7 Hello World\\par\r\n");
130 | rtfBuilder.append("}\r\n");
131 | String rtfString = rtfBuilder.toString();
132 |
133 | RtfReader reader = new RtfReader();
134 | reader.parse(rtfString);
135 |
136 | RtfHtml formatter = new RtfHtml();
137 | String htmlString = formatter.format(reader.root);
138 |
139 | Assert.assertEquals(expectedString, htmlString);
140 | }
141 |
142 | @Test
143 | public void testBold() throws RtfParseException {
144 | String expectedString = "
Hello World
";
145 |
146 | StringBuilder rtfBuilder = new StringBuilder();
147 | rtfBuilder.append(
148 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
149 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
150 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\b\\f0\\fs22\\lang7 Hello World\\par\r\n");
151 | rtfBuilder.append("}\r\n");
152 | String rtfString = rtfBuilder.toString();
153 |
154 | RtfReader reader = new RtfReader();
155 | reader.parse(rtfString);
156 |
157 | RtfHtml formatter = new RtfHtml();
158 | String htmlString = formatter.format(reader.root);
159 |
160 | Assert.assertEquals(expectedString, htmlString);
161 | }
162 |
163 | @Test
164 | public void testItalic() throws RtfParseException {
165 | String expectedString = "
Hello World
";
166 |
167 | StringBuilder rtfBuilder = new StringBuilder();
168 | rtfBuilder.append(
169 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
170 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
171 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\i\\f0\\fs22\\lang7 Hello World\\par\r\n");
172 | rtfBuilder.append("}\r\n");
173 | String rtfString = rtfBuilder.toString();
174 |
175 | RtfReader reader = new RtfReader();
176 | reader.parse(rtfString);
177 |
178 | RtfHtml formatter = new RtfHtml();
179 | String htmlString = formatter.format(reader.root);
180 |
181 | Assert.assertEquals(expectedString, htmlString);
182 | }
183 |
184 | @Test
185 | public void testUnderline() throws RtfParseException {
186 | String expectedString = "
Hello World
";
187 |
188 | StringBuilder rtfBuilder = new StringBuilder();
189 | rtfBuilder.append(
190 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
191 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
192 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\ul\\f0\\fs22\\lang7 Hello World\\par\r\n");
193 | rtfBuilder.append("}\r\n");
194 | String rtfString = rtfBuilder.toString();
195 |
196 | RtfReader reader = new RtfReader();
197 | reader.parse(rtfString);
198 |
199 | RtfHtml formatter = new RtfHtml();
200 | String htmlString = formatter.format(reader.root);
201 |
202 | Assert.assertEquals(expectedString, htmlString);
203 | }
204 |
205 | @Test
206 | public void testStrikethrough() throws RtfParseException {
207 | String expectedString = "
Hello World
";
208 |
209 | StringBuilder rtfBuilder = new StringBuilder();
210 | rtfBuilder.append(
211 | "{\\rtf1\\ansi\\ansicpg1252\\deff0\\nouicompat\\deflang1031{\\fonttbl{\\f0\\fnil\\fcharset0 Calibri;}}\r\n");
212 | rtfBuilder.append("{\\*\\generator Riched20 6.3.9600}\\viewkind4\\uc1 \r\n");
213 | rtfBuilder.append("\\pard\\sa200\\sl276\\slmult1\\strike\\f0\\fs22\\lang7 Hello World\\par\r\n");
214 | rtfBuilder.append("}\r\n");
215 | String rtfString = rtfBuilder.toString();
216 |
217 | RtfReader reader = new RtfReader();
218 | reader.parse(rtfString);
219 |
220 | RtfHtml formatter = new RtfHtml();
221 | String htmlString = formatter.format(reader.root);
222 |
223 | Assert.assertEquals(expectedString, htmlString);
224 | }
225 | }
--------------------------------------------------------------------------------
/src/org/rtf/RtfHtml.java:
--------------------------------------------------------------------------------
1 | package org.rtf;
2 |
3 | import java.util.ArrayList;
4 | import java.util.LinkedHashMap;
5 | import java.util.List;
6 | import java.util.Map;
7 | import java.util.Stack;
8 |
9 | /**
10 | * This class is the HTML formatter.
11 | *
12 | * @author Kay Schröer
13 | */
14 | public class RtfHtml {
15 | private String output;
16 | private Stack states;
17 | private RtfState state;
18 | private RtfState previousState;
19 | private Map openedTags;
20 | private List fonttbl;
21 | private List colortbl;
22 | private boolean newRootPar;
23 |
24 | /**
25 | * Transforms an RTF group with all children into HTML tags.
26 | *
27 | * @param root
28 | * element from which the formatting should be started
29 | * @return HTML string
30 | */
31 | public String format(RtfGroup root) {
32 | return format(root, false);
33 | }
34 |
35 | /**
36 | * Transforms an RTF group with all children into HTML tags.
37 | *
38 | * @param root
39 | * element from which the formatting should be started
40 | * @param page
41 | * defines whether a complete HTML page should be generated or
42 | * the new tags should be returned as snippet
43 | * @return HTML string
44 | */
45 | public String format(RtfGroup root, boolean page) {
46 | // Keeping track of style modifications.
47 | previousState = null;
48 | openedTags = new LinkedHashMap<>();
49 | openedTags.put("span", false);
50 | openedTags.put("p", true);
51 |
52 | // Create a stack of states and put an initial standard state onto the
53 | // stack.
54 | states = new Stack<>();
55 | state = new RtfState();
56 | states.push(state);
57 |
58 | // Do the job.
59 | output = "";
60 | newRootPar = true;
61 | formatGroup(root);
62 | if (page) {
63 | wrapTags();
64 | }
65 |
66 | return output;
67 | }
68 |
69 | /**
70 | * @param fontTblGrp
71 | * list with child elements of the "fonttbl" group element
72 | */
73 | protected void extractFontTable(List fontTblGrp) {
74 | // {\fonttbl
75 | // {\f0\fswiss\fcharset0\fprq2 Arial;}
76 | // {\f1\froman\fcharset2\fprq2 Symbol;}
77 | // }
78 | // index 0 is the "default" font (in fact: default font is declared by /deffN in RTF header section)
79 | List fonttbl = new ArrayList<>();
80 |
81 | int c = fontTblGrp.size();
82 |
83 | for (int i = 1; i < c; i++) {
84 | // assume that font table entries are present in order of their index, i. e. f0, f1, f2...
85 | if (fontTblGrp.get(i) instanceof RtfGroup) {
86 | RtfGroup fontDesc = (RtfGroup) fontTblGrp.get(i);
87 | String fontFamily = "";
88 | // process font description group
89 | List fontAttrs = fontDesc.children;
90 | // assume that the font index is the first (at least) RtfElement in the font descriptor RtfGroup.
91 | // Only RtfControlWord and RtfText elements are processed here. RtfGroups are not processed.
92 | for (int fa = 1; fa < fontAttrs.size(); fa++) {
93 | RtfElement faElem = fontAttrs.get(fa);
94 | if (faElem instanceof RtfControlWord) {
95 | // font attribute
96 | RtfControlWord fontAttr = (RtfControlWord) faElem;
97 | // font family (has only one of):
98 | if (fontAttr.word.equals("fnil")) {
99 | // font family Unknown/Default -> no font name applicable so far
100 | } else
101 | if (fontAttr.word.equals("froman")) {
102 | // font family Roman (proportionally spaced, serif)
103 | fontFamily = "Times,serif";
104 | } else
105 | if (fontAttr.word.equals("fswiss")) {
106 | // font family Swiss (proportionally spaced, sans-serif)
107 | fontFamily = "Helvetica,Swiss,sans-serif";
108 | } else
109 | if (fontAttr.word.equals("fmodern")) {
110 | // font family Fixed-pitch (typewriter)
111 | fontFamily = "Courier,monospace";
112 | } else
113 | if (fontAttr.word.equals("fscript")) {
114 | // font family Script (like handwritten)
115 | fontFamily = "Cursive";
116 | } else
117 | if (fontAttr.word.equals("fdecor")) {
118 | // font family Decorative
119 | fontFamily = "'ITC Zapf Chancery'";
120 | } else
121 | if (fontAttr.word.equals("ftech")) {
122 | // font family Non-Unicode, technical, symbol
123 | fontFamily = "Symbol,Wingdings";
124 | } else
125 | if (fontAttr.word.equals("fbidi")) {
126 | // font family bi-directional
127 | fontFamily = "Miriam";
128 | } else
129 | // charset (after font family setting):
130 | if (fontAttr.word.equals("fcharset")) {
131 | // font charset reference (with parameter)
132 | // 0 = default charset as defined in RTF header (assume ANSI, CP1252)
133 | // 2 = SYMBOL_CHARSET (CP42)
134 | if (fontAttr.parameter == 2) {
135 | // supersede font family by forcing "Symbol" font
136 | fontFamily = "Symbol";
137 | }
138 | }
139 | // /cpgN (code page) is ignored. 42 however would equal /fcharset2 (Symbol)
140 | }
141 | if (faElem instanceof RtfText) {
142 | // font name
143 | RtfText fontName = (RtfText) faElem;
144 | String fontNameText = fontName.text;
145 | if (!";".equals(fontNameText)) {
146 | if (fontNameText.endsWith(";")) {
147 | fontNameText = fontNameText.substring(0, fontNameText.length() - 1);
148 | }
149 | if (!fontFamily.contains(fontNameText)) {
150 | // DRY...
151 | if (fontFamily.length() > 0) {
152 | fontFamily = "," + fontFamily;
153 | }
154 | fontFamily = "'" + fontNameText + "'" + fontFamily;
155 | }
156 | }
157 | }
158 | }
159 | fonttbl.add(fontFamily);
160 | }
161 | }
162 |
163 | this.fonttbl = fonttbl;
164 | }
165 |
166 | /**
167 | * Extracts the color information available in the document and fills the
168 | * color table.
169 | *
170 | * @param colorTblGrp
171 | * list with child elements of the "colortbl" group element
172 | */
173 | protected void extractColorTable(List colorTblGrp) {
174 | // {\colortbl;\red0\green0\blue0;}
175 | // index 0 is the "auto" color
176 | // force list to begin at index 1
177 | List colortbl = new ArrayList<>();
178 | colortbl.add(null);
179 |
180 | int c = colorTblGrp.size();
181 | String color = "";
182 |
183 | for (int i = 2; i < c; i++) {
184 | if (colorTblGrp.get(i) instanceof RtfControlWord) {
185 | // Extract RGB color and convert it to hex string.
186 | int red = ((RtfControlWord) colorTblGrp.get(i)).parameter;
187 | int green = ((RtfControlWord) colorTblGrp.get(i + 1)).parameter;
188 | int blue = ((RtfControlWord) colorTblGrp.get(i + 2)).parameter;
189 |
190 | color = String.format("#%02x%02x%02x", red, green, blue);
191 | i += 2;
192 | } else if (colorTblGrp.get(i) instanceof RtfText) {
193 | // This a delimiter ";" so store the already extracted color.
194 | colortbl.add(color);
195 | }
196 | }
197 |
198 | this.colortbl = colortbl;
199 | }
200 |
201 | /**
202 | * Formats an RTF group.
203 | *
204 | * @param group
205 | * group element to process
206 | */
207 | protected void formatGroup(RtfGroup group) {
208 | // Can we ignore this group?
209 | // Font table extraction.
210 | if (group.getType().equals("fonttbl")) {
211 | extractFontTable(group.children);
212 | return;
213 | }
214 | // Extract color table.
215 | if (group.getType().equals("colortbl")) {
216 | extractColorTable(group.children);
217 | return;
218 | }
219 | // Stylesheet extraction not yet supported.
220 | if (group.getType().equals("stylesheet")) {
221 | return;
222 | }
223 | // Info extraction not yet supported.
224 | if (group.getType().equals("info")) {
225 | return;
226 | }
227 | // Picture extraction not yet supported.
228 | if (group.getType().length() >= 4 && group.getType().substring(0, 4).equals("pict")) {
229 | return;
230 | }
231 | // Ignore destinations.
232 | if (group.isDestination()) {
233 | return;
234 | }
235 |
236 | // Push a new state onto the stack.
237 | state = (RtfState) state.clone();
238 | states.push(state);
239 |
240 | // Format all group children.
241 | for (RtfElement child : group.children) {
242 | if (child instanceof RtfGroup) {
243 | formatGroup((RtfGroup) child);
244 | } else if (child instanceof RtfControlWord) {
245 | formatControlWord((RtfControlWord) child);
246 | } else if (child instanceof RtfControlSymbol) {
247 | formatControlSymbol((RtfControlSymbol) child);
248 | } else if (child instanceof RtfText) {
249 | formatText((RtfText) child);
250 | }
251 | }
252 |
253 | // Pop state from stack.
254 | states.pop();
255 | state = states.peek();
256 | }
257 |
258 | /**
259 | * Formats an RTF control word.
260 | *
261 | * @param rtfWord
262 | * word element to process
263 | */
264 | protected void formatControlWord(RtfControlWord rtfWord) {
265 | if (rtfWord.word.equals("plain") || rtfWord.word.equals("pard")) {
266 | state.reset();
267 | } else
268 | // state changers, not printed immediately:
269 | if (rtfWord.word.equals("f")) {
270 | state.font = rtfWord.parameter;
271 | } else if (rtfWord.word.equals("b")) {
272 | state.bold = rtfWord.parameter > 0;
273 | } else if (rtfWord.word.equals("i")) {
274 | state.italic = rtfWord.parameter > 0;
275 | } else if (rtfWord.word.equals("ul")) {
276 | state.underline = rtfWord.parameter > 0;
277 | } else if (rtfWord.word.equals("ulnone")) {
278 | state.underline = false;
279 | } else if (rtfWord.word.equals("strike")) {
280 | state.strike = rtfWord.parameter > 0;
281 | } else if (rtfWord.word.equals("v")) {
282 | state.hidden = rtfWord.parameter > 0;
283 | } else if (rtfWord.word.equals("fs")) {
284 | state.fontSize = (int) Math.ceil((rtfWord.parameter / 24.0) * 16.0);
285 | } else if (rtfWord.word.equals("dn")) {
286 | state.dnup = (int) Math.ceil((rtfWord.parameter / 24.0) * 16.0) * -1;
287 | } else if (rtfWord.word.equals("up")) {
288 | state.dnup = (int) Math.ceil((rtfWord.parameter / 24.0) * 16.0);
289 | } else if (rtfWord.word.equals("sub")) {
290 | state.subscript = true;
291 | state.superscript = false;
292 | } else if (rtfWord.word.equals("super")) {
293 | state.subscript = false;
294 | state.superscript = true;
295 | } else if (rtfWord.word.equals("nosupersub")) {
296 | state.subscript = false;
297 | state.superscript = false;
298 | } else if (rtfWord.word.equals("cf")) {
299 | state.textColor = rtfWord.parameter;
300 | } else if (rtfWord.word.equals("cb") || rtfWord.word.equals("chcbpat") || rtfWord.word.equals("highlight")) {
301 | state.background = rtfWord.parameter;
302 | } else
303 | // special characters, printed immediately:
304 | if (rtfWord.word.equals("lquote")) {
305 | applyStyle("‘");
306 | } else if (rtfWord.word.equals("rquote")) {
307 | applyStyle("’");
308 | } else if (rtfWord.word.equals("ldblquote")) {
309 | applyStyle("“");
310 | } else if (rtfWord.word.equals("rdblquote")) {
311 | applyStyle("”");
312 | } else if (rtfWord.word.equals("emdash")) {
313 | applyStyle("—");
314 | } else if (rtfWord.word.equals("endash")) {
315 | applyStyle("–");
316 | } else if (rtfWord.word.equals("emspace")) {
317 | applyStyle(" ");
318 | } else if (rtfWord.word.equals("enspace")) {
319 | applyStyle(" ");
320 | } else if (rtfWord.word.equals("tab")) {
321 | applyStyle(" ");
322 | } else if (rtfWord.word.equals("line")) {
323 | applyStyle("
");
324 | } else if (rtfWord.word.equals("bullet")) {
325 | applyStyle("•");
326 | } else if (rtfWord.word.equals("u")) {
327 | applyStyle("" + rtfWord.parameter + ";");
328 | } else if (rtfWord.word.equals("par") || rtfWord.word.equals("row")) {
329 | // Close previously opened tags.
330 | closeTags();
331 |
332 | output += "";
333 | openedTags.put("p", true);
334 | newRootPar = true;
335 | }
336 | }
337 |
338 | /**
339 | * Adds the new layout information using the span tag.
340 | *
341 | * @param txt
342 | * text to be formatted
343 | */
344 | protected void applyStyle(String txt) {
345 | // Create span only when a style change occurs or a root paragraph start was just inserted.
346 | if (!state.equals(previousState) || newRootPar) {
347 | String span = "";
348 |
349 | if (state.font >= 0) {
350 | span += "font-family:" + printFontFamily(state.font) + ";";
351 | }
352 | if (state.bold) {
353 | span += "font-weight:bold;";
354 | }
355 | if (state.italic) {
356 | span += "font-style:italic;";
357 | }
358 | if (state.underline) {
359 | span += "text-decoration:underline;";
360 | }
361 | if (state.strike) {
362 | span += "text-decoration:strikethrough;";
363 | }
364 | if (state.hidden) {
365 | span += "display:none;";
366 | }
367 | if (state.fontSize != 0) {
368 | span += "font-size:" + state.fontSize + "px;";
369 | }
370 | // RTF dn/up:
371 | // By spec, RTF fs and RTF dn/up are independent of each other;
372 | // there is no documented "auto-reducing" for the font size.
373 | // In the wild, RTF dn/up often is given together with a "full" RTF fs but rendered with reduced font size.
374 | // Thus, RTF dn/up is rendered with implicit font size reduction.
375 | // This font-size setting supersedes the explicit "fs" font-size setting.
376 | if (state.dnup != 0) {
377 | span += calculateReducedFontSize() + "vertical-align:" + state.dnup + "px;";
378 | }
379 | // RTF sub/super:
380 | // Reduced font-size and vertical-align supersede settings from fs,dn,up.
381 | if (state.subscript) {
382 | span += calculateReducedFontSize() + "vertical-align:sub;";
383 | }
384 | if (state.superscript) {
385 | span += calculateReducedFontSize() + "vertical-align:super;";
386 | }
387 | if (state.textColor != 0) {
388 | span += "color:" + printColor(state.textColor) + ";";
389 | }
390 | if (state.background != 0) {
391 | span += "background-color:" + printColor(state.background) + ";";
392 | }
393 |
394 | // Keep track of preceding style.
395 | previousState = (RtfState) state.clone();
396 |
397 | // Close previously opened "span" tag.
398 | closeTag("span");
399 |
400 | output += "" + txt;
401 | openedTags.put("span", true);
402 | } else {
403 | output += txt;
404 | }
405 | newRootPar = false;
406 | }
407 |
408 | /**
409 | * Calculate reduced font size based on actual state.
410 | * If actual state defines a font size, then CSS fon-size with 2/3 of this is returned,
411 | * else "smaller" is returned.
412 | * @return CSS for reduced font size.
413 | */
414 | protected String calculateReducedFontSize() {
415 | String css;
416 | if (state.fontSize != 0) {
417 | int reducedFontSize = (int) Math.ceil((state.fontSize / 3.0) * 2.0);
418 | css = "font-size:" + reducedFontSize + "px;";
419 | } else {
420 | css = "font-size:smaller;";
421 | }
422 | return css;
423 | }
424 |
425 | protected String printFontFamily(int index) {
426 | // index is 0-based
427 | if (index >= 0 && index < fonttbl.size()) {
428 | return fonttbl.get(index);
429 | } else {
430 | return "";
431 | }
432 | }
433 |
434 | /**
435 | * Gets the color at the specified position from the color table.
436 | *
437 | * @param index
438 | * a value greater than 0 and less than the number of list items
439 | * @return RGB hex string or an empty string if the position is invalid
440 | */
441 | protected String printColor(int index) {
442 | if (index >= 1 && index < colortbl.size()) {
443 | return colortbl.get(index);
444 | } else {
445 | return "";
446 | }
447 | }
448 |
449 | /**
450 | * Adds the closing tag to match the last opening tag.
451 | *
452 | * @param tag
453 | * the HTML tag name, e.g. "span" or "p"
454 | */
455 | protected void closeTag(String tag) {
456 | if (openedTags.get(tag)) {
457 | output += "" + tag + ">";
458 | openedTags.put(tag, false);
459 | }
460 | }
461 |
462 | /**
463 | * Closes all opened tags.
464 | */
465 | protected void closeTags() {
466 | for (String tag : openedTags.keySet()) {
467 | closeTag(tag);
468 | }
469 | }
470 |
471 | /**
472 | * Wraps HTML head and body tags around the output string.
473 | */
474 | protected void wrapTags() {
475 | StringBuilder source = new StringBuilder();
476 | source.append("\n");
477 | source.append("\n");
478 | source.append(" \n");
479 | source.append(" \n");
480 | source.append(" \n");
481 | source.append(" \n");
482 | source.append(output + "\n");
483 | source.append(" \n");
484 | source.append("\n");
485 | output = source.toString();
486 | }
487 |
488 | /**
489 | * Formats an RTF control symbol.
490 | *
491 | * @param rtfSymbol
492 | * symbol element to process
493 | */
494 | protected void formatControlSymbol(RtfControlSymbol rtfSymbol) {
495 | if (rtfSymbol.symbol == '\'') {
496 | applyStyle("" + rtfSymbol.parameter + ";");
497 | }
498 | if (rtfSymbol.symbol == '~') {
499 | output += " ";
500 | }
501 | }
502 |
503 | /**
504 | * Formats an RTF text.
505 | *
506 | * @param rtfText
507 | * text element to process
508 | */
509 | protected void formatText(RtfText rtfText) {
510 | applyStyle(rtfText.text);
511 | }
512 | }
--------------------------------------------------------------------------------