├── README.md └── code.js /README.md: -------------------------------------------------------------------------------- 1 | ## Google Doc to clean HTML converter ## 2 | 3 | 1. Open your Google Doc and go to Tools menu, select Script Editor. You 4 | should see a new window open with a nice code editor. 5 | 2. Copy and paste the code from here: [GoogleDocs2Html][1] 6 | 3. Then from the "Select Editor" menu, choose ConvertGoogleDocToCleanHtml 7 | 4. Click the play button to run the script. 8 | 5. You will get an email containing the HTML output of 9 | the Google Doc with inline images. 10 | 6. You can easily forward that email to anyone or copy and paste in a Wordpress post. 11 | 12 | 13 | [1]: https://raw.githubusercontent.com/oazabir/GoogleDoc2Html/master/code.js 14 | -------------------------------------------------------------------------------- /code.js: -------------------------------------------------------------------------------- 1 | function ConvertGoogleDocToCleanHtml() { 2 | var body = DocumentApp.getActiveDocument().getBody(); 3 | var numChildren = body.getNumChildren(); 4 | var output = []; 5 | var images = []; 6 | var listCounters = {}; 7 | 8 | // Walk through all the child elements of the body. 9 | for (var i = 0; i < numChildren; i++) { 10 | var child = body.getChild(i); 11 | output.push(processItem(child, listCounters, images)); 12 | } 13 | 14 | var html = output.join('\r'); 15 | emailHtml(html, images); 16 | //createDocumentForHtml(html, images); 17 | } 18 | 19 | function emailHtml(html, images) { 20 | var attachments = []; 21 | for (var j=0; j): 99 | if (gt === DocumentApp.GlyphType.BULLET 100 | || gt === DocumentApp.GlyphType.HOLLOW_BULLET 101 | || gt === DocumentApp.GlyphType.SQUARE_BULLET) { 102 | prefix = '"; 105 | } 106 | else { 107 | // Ordered list (
    ): 108 | prefix = "
    1. ", suffix = "
    2. "; 109 | } 110 | } 111 | else { 112 | prefix = "
    3. "; 113 | suffix = "
    4. "; 114 | } 115 | 116 | if (item.isAtDocumentEnd() || (item.getNextSibling() && item.getNextSibling().getType() != DocumentApp.ElementType.LIST_ITEM)) { 117 | if (gt === DocumentApp.GlyphType.BULLET 118 | || gt === DocumentApp.GlyphType.HOLLOW_BULLET 119 | || gt === DocumentApp.GlyphType.SQUARE_BULLET) { 120 | suffix += ""; 121 | } 122 | else { 123 | // Ordered list (
        ): 124 | suffix += "
      "; 125 | } 126 | 127 | } 128 | 129 | counter++; 130 | listCounters[key] = counter; 131 | } 132 | 133 | output.push(prefix); 134 | 135 | if (item.getType() == DocumentApp.ElementType.TEXT) { 136 | processText(item, output); 137 | } 138 | else { 139 | 140 | 141 | if (item.getNumChildren) { 142 | var numChildren = item.getNumChildren(); 143 | 144 | // Walk through all the child elements of the doc. 145 | for (var i = 0; i < numChildren; i++) { 146 | var child = item.getChild(i); 147 | output.push(processItem(child, listCounters, images)); 148 | } 149 | } 150 | 151 | } 152 | 153 | output.push(suffix); 154 | return output.join(''); 155 | } 156 | 157 | 158 | function processText(item, output) { 159 | var text = item.getText(); 160 | var indices = item.getTextAttributeIndices(); 161 | 162 | if (indices.length <= 1) { 163 | // Assuming that a whole para fully italic is a quote 164 | if(item.isBold()) { 165 | output.push('' + text + ''); 166 | } 167 | else if(item.isItalic()) { 168 | output.push('
      ' + text + '
      '); 169 | } 170 | else if (text.trim().indexOf('http://') == 0) { 171 | output.push('' + text + ''); 172 | } 173 | else { 174 | output.push(text); 175 | } 176 | } 177 | else { 178 | 179 | for (var i=0; i < indices.length; i ++) { 180 | var partAtts = item.getAttributes(indices[i]); 181 | var startPos = indices[i]; 182 | var endPos = i+1 < indices.length ? indices[i+1]: text.length; 183 | var partText = text.substring(startPos, endPos); 184 | 185 | // Logger.log(partAtts); 186 | // Logger.log(partText); 187 | // Logger.log(partText.trim().indexOf('http://')); 188 | 189 | if (partAtts.ITALIC) { 190 | output.push(''); 191 | } 192 | if (partAtts.BOLD) { 193 | output.push(''); 194 | } 195 | if (partAtts.UNDERLINE) { 196 | output.push(''); 197 | } 198 | 199 | // If someone has written [xxx] and made this whole text some special font, like superscript 200 | // then treat it as a reference and make it superscript. 201 | // Unfortunately in Google Docs, there's no way to detect superscript 202 | if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') { 203 | output.push('' + partText + ''); 204 | } 205 | /*else if (partText.trim().indexOf('http://') == 0) { 206 | output.push('' + partText + ''); 207 | }*/ 208 | else if (partAtts.LINK_URL) { 209 | output.push('' + partText + ''); 210 | } 211 | else { 212 | output.push(partText); 213 | } 214 | 215 | if (partAtts.ITALIC) { 216 | output.push(''); 217 | } 218 | if (partAtts.BOLD) { 219 | output.push(''); 220 | } 221 | if (partAtts.UNDERLINE) { 222 | output.push(''); 223 | } 224 | 225 | } 226 | } 227 | } 228 | 229 | 230 | function processImage(item, images, output) 231 | { 232 | images = images || []; 233 | var blob = item.getBlob(); 234 | var contentType = blob.getContentType(); 235 | var extension = ""; 236 | if (/\/png$/.test(contentType)) { 237 | extension = ".png"; 238 | } else if (/\/gif$/.test(contentType)) { 239 | extension = ".gif"; 240 | } else if (/\/jpe?g$/.test(contentType)) { 241 | extension = ".jpg"; 242 | } else { 243 | throw "Unsupported image type: "+contentType; 244 | } 245 | var imagePrefix = "Image_"; 246 | var imageCounter = images.length; 247 | var name = imagePrefix + imageCounter + extension; 248 | imageCounter++; 249 | output.push(''); 250 | images.push( { 251 | "blob": blob, 252 | "type": contentType, 253 | "name": name}); 254 | } 255 | --------------------------------------------------------------------------------