├── .gitignore ├── README.md ├── bin └── medium2gatsby ├── exporters └── gatsby.js ├── index.js ├── lib ├── converter.js └── markdown.js ├── package-lock.json ├── package.json ├── templates └── default.js └── yarn.lock /.gitignore: -------------------------------------------------------------------------------- 1 | NODE_MODULES 2 | generated_files 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # medium-to-gatsby 2 | 3 | A CLI to convert your medium exported .html files to gatsby .md files. 4 | 5 | ## Features 6 | - Converts medium .html files and outputs gatsby .md files. 7 | - Customize output via templates 8 | - Downloads post images from medium and saves locally 9 | - Handles embedded tweets 10 | - Inlines github gists 11 | - Allows default language for code blocks. 12 | - Skips over drafts and post replies. 13 | - Generates report when done. 14 | 15 | ## Installation 16 | `$ npm install -g https://github.com/jamischarles/export-medium-to-gatsby` (maybe it'll go on npm eventually) 17 | 18 | ## Steps 19 | 1. [Download your medium posts as an archive from medium](https://help.medium.com/hc/en-us/articles/115004745787-Download-your-information). 20 | 2. Install this CLI via #Installation step above 21 | 3. Save a template file (see template section below) where you'll be running 22 | your export command. 23 | 4. Customize the template.js file you downloaded to match the [frontmatter](https://jekyllrb.com/docs/front-matter/) fields your gatsby blog requires. Here you also define what folder in your blog medium images should be downloaded to. 24 | 5. Run the CLI and use the `medium-export/posts` folder as the input, and for output either directly output to your `content/posts` folder, or copy it there after generating the files. 25 | 6. Verify that the generated files are correct and looks good. 26 | 7. Make any CSS and styling adjustments as needed. 27 | 8. Do a happy dance. 28 | 29 | ## CLI Usage 30 | ``` 31 | Usage 32 | $ medium2gatsby 33 | 34 | Options 35 | --output, -o Destination folder for output files. Defaults to './'. 36 | --template, -t Template used to generate post files. 37 | --help, -h Shows usage instructions 38 | 39 | Examples 40 | $ medium2gatsby . -o posts -t template.js 41 | $ medium2gatsby 2018-04-02_Introducing-the-react-testing-library----e3a274307e65.html -o output -t template.js 42 | $ medium2gatsby ~/Downloads/medium-export/posts -o . -t template.js 43 | ``` 44 | 45 | ## Recommended styling 46 | ### Images and subtitles 47 | Images and subtitles will been converted to 48 | `
Subtitle
` same as medium used. 49 | 50 | Gatsby injects a `

` in there. To fix spacing I suggest you add the following to your template's 51 | CSS:`figure > p {margin-bottom:0px !important;}`. 52 | 53 | You can use `figure figcaption {}` to style image subtitles. 54 | 55 | ### Fenced Links 56 | If you used fenced links on medium to make links stand out, those links will now 57 | have show up as `some text.` This 58 | CSS should approximate the medium fenced link style: 59 | ```css 60 | .fenced-link { 61 | background-color:#0000000d; 62 | font-family:monospace; 63 | text-decoration:underline; 64 | padding:2px; 65 | } 66 | ``` 67 | 68 | 69 | 70 | ## Customize via templates 71 | Based on which gatsby theme you're using you may need to generate different 72 | frontmatter fields. 73 | 74 | Here are some example `template.js` you can save and pass to the CLI via the `-t` flag. 75 | 76 | ### Template ex1: Different folder for each post 77 | - specifies `2018-04-16` date format in frontmatter `date` field 78 | - generates a separate folder for each post ie: `content/posts/introducing-react/index.md 79 | - saves post images to `/images2` (relative to the post folder) 80 | - posts will show on site as `/posts/[slug-name]` 81 | - defauls all code fences to use `'js'` 82 | 83 | ```js 84 | module.exports = { 85 | render: function(data) { 86 | // data.published is Date ISO format: 2018-04-16T14:48:00.000Z 87 | var date = new Date(data.published); 88 | var prettyDate = 89 | date.getFullYear() + 90 | '-' + 91 | (date.getMonth() + 1).toString().padStart(2, 0) + 92 | '-' + 93 | date 94 | .getDate() 95 | .toString() 96 | .padStart(2, 0); //2018-04-16 97 | 98 | var template = `\ 99 | --- 100 | slug: "/posts/${data.titleForSlug}/" 101 | date: ${prettyDate} 102 | title: "${data.title}" 103 | draft: false 104 | description: "${data.description}" 105 | categories: [] 106 | keywords: [${data.tags.join(',')}] 107 | --- 108 | 109 | ${data.body} 110 | `; 111 | 112 | return template; 113 | }, 114 | getOptions: function() { 115 | return { 116 | folderForEachSlug: true, // separate folder for each blog post, where index.md and post images will live 117 | imagePath: '/images2', // . Used in the markdown files. 118 | defaultCodeBlockLanguage: 'js', // code fenced by default will be ``` with no lang. If most of your code blocks are in a specific lang, set this here. 119 | }; 120 | }, 121 | }; 122 | 123 | ``` 124 | 125 | ### Template ex2: Same folder for all posts 126 | - specifies `2018-04-16T14:48:00.000Z` date format (ISO, which is default) in frontmatter `date` field 127 | - saves all generated posts to same folder defined in `-o` options for CLI. Files are named via slug name from medium. 128 | - saves post images to `/Users/jacharles/dev/blog/content/posts/introducing-the-react-testing-library/images` 129 | - defauls all code fences to use `''` (no language). 130 | 131 | ```js 132 | module.exports = { 133 | render: function(data) { 134 | var template = `\ 135 | --- 136 | slug: ${data.titleForSlug} 137 | date: ${data.published} 138 | title: "${data.title}" 139 | template: "post" 140 | draft: false 141 | description: "${data.description}" 142 | category: "" 143 | tags: [${data.tags.join(',')}] 144 | --- 145 | 146 | ${data.body} 147 | `; 148 | 149 | return template; 150 | }, 151 | getOptions: function() { 152 | return { 153 | folderForEachSlug: false, // same folder for all posts 154 | imagePath: '/media', // . Used in the markdown files. 155 | // This field is ignored when folderForEachSlug:true. Should be absolute. Location where medium images will be saved. 156 | imageFolder: 157 | '/Users/jacharles/dev/blog/static/media', 158 | defaultCodeBlockLanguage: '', // code fenced by default will be ``` with no lang. If most of your code blocks are in a specific lang, set this here. 159 | }; 160 | }, 161 | }; 162 | 163 | ``` 164 | 165 | ## TODO and Help needed 166 | I'm about ready to move on from this, but would love help with the following if 167 | anybody feels inclined: 168 | - [ ] Better progress / error messages. Should notify which articles fail for whichever reason 169 | - [ ] Error handling is very lacking in many places. Could / should be improved to be more robust especially around downloading posts / images from medium. 170 | - [ ] Adding tests (prefer something dead simple like mocha). Currently there 171 | are zero tests. 172 | - [ ] More generator targets. This repo could fairly easily be forked and expanded to include other targets like jekyll, or 173 | other static site generators. (low priority) (medium2markdown) 174 | -------------------------------------------------------------------------------- /bin/medium2gatsby: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | 4 | //executable CLI entry point into this util 5 | require('../index.js'); 6 | -------------------------------------------------------------------------------- /exporters/gatsby.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamischarles/export-medium-to-gatsby/2cc8dddfd39fb91d62870241153d74eabd375efe/exporters/gatsby.js -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Module dependencies. 3 | */ 4 | var meow = require('meow'); 5 | 6 | /** 7 | * Local libs 8 | */ 9 | var converter = require('./lib/converter.js'); 10 | 11 | var cli = meow( 12 | ` 13 | Usage 14 | $ medium2gatsby 15 | 16 | Options 17 | --output, -o Destination folder for output files. Defaults to './'. 18 | --template, -t Template used to generate post files. 19 | --drafts, -d set flag to export drafts along with other posts. Default, false. 20 | --help, -h Shows usage instructions 21 | 22 | Examples 23 | $ medium2gatsby . -o posts -t template.js 24 | $ medium2gatsby 2018-04-02_Introducing-the-react-testing-library----e3a274307e65.html -o output -t template.js 25 | 26 | `, 27 | { 28 | flags: { 29 | drafts: { 30 | type: 'boolean', 31 | alias: 'd', 32 | default: false 33 | }, 34 | output: { 35 | type: 'string', 36 | alias: 'o', 37 | }, 38 | template: { 39 | type: 'string', 40 | alias: 't', 41 | }, 42 | }, 43 | }, 44 | ); 45 | /* 46 | { 47 | input: ['unicorns'], 48 | flags: {rainbow: true}, 49 | ... 50 | } 51 | */ 52 | 53 | // show help if no args passed 54 | if (cli.input.length < 1) { 55 | cli.showHelp(); 56 | } 57 | 58 | var srcPath = cli.input[0]; 59 | var destPath = cli.flags.output; 60 | var templatePath = cli.flags.template; 61 | var export_drafts = cli.flags.drafts; 62 | converter.convert(srcPath, destPath, templatePath, export_drafts); 63 | // foo(cli.input[0], cli.flags); 64 | -------------------------------------------------------------------------------- /lib/converter.js: -------------------------------------------------------------------------------- 1 | // FIXME: add file description... 2 | var fakeUa = require('fake-useragent'); 3 | var request = require('request'); 4 | var fs = require('fs'); 5 | var path = require('path'); 6 | var cheerio = require('cheerio'); 7 | var util = require('util'); 8 | var mkdirp = require('mkdirp'); 9 | 10 | var markdownUtils = require('./markdown'); 11 | 12 | var makeRequest = util.promisify(request.get); 13 | 14 | // global state. FIXME: consider localizing this more... 15 | var report = { 16 | posts: { 17 | attempted: [], 18 | succeeded: [], 19 | failed: [], 20 | drafts: [], 21 | replies: [], 22 | }, 23 | gists: { 24 | attempted: [], 25 | succeeded: [], 26 | failed: [], 27 | }, 28 | images: { 29 | attempted: [], 30 | succeeded: [], 31 | failed: [], 32 | }, 33 | }; 34 | 35 | // handle promise errors 36 | process.on('unhandledRejection', up => { 37 | console.log('err', up); 38 | // throw up; 39 | }); 40 | 41 | function convertToSlug(Text) 42 | { 43 | return Text 44 | .toLowerCase() 45 | .replace(/ /g,'-') 46 | .replace(/[^\w-]+/g,'') 47 | ; 48 | } 49 | 50 | // primary entry point 51 | async function convertMediumFile(filePath, outputFolder, templatePath, export_drafts) { 52 | var template = require(templatePath); 53 | var options = template.getOptions(); 54 | 55 | // don't process drafts 56 | var filename = path.basename(filePath, '.html'); 57 | if (filename.startsWith('draft')) { 58 | // console.log('Skipping over draft file ', filePath); 59 | report.posts.drafts.push(filePath); 60 | // throw 'draft file'; // equivalent of promise.reject 61 | // if we don't want to export drafts then bail 62 | if (!export_drafts) return; 63 | } 64 | 65 | report.posts.attempted.push(filePath); 66 | 67 | var srcFilepath = filePath; 68 | var content = fs.readFileSync(filePath); 69 | 70 | try { 71 | var postData = await gatherPostData(content, options, srcFilepath); 72 | postData.draft = export_drafts 73 | 74 | var imageFolder = path.resolve(options.imageFolder); 75 | var output = template.render(postData); 76 | 77 | // if true, make folder for each slug, and name it '[slug]/index.md' 78 | if (options.folderForEachSlug) { 79 | outputFolder = path.join(outputFolder, postData.titleForSlug); 80 | imageFolder = path.join(outputFolder, options.imagePath); 81 | filePath = 'index'; 82 | } 83 | 84 | // make outputFolder if it doesn't exist yet 85 | mkdirp.sync(outputFolder); 86 | 87 | // console.log( 88 | // `processing: ${srcFilepath} -> ${path.join(outputFolder, filePath)}.md`, 89 | // ); 90 | 91 | // save post images to the local image folder 92 | await saveImagesToLocal(imageFolder, postData.images); 93 | 94 | // render post file to folder 95 | writePostToFile(output, filePath, outputFolder); 96 | 97 | report.posts.succeeded.push(filePath); 98 | } catch (err) { 99 | // reject(err); 100 | // re-throw if you want it to bubble up 101 | if (err.type != 'silent') throw err; 102 | } 103 | // }); 104 | } 105 | 106 | async function gatherPostData(content, options, filePath) { 107 | var $ = cheerio.load(content); 108 | 109 | await inlineGists($); 110 | var filename = path.basename(filePath, '.html'); 111 | is_draft = filename.startsWith('draft') 112 | // TODO: add no match condition... 113 | if (!is_draft){ 114 | var canonicalLink = $('.p-canonical').attr('href'); 115 | var match = canonicalLink.match( 116 | /https:\/\/medium\.com\/.+\/(.+)-[a-z0-9]+$/i, 117 | ); 118 | var titleForSlug = match[1]; 119 | } 120 | else { 121 | // construct a canonical link 122 | var canonicalLink = $('footer > p > a').attr('href'); 123 | var blogTitle = $(".graf--leading").first().text() 124 | var titleForSlug = convertToSlug(blogTitle) 125 | } 126 | 127 | // This will get the image urls, and rewrite the src in the content 128 | var imagesToSave = getMediumImages($, options.imagePath, titleForSlug); 129 | 130 | var subtitle = $('section.p-summary').text(); 131 | 132 | // $2 is for the post on medium instead of the local file... 133 | var postBody = await scrapeMetaDetailsFromPost(canonicalLink); 134 | 135 | // check if standalone post or reply 136 | var isReplyPost = postBody.match(/inResponseToPostId":"[0-9a-z]+"/); // this is in markup for reply posts 137 | 138 | if (isReplyPost) { 139 | report.posts.replies.push(filePath); 140 | // FIXME: consider setting type of err and then ignoring it at the higher level 141 | throw new SilentError('reply post. Skip over this one: ' + titleForSlug); 142 | } 143 | 144 | var $2 = cheerio.load(postBody); 145 | var description = $2('meta[name=description]').attr('content'); // from page... 146 | 147 | var schemaTags = $2('script[type="application/ld+json"]'); 148 | 149 | var metaData = JSON.parse(schemaTags[0].children[0].data); 150 | 151 | var tags = getTags(metaData.keywords); 152 | 153 | var title = $('h1').text(); 154 | 155 | // FIXME: put this in fn 156 | // REMOVE h1 and avatar section 157 | $('h1') 158 | .next() 159 | .remove(); // remove div avatar domEl right after h1 160 | $('h1').remove(); 161 | 162 | // process code blocks 163 | // medium exports inline code block as and multi-line as


164 |   // We need to wrap the content of the 
 with  tags so turndown parser won't escape the codeblock content
165 |   $('pre').map(function(i, el) {
166 |     var codeBlockContent = $(this).html();
167 |     codeBlockContent = `${codeBlockContent}`;
168 | 
169 |     var newEl = $(this).html(codeBlockContent);
170 |     return newEl;
171 |   });
172 | 
173 |   // embedded tweets:
174 |   // medium returns empty  which turndown throws out before we can process it.
175 |   // add dummy link text so turndown won't discard it
176 |   $('blockquote.twitter-tweet a').text('[Embedded tweet]');
177 | 
178 |   var post = {
179 |     title: title,
180 |     description: description,
181 |     subtitle: subtitle,
182 |     published: $('time').attr('datetime'),
183 |     bodyRaw: $('.section-content').html(),
184 |     titleForSlug: titleForSlug,
185 |     tags: tags,
186 |     images: imagesToSave, // data for images from the medium post
187 |     body: convertHtmlToMarkdown($('.section-content').html(), options),
188 |   };
189 | 
190 |   return post;
191 | }
192 | 
193 | // takes array of strings
194 | function getTags(arr) {
195 |   var tags = [];
196 | 
197 |   // only take format of 'Tag:JavaScript', and keep latter portion
198 |   arr.forEach(item => {
199 |     if (item.startsWith('Tag:')) {
200 |       tags.push(item.split(':')[1]);
201 |     }
202 |   });
203 | 
204 |   return tags;
205 | }
206 | 
207 | var suffix = /\.html$/i;
208 | 
209 | // FIXME: get name from date + slug
210 | function writePostToFile(content, oldFilePath, outputFolder) {
211 |   var fileName = path.basename(oldFilePath, '.html');
212 | 
213 |   var newPath = path.resolve(path.join(outputFolder, fileName) + '.md');
214 | 
215 |   // console.log('newPath', newPath);
216 |   fs.writeFileSync(newPath, content);
217 | }
218 | 
219 | // convert the post body
220 | function convertHtmlToMarkdown(html, templateOptions) {
221 |   return markdownUtils.transformHtmlToMarkdown(html, templateOptions);
222 | }
223 | 
224 | async function scrapeMetaDetailsFromPost(url) {
225 |   var headers = {
226 |     'User-Agent': fakeUa(),
227 |   };
228 | 
229 |   // FIXME: add error handling conditions...
230 |   var resp = await makeRequest({url: url, headers: headers});
231 |   return resp.body;
232 | }
233 | 
234 | // attempts to take gist script tags, then downloads the raw content, and places in 
 tag which will be converted to
235 | // fenced block (```) by turndown
236 | async function inlineGists($) {
237 |   // get all script tags on thet page
238 |   // FIXME: can do away with promises here entirely?
239 |   var promises = [];
240 | 
241 |   $('script').each(async function(i, item) {
242 |     var prom = new Promise(async (resolve, reject) => {
243 |       var src = $(this).attr('src');
244 |       var isGist = src.includes('gist');
245 |       if (isGist) {
246 |         try {
247 |           // console.log('feching raw gist source for: ', src);
248 |           report.gists.attempted.push(src);
249 |           var rawGist = await getRawGist(src);
250 |           report.gists.succeeded.push(src);
251 | 
252 |           // replace rawGist in markup
253 |           // FIXME: just modify this in turndown?
254 |           var inlineCode = $(`
${rawGist}
`); //this turns into ``` codefence 255 | 256 | // FIXME: guard to ensure
parent is removed 257 | // Replace the
parent node with code fence 258 | $(this) 259 | .parent() 260 | .replaceWith(inlineCode); 261 | 262 | resolve(); 263 | } catch (e) { 264 | report.gists.failed.push(src); 265 | reject(e); 266 | } 267 | } 268 | }); 269 | promises.push(prom); 270 | }); 271 | 272 | return await Promise.all(promises); 273 | } 274 | 275 | // get the raw gist from github 276 | async function getRawGist(gistUrl) { 277 | var newUrl = gistUrl.replace('github.com', 'githubusercontent.com'); 278 | 279 | // remove suffix (like .js) (maybe use it for code fencing later...) 280 | // FIXME: this is hacky 281 | var gistID = newUrl.split('/')[4]; // FIXME: guard for error 282 | if (gistID.includes('.')) { 283 | var ext = path.extname(gistID); 284 | newUrl = newUrl.replace(ext, ''); // srip extension (needed for raw fetch to work) 285 | } 286 | 287 | newUrl += '/raw'; 288 | 289 | // make the call 290 | var resp = await makeRequest({url: newUrl}); 291 | if (resp.statusCode === 200) { 292 | return resp.body; 293 | } 294 | } 295 | 296 | // returns urls of images to download and re-writes post urls to point locally 297 | function getMediumImages($, imageBasePath, postSlug) { 298 | var images = []; 299 | 300 | $('img.graf-image').each(async function(i, item) { 301 | var imageName = $(this).attr('data-image-id'); 302 | var ext = path.extname(imageName); 303 | 304 | // get max resolution of image 305 | var imgUrl = `https://cdn-images-1.medium.com/max/2600/${imageName}`; 306 | 307 | var localImageName = `${postSlug}-${i}${ext}`; // some-post-name-01.jpg 308 | var localImagePath = path.join(imageBasePath, localImageName); // full path including folder 309 | 310 | var imgData = { 311 | mediumUrl: imgUrl, 312 | localName: localImageName, 313 | localPath: localImagePath, // local path including filename we'll save it as 314 | }; 315 | 316 | images.push(imgData); 317 | 318 | // rewrite img urls in post 319 | $(this).attr('src', localImagePath); 320 | }); 321 | return images; 322 | } 323 | 324 | async function saveImagesToLocal(imageFolder, images) { 325 | var imagePromises = images.map(function(image) { 326 | return new Promise(function(resolve, reject) { 327 | var filePath = path.join(imageFolder, image.localName); 328 | mkdirp.sync(imageFolder); // fs.writeFileSync(p, images[0].binary, 'binary'); 329 | 330 | // console.log(`Downloading image ${image.mediumUrl} -> ${filePath}`); 331 | report.images.attempted.push(image.mediumUrl); 332 | // request(image.mediumUrl).pipe(fs.createWriteStream(filePath)); // request image from medium CDN and save locally. TODO: add err handling 333 | 334 | var writer = fs.createWriteStream(filePath); 335 | 336 | request 337 | .get(image.mediumUrl) 338 | .on('complete', function(response) { 339 | // FIXME: how do we measure success / failure here? 340 | report.images.succeeded.push(`${image.mediumUrl}->${filePath}`); 341 | resolve(response); 342 | }) 343 | .on('error', function(err) { 344 | console.log(err); 345 | report.images.failed.push(`${image.mediumUrl}->${filePath}`); 346 | reject(err); 347 | }) 348 | .pipe(writer); 349 | }); 350 | }); 351 | 352 | return await Promise.all(imagePromises); 353 | } 354 | 355 | // using this allows us to stop flow execution, but not throw all the way up the chain... 356 | class SilentError extends Error { 357 | constructor(...args) { 358 | super(...args); 359 | Error.captureStackTrace(this, SilentError); 360 | this.type = 'silent'; 361 | } 362 | } 363 | 364 | function printPrettyReport() { 365 | var postsAttempted = report.posts.attempted.length; 366 | var postsSucceeded = report.posts.succeeded.length; 367 | var postsFailed = report.posts.failed.length; 368 | var postsFailedDetail = report.posts.failed; 369 | var postDrafts = report.posts.drafts.length; 370 | var postReplies = report.posts.replies.length; 371 | 372 | var imagesAttempted = report.images.attempted.length; 373 | var imagesSucceeded = report.images.succeeded.length; 374 | var imagesFailed = report.images.failed.length; 375 | var imagesFailedDetail = report.images.failed; 376 | 377 | var gistAttempted = report.gists.attempted.length; 378 | var gistSucceeded = report.gists.succeeded.length; 379 | var gistFailed = report.gists.failed.length; 380 | var gistFailedDetail = report.gists.failed; 381 | 382 | console.log('##############################################################'); 383 | console.log('CONVERSION METRICS'); 384 | console.log('posts attempted', postsAttempted); 385 | console.log('posts succeeded', postsSucceeded); 386 | console.log('posts replies that were ignored:', postReplies); 387 | console.log('posts drafts that were not attempted:', postDrafts); 388 | console.log('posts failed', postsFailed); 389 | console.log('Failed posts:', postsFailedDetail); 390 | console.log(''); 391 | 392 | console.log('medium images attempted', imagesAttempted); 393 | console.log('images succeeded', imagesSucceeded); 394 | console.log('images failed', imagesFailed); 395 | console.log('Failed images:', imagesFailedDetail); 396 | console.log(''); 397 | 398 | console.log('gists inlining attempted', gistAttempted); 399 | console.log('gists succeeded', gistSucceeded); 400 | console.log('gists failed', gistFailed); 401 | console.log('Failed gists:', gistFailedDetail); 402 | 403 | console.log('##############################################################'); 404 | } 405 | 406 | function saveReportToFile(outputFolder) { 407 | fs.writeFileSync( 408 | path.join(outputFolder, 'conversion_report.json'), 409 | JSON.stringify(report), 410 | ); 411 | } 412 | 413 | // writePostFile(metaTemplate); 414 | module.exports = { 415 | convert: async function(srcPath, outputFolder = '.', templatePathStr, export_drafts) { 416 | var isDir = fs.lstatSync(srcPath).isDirectory(); 417 | var isFile = fs.lstatSync(srcPath).isFile(); 418 | 419 | var defaultTemplate = path.resolve( 420 | path.join(__dirname, '../templates/default.js'), 421 | ); 422 | 423 | var templatePath = defaultTemplate; 424 | // if template passed in, load that instead of default 425 | if (templatePathStr) { 426 | templatePath = path.resolve(templatePathStr); 427 | } 428 | 429 | var promises = []; 430 | 431 | if (isDir) { 432 | // folder was passed in, so get all html files for folders 433 | fs.readdirSync(srcPath).forEach(file => { 434 | var curFile = path.join(srcPath, file); 435 | 436 | if (file.endsWith('.html')) { 437 | promises.push(convertMediumFile(curFile, outputFolder, templatePath, export_drafts)); 438 | // } else { 439 | // promises.push(Promise.resolve('not html file')); // FIXME: is this needed? 440 | } 441 | }); 442 | } else { 443 | var promises = [ 444 | convertMediumFile(path.resolve(srcPath), outputFolder, templatePath, export_drafts), 445 | ]; 446 | } 447 | 448 | try { 449 | var result = await Promise.all(promises); 450 | // console.log('ALL DONE', report); 451 | printPrettyReport(); 452 | saveReportToFile(outputFolder); 453 | console.log( 454 | `Medium files from "${path.resolve( 455 | srcPath, 456 | )}" have finished converting to "${path.resolve( 457 | outputFolder, 458 | )}" using the "${templatePathStr}" template.`, 459 | ); 460 | console.log( 461 | `Detailed output report named "conversion_report.json" can be found in the output folder.`, 462 | ); 463 | } catch (e) { 464 | console.log('Error during conversion!', e); 465 | } 466 | }, 467 | }; 468 | -------------------------------------------------------------------------------- /lib/markdown.js: -------------------------------------------------------------------------------- 1 | // Utils related to scraping pages and converting them to markdown 2 | 3 | // converts html to markdown 4 | var TurndownService = require('turndown'); 5 | 6 | // global placeholder allowing us to pass in options from the template... 7 | var templateOptions; 8 | 9 | var turnDownOptions = { 10 | // linkReferenceStyle: 'collapsed', 11 | codeBlockStyle: 'fenced', 12 | }; 13 | var turndownService = new TurndownService(turnDownOptions); 14 | // strip