├── README.md
└── hugodocs2json.rb


/README.md:
--------------------------------------------------------------------------------
 1 | hugodocs2json
 2 | ==
 3 | 
 4 | ## Requirements
 5 | 
 6 | * **nokogiri:** `gem install nokogiri`
 7 | 
 8 | ## Sample usage
 9 | 
10 | Clone this repository:
11 | 
12 | ```
13 | $ git clone https://github.com/matiasinsaurralde/hugodocs2json.git
14 | $ cd hugodocs2json
15 | ```
16 | 
17 | Generate your Hugo docs:
18 | 
19 | ```
20 | $ hugo -s ~/docs -d ~/docs-output
21 | ```
22 | 
23 | Run the script, using the output directory as the first argument:
24 | ```
25 | $ ruby hugodocs2json.rb ~/docs-output
26 | ```
27 | 
28 | The output will be available as `output.json` in the current working directory.


--------------------------------------------------------------------------------
/hugodocs2json.rb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | require 'nokogiri'
 3 | require 'json'
 4 | 
 5 | path, output_file = ARGV[0], ARGV[1] || "output.json"
 6 | 
 7 | if ARGV.length == 0
 8 |     puts "Please specify a path!"
 9 |     exit
10 | end
11 | 
12 | files = Dir.glob(File.join(path, '**', '*.html'))
13 | puts "Found #{files.length} files."
14 | 
15 | items, skipped_files = [], 0
16 | 
17 | skipped_files =0
18 | 
19 | files.each do |f|
20 |     begin
21 |         raw_html = File.read(f)
22 |         page = Nokogiri::HTML(raw_html)
23 | 
24 |         # Remove GH link:
25 |         gh_link = page.css('.container-github')
26 |         if gh_link.length > 0
27 |             gh_link[0].remove
28 |         end
29 | 
30 |         item = {
31 |             title: page.css('title').text.strip(),
32 |             article: page.css('article').text.strip.gsub("\n", " ")
33 |         }
34 |         items << item
35 |     rescue
36 |         puts "Skipping: #{f}"
37 |         skipped_files+=1
38 |     end
39 | end
40 | 
41 | puts "Skipped files: #{skipped_files} of #{files.length}"
42 | puts "Writing output to #{output_file}..."
43 | 
44 | output_json = JSON.dump(items)
45 | File.write(output_file, output_json)
46 | 


--------------------------------------------------------------------------------