├── public ├── js │ └── the.js └── css │ └── the.css ├── .gitignore ├── Procfile ├── .DS_Store ├── controllers ├── index.js └── base_controller.js ├── package.json ├── LICENSE ├── index.js ├── routes.js ├── views ├── layouts │ └── application.jade └── base │ └── index.jade ├── services └── html_to_json.js └── README.md /public/js/the.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: node index.js -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dinubs/jam-api/HEAD/.DS_Store -------------------------------------------------------------------------------- /controllers/index.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | Base: require('./base_controller') 3 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hm-file-structure", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "GPL", 11 | "dependencies": { 12 | "boom": "^3.1.1", 13 | "cheerio": "^0.19.0", 14 | "hapi": "^8.8.0", 15 | "hapi-cors-headers": "^1.0.0", 16 | "hapi-locals": "^0.1.5", 17 | "jade": "^1.11.0", 18 | "mongoose": "^4.1.4", 19 | "request": "^2.67.0" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Jam API 2 | Copyright (C) 2016 Gavin Dinubilo 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var fs = require("fs"); 2 | 3 | var Path = require('path'); 4 | var Hapi = require('hapi'); 5 | 6 | var server = new Hapi.Server({ 7 | debug: { request: ['error'] }, 8 | connections: { 9 | routes: { 10 | cors: true 11 | } 12 | } 13 | }); 14 | server.connection({ 15 | port: process.env.PORT || 5000, 16 | routes: { 17 | json: { 18 | space: 4 19 | } 20 | } 21 | }); 22 | 23 | server.views({ 24 | engines: { 25 | jade: require('jade') 26 | }, 27 | relativeTo: __dirname + '/views', 28 | }); 29 | 30 | 31 | // Configure routes 32 | var routes = require('./routes')(server); 33 | 34 | server.start(function (err) { 35 | console.log(err); 36 | console.log('Server running at:', server.info.uri); 37 | }); 38 | -------------------------------------------------------------------------------- /routes.js: -------------------------------------------------------------------------------- 1 | var c = require('./controllers/index'); 2 | 3 | module.exports = function(server) { 4 | // Base routes 5 | server.route({method: 'GET', path: '/', handler: c.Base.index}); 6 | server.route({method: 'POST', path: '/', handler: c.Base.parse}); 7 | 8 | server.route({ 9 | path: '/{p*}', 10 | method: 'OPTIONS', 11 | handler: function(req, reply){ 12 | console.log('test'); 13 | reply({method: 'options'}); 14 | }, 15 | config: { 16 | auth: false, 17 | cors: true 18 | } 19 | }); 20 | 21 | // Static files 22 | server.route({ 23 | method: 'GET', 24 | path: '/css/{file}.css', 25 | handler: function (request, reply) { 26 | reply.file("./public/css/"+request.params.file+".css"); 27 | } 28 | }); 29 | server.route({ 30 | method: 'GET', 31 | path: '/js/{file}.js', 32 | handler: function (request, reply) { 33 | reply.file("./public/js/"+request.params.file+".js"); 34 | } 35 | }); 36 | }; 37 | -------------------------------------------------------------------------------- /controllers/base_controller.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | let $ = require('cheerio'); 4 | let request = require('request'); 5 | 6 | let convert = require('../services/html_to_json'); 7 | 8 | module.exports = { 9 | index: function(req, res) { 10 | res.view('base/index', {title: 'Hello'}); 11 | }, 12 | parse: function(req, res) { 13 | console.log(req.payload); 14 | request(req.payload.url, function(err, data, body) { 15 | if (err) return res(err.toString()); 16 | // This is a poor fix to the issue, but for the time being since it's 4 in the morning. 17 | try { 18 | let json_data = JSON.parse(req.payload.json_data.replace(/'/g, '"')); 19 | try { 20 | return res(convert(json_data, $.load(body))); 21 | } catch(e) { 22 | return res({'error': 'A provided CSS selector was not found on the provided '}); 23 | } 24 | } catch(e) { 25 | console.log(e); 26 | return res({'error': 'invalid JSON'}); 27 | } 28 | }); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /public/css/the.css: -------------------------------------------------------------------------------- 1 | * { 2 | margin: 0; 3 | padding: 0; 4 | box-sizing: border-box; 5 | position: relative; 6 | } 7 | 8 | html, body { min-height: 100%; } 9 | 10 | html { 11 | font-size: 62.5%; 12 | box-sizing: border-box; 13 | padding: 25px 0; 14 | } 15 | 16 | body { 17 | font-size: 1.6rem; 18 | margin: 0; 19 | color: #333; 20 | padding: 10px; 21 | font-family: 'Trebuchet MS', 'Lucida Grande', 'Lucida Sans Unicode', 'Lucida Sans', Tahoma, sans-serif; 22 | -webkit-text-size-adjust: 100%; 23 | -ms-text-size-adjust: 100%; 24 | -moz-osx-font-smoothing: grayscale; 25 | -webkit-font-smoothing: antialiased; 26 | } 27 | 28 | .wrap { 29 | width: 100%; 30 | max-width: 48em; 31 | margin: 0 auto; 32 | } 33 | 34 | h1, h2 { 35 | margin-top: 25px; 36 | } 37 | 38 | p { 39 | line-height: 1.5; 40 | } 41 | 42 | strong { 43 | display: inline-block; 44 | margin-top: 10px; 45 | } 46 | 47 | pre { 48 | width: 100%; 49 | background-color: #F9F7F7; 50 | overflow-x: scroll; 51 | padding: 15px; 52 | border-radius: 5px; 53 | } 54 | 55 | label { 56 | margin-top: 15px; 57 | display: block; 58 | } 59 | 60 | input, textarea { 61 | width: 100%; 62 | margin: 5px 0; 63 | padding: 15px; 64 | border: 3px solid; 65 | border-color: #F9F7F7; 66 | outline: none; 67 | } 68 | 69 | input:focus, textarea:focus { 70 | border-color: #4D6DE3; 71 | } 72 | 73 | input[type="submit"] { 74 | width: 200px; 75 | background-color: #4D6DE3; 76 | color: #fff; 77 | border: none; 78 | border-radius: 5px; 79 | padding: 10px; 80 | text-transform: uppercase; 81 | font-weight: bold; 82 | cursor: pointer; 83 | } 84 | 85 | a { 86 | color: #4D6DE3; 87 | text-decoration: none; 88 | } 89 | 90 | a:hover { 91 | text-decoration: underline; 92 | } 93 | 94 | ul { 95 | padding: 10px 0; 96 | padding-left: 40px; 97 | } 98 | -------------------------------------------------------------------------------- /views/layouts/application.jade: -------------------------------------------------------------------------------- 1 | doctype html 2 | html(lang="en") 3 | head 4 | link(rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.4.0/css/font-awesome.min.css") 5 | link(rel="stylesheet", type="text/css", href="/css/the.css") 6 | meta(name="viewport" content="width=device-width, initial-scale=1.0") 7 | meta(name="description" content=app_descripttion) 8 | meta(charset="UTF-8") 9 | title Jam API 10 | body 11 | .wrap 12 | block content 13 | 14 | script(type="application/javascript", src="/js/the.js") 15 | -------------------------------------------------------------------------------- /services/html_to_json.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | let $ = require('cheerio'); 4 | 5 | function parse_object(obj, elem) { 6 | var temp_obj = {}; 7 | delete obj.elem; 8 | for (var key in obj) { 9 | if (obj[key] === 'text') { 10 | temp_obj[key] = $(elem).text().trim(); 11 | } else if (obj[key] === 'html') { 12 | temp_obj[key] = $(elem).html().trim(); 13 | } else { 14 | temp_obj[key] = elem.attribs[obj[key]]; 15 | } 16 | } 17 | return temp_obj; 18 | } 19 | 20 | function get_primary_type(elem) { 21 | var elem_type = elem.name; 22 | if (elem_type === 'img') { 23 | return elem.attribs['src']; 24 | } else if (elem_type === 'a' || elem_type === 'link') { 25 | return elem.attribs['href']; 26 | } 27 | return $(elem).text().trim(); 28 | } 29 | 30 | function array_element(obj, elems) { 31 | var temp_tags = elems; 32 | var temp_array = []; 33 | for (var i = 0; i < temp_tags.length; i++) { 34 | var temp_obj = {} 35 | if (Object.prototype.toString.call(obj) === '[object Object]') { 36 | temp_obj = parse_object(obj, temp_tags.get(i)); 37 | temp_array.push(temp_obj); 38 | } else { 39 | temp_obj = get_primary_type(temp_tags.get(i)); 40 | temp_array.push(temp_obj); 41 | } 42 | } 43 | return temp_array; 44 | } 45 | 46 | function convert(tags, $) { 47 | var converted_tags = {}; 48 | 49 | for (var key in tags) { 50 | if (tags.hasOwnProperty(key)) { 51 | if (Array.isArray(tags[key])) { 52 | if (Object.prototype.toString.call(tags[key][0]) === '[object Object]') { 53 | var elem = tags[key][0]; 54 | converted_tags[key] = array_element(elem, $(tags[key][0]['elem'])); 55 | } else { 56 | var elem = tags[key][0]; 57 | converted_tags[key] = array_element(elem, $(tags[key][0])); 58 | } 59 | } else if (Object.prototype.toString.call(tags[key]) === '[object Object]') { 60 | var elem = tags[key]; 61 | converted_tags[key] = parse_object(elem, $(tags[key]['elem']).get(0)); 62 | } else { 63 | var elem = tags[key]; 64 | converted_tags[key] = get_primary_type($(elem).get(0)); 65 | } 66 | } 67 | } 68 | 69 | return converted_tags; 70 | } 71 | 72 | module.exports = convert; 73 | -------------------------------------------------------------------------------- /views/base/index.jade: -------------------------------------------------------------------------------- 1 | extends ../layouts/application 2 | 3 | block content 4 | h1 Jam API 5 | p Jam API is a service that allows you to turn any site into a JSON accessible api using CSS selectors. To get started simply run a post request to https://www.jamapi.xyz with formdata of "url" and "json_data". 6 | p Here's an example of what your data should look like, this queries the Product Hunt jobs site and gets the company name and the job link: 7 | form(method="post" action="/") 8 | label(for='url') URL 9 | input(type='url' id="url" name='url' value='https://www.producthunt.com/jobs') 10 | label(for='json_data') JSON 11 | textarea(name='json_data' id='json_data' rows='10' cols='100') 12 | | { 13 | | "title": "title", 14 | | "companies": [{"elem": "ul.jobs li.job a", "link": "href", "name": "text"}], 15 | | "company_descriptions": [{"elem": "ul.jobs li.job .description", "name": "text"}] 16 | | } 17 | input(type="submit" value='query site') 18 | h2 Example Sites 19 | p 20 | a(href="https://www.random-medium.xyz") Random Medium 21 | | : Gets you the top Medium post for a random day from September 10, 2014. 22 | p 23 | | If you'd like to add your site, please submit a 24 | a(href="https://www.github.com/gavindinubilo/jam-api") pull request. 25 | h2 Code Examples 26 | p 27 | | Can be found on 28 | a(href="https://www.github.com/gavindinubilo/jam-api") Github 29 | | . If you'd like to add your own example please make a pull request. 30 | h2 Features 31 | ul 32 | li Passing an element that equates to an img will return the img src. 33 | li Passing an element that equates to a link will return the href of the link. 34 | li Passing any other element will return the text of that element. 35 | p When you pass an array with JSON you'll get a structure that looks as follows: 36 | pre 37 | | "companies": [ 38 | | { 39 | | "index": 0, 40 | | "value": { 41 | | "link": "https://blog.underdog.io/post/140227881612/were-hiring-a-communityrecruiting-lead", 42 | | "name": "Underdog.io" 43 | | } 44 | | }, 45 | | { 46 | | "index": 1, 47 | | "value": { 48 | | "link": "https://jobs.babbel.com/en/?gh_jid=202241", 49 | | "name": "Babbel" 50 | | } 51 | | }, 52 | | ] 53 | p All the attributes you provide as JSON will be put inside of the value property, and the index property is to be able to track what index it ocurred in the DOM. I nested JSON values into it's own so that you can still have an "index" property returned and not run into issues. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **This project is no longer active developement, please see [CoolQLCool](https://www.github.com/dinubs/coolqlcool) for a similar more active project ([ref](https://github.com/dinubs/jam-api/issues/24))** 2 | 3 | # Jam API 4 | Jam API is a service that allows you to turn any site into a JSON accessible api using CSS selectors. To get started simply run a post request to https://www.jamapi.xyz with formdata of "url" and "json_data", here's an example of what your data should look like: 5 | ```json 6 | { 7 | "title": "title", 8 | "logo": ".nav-logo img", 9 | "paragraphs": [{ "elem": ".home-post h1", "value": "text"}], 10 | "links": [{"elem": ".home-post > a:first-of-type", "location": "href"}] 11 | } 12 | ``` 13 | Using API you can simply generate JSON data from any website. 14 | 15 | ## Code samples 16 | ### nodejs 17 | ```js 18 | const request = require('request'); 19 | request.post('https://www.jamapi.xyz/', {form: {url: 'http://www.gavin.codes/', json_data: '{"title": "title"}'}}, function(err, response, body) { 20 | console.log(body); 21 | }) 22 | ``` 23 | ### Javascript 24 | ```js 25 | fetch('https://www.jamapi.xyz', { 26 | method: 'POST', 27 | headers: { 28 | 'Accept': 'application/json', 29 | 'Content-Type': 'application/json' 30 | }, 31 | body: JSON.stringify({ 32 | url: 'http://www.gavin.codes/', 33 | json_data: '{"title": "title"}' 34 | }) 35 | }).then(function(response) { 36 | return response.json(); 37 | }).then(function(json) { 38 | document.body.innerHTML = json; 39 | }); 40 | ``` 41 | ### Ruby 42 | ```ruby 43 | require 'httparty' 44 | response = HTTParty.post("https://www.jamapi.xyz/", 45 | :body => { "url" => "http://www.gavin.codes/", "json_data" => "{'title': 'title'}"}) 46 | puts response.to_json 47 | ``` 48 | ### Python 49 | ```python 50 | import requests 51 | payload = {'url': 'http://www.gavin.codes/', 'json_data': '{"title": "title"}'} 52 | 53 | r = requests.post("https://www.jamapi.xyz", data=payload) 54 | print(r.json()) 55 | ``` 56 | ### curl 57 | ```shell 58 | curl -X POST \ 59 | -F 'url=http://www.gavin.codes/' \ 60 | -F 'json_data={"title":"title"}' \ 61 | https://www.jamapi.xyz 62 | ``` 63 | 64 | ## Features 65 | Will auto pull the img src on corresponding elements, will auto pull the href from links. If passing JSON, you must provide a "elem" property, and then the element attributes you want. When you pass an array with JSON you'll get a structure that looks as follows: 66 | ```json 67 | [ 68 | { 69 | "index": 0, 70 | "value": { 71 | "value": "Porter Robinson – Sad Machine (Cosmo’s Midnight Remix)" 72 | } 73 | }, 74 | { 75 | "index": 1, 76 | "value": { 77 | "value": "Listen to Rachel Platten’s “Stand By You”" 78 | } 79 | }] 80 | ``` 81 | All the attributes you provide as JSON will be put inside of the value property, and the index property is to be able to track what index it ocurred in the DOM. I nested JSON values into it's own so that you can still have an "index" property returned and not run into issues. 82 | 83 | ## How it works 84 | Main power of the program is in `services/html_to_json.js`. Start site with `node index` after doing `npm install`. 85 | 86 | Suggested node version is at least `4.2.2` 87 | --------------------------------------------------------------------------------