├── .github
    ├── dependabot.yml
    └── workflows
    │   └── ci.yml
├── .gitignore
├── README.md
├── config
    └── repos.js
├── examples
    ├── .gitignore
    ├── data
    │   └── ___next_page.txt
    ├── get_profile.js
    ├── index.html
    ├── list-repos.js
    └── stars-recursive-scrape-save.js
├── index.js
├── lambda
    ├── debug.js
    ├── http_request.js
    └── s3.js
├── lib
    ├── feed.js
    ├── followers.js
    ├── http_request.js
    ├── index.js
    ├── issue.js
    ├── issues.js
    ├── issues_search.js
    ├── labels.js
    ├── milestones.js
    ├── next_page.js
    ├── next_page_beta.js
    ├── org.js
    ├── org_repos.js
    ├── people.js
    ├── profile.js
    ├── profile_contribs.js
    ├── repo.js
    ├── repos.js
    ├── repos_user.js
    ├── scrapers.js
    ├── starred.js
    ├── stars_watchers.js
    ├── switcher.js
    ├── url_validator.js
    └── utils.js
├── package-lock.json
├── package.json
└── test
    ├── e2e.test.js
    ├── feed.test.js
    ├── fixtures
        ├── dwyl-tudo-issue-51-api-comments.json
        ├── dwyl-tudo-issue-51-api.json
        ├── dwyl-tudo-issue-51-scrape.json
        └── dwyl-tudo-issue-51.html
    ├── followers.test.js
    ├── following.test.js
    ├── http_request.test.js
    ├── issue.test.js
    ├── issues.test.js
    ├── issues_search.test.js
    ├── labels.test.js
    ├── milestones.test.js
    ├── org.test.js
    ├── people.test.js
    ├── profile.test.js
    ├── repo.test.js
    ├── repos.test.js
    ├── starred.test.js
    ├── stars.test.js
    ├── switcher.test.js
    ├── url_validator.test.js
    └── utils.test.js


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: npm
4 |   directory: "/"
5 |   schedule:
6 |     interval: weekly
7 |     time: "17:00"
8 |     timezone: Europe/London
9 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will do a clean install of node dependencies, cache/restore them, build the source code and run tests across different versions of node
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
 3 | 
 4 | name: Node.js CI
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     strategy:
18 |       matrix:
19 |         node-version: [18.x, 20.x]
20 |         # See supported Node.js release schedule at https://nodejs.org/en/about/releases/
21 | 
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Use Node.js ${{ matrix.node-version }}
25 |       uses: actions/setup-node@v2
26 |       with:
27 |         node-version: ${{ matrix.node-version }}
28 |         cache: 'npm'
29 |     - run: npm ci
30 | #     - run: npm run build --if-present
31 |     - run: npm test
32 |     - name: Upload coverage to Codecov
33 |       uses: codecov/codecov-action@v4
34 |       with:
35 |         token: ${{ secrets.CODECOV_TOKEN }}
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | 
 5 | # Runtime data
 6 | pids
 7 | *.pid
 8 | *.seed
 9 | 
10 | # Directory for instrumented libs generated by jscoverage/JSCover
11 | lib-cov
12 | 
13 | # Coverage directory used by tools like istanbul
14 | coverage
15 | 
16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
17 | .grunt
18 | 
19 | # Compiled binary addons (http://nodejs.org/api/addons.html)
20 | build/Release
21 | 
22 | # Dependency directory
23 | # Commenting this out is preferred by some people, see
24 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git-
25 | node_modules
26 | 
27 | # Users Environment Variables
28 | .lock-wscript
29 | .vagrant
30 | crawl.js
31 | .DS_Store
32 | 
33 | .env
34 | tmp/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
   1 | <div align="center">
   2 | 
   3 | # :octocat: 🕷 🕸 GitHub Scraper
   4 | 
   5 | Learn how to parse the DOM of a web page
   6 | by using your favourite coding community as an example.
   7 | 
   8 | [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/nelsonic/github-scraper/ci.yml?label=build&style=flat-square&branch=main)](https://github.com/nelsonic/github-scraper/actions)
   9 | [![codecov.io](https://img.shields.io/codecov/c/github/nelsonic/github-scraper/master.svg?style=flat-square)](http://codecov.io/github/nelsonic/github-scraper?branch=master)
  10 | [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat-square)](https://github.com/nelsonic/github-scraper/issues)
  11 | [![HitCount](https://hits.dwyl.com/nelsonic/github-scraper.svg)](https://hits.dwyl.com/nelsonic/github-scraper)
  12 | [![npm package version](https://img.shields.io/npm/v/github-scraper.svg?color=brightgreen&style=flat-square)](https://www.npmjs.com/package/github-scraper)
  13 | <!-- uncomment when service is working ... 
  14 | [![Dependencies: None!](https://david-dm.org/nelsonic/github-scraper/status.svg?style=flat-square)](https://david-dm.org/nelsonic/github-scraper)
  15 | [![devDependencies Status](https://david-dm.org/nelsonic/github-scraper/dev-status.svg?style=flat-square)](https://david-dm.org/nelsonic/github-scraper?type=dev)
  16 | [![Inline docs](http://inch-ci.org/github/nelsonic/github-scraper.svg?branch=master&style=flat-square)](http://inch-ci.org/github/nelsonic/github-scraper) 
  17 | -->
  18 | 
  19 | <a href=""
  20 |  alt="Try the Demo on Heroku!">
  21 |   <img src="https://user-images.githubusercontent.com/194400/55904193-3528da00-5bc7-11e9-828d-754df210e365.png"
  22 |   alt="Step one: learn JavaScript!">
  23 | </a>
  24 | 
  25 | </div>
  26 | 
  27 | # ⚠️  Disclaimer / Warning!
  28 | 
  29 | This repository/project is intended for
  30 | ***Educational Purposes*** **ONLY**. <br />
  31 | The project and corresponding NPM module should not
  32 | be used for any purpose other than *learning*.
  33 | Please do not use it for any other reason
  34 | than to learn _about_ DOM parsing
  35 | and _definitely_ don't _depend_ on it for anything important!
  36 | 
  37 | The nature of DOM parsing is that when the HTML/UI changes,
  38 | the parser will inevitably fail ...
  39 | GitHub have every right to change/improve their UI as they see fit.
  40 | When they do change their UI the scraper will _inevitably_ "_break_"!
  41 | We have [Travis-CI](https://travis-ci.org/nelsonic/github-scraper)
  42 | continuous integration to run our tests precisely
  43 | to _check_ that parsers for the various pages are working as expected.
  44 | You can run the tests locally too,
  45 | see
  46 | ["Run The Tests"](https://github.com/nelsonic/github-scraper#3-run-the-tests)
  47 | section below.
  48 | 
  49 | ## Why?
  50 | 
  51 | Our _initial reason_ for writing this set of scrapers was to satisfy the _curiosity_ / _question_:
  52 | > _How_ can we ***discover*** which are the ***interesting people and projects
  53 | on GitHub***  
  54 | (_without **manually** checking *dozens* of GitHub profiles/repositories each day_) ?
  55 | 
  56 | Our _second reason_ for scraping data from GitHub is so that we can show people a "*summary view*" of all their issues in our [Tudo](https://github.com/dwyl/tudo) project (which helps people track/manage/organise/prioritise their GitHub issues).
  57 | See: https://github.com/dwyl/tudo/issues/51
  58 | 
  59 | We needed a _simple_ way of systematically getting data from GitHub (_before people authenticate_) and scraping is the only way we could think of.
  60 | 
  61 | We _tried_ using the [GitHub ***API***](https://developer.github.com/v3/)
  62 | to get records from GitHub, but sadly,
  63 | it has quite a few limitations (see: "_Issues with GitHub API_" section below) the biggest limitation being the [_rate-limiting_](https://developer.github.com/v3/#rate-limiting) on API requests.
  64 | 
  65 | Thirdly we're building this project to [***scratch our own itch***](https://gettingreal.37signals.com/ch02_Whats_Your_Problem.php)  
  66 | ... scraping the _pages_ of GitHub has given us a _unique_ insight into the features of the platform which has leveled-up our skills.
  67 | 
  68 | > Don't *you* want to know ***what's "Hot" right now on GitHub***...?
  69 | 
  70 | 
  71 | ## What (*Problem* are we _trying_ to Solve)?
  72 | 
  73 | Having a way of extracting the *essential* data from GitHub
  74 | is a solution to a _surprisingly **wide array of problems**_, here are a few:
  75 | 
  76 | + ***Who*** are the up-and-comming people (_worth following_) on GitHub?
  77 | + ***Which*** are the ***interesting projects*** (*and why?!*)
  78 | + ***What*** is the average age of an issue for a project?
  79 | + Is a project's ***popularity growing*** or *plateaued*?
  80 | + Are there (_already_) any ***similar projects*** to what I'm trying to build? (_reduce duplication of effort which is rampant in Open Source!!_)
  81 | + How many projects get started but never finished?
  82 | + ***Will*** my **Pull Request** *ever* get *merged* or is the module maintainer *too busy* and did I just [***waste 3 hours***](https://twitter.com/nelsonic/status/621984170353524736)?
  83 | + _insert **your idea/problem** here_ ...
  84 | + **Associative Lists** e.g: People who starred `abc` also liked `xyz`
  85 | 
  86 | 
  87 | # How?
  88 | 
  89 | This module fetches (_public_) pages from GitHub, "[_scrapes_](https://en.wikipedia.org/wiki/Web_scraping)" the html to extract raw data and returns a JSON Object.
  90 | 
  91 | # Usage
  92 | 
  93 | ## install from NPM
  94 | 
  95 | install from npm and save to your `package.json`:
  96 | 
  97 | ```sh
  98 | npm install github-scraper --save
  99 | ```
 100 | 
 101 | ## Use it in your script!
 102 | 
 103 | ```js
 104 | var gs = require('github-scraper');
 105 | var url = '/iteles' // a random username
 106 | gs(url, function(err, data) {
 107 |   console.log(data); // or what ever you want to do with the data
 108 | })
 109 | ```
 110 | 
 111 | ## Example URLs and Output
 112 | 
 113 | ### Profile Page
 114 | 
 115 | User profile has the following format `https://github.com/{username}`  
 116 | example: [https://github.com/**iteles**](https://github.com/iteles)
 117 | 
 118 | ![iteles-github-profile-april-2019-annotated](https://user-images.githubusercontent.com/194400/56076833-3deafd00-5dcd-11e9-87b0-693341a0ff64.png)
 119 | 
 120 | 
 121 | ```js
 122 | var gs = require('github-scraper'); // require the module
 123 | var url = 'alanshaw' // a random username (of someone you should follow!)
 124 | gs(url, function(err, data) {
 125 |   console.log(data); // or what ever you want to do with the data
 126 | })
 127 | ```
 128 | 
 129 | Sample output:
 130 | 
 131 | ```json
 132 | {
 133 |   "type": "profile",
 134 |   "url": "/iteles",
 135 |   "avatar": "https://avatars1.githubusercontent.com/u/4185328?s=400&v=4",
 136 |   "name": "Ines Teles Correia",
 137 |   "username": "iteles",
 138 |   "bio": "Co-founder @dwyl | Head cheerleader @foundersandcoders",
 139 |   "uid": 4185328,
 140 |   "worksfor": "@dwyl",
 141 |   "location": "London, UK",
 142 |   "website": "http://www.twitter.com/iteles",
 143 |   "orgs": {
 144 |     "bowlingjs": "https://avatars3.githubusercontent.com/u/8825909?s=70&v=4",
 145 |     "foundersandcoders": "https://avatars3.githubusercontent.com/u/9970257?s=70&v=4",
 146 |     "docdis": "https://avatars0.githubusercontent.com/u/10836426?s=70&v=4",
 147 |     "dwyl": "https://avatars2.githubusercontent.com/u/11708465?s=70&v=4",
 148 |     "ladiesofcode": "https://avatars0.githubusercontent.com/u/16606192?s=70&v=4",
 149 |     "TheScienceMuseum": "https://avatars0.githubusercontent.com/u/16609662?s=70&v=4",
 150 |     "SafeLives": "https://avatars2.githubusercontent.com/u/20841400?s=70&v=4"
 151 |   },
 152 |   "repos": 28,
 153 |   "projects": 0,
 154 |   "stars": 453,
 155 |   "followers": 341,
 156 |   "following": 75,
 157 |   "pinned": [
 158 |     { "url": "/dwyl/start-here" },
 159 |     { "url": "/dwyl/learn-tdd" },
 160 |     { "url": "/dwyl/learn-elm-architecture-in-javascript" },
 161 |     { "url": "/dwyl/tachyons-bootstrap" },
 162 |     { "url": "/dwyl/learn-ab-and-multivariate-testing" },
 163 |     { "url": "/dwyl/learn-elixir" }
 164 |   ],
 165 |   "contribs": 878,
 166 |   "contrib_matrix": {
 167 |     "2018-04-08": { "fill": "#c6e48b", "count": 1, "x": "13", "y": "0" },
 168 |     "2018-04-09": { "fill": "#c6e48b", "count": 2, "x": "13", "y": "12" },
 169 |     "2018-04-10": { "fill": "#7bc96f", "count": 3, "x": "13", "y": "24" },
 170 |     ...etc...
 171 |     "2019-04-11": { "fill": "#c6e48b", "count": 1, "x": "-39", "y": "48" },
 172 |     "2019-04-12": { "fill": "#7bc96f", "count": 5, "x": "-39", "y": "60"}
 173 |   }
 174 | }
 175 | ```
 176 | 
 177 | ### Followers
 178 | 
 179 | How many people are following a given person on Github.
 180 | Url format: `https://github.com/{username}/followers`  
 181 | example: [https://github.com/iteles/**followers**](https://github.com/iteles/followers)
 182 | 
 183 | ```js
 184 | var gs = require('github-scraper'); // require the module
 185 | var url = 'iteles/followers' // a random username (of someone you should follow!)
 186 | gs(url, function(err, data) {
 187 |   console.log(data); // or what ever you want to do with the data
 188 | })
 189 | ```
 190 | 
 191 | Sample output:
 192 | 
 193 | ```js
 194 | { entries:
 195 |    [ 'tunnckoCore', 'OguzhanE', 'minaorangina', 'Jasonspd', 'muntasirsyed', 'fmoliveira', 'nofootnotes',
 196 |     'SimonLab', 'Danwhy', 'kbocz', 'cusspvz', 'RabeaGleissner', 'beejhuff', 'heron2014', 'joshpitzalis',
 197 |     'rub1e', 'nikhilaravi', 'msmichellegar', 'anthonybrown', 'miglen', 'shterev', 'NataliaLKB',
 198 |     'ricardofbarros', 'boymanjor', 'asimjaved', 'amilvasishtha', 'Subhan786', 'Neats29', 'lottie-em',
 199 |     'rorysedgwick', 'izaakrogan', 'oluoluoxenfree', 'markwilliamfirth', 'bmordan', 'nodeco', 'besarthoxhaj',
 200 |     'FilWisher', 'maryams', 'sofer', 'joaquimserafim', 'vs4vijay', 'intool', 'edwardcodes', 'hyprstack',
 201 |     'nelsonic' ],
 202 |   url: 'https://github.com/iteles/followers' }
 203 | ok 1 iteles/followers count: 45
 204 | ```
 205 | 
 206 | If the person has ***more than 51 followers*** they will have multiple pages of followers.
 207 | The data will have a **next_page** key with a value such as:
 208 | [/nelsonic/followers?**page=2**](https://github.com/nelsonic/followers?page=2)
 209 | If you want to keep fetching these subsequent pages of followers,
 210 | simply keep running the scraper:
 211 | e.g:
 212 | 
 213 | ```js
 214 | var url = 'alanshaw/followers' // a random username (of someone you should follow!)
 215 | gs(url, function(err, data) {
 216 |   console.log(data); // or what ever you want to do with the data
 217 |   if(data.next_page) {
 218 |     gs(data.next_page, function(err2, data2) {
 219 |       console.log(data2); // etc.
 220 |     })
 221 |   }
 222 | })
 223 | ```
 224 | 
 225 | ### **Following**
 226 | Want to know the list of people this person is `following` that's *easy* too!
 227 | The url format is: `https://github.com/{username}/following`
 228 | e.g: [https://github.com/iteles/**following**](https://github.com/iteles/following) or
 229 | [https://github.com/nelsonic/following?**page=2**](https://github.com/nelsonic/following?page=2)
 230 | (_where the person is following more than 51 people_ ...)
 231 | 
 232 | Usage format is *identical* to `followers` (above) so here's an example
 233 | of fetching page 3 of the results:
 234 | 
 235 | ```js
 236 | var gs = require('github-scraper'); // require the module
 237 | var url = 'nelsonic/following?page=3' // a random dude
 238 | gs(url, function(err, data) {
 239 |   console.log(data); // or what ever you want to do with the data
 240 | })
 241 | ```
 242 | 
 243 | Sample output:
 244 | 
 245 | ```js
 246 | {
 247 |   entries:
 248 |    [ 'kytwb', 'dexda', 'arrival', 'jinnjuice', 'slattery', 'unixarcade', 'a-c-m', 'krosti',
 249 |    'simonmcmanus', 'jupiter', 'capaj', 'cowenld', 'FilWisher', 'tsop14', 'NataliaLKB',
 250 |    'izaakrogan', 'lynnaloo', 'nvcexploder', 'cwaring', 'missinglink', 'alanshaw', 'olizilla',
 251 |    'tancredi', 'Ericat', 'pgte' 'hyprstack', 'iteles' ],
 252 |   url: 'https://github.com/nelsonic/following?page=3',
 253 |   next_page: 'https://github.com/nelsonic/following?page=4'
 254 | }
 255 | ```
 256 | 
 257 | ### Starred Repositories
 258 | 
 259 | The list of projects a person has *starred* a fascinating source of insight.
 260 | url format: https://github.com/stars/{username}
 261 | e.g: [/stars/iteles](https://github.com/stars/iteles)
 262 | 
 263 | ```js
 264 | var gs = require('github-scraper'); // require the module
 265 | var url = 'stars/iteles';           // starred repos for this user
 266 | gs(url, function(err, data) {
 267 |   console.log(data);                // or what ever you want to do with the data
 268 | })
 269 | ```
 270 | 
 271 | Sample output:
 272 | 
 273 | ```js
 274 | {
 275 |   entries:
 276 |    [ '/dwyl/repo-badges', '/nelsonic/learn-testling', '/joshpitzalis/testing', '/gmarena/gmarena.github.io',
 277 |     '/dwyl/alc', '/nikhilaravi/fac5-frontend', '/foundersandcoders/dossier', '/nelsonic/health', '/dwyl/alvo',
 278 |     '/marmelab/gremlins.js', '/docdis/learn-saucelabs', '/rogerdudler/git-guide', '/tableflip/guvnor',
 279 |     '/dwyl/learn-redis', '/foundersandcoders/playbook', '/MIJOTHY/FOR_FLUX_SAKE', '/NataliaLKB/learn-git-basics',
 280 |     '/nelsonic/liso', '/dwyl/learn-json-web-tokens', '/dwyl/hapi-auth-jwt2', '/dwyl/start-here',
 281 |     '/arvida/emoji-cheat-sheet.com', '/dwyl/time', '/docdis/learn-react', '/dwyl/esta', '/alanshaw/meteor-foam',
 282 |     '/alanshaw/stylist', '/meteor-velocity/velocity', '/0nn0/terminal-mac-cheatsheet',
 283 |     '/bowlingjs/bowlingjs.github.io' ],
 284 |   url: 'https://github.com/stars/iteles?direction=desc&page=2&sort=created',
 285 |   next_page: 'https://github.com/stars/iteles?direction=desc&page=3&sort=created'
 286 | }
 287 | ```
 288 | 
 289 | ### Repositories
 290 | 
 291 | The second tab on the personal profile page is "Repositories"
 292 | this is a **list** of the ***personal projects*** the person is working on, e.g: https://github.com/iteles?tab=repositories
 293 | 
 294 | <img width="1033" alt="github-ines-list-of-repositories" src="https://cloud.githubusercontent.com/assets/194400/8909661/7e83e97e-347a-11e5-84c9-239f558a2b98.png">
 295 | 
 296 | We crawl this page and return an array containing the repo properties:
 297 | 
 298 | ```js
 299 | var url = 'iteles?tab=repositories';
 300 | gs(url, function(err, data) {
 301 |   console.log(data);  // or what ever you want to do with the data
 302 | })
 303 | ```
 304 | 
 305 | sample output:
 306 | 
 307 | ```js
 308 | {
 309 |   entries: [
 310 |     { url: '/iteles/learn-ab-and-multivariate-testing',
 311 |       name: 'learn-ab-and-multivariate-testing',
 312 |       lang: '',
 313 |       desc: 'Tutorial on A/B and multivariate testing',
 314 |       info: '',
 315 |       stars: '4',
 316 |       forks: '0',
 317 |       updated: '2015-07-08T08:36:37Z' },
 318 |     { url: '/iteles/learn-tdd',
 319 |       name: 'learn-tdd',
 320 |       lang: 'JavaScript',
 321 |       desc: 'A brief introduction to Test Driven Development (TDD) in JavaScript',
 322 |       info: 'forked from dwyl/learn-tdd',
 323 |       stars: '0',
 324 |       forks: '4',
 325 |       updated: '2015-06-29T17:24:56Z' },
 326 |     { url: '/iteles/practical-full-stack-testing',
 327 |       name: 'practical-full-stack-testing',
 328 |       lang: 'HTML',
 329 |       desc: 'A fork of @nelsonic\'s repo to allow for PRs',
 330 |       info: 'forked from nelsonic/practical-js-tdd',
 331 |       stars: '0',
 332 |       forks: '36',
 333 |       updated: '2015-06-06T14:40:43Z' },
 334 |     { url: '/iteles/styling-for-accessibility',
 335 |       name: 'styling-for-accessibility',
 336 |       lang: '',
 337 |       desc: 'A collection of \'do\'s and \'don\'t\'s of CSS to ensure accessibility',
 338 |       info: '',
 339 |       stars: '0',
 340 |       forks: '0',
 341 |       updated: '2015-05-26T11:06:28Z' },
 342 |     { url: '/iteles/Ultimate-guide-to-successful-meetups',
 343 |       name: 'Ultimate-guide-to-successful-meetups',
 344 |       lang: '',
 345 |       desc: 'The ultimate guide to organizing successful meetups',
 346 |       info: '',
 347 |       stars: '3',
 348 |       forks: '0',
 349 |       updated: '2015-05-19T09:40:39Z' },
 350 |     { url: '/iteles/Javascript-the-Good-Parts-notes',
 351 |       name: 'Javascript-the-Good-Parts-notes',
 352 |       lang: '',
 353 |       desc: 'Notes on the seminal "Javascript the Good Parts: byDouglas Crockford',
 354 |       info: '',
 355 |       stars: '41',
 356 |       forks: '12',
 357 |       updated: '2015-05-17T16:39:35Z' }  
 358 |   ],
 359 |   url: 'https://github.com/iteles?tab=repositories' }
 360 | ```
 361 | 
 362 | 
 363 | ### Activity feed
 364 | 
 365 | Every person on GitHub has an RSS feed for their recent activity;
 366 | this is the 3rd and final tab of the person's profile page.
 367 | 
 368 | it can be viewed online by visiting:
 369 | ```sh
 370 | https://github.com/{username}?tab=activity
 371 | ```
 372 | e.g: [/iteles?tab=activity](https://github.com/iteles?tab=activity)
 373 | 
 374 | 
 375 | #### Parsing the Feed
 376 | 
 377 | The activity feed is published as an [**.atom**](https://en.wikipedia.org/wiki/RSS)
 378 | xml string which contains a list of entries.
 379 | 
 380 | We use [**xml2js**](https://www.npmjs.com/package/xml2js)
 381 | (which in turn uses the [**sax**](https://www.npmjs.com/package/sax) xml parser) to parse the xml stream. This results in a object similar to the following example:
 382 | 
 383 | ```js
 384 | { '$':
 385 |    { xmlns: 'http://www.w3.org/2005/Atom',
 386 |      'xmlns:media': 'http://search.yahoo.com/mrss/',
 387 |      'xml:lang': 'en-US' },
 388 |   id: [ 'tag:github.com,2008:/iteles' ],
 389 |   link: [ { '$': [Object] }, { '$': [Object] } ],
 390 |   title: [ 'iteles’s Activity' ],
 391 |   updated: [ '2015-07-22T23:31:25Z' ],
 392 |   entry:
 393 |    [ { id: [Object],
 394 |        published: [Object],
 395 |        updated: [Object],
 396 |        link: [Object],
 397 |        title: [Object],
 398 |        author: [Object],
 399 |        'media:thumbnail': [Object],
 400 |        content: [Object] },
 401 |      { id: [Object],
 402 |        published: [Object],
 403 |        updated: [Object],
 404 |        link: [Object],
 405 |        title: [Object],
 406 |        author: [Object],
 407 |        'media:thumbnail': [Object],
 408 |        content: [Object] }
 409 |     ]
 410 | }
 411 | ```
 412 | Each call to the atom feed returns the latest 30 enties.
 413 | We're showing 2 here for illustration (_so you get the idea..._)
 414 | 
 415 | From this we _extract_ only the relevant info:
 416 | 
 417 | ```sh
 418 | '2015-07-22T12:33:14Z alanshaw pushed to master at alanshaw/david-www',
 419 | '2015-07-22T12:33:14Z alanshaw created tag v9.4.3 at alanshaw/david-www',
 420 | '2015-07-22T09:23:28Z alanshaw closed issue tableflip/i18n-browserify#6',
 421 | '2015-07-21T17:08:19Z alanshaw commented on issue alanshaw/david#71',
 422 | '2015-07-21T08:24:13Z alanshaw pushed to master at tableflip/score-board',
 423 | '2015-07-20T17:49:59Z alanshaw deleted branch refactor-corp-events at tableflip/sow-api-client',
 424 | '2015-07-20T17:49:58Z alanshaw pushed to master at tableflip/sow-api-client',
 425 | '2015-07-20T17:49:58Z alanshaw merged pull request tableflip/sow-api-client#2',
 426 | '2015-07-20T17:49:54Z alanshaw opened pull request tableflip/sow-api-client#2',
 427 | '2015-07-18T07:30:36Z alanshaw closed issue alanshaw/md-tokenizer#1',
 428 | '2015-07-18T07:30:36Z alanshaw commented on issue alanshaw/md-tokenizer#1',
 429 | ```
 430 | Instead of _wasting_ (_what will be **Giga**_) ***Bytes*** of space with key:value pairs by storing the entries as JSON, we are storing the activity feed entries as strings in an array.
 431 | Each item in the array can be broken down into:
 432 | ```sh
 433 | {date-time} {username} {action} {link}
 434 | ```
 435 | 
 436 | As we can see from this there are several event types:
 437 | 
 438 | + **pushed to master** at
 439 | + **created tag** v9.4.3 at
 440 | + **opened issue**
 441 | + **commented on issue**
 442 | + **closed issue**
 443 | + **deleted branch**
 444 | + **opened pull request**
 445 | + **merged pull request**
 446 | + **starred** username/repo-name
 447 | 
 448 | For now we are *not* going to parse the event types, we are simply going to store them in our list for later analysis.
 449 | 
 450 | We have a good pointer when its time to start interpreting the data:
 451 | https://developer.github.com/v3/activity/events/types/
 452 | 
 453 | One thing worth noting is that RSS feed is ***Not Real-Time*** ...
 454 | sadly, it only gets updated periodically so we cannot rely on it to
 455 | have the *latest* info.
 456 | 
 457 | 
 458 | ### Organization
 459 | 
 460 | Organization pages have the following url pattern: `https://github.com/{orgname}`  
 461 | example: [https://github.com/**dwyl**](https://github.com/dwyl)
 462 | 
 463 | ```js
 464 | var url = 'dwyl';
 465 | gs(url, function(err, data) {
 466 |   console.log(data); // or do something way more interesting with the data!
 467 | });
 468 | ```
 469 | 
 470 | sample data (`entries` _truncated for brevity_):
 471 | ```js
 472 | {
 473 |   entries:
 474 |    [ { name: 'hapi-auth-jwt2',
 475 |        desc: 'Secure Hapi.js authentication plugin using JSON Web Tokens (JWT)',
 476 |        updated: '2015-08-04T19:30:50Z',
 477 |        lang: 'JavaScript',
 478 |        stars: '59',
 479 |        forks: '11' },
 480 |      { name: 'start-here',
 481 |        desc: 'A Quick-start Guide for People who want to DWYL',
 482 |        updated: '2015-08-03T11:04:14Z',
 483 |        lang: 'HTML',
 484 |        stars: '14',
 485 |        forks: '9' },
 486 |      { name: 'summer-2015',
 487 |        desc: 'Probably the best Summer Sun, Fun & Coding Experience in the World!',
 488 |        updated: '2015-07-31T11:02:29Z',
 489 |        lang: 'CSS',
 490 |        stars: '16',
 491 |        forks: '1' },
 492 |   ],
 493 |   website: 'http://dwyl.io',
 494 |   url: 'https://github.com/dwyl',
 495 |   name: 'dwyl - do what you love',
 496 |   desc: 'Start here: https://github.com/dwyl/start-here',
 497 |   location: 'Your Pocket',
 498 |   email: 'github@dwyl.io',
 499 |   pcount: 24,
 500 |   avatar: 'https://avatars3.githubusercontent.com/u/11708465?v=3&s=200',
 501 |   next_page: '/dwyl?page=2'
 502 | }
 503 | ```
 504 | Note #1: *sadly*, this has the ***identical*** url format to *Profile*
 505 | this gets handled by the `switcher` which infers what is an org vs. profile page
 506 | by checking for an known element on the page.
 507 | 
 508 | Note #2: when an organization has *multiple pages* of repositories you will see a `next_page`
 509 | key/value in the `data` e.g: [/dwyl?**page=2**](/dwyl?page=2) (for the second page of repos)
 510 | 
 511 | 
 512 | ### Repository Stats
 513 | 
 514 | This is where things start getting interesting ...
 515 | 
 516 | ![github-repo-page](https://cloud.githubusercontent.com/assets/194400/8930109/d8a76ab8-3522-11e5-8e07-95596a889fde.png)
 517 | 
 518 | example: https://github.com/nelsonic/adoro
 519 | 
 520 | ```js
 521 | var url = 'nelsonic/adoro';
 522 | gs(url, function(err, data) {
 523 |   console.log(data); // or do something way more interesting with the data!
 524 | });
 525 | ```
 526 | 
 527 | sample data:
 528 | 
 529 | ```js
 530 | {
 531 |   url: 'https://github.com/nelsonic/adoro',
 532 |   desc: 'The little publishing tool you\'ll love using. [work-in-progress]',
 533 |   website: 'http://www.dwyl.io/',
 534 |   watchers: 3,
 535 |   stars: 8,
 536 |   forks: 1,
 537 |   commits: 12,
 538 |   branches: 1,
 539 |   releases: 1,
 540 |   langs: [ 'JavaScript 90.7%', 'CSS 9.3%' ]
 541 | }
 542 | ```
 543 | 
 544 | > Annoyingly the number of issues and pull requests, contributors and issues
 545 |  are only rendered *after* the page has loaded (via XHR) so we do not get
 546 |  these three stats on page load.
 547 | 
 548 | 
 549 |  ### 7. Issues
 550 | 
 551 |  Clicking on the issues icon/link in any repository takes us to the list of all the issues.
 552 | 
 553 |  Visiting a project with more than a page worth of issues has pagination at the bottom of the page:
 554 | 
 555 |  ![tudo-issues-list-showing-pagination](https://cloud.githubusercontent.com/assets/194400/8942419/27b9446a-356d-11e5-84f9-5de2eaae506b.png)
 556 | 
 557 |  Which has a link to: https://github.com/dwyl/tudo/issues?page=2&q=is%3Aissue+is%3Aopen
 558 | 
 559 |  ![tudo-issues-second-page](https://cloud.githubusercontent.com/assets/194400/8942423/33bf0a2e-356d-11e5-82b8-1bd142fb2302.png)
 560 | 
 561 |  List of issues for a repository:
 562 | 
 563 |  ```js
 564 |  var gs  = require('github-scraper');
 565 |  var url = '/dwyl/tudo/issues';
 566 |  gs(url, function (err, data) {
 567 |    console.log(data); // use the data how ever you like
 568 |  });
 569 |  ```
 570 | 
 571 |  sample output:
 572 | 
 573 |  ```sh
 574 |  { entries:
 575 |     [
 576 |       {
 577 |         url: '/dwyl/tudo/issues/46',
 578 |         title: 'discuss components',
 579 |         created: '2015-07-21T15:34:22Z',
 580 |         author: 'benjaminlees',
 581 |         comments: 3,
 582 |         assignee: 'izaakrogan',
 583 |         milestone: 'I don\'t know what I\'m doing',
 584 |         labels: [ 'enhancement', 'help wanted', 'question' ]
 585 |       },
 586 |       {
 587 |         url: '/dwyl/tudo/issues/45',
 588 |         title: 'Create riot components from HTML structure files',
 589 |         created: '2015-07-21T15:24:58Z',
 590 |         author: 'msmichellegar',
 591 |         comments: 2,
 592 |         assignee: 'msmichellegar',
 593 |         labels: [ 'question' ]
 594 |       }
 595 |    ], // truncated for brevity
 596 |    open: 30,
 597 |    closed: 20,
 598 |    next: '/dwyl/tudo/issues?page=2&q=is%3Aissue+is%3Aopen',
 599 |    url: '/dwyl/tudo/issues'
 600 |  }
 601 |  ```
 602 | 
 603 |  Each issue in the list would create a entry in the crawler (worker) queue:
 604 | 
 605 |  ```sh
 606 |  2015-07-22T12:33:14Z issue /dwyl/tudo/issues/77
 607 |  ```
 608 | 
 609 |  > Should we include the "all issues by this author" link?
 610 |  + **created_by** https://github.com/dwyl/tudo/issues/created_by/iteles
 611 |  + **assignee** (assigned to): https://github.com/dwyl/tudo/issues?q=assignee%3Aiteles+is%3Aopen
 612 | 
 613 | 
 614 |  ### Issue (_individual_)
 615 | 
 616 |  The result of scraping https://github.com/dwyl/tudo/issues/51
 617 | 
 618 |  ```js
 619 |  var gs  = require('github-scraper');
 620 |  var url = '/dwyl/tudo/issues/51';
 621 |  gs(url, function (err, data) {
 622 |    console.log(data); // use the data how ever you like
 623 |  });
 624 |  ```
 625 | 
 626 |  sample output:
 627 | 
 628 |  ```js
 629 |  { entries:
 630 |     [ { id: 'issue-96442793',
 631 |         author: 'nelsonic',
 632 |         created: '2015-07-22T00:00:45Z',
 633 |         body: 'instead of waiting for people to perform the steps to authorise Tudo (to access their GitHub orgs/issues we could request their GitHub username on the login page and initiate the retrieval of their issues while they are authenticating... That way, by the time they get back to Tudo their issues dashboard is already pre-rendered and loaded! This is a wow-factor people won\'t be expecting and thus our app immediately delivers on our first promise!\n\nThoughts?' },
 634 |       { id: 'issuecomment-123807796',
 635 |         author: 'iteles',
 636 |         created: '2015-07-22T17:54:12Z',
 637 |         body: 'I\'d love to test this out, this will be an amazing selling point if we can get the performance to work like we expect!' },
 638 |       { id: 'issuecomment-124048121',
 639 |         author: 'nelsonic',
 640 |         created: '2015-07-23T10:20:15Z',
 641 |         body: '@iteles have you watched the Foundation Episode featuring Kevin Systrom (instagram) ?\n\n\nhttps://www.youtube.com/watch?v=nld8B9l1aRE\n\n\nWhat were the USPs that contributed to instagram\'s success (considering how many photo-related-apps were in the app store at the time) ?\n\ncc: @besarthoxhaj' },
 642 |       { id: 'issuecomment-124075792',
 643 |         author: 'besarthoxhaj',
 644 |         created: '2015-07-23T11:59:31Z',
 645 |         body: '@nelsonic love the idea! Let\'s do it!' } ],
 646 |    labels: [ 'enhancement', 'help wanted', 'question' ],
 647 |    participants: [ 'nelsonic', 'iteles', 'besarthoxhaj' ],
 648 |    url: '/dwyl/tudo/issues/51',
 649 |    title: 'Pre-fetch people\'s issues while they are authenticating with GitHub',
 650 |    state: 'Open',
 651 |    author: 'nelsonic',
 652 |    created: '2015-07-22T00:00:45Z',
 653 |    milestone: 'Minimal Usable Product',
 654 |    assignee: 'besarthoxhaj' }
 655 |  ```
 656 | 
 657 |  By contrast using the GitHub API to fetch this issue
 658 |  see: https://developer.github.com/v3/issues/#get-a-single-issue
 659 | 
 660 |  format:
 661 |  ```sh
 662 |  /repos/:owner/:repo/issues/:number
 663 |  ```
 664 | 
 665 |  ```sh
 666 |  curl https://api.github.com/repos/dwyl/tudo/issues/51
 667 |  ```
 668 | 
 669 |  ### Milestones
 670 | 
 671 |  Milestones are used to group issues into logical units.
 672 | 
 673 |  ![dwyl-tudo-milestones](https://cloud.githubusercontent.com/assets/194400/9010055/b3e4da72-379c-11e5-8fd3-680bf928a389.png)
 674 | 
 675 |  ```js
 676 |  var gs  = require('github-scraper');
 677 |  var url = '/dwyl/tudo/milestones';
 678 |  gs(url, function (err, data) {
 679 |    console.log(data); // use the data how ever you like
 680 |  });
 681 |  ```
 682 | 
 683 | Sample output:
 684 | 
 685 |  ```js
 686 |  { entries:
 687 |     [ { name: 'Test Milestone - Please Don\'t Close!',
 688 |         due: 'Past due by 16 days',
 689 |         updated: 'Last updated 5 days ago',
 690 |         desc: 'This Milestone in used in our e2e tests to check for an over-due milestone, so please don\'t close it!',
 691 |         progress: '0%',
 692 |         open: 1,
 693 |         closed: 0 },
 694 |       { name: 'Minimal Usable Product',
 695 |         due: 'Due by July  5, 2016',
 696 |         updated: 'Last updated 2 days ago',
 697 |         desc: 'What is the absolute minimum we can do to deliver value to people using the app?\n(and thus make them want to come back and use it!)',
 698 |         progress: '0%',
 699 |         open: 5,
 700 |         closed: 0 } ],
 701 |    url: 'https://github.com/dwyl/tudo/milestones',
 702 |    open: 2,
 703 |    closed: 1 }
 704 |  ```
 705 | 
 706 |  ### Labels (for a repository)
 707 | 
 708 |  All repositories have a set of standard labels (built-in to GitHub)
 709 |  e.g: https://github.com/dwyl/tudo/labels is (_currently_) only using the "*standard*" labels.
 710 | 
 711 |  <img width="998" alt="github-dwyl-tudo-labels-list" src="https://cloud.githubusercontent.com/assets/194400/8945752/36c87754-3582-11e5-9a46-a4a786ca7c25.png">
 712 | 
 713 |  Whereas the RethinkDB (which uses GitHub for all their project tracking) uses _several **custom labels**_:
 714 |  https://github.com/rethinkdb/rethinkdb/labels
 715 | 
 716 |  <img width="996" alt="github-rethinkdb-issues-list" src="https://cloud.githubusercontent.com/assets/194400/8945786/7b98b718-3582-11e5-961b-905d268dd39a.png">
 717 | 
 718 |  We need to crawl these for each repo.
 719 | 
 720 | ```js
 721 | var gs  = require('github-scraper');
 722 | var url = '/dwyl/time/labels';
 723 | gs(url, function (err, data) {
 724 |   console.log(data); // use the data how ever you like
 725 | });
 726 | ```
 727 | 
 728 |  Here's the extraction of the standard labels:
 729 |  ```js
 730 |  [
 731 |    { name: 'bug',
 732 |      style: 'background-color: #fc2929; color: #fff;',
 733 |      link: '/dwyl/tudo/labels/bug',
 734 |      count: 3 },
 735 |    { name: 'duplicate',
 736 |      style: 'background-color: #cccccc; color: #333333;',
 737 |      link: '/dwyl/tudo/labels/duplicate',
 738 |      count: 0 },
 739 |    { name: 'enhancement',
 740 |      style: 'background-color: #84b6eb; color: #1c2733;',
 741 |      link: '/dwyl/tudo/labels/enhancement',
 742 |      count: 11 },
 743 |    { name: 'help wanted',
 744 |      style: 'background-color: #159818; color: #fff;',
 745 |      link: '/dwyl/tudo/labels/help%20wanted',
 746 |      count: 21 },
 747 |    { name: 'invalid',
 748 |      style: 'background-color: #e6e6e6; color: #333333;',
 749 |      link: '/dwyl/tudo/labels/invalid',
 750 |      count: 1 },
 751 |    { name: 'question',
 752 |      style: 'background-color: #cc317c; color: #fff;',
 753 |      link: '/dwyl/tudo/labels/question',
 754 |      count: 10 }
 755 |  ]
 756 |  ```
 757 | 
 758 |  or a repo that has ***custom labels***:
 759 | 
 760 |  ```js
 761 |  { entries:
 762 |    [ { name: '[alpha]',
 763 |        style: 'background-color: #79CDCD; color: #1e3333;',
 764 |        link: '/dwyl/time/labels/%5Balpha%5D',
 765 |        count: 2 },
 766 |      { name: 'API',
 767 |        style: 'background-color: #006b75; color: #fff;',
 768 |        link: '/dwyl/time/labels/API',
 769 |        count: 11 },
 770 |      { name: 'bug',
 771 |        style: 'background-color: #fc2929; color: #fff;',
 772 |        link: '/dwyl/time/labels/bug',
 773 |        count: 5 },
 774 |      { name: 'chore',
 775 |        style: 'background-color: #e11d21; color: #fff;',
 776 |        link: '/dwyl/time/labels/chore',
 777 |        count: 9 },
 778 |      { name: 'discuss',
 779 |        style: 'background-color: #bfe5bf; color: #2a332a;',
 780 |        link: '/dwyl/time/labels/discuss',
 781 |        count: 43 },
 782 |      { name: 'Documentation',
 783 |        style: 'background-color: #eb6420; color: #fff;',
 784 |        link: '/dwyl/time/labels/Documentation',
 785 |        count: 2 },
 786 |      { name: 'duplicate',
 787 |        style: 'background-color: #cccccc; color: #333333;',
 788 |        link: '/dwyl/time/labels/duplicate',
 789 |        count: 0 },
 790 |      { name: 'enhancement',
 791 |        style: 'background-color: #84b6eb; color: #1c2733;',
 792 |        link: '/dwyl/time/labels/enhancement',
 793 |        count: 27 },
 794 |      { name: 'external dependency',
 795 |        style: 'background-color: #D1EEEE; color: #2c3333;',
 796 |        link: '/dwyl/time/labels/external%20dependency',
 797 |        count: 1 },
 798 |      { name: 'FrontEnd',
 799 |        style: 'background-color: #f7c6c7; color: #332829;',
 800 |        link: '/dwyl/time/labels/FrontEnd',
 801 |        count: 26 },
 802 |      { name: 'help wanted',
 803 |        style: 'background-color: #009800; color: #fff;',
 804 |        link: '/dwyl/time/labels/help%20wanted',
 805 |        count: 42 },
 806 |      { name: 'invalid',
 807 |        style: 'background-color: #e6e6e6; color: #333333;',
 808 |        link: '/dwyl/time/labels/invalid',
 809 |        count: 0 },
 810 |      { name: 'investigate',
 811 |        style: 'background-color: #fbca04; color: #332900;',
 812 |        link: '/dwyl/time/labels/investigate',
 813 |        count: 18 },
 814 |      { name: 'MVP',
 815 |        style: 'background-color: #207de5; color: #fff;',
 816 |        link: '/dwyl/time/labels/MVP',
 817 |        count: 27 },
 818 |      { name: 'NiceToHave',
 819 |        style: 'background-color: #fbca04; color: #332900;',
 820 |        link: '/dwyl/time/labels/NiceToHave',
 821 |        count: 7 },
 822 |      { name: 'Post MVP',
 823 |        style: 'background-color: #fef2c0; color: #333026;',
 824 |        link: '/dwyl/time/labels/Post%20MVP',
 825 |        count: 24 },
 826 |      { name: 'question',
 827 |        style: 'background-color: #cc317c; color: #fff;',
 828 |        link: '/dwyl/time/labels/question',
 829 |        count: 25 },
 830 |      { name: 'UI',
 831 |        style: 'background-color: #bfdadc; color: #2c3233;',
 832 |        link: '/dwyl/time/labels/UI',
 833 |        count: 13 } ],
 834 |   url: 'https://github.com/dwyl/time/labels' }
 835 |  ```
 836 | 
 837 |  ### Issues > *Search* (*Bonus Feature*)
 838 | 
 839 |  A ***much*** more *effective* way of collating all the issues relevant to a person is to search for them!
 840 | 
 841 |  example:
 842 |   https://github.com/search?type=Issues&q=author%3Aiteles&state=open&o=desc&s=created
 843 | 
 844 |  ```js
 845 |  {
 846 |    entries:
 847 |     [
 848 |       { title: 'Remove flexbox from CSS',
 849 |         url: '/dwyl/dwyl.github.io/issues/29',
 850 |         desc: 'To ensure the site works across all devices, particularly Kindle/e-readers.',
 851 |         author: 'iteles',
 852 |         created: '2015-07-25T22:57:20Z',
 853 |         comments: 2 },
 854 |       { title: 'CSS | Add indentation back into main.css (disappeared from master)',
 855 |         url: '/dwyl/tudo/issues/77',
 856 |         desc: 'All indentation has been removed from main.css in the latest commit.     \n\nThis needs to be put back in as originally written by @msmichellegar and @iteles.',
 857 |         author: 'iteles',
 858 |         created: '2015-07-25T16:27:59Z' },
 859 |       { title: 'CSS | Investigate styling of issue label colours',
 860 |         url: '/dwyl/tudo/issues/72',
 861 |         desc: 'Labels can be given any colour so there is no predictable set that we can code into the CSS file.\n\nWe need to investigate what the best way to ensure we can provide the right colour of background to the ...',
 862 |         author: 'iteles',
 863 |         created: '2015-07-23T17:49:02Z',
 864 |         comments: 4 }
 865 |    ],
 866 |    next: '/search?o=desc&p=2&q=author%3Aiteles&s=created&state=open&type=Issues'
 867 |  }
 868 |  ```
 869 | 
 870 | 
 871 |  #### Owner
 872 | 
 873 |  For the issues created across all their *personal* repositories
 874 |  use a search query of the form:
 875 |  ```sh
 876 |  https://github.com/search?q=user%3A{username|org}
 877 |  &state={state}
 878 |  &type=Issues&s={relevance}
 879 |  &o={order}
 880 |  ```
 881 |  e.g:
 882 |  https://github.com/search?q=user%3Aiteles&state=open&type=Issues&s=updated&o=asc
 883 | 
 884 |  #### Author (_created by_)
 885 | 
 886 |  Or to find ***all*** the issues where the person is the ***author***
 887 |  use a query of the following format:
 888 | 
 889 |  ```sh
 890 |  https://github.com/search?q=author%3A{username|org}
 891 |  &state={state}
 892 |  &type=Issues&s={relevance}
 893 |  &o={order}
 894 |  ```
 895 | 
 896 |  #### Assignee (_issues assigned to this person_)
 897 | 
 898 |  Or to find ***all*** the issues *assigned* to the person use a query of the following format:
 899 | 
 900 |  ```sh
 901 |  https://github.com/search?q=assignee%3A{username|org}
 902 |  &state={state}
 903 |  &type=Issues&s={relevance}
 904 |  &o={order}
 905 |  &s={filter}
 906 |  ```
 907 | 
 908 |  #### Mentions
 909 | 
 910 |  We can use a ***mentions*** (search) query to discover all the
 911 |  issues where a given person (_username_) was mentioned:
 912 | 
 913 |  ```sh
 914 |  https://github.com/search?q=mentions%3A{username}&type=Issues&state={state}
 915 |  ```
 916 | 
 917 |  e.g: https://github.com/search?q=mentions%3Aiteles&type=Issues&state=open
 918 | 
 919 |  This _could_ be more than the issues in the person's (_own_) repos *or* the repos the person has access to (_via org_). e.g:
 920 |  if [_Sally_](http://www.imdb.com/title/tt1483013/quotes?item=qt1905812)
 921 |    axks a clarifying question on a project she has not yet contributed to,
 922 |    the issue will not appear when we crawl the repos on her profile or orgs she has access to ...
 923 | 
 924 |  #### Issues Filters
 925 | 
 926 |  There are *many* filters we can use to find issues, here are a few:
 927 | 
 928 |  + **created** https://github.com/search?q=author%3Aiteles&s=created&type=Issues&o=desc&state=open
 929 |  + **updated**: https://github.com/search?q=author%3Aiteles&s=updated&type=Issues&o=desc&state=open
 930 |  + **date range**: https://github.com/dwyl/time/issues?q=is%3Aissue+is%3Aopen+updated%3A%3C2015-06-28
 931 | 
 932 |  ##### Further Reading on Searching+Filters
 933 | 
 934 |  For *way* more details on searching & filters see:
 935 | 
 936 |  + https://help.github.com/articles/searching-issues/
 937 |  + https://help.github.com/articles/searching-github/#types-of-searches
 938 |  + https://help.github.com/articles/search-syntax/
 939 | 
 940 | 
 941 | 
 942 | 
 943 | ## Want More Examples?
 944 | 
 945 | If you want ***even more*** examples of the pages you can scrape,
 946 | take a look at our end-to-end tests where we *test* all the scrapers!
 947 | 
 948 | <br />
 949 | 
 950 | ## Future Features / Road Map ?
 951 | 
 952 | 
 953 | ### Crawl the List of commits
 954 | 
 955 | Would it be interesting to see/track:
 956 | + **who** makes the most commits to the project
 957 | + **when** (***what time*** of day/night) people do their work
 958 | + **what** did the person contribute? (docs, code improvement, tests, typo, dependency update?)
 959 | 
 960 | Show your interest in this feature: https://github.com/nelsonic/github-scraper/issues/17
 961 | 
 962 | <br /><br /><br />
 963 | 
 964 | # Contributing?
 965 | 
 966 | Contributions are _always_ welcome!
 967 | We have a backlog of features (_many pages we want to parse_) <br >
 968 | please see: https://github.com/nelsonic/github-scraper/issues <br />
 969 | If anything interests you, please lave a comment on the issue.
 970 | 
 971 | Your first step to _contributing_ to this project
 972 | is to run it on your **`localhost`**.
 973 | 
 974 | ### 1. Clone the Repository
 975 | 
 976 | In your terminal, clone the repository from GitHub:
 977 | 
 978 | ```sh
 979 | git clone https://github.com/nelsonic/github-scraper.git && cd github-scraper
 980 | ```
 981 | 
 982 | ### 2. Install the Dependencies
 983 | 
 984 | Ensure you have Node.js installed, see https://nodejs.org <br />
 985 | Then run the following command to install the project dependencies:
 986 | 
 987 | ```sh
 988 | npm install
 989 | ```
 990 | 
 991 | You should see output in your terminal similar to the following:
 992 | 
 993 | ```
 994 | added 162 packages from 177 contributors and audited 265 packages in 4.121s
 995 | ```
 996 | 
 997 | That tells you that the dependencies were successfully installed.
 998 | 
 999 | 
1000 | ### 3. Run the Tests
1001 | 
1002 | In your terminal execute the following command:
1003 | 
1004 | ```sh
1005 | npm test
1006 | ```
1007 | 
1008 | 
1009 | You should see output similar to the following:
1010 | 
1011 | ```
1012 | > github-scraper@6.7.1 test /Users/n/code/github-scraper
1013 | > istanbul cover ./node_modules/tape/bin/tape ./test/*.js | node_modules/tap-spec/bin/cmd.js
1014 | 
1015 | 
1016 |   read list of followers for @jupiter (single page of followers)
1017 | 
1018 |       - - - GitHub Scraper >> /jupiter/followers >> followers  - - -
1019 |     ✔ jupiter/followers data.type: followers
1020 |     ✔ @jupiter/followers has 34 followers
1021 |     ✔ Nelson in jupiter/followers
1022 |     ✔ @jupiter/followers only has 1 page of followers
1023 | 
1024 |   read list of followers for @iteles (multi-page)
1025 | 
1026 |       - - - GitHub Scraper >> /iteles/followers >> followers  - - -
1027 |     ✔ "followers": 51 on page 1
1028 |     ✔ iteles/followers multi-page followers
1029 | 
1030 | 
1031 | ... etc ...
1032 | 
1033 | =============================================================================
1034 | Writing coverage object [/Users/n/code/github-scraper/coverage/coverage.json]
1035 | Writing coverage reports at [/Users/n/code/github-scraper/coverage]
1036 | =============================================================================
1037 |     =============================== Coverage summary ===============================
1038 |     Statements   : 100% ( 192/192 )
1039 |     Branches     : 100% ( 63/63 )
1040 |     Functions    : 100% ( 22/22 )
1041 |     Lines        : 100% ( 192/192 )
1042 |     ================================================================================
1043 | 
1044 | 
1045 |   total:     102
1046 |   passing:   102
1047 |   duration:  31.6s
1048 | ```
1049 | 
1050 | The tests take around 30 seconds to run on _my_ `localhost`,
1051 | but your test execution time will vary depending on your location
1052 | (_the further you are from GitHub's servers the slower the tests will run..._).
1053 | 
1054 | Don't panic if you see some red in your terminal while the tests are running.
1055 | We have to simulate failure `404` and `403` errors
1056 | to ensure that we can handle them.
1057 | Pages some times disappear
1058 | e.g: a user leaves GitHub or deletes a project.
1059 | And our script needs to not freak out when that happens.
1060 | This is good practice in DOM parsing, the web changes a _lot_!
1061 | 
1062 | When the tests _pass_ on your `localhost`,
1063 | you know everything is working as expected.
1064 | Time to move on to the fun bit!
1065 | 
1066 | > **Note**: This project follows Test Driven Development (TDD)
1067 | because it's the only way we can maintain our sanity ...
1068 | If we didn't have tests it would be _chaos_
1069 | and _everything_ would "break" all the time.
1070 | If you are contributing to the project,
1071 | please be aware that tests are required
1072 | and any Pull Requests without tests will not be considered.
1073 | (_please don't take it personally, it's just a rule we have_). <br />
1074 | 
1075 | If you are new to TDD, please see:
1076 | [github.com/dwyl/**learn-tdd**](https://github.com/dwyl/learn-tdd)
1077 | 
1078 | 
1079 | 
1080 | ### 4. Pick an Issue and Write Some Code!
1081 | 
1082 | Once you have the project running on your `localhost`,
1083 | it's time to pick a page to parse!
1084 | 
1085 | There are a bunch of features in the backlog. see:
1086 | https://github.com/nelsonic/github-scraper/issues
1087 | 
1088 | Pick one that interests you
1089 | and write a comment on it
1090 | to _show_ your interest in contributing.
1091 | 
1092 | 
1093 | ### Travis-CI?
1094 | 
1095 | We use Travis-CI (Continuous Integration),
1096 | to ensure that our code works
1097 | and all tests _pass_ whenever a change is made to the code.
1098 | This is _essential_ in _any_ project and even more so in a DOM parsing one.
1099 | 
1100 | If you are new to Travis-CI, please see:
1101 | [github.com/dwyl/**learn-travis**](https://github.com/dwyl/learn-travis)
1102 | 
1103 | ### Pre-Commit Hook?
1104 | 
1105 | When you attempt to commit code on your `localhost`,
1106 | the tests will run **`before`** your commit will register.
1107 | This is a precaution to ensure that the code we write is _always tested_.
1108 | There is no point writing code that is not being tested
1109 | as it will "break" almost immediately and be unmaintainable.
1110 | 
1111 | Simply wait a few seconds for the tests to pass
1112 | and then push your work to GitHub.
1113 | 
1114 | If you are new to pre-commit hooks, please see:
1115 | [github.com/dwyl/**learn-pre-commit**](https://github.com/dwyl/learn-pre-commit)
1116 | 
1117 | 
1118 | <br /><br /><br />
1119 | 
1120 | ## tl;dr
1121 | 
1122 | If you are the kind of person that likes to *understand* how something works,
1123 | this is *your* section.
1124 | 
1125 | ### Inferring Which Scraper to use from the URL
1126 | 
1127 | `lib/switcher.js` handles inference.
1128 | We wanted to use a `switch > case` construct but, ended up using `if/else`
1129 | because there are two types of checks we need to do so `if/else` seemed simpler.
1130 | 
1131 | 
1132 | ## Interesting Facts
1133 | 
1134 | - GitHub has 10.3 Million users (_at last count_)
1135 | - yet the most followed person [Linus Torvalds](https://github.com/torvalds)
1136 | "_only_" has **28k followers** (_so its a **highly distributed network**_ )
1137 | + https://www.githubarchive.org/ attempts to archive all of GitHub
1138 | + http://octoboard.com/ shows stats for the past 24h
1139 | 
1140 | 
1141 | ## Research
1142 | 
1143 | > Must read up about http://en.wikipedia.org/wiki/Inverted_index
1144 | > so I understand how to use: https://www.npmjs.org/package/level-inverted-index
1145 | 
1146 | - GitHub stats (node module): https://github.com/apiengine/ghstats
1147 | (no tests or recent work/activity, but interesting functionality)
1148 | 
1149 | - Hard Drive reliability stats:
1150 | https://www.backblaze.com/blog/hard-drive-reliability-update-september-2014
1151 | (useful when selecting which drives to use in the storage array -
1152 |   Clear Winner is Hitachi 3TB)
1153 | - RAID explained in layman's terms:
1154 | http://uk.pcmag.com/storage-devices-reviews/7917/feature/raid-levels-explained
1155 | - RAID Calculator:
1156 | https://www.synology.com/en-global/support/RAID_calculator
1157 | (if you don't already know how much space you get)
1158 | - SQLite limits: https://www.sqlite.org/limits.html
1159 | 
1160 | ## Useful Links
1161 | 
1162 | - Summary of ***Most Active*** GitHub users: http://git.io/top
1163 | - Intro to web-scraping with cheerio:
1164 | https://www.digitalocean.com/community/tutorials/how-to-use-node-js-request-and-cheerio-to-set-up-simple-web-scraping
1165 | - GitHub background info: http://en.wikipedia.org/wiki/GitHub
1166 | + GitHub Event Types:
1167 | https://developer.github.com/v3/activity/events/types/
1168 | 
1169 | ### GitHub Stats API
1170 | 
1171 | - Github Stats API: https://developer.github.com/v3/repos/statistics/
1172 | - GitHub Followers API: https://developer.github.com/v3/users/followers/
1173 | 
1174 | Example:
1175 | 
1176 | ```sh
1177 | curl -v https://api.github.com/users/pgte/followers
1178 | ```
1179 | 
1180 | ```js
1181 | [
1182 |   {
1183 |     "login": "methodmissing",
1184 |     "id": 379,
1185 |     "avatar_url": "https://avatars.githubusercontent.com/u/379?v=2",
1186 |     "gravatar_id": "",
1187 |     "url": "https://api.github.com/users/methodmissing",
1188 |     "html_url": "https://github.com/methodmissing",
1189 |     "followers_url": "https://api.github.com/users/methodmissing/followers",
1190 |     "following_url": "https://api.github.com/users/methodmissing/following{/other_user}",
1191 |     "gists_url": "https://api.github.com/users/methodmissing/gists{/gist_id}",
1192 |     "starred_url": "https://api.github.com/users/methodmissing/starred{/owner}{/repo}",
1193 |     "subscriptions_url": "https://api.github.com/users/methodmissing/subscriptions",
1194 |     "organizations_url": "https://api.github.com/users/methodmissing/orgs",
1195 |     "repos_url": "https://api.github.com/users/methodmissing/repos",
1196 |     "events_url": "https://api.github.com/users/methodmissing/events{/privacy}",
1197 |     "received_events_url": "https://api.github.com/users/methodmissing/received_events",
1198 |     "type": "User",
1199 |     "site_admin": false
1200 |   },
1201 | 
1202 | etc...]
1203 | ```
1204 | 
1205 | #### Issues (with using the) GitHub API:
1206 | 
1207 | - The API only returns 30 results per query.
1208 | - **X-RateLimit-Limit**: **60** (can only make 60 requests per hour) ...
1209 | 1440 queries per day (60 per hour x 24 hours) sounds like *ample* on the surface.
1210 | But, if we assume the average person has at least 2 pages worth of followers (30<)
1211 | it means on a single instance/server we can only track 720 people.
1212 | Not really enough to do any sort of trend analysis. :disappointed:
1213 | If we are tracking people with hundreds of followers (and *growing fast*)
1214 | e.g. 300< followers. the number of users we can track comes down to
1215 | 1440 / 10 = 140 people...
1216 | (10 requests to fetch complete list of followers) we burn through 1440 requests
1217 | pretty quickly.
1218 | - There's no guarantee which order the followers will be in
1219 | (e.g. most recent first?)
1220 | - **Results** are ***Cached*** so they are not-real time like they are in the
1221 | Web. (seems daft, but its true.) Ideally they would have a ***Streaming API***
1222 | but sadly, [GitHub is built in Ruby-on-Rails](http://builtwith.com/github.com)
1223 | which is "**RESTful**" (***not real-time***).
1224 | 
1225 | #### *But*...
1226 | 
1227 | Once we know _who_ we *should* be following, we can use
1228 | 
1229 | - https://developer.github.com/v3/users/followers/#follow-a-user
1230 | - https://developer.github.com/v3/users/followers/#check-if-one-user-follows-another
1231 | 
1232 | e.g:
1233 | ```sh
1234 | curl -v https://api.github.com/users/pgte/following/visionmedia
1235 | ```
1236 | 
1237 | <br /><br /><br />
1238 | 
1239 | # FAQ?
1240 | 
1241 | ## Is *Crawling* a Website *Legal*...?
1242 | 
1243 | The fact that scraping or "crawling" is Google's Business Model suggests that scraping is at least "OK" ...
1244 | 
1245 | Started typing this into google and saw:
1246 | <img width="288" alt="is-it-illegal-to" src="https://cloud.githubusercontent.com/assets/194400/9385839/82697796-4750-11e5-8d84-09fbc98b131f.png">
1247 | 
1248 | I read a few articles and was not able to locate a definitive answer ...
1249 | 
1250 | + Legal Issues: https://en.wikipedia.org/wiki/Web_scraping#Legal_issues
1251 | + It depends: http://resources.distilnetworks.com/h/i/53822104-is-web-scraping-illegal-depends-on-what-the-meaning-of-the-word-is-is/181642
1252 | + Screen scraping: How to profit from your rival's data:
1253 | http://www.bbc.com/news/technology-23988890
1254 | + Web Scraping For Fun and Profit: https://blog.hartleybrody.com/web-scraping/
1255 | 


--------------------------------------------------------------------------------
/config/repos.js:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | const SELECTORS={
 4 |     COMMIT:".Box-header--blue strong",
 5 |     LANGUAGES:".BorderGrid--spacious .BorderGrid-row",
 6 |     FORKED_FROM:'a[data-hovercard-type="repository"]',
 7 |     FOLLOWERS:'.Layout-main .d-table',
 8 |     TOPIC_TAG:".topic-tag",
 9 |     PROFILE:'div[itemtype="http://schema.org/Person"]'
10 | }
11 | 
12 | module.exports = SELECTORS;


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | dwyl
2 | *.json
3 | 


--------------------------------------------------------------------------------
/examples/data/___next_page.txt:
--------------------------------------------------------------------------------
 1 | /dwyl?page=2
 2 | https://github.com/dwyl/aws-lambda-deploy/stargazers?after=Y3Vyc29yOnYyOpO0MjAxNi0wOC0yOVQwNDo0ODozNloAzgP5isg%3D
 3 | https://github.com/dwyl/learn-nightwatch/stargazers?after=Y3Vyc29yOnYyOpO0MjAxOS0wOC0yMFQxNjoyOTowMFoAzgrqe7Q%3D
 4 | https://github.com/dwyl/english-words/stargazers?after=Y3Vyc29yOnYyOpO0MjAyMC0wMy0yOFQyMToyOTozOVoAzgymXrc%3D
 5 | https://github.com/dwyl/learn-elm/stargazers?after=Y3Vyc29yOnYyOpO0MjAyMC0wMS0yOFQxMDozMzozOVoAzgwqTrg%3D
 6 | /dwyl/learn-to-send-email-via-google-script-html-no-server/watchers?page=2
 7 | https://github.com/dwyl/phoenix-liveview-counter-tutorial/stargazers?after=Y3Vyc29yOnYyOpO0MjAxOS0wNi0wN1QyMDo1Mjo1MFoAzgpVseM%3D
 8 | https://github.com/dwyl/learn-aws-lambda/stargazers?after=Y3Vyc29yOnYyOpO0MjAxOS0xMi0wOFQxMTo1MTo0OVoAzgvJ_MM%3D
 9 | https://github.com/dwyl/aws-sdk-mock/stargazers?after=Y3Vyc29yOnYyOpO0MjAyMC0wMy0wMlQwOTo1NTozMVoAzgxtXqE%3D
10 | /dwyl/learn-elm/watchers?page=2
11 | https://github.com/dwyl/hapi-auth-jwt2/stargazers?after=Y3Vyc29yOnYyOpO0MjAxOS0xMS0yMFQxMjowNDo0NVoAzgulaBM%3D
12 | https://github.com/dwyl/phoenix-ecto-encryption-example/stargazers?after=Y3Vyc29yOnYyOpO0MjAxOS0xMC0xNlQxMTo0MDoyNFoAzgtdNa8%3D
13 | https://github.com/dwyl/learn-to-send-email-via-google-script-html-no-server/stargazers?after=Y3Vyc29yOnYyOpO0MjAyMC0wMy0xNVQwMDoxNzo0MVoAzgyIV5U%3D
14 | https://github.com/dwyl/learn-phoenix-framework/stargazers?after=Y3Vyc29yOnYyOpO0MjAxOS0xMS0yMlQwMzowMDo0MloAzgupDGk%3D
15 | https://github.com/dwyl/phoenix-chat-example/stargazers?after=Y3Vyc29yOnYyOpO0MjAyMC0wMS0wM1QwMzozMTowMloAzgv6dNQ%3D
16 | /dwyl/english-words/watchers?page=2
17 | /dwyl/learn-aws-lambda/watchers?page=2
18 | /dwyl/learn-nightwatch/watchers?page=2
19 | 


--------------------------------------------------------------------------------
/examples/get_profile.js:
--------------------------------------------------------------------------------
1 | const fs = require("fs")
2 | const gs = require("../lib/switcher");
3 | const url = "andrew" // "iteles" // a random username
4 | gs(url, function(err, data) {
5 | 
6 |   fs.writeFileSync(__dirname + "/" + url + ".json", JSON.stringify(data, null, 2))
7 |   console.log(data); // or what ever you want to do with the data
8 | })
9 | 


--------------------------------------------------------------------------------
/examples/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <meta charset="utf-8">
 3 | 
 4 | <!-- Load d3.js -->
 5 | <script src="https://d3js.org/d3.v4.js"></script>
 6 | 
 7 | <!-- Create a div where the graph will take place -->
 8 | <div id="dataviz"></div>
 9 | 
10 | <script>
11 | const username = "andrew" // "iteles";
12 | 
13 | // https://stackoverflow.com/questions/1248081/browser-viewport-dimensions
14 | const vw = Math.max(document.documentElement.clientWidth, window.innerWidth || 0);
15 | // set the dimensions and margins of the graph
16 | var margin = {top: 10, right: 30, bottom: 30, left: 60},
17 |     width = vw - margin.left - margin.right - 20,
18 |     height = 400 - margin.top - margin.bottom;
19 | 
20 | // append the svg object to the body of the page
21 | var svg = d3.select("#dataviz")
22 |   .append("svg")
23 |     .attr("width", width + margin.left + margin.right)
24 |     .attr("height", height + margin.top + margin.bottom)
25 |   .append("g")
26 |     .attr("transform",
27 |           "translate(" + margin.left + "," + margin.top + ")");
28 | 
29 | //Read the data
30 | var url = window.location;
31 | d3.json(url + username + ".json", function (json) {
32 |     // console.log(data.contrib_matrix);
33 |     const data = Object.keys(json.contrib_matrix).map(function (key) {
34 |       const item = json.contrib_matrix[key];
35 |       return {
36 |         date: d3.timeParse("%Y-%m-%d")(key),
37 |         value: item.count == 0 ? 0.1 : item.count,
38 |         fill: item.fill == "#ebedf0" ? "#e74c3c"  : item.fill
39 |       }
40 |     })
41 | 
42 |     // Add X axis --> it is a date format
43 |     var x = d3.scaleTime()
44 |       .domain(d3.extent(data, function(d) { return d.date; }))
45 |       .range([ 0, width ]);
46 |     svg.append("g")
47 |       .attr("transform", "translate(0," + height + ")")
48 |       .call(d3.axisBottom(x));
49 | 
50 |     // Add Y axis
51 |     var y = d3.scaleLinear()
52 |       .domain([0, d3.max(data, function(d) { return +d.value; })])
53 |       .range([ height, 0 ]);
54 |     svg.append("g")
55 |       .call(d3.axisLeft(y));
56 | 
57 |     // segment the data:
58 |     let group = []
59 |     let prev = data[0].fill;
60 |     data.forEach(function (item) {
61 |       console.log(item.fill, prev, item.fill == prev, item);
62 |       group.unshift(item);
63 |       if (item.fill !== prev) {
64 |         console.log(group);
65 |         render_line(group); // render segement of line
66 |         group = [item]; // reset
67 |       }
68 |       prev = item.fill;
69 |     })
70 |     // render_line(data)
71 | 
72 |     function render_line (data) {
73 |       console.log(data.length, data[0].fill);
74 |       svg.append("path")
75 |         .datum(data)
76 |         .attr("d", d3.line()
77 |           .x(function(d) { return x(d.date) })
78 |           .y(function(d) { return y(d.value) })
79 |         )
80 |         .attr("fill", data[0].fill)
81 |         .attr("stroke", data[0].fill)
82 |         .attr("stroke-width", 2)
83 |     }
84 | 
85 | })
86 | 
87 | 
88 | </script>
89 | 
90 | </html>
91 | 


--------------------------------------------------------------------------------
/examples/list-repos.js:
--------------------------------------------------------------------------------
 1 | var fs = require('fs');
 2 | var gs = require('../lib');
 3 | var stars = require('./stars-recursive-scrape-save.js');
 4 | var NEXT_PAGE_LIST = stars.NEXT_PAGE_LIST; // to be re-factored!
 5 | 
 6 | var org = 'dwyl/';
 7 | gs(org, process_org_page);
 8 | 
 9 | function process_org_page(err, data) {
10 |   if(data && data.entries) {
11 |     data.entries.forEach(function (repo) {
12 |       stars(org + repo.name);
13 |     })
14 |     if(data.next_page) {
15 |       // gs(data.next_page, process_org_page);
16 |       stars.save_next_page(data.next_page);
17 |     }
18 |   }
19 |   else {
20 |     console.log(data);
21 |   }
22 | }
23 | 
24 | function crawl_next() {
25 |   fs.readFile(NEXT_PAGE_LIST, 'utf8', function (err, data) {
26 |     if (err) {
27 |       console.log(err);
28 |     }
29 |     else {
30 |       var url = data.split('\n')[0];
31 |       var linesExceptFirst = data.split('\n').slice(1).join('\n');
32 |       fs.writeFile(NEXT_PAGE_LIST, linesExceptFirst);
33 |     }
34 |     if(url.indexOf('/dwyl?') > -1) { // org page
35 |       gs(url, process_org_page);
36 |     }
37 |     else {
38 |       stars(url);
39 |     }
40 |   });
41 | }
42 | 
43 | var interval = setInterval(function(){
44 |   crawl_next();
45 | }, 2000);
46 | 


--------------------------------------------------------------------------------
/examples/stars-recursive-scrape-save.js:
--------------------------------------------------------------------------------
 1 | // list of people who have starred a dwyl repository
 2 | var gs = require('../lib');
 3 | var path = require('path');
 4 | var fs = require('fs');
 5 | var mkdirp = require('mkdirp');
 6 | 
 7 | // constants
 8 | var TIMESTAMP = Date.now();
 9 | var GURL = 'https://github.com/';
10 | var BASE_DIR = path.resolve('./', 'data') + '/';
11 | console.log('BASE_DIR:', BASE_DIR );
12 | var NEXT_PAGE_LIST = BASE_DIR + '___next_page.txt';
13 | fs.openSync(NEXT_PAGE_LIST, 'a') // "touch" file to ensure it exists
14 | 
15 | function main(url) {
16 |   var DATA_DIR = path.normalize(BASE_DIR + url); // repository
17 |   mkdirp.sync(DATA_DIR); // ensure the dir exists
18 | 
19 |   var p = ['stargazers', 'watchers'];
20 |   // console.log('url.indexOf(p[0]) === -1 ', url.indexOf(p[0]))
21 |   if(url.indexOf(p[0]) === -1 && url.indexOf(p[1]) === -1 ) { // url is base repo
22 |     console.log('>>> ' + url)
23 |     p.forEach(function(page) {
24 |       gs(url + '/' + page, process_results); // start crawling stargazers
25 |     })
26 |   }
27 |   else {
28 |     gs(url, process_results);
29 |   }
30 | }
31 | 
32 | function process_results(err, data) {
33 |   if (err) { return console.log(err); }
34 |   write_lines(data);
35 |   if(data.next_page) {
36 |     // gs(data.next_page, process_results);
37 |     return save_next_page(data.next_page);
38 |   }
39 | }
40 | 
41 | function save_next_page(url) {
42 |   var lines = fs.readFileSync(NEXT_PAGE_LIST).toString().split('\n');
43 |   if(lines.indexOf(url) === -1) { // ensure no duplicates
44 |     fs.writeFileSync(NEXT_PAGE_LIST, lines.join('\n') + url + '\n');
45 |   }
46 | }
47 | 
48 | 
49 | function parse_file(filename) {
50 |   var data = fs.readFileSync(filename).toString();
51 |   return data.split('\n').map(function (row) {
52 |     if(row.length > 1) {
53 |       var json_str = row.split(',')[1]
54 |       var json = JSON.parse(json_str);
55 |       return json.username;
56 |     }
57 |   });
58 | }
59 | 
60 | // write lines to file
61 | function write_lines(data) {
62 |   var filepath = path.normalize(BASE_DIR +
63 |     data.url.replace(GURL, '').split('?')[0]) + '.csv'
64 | 
65 |   fs.openSync(filepath, 'a') // "touch" file to ensure it exists
66 |   var existing = parse_file(filepath);
67 | 
68 |   var rows = data.entries.map(function(entry) {
69 |     if(existing.indexOf(entry.username) === -1) {
70 |       console.log('entry', entry);
71 |       return TIMESTAMP + ',' + JSON.stringify(entry);
72 |     }
73 |   }).filter(function (n) { return n != undefined }); // remove blanks
74 | 
75 |   if (rows.length > 0) {
76 |     var str = rows.join('\n') + '\n'; // end file with new line
77 |     return fs.appendFile(filepath, str, function (err, res) {
78 |       console.log('wrote ' + data.entries.length + ' lines to: ' + filepath);
79 |     });
80 |   } else {
81 |     console.log('no new faces')
82 |   }
83 | }
84 | 
85 | module.exports = main;
86 | module.exports.save_next_page = save_next_page;
87 | module.exports.NEXT_PAGE_LIST = NEXT_PAGE_LIST;
88 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
 1 | require("env2")(".env");
 2 | const debug = require("./lambda/debug.js");
 3 | const gs = require('github-scraper');
 4 | 
 5 | exports.handler = function handler (event, context, callback) {
 6 |   console.log(event);
 7 |   console.log("Hi Friends!")
 8 |   debug(event);
 9 |   console.log('rawPath:', event.rawPath)
10 | 
11 |   const url = event.rawPath;
12 |   gs(url, function(err, data) {
13 |     console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ')
14 |     console.log(data);
15 |   
16 |     return callback(null, data);
17 |   });
18 | }


--------------------------------------------------------------------------------
/lambda/debug.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | require('env2')('.env');
 3 | const save = require('./s3.js').save;
 4 | 
 5 | /**
 6 |  * `debug` is used to debug SNS notification events.
 7 |  * it only gets executed if the NODE_ENV is set to "test".
 8 |  * To save event data to S3 you will need to add AWS_S3_BUCKET to .env
 9 |  * see: github.com/dwyl/aws-ses-lambda/issues/12
10 |  * @param {Object} event - the object we want to store on S3
11 |  */
12 | module.exports = function debug (event) {
13 |   // console.log("process.env.NODE_ENV:", process.env.NODE_ENV);
14 |   if (process.env.NODE_ENV === "test") {
15 |     if(event.Records && !event.key) {
16 |       event.key = "sns";
17 |     }
18 |     save(event, function callback (error, data) {
19 |       console.log("DEBUG - - - error:", error, " - - - data:");
20 |       console.log(data);
21 |       console.log(" - - - - - - - - - - - - - - - - - - - - ");
22 |     });
23 |   }
24 | };
25 | 


--------------------------------------------------------------------------------
/lambda/http_request.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | require("env2")(".env"); // ensure JWT_SECRET environment variable is defined.
 4 | const http = require('https'); // ALWAYS use TLS over the internets!
 5 | const jwt = require('jsonwebtoken');
 6 | /**
 7 |  * simple_http_request is a bare-bones http request using node.js core http
 8 |  * see: https://nodejs.org/api/http.html#http_http_request_options_callback
 9 |  * @param {Object} json - the JSON data we want to send to the Phoenix App.
10 |  * @param {Function} callback - a standard callback with error & response args
11 |  * response is a JSON Object unless there is an error. No error handling yet ...
12 |  */
13 | 
14 | module.exports = function simple_http_request (json, callback) {
15 |   const options = { // the json data is included in the token! 😮
16 |     headers: {
17 |       'Authorization': jwt.sign(json, process.env.JWT_SECRET),
18 |       'Accept': 'application/json'
19 |     },
20 |     hostname: process.env.EMAIL_APP_URL, // e.g: phemail.herokuapp.com
21 |     method: 'POST', // HTTP post sans body: stackoverflow.com/questions/4191593
22 |     port: '443',
23 |     path: '/api/sns' // the API endpoint that processes and stores SNS data
24 |   }
25 | 
26 |   http.request(options, function (res) {
27 |     let resStr = '';
28 |     res.setEncoding('utf8');
29 |     res.on('data', function (chunk) {
30 |       resStr += chunk;
31 |     }).on('end', function () {
32 |       return callback(res.statusCode, JSON.parse(resStr));
33 |     });
34 |   })
35 |   // .on('error', (e) => {
36 |   //   console.error(`problem with request: ${e.message}`);
37 |   // })
38 |   .end();
39 | };
40 | 


--------------------------------------------------------------------------------
/lambda/s3.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | require('env2')('.env');
 3 | const AWS = require('aws-sdk');
 4 | AWS.config.region = 'eu-west-1';
 5 | var s3 = new AWS.S3({params: {Bucket: process.env.AWS_S3_BUCKET}});
 6 | 
 7 | /**
 8 |  * `save` saves a JSON object to S3.
 9 |  * if you need to specify the file name, use `json.key`
10 |  * @param {Object} json - the object we want to store on S3
11 |  * @param {Function} callback - called once the file has been uploaded
12 |  */
13 | module.exports.save = function save (json, callback) {
14 |   if (json) {
15 |     const filename = json.key || 'event'
16 |     const params = {
17 |       Key: filename + '.json',
18 |       Body: JSON.stringify(json),
19 |       ContentType: 'application/json',
20 |       ACL: 'public-read'
21 |     };
22 | 
23 |     s3.upload(params, function (err, data) {
24 |       if (callback && typeof callback === "function") {
25 |         return callback(err, data);
26 |       }
27 |       else {
28 |         return data;
29 |       }
30 |     });
31 | 
32 |   } else {
33 |     return callback('ERROR: please provide json data');
34 |   }
35 | }
36 | 
37 | /**
38 |  * `get` retrieves and parses a JSON file from S3
39 |  * this function is only used to test that the `save` method.
40 |  * @param {String} key - the filename of the object to get from S3
41 |  * @param {Function} callback - called once the file has been uploaded
42 |  */
43 | module.exports.get = function get (key, callback) {
44 |   s3.getObject({Key: key}, function (error, data) {
45 |     if (error) {
46 |       return callback(error);
47 |     }
48 |     else {
49 |       return callback(error, JSON.parse(data.Body.toString()));
50 |     }
51 |   });
52 | };
53 | 


--------------------------------------------------------------------------------
/lib/feed.js:
--------------------------------------------------------------------------------
 1 | // var wreck  = require('wreck');
 2 | // var parse = require('xml2js').parseString;
 3 | //
 4 | // /**
 5 | //  * feed method parses a given GitHub user's activity feed
 6 | //  * @param {Object} $ - cheerio object with DOM of page to be scraped
 7 | //  * @param {string} url - a valid GitHub feed url format: {username}.atom
 8 | //  * @param {function} callback - the callback we should call after scraping
 9 | //  *  a callback passed into this method should accept two parameters:
10 | //  *  @param {objectj} error an error object (set to null if no error occurred)
11 | //  *  @param {object} repos - list of (Public) GitHub repositories (for the user)
12 | //  */
13 | // module.exports = function feed (url, callback) {
14 | //   wreck.get(url, function (error, response, xml) {
15 | //     if (error) {  // || response.output && response.output.statusCode !== 200) {
16 | //       return callback(404);
17 | //     }
18 | //     else {
19 | //       var data = {entries : [], url: url};
20 | //       parse(xml.toString(), function(err, JSON) {
21 | //         data.updated = JSON.feed.updated[0]; // when feed was last updated
22 | //         JSON.feed.entry.map(function(item) {
23 | //           // store only the date/time and action performed (space separated)
24 | //           data.entries.push(item.published[0] + ' ' + item.title[0]._);
25 | //         })
26 | //         return callback(error, data);
27 | //       });
28 | //     }
29 | //   });
30 | // }
31 | 


--------------------------------------------------------------------------------
/lib/followers.js:
--------------------------------------------------------------------------------
 1 | 
 2 | const selectors=require('../config/repos')
 3 | 
 4 | /**
 5 |  * followers method parses a given GitHub user's followers/following/stars list
 6 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 7 |  * @param {string} url - a valid GitHub username or url e.g: /{username}
 8 |  * @param {function} callback - the callback we should call after scraping
 9 |  *  a callback passed into this method should accept two parameters:
10 |  *  @param {objectj} error an error object (set to null if no error occurred)
11 |  *  @param {object} data - list of (Public) GitHub repositories (for the user)
12 |  */
13 | module.exports = function followers ($, url, callback) {
14 |   console.log(url)
15 |   var data = { entries : [], url: url};
16 |   data.type = url.match(/tab=following/) ? 'following' : 'followers';
17 | 
18 |   // console.log('selectors.FOLLOWERS', selectors.FOLLOWERS);
19 |   $(`${selectors.FOLLOWERS}`).each(function(i, el){
20 | 
21 |     data.entries.push({
22 |       avatar: $(this).find('img.avatar-user').first().attr("src"),
23 |       fullname: $(this).find('.Link--primary').first().text(),
24 |       username: $(this).find('.Link--secondary').first().text()
25 |     })
26 |   })
27 | 
28 |   data = require('./next_page')($, data); // don't worry require is cached ;-)
29 |   callback(null, data)
30 | }
31 | 


--------------------------------------------------------------------------------
/lib/http_request.js:
--------------------------------------------------------------------------------
 1 | 'use strict';
 2 | 
 3 | var http = require('https'); // ALWAYS use TLS over the internets!
 4 | var bgRedBlack = '\x1b[41m\x1b[30m';
 5 | var RESET = '\x1b[0m'; // see: https://stackoverflow.com/a/41407246/1148249
 6 | /**
 7 |  * simple_http_request is a bare-bones http request using node.js core http
 8 |  * see: https://nodejs.org/api/http.html#http_http_request_options_callback
 9 |  * the NPM request module is 3.6 Megabytes and offers v. little benefit ...
10 |  * This code achieves the same in less than 1kb. less code = faster response.
11 |  * @param {Object} path - the path (on GitHub) we want to "view"
12 |  * @param {Function} callback - a standard callback with error & response args
13 |  * response is a JSON Object unless there is an error.
14 |  */
15 | 
16 | module.exports = function simple_http_request (path, callback) {
17 | 
18 |   var options = {
19 |     headers: {
20 |       'Accept': 'text/html',
21 |       'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36'
22 |     },
23 |     hostname: 'github.com',
24 |     port: '443',
25 |     path: path
26 |   }
27 | 
28 |   http.request(options, function (res) {
29 |     var resStr = '';
30 |     var response;
31 |     // console.log(res.statusCode);
32 |     if (res.statusCode !== 200) {
33 |       console.log(bgRedBlack, ' GOT ', res.statusCode, ' for ', options, RESET);
34 |       return callback(res.statusCode);
35 |     }
36 | 
37 |     res.setEncoding('utf8');
38 |     res.on('data', function (chunk) {
39 |       // console.log(chunk);
40 |       resStr += chunk;
41 |     }).on('end', function () {
42 |       return callback(res.statusCode, resStr); // return response as HTML!
43 |     });
44 | 
45 |     return true;
46 |   }).end();
47 | 
48 | };
49 | 


--------------------------------------------------------------------------------
/lib/index.js:
--------------------------------------------------------------------------------
1 | module.exports = require('./switcher')
2 | module.exports.issues_search = require('./issues_search') // easter egg
3 | 


--------------------------------------------------------------------------------
/lib/issue.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * profile method scrapes a given GitHub user profile
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {String} url - a valid GitHub issue url
 5 |  * @param {Function} callback - the callback we should call after scraping
 6 |  *  a callback passed into this method should accept two parameters:
 7 |  *  @param {Object} error an error object (set to null if no error occurred)
 8 |  *  @param {Object} data - the complete issue contents + meta data
 9 |  */
10 | module.exports = function issue($, url, callback) {
11 | 
12 |   var data = { entries : [], labels : [], participants : [] };
13 |   data.url = url;
14 |   // console.log($('.gh-header-title'));
15 |   data.title = $('.gh-header-title').first().text().trim().split('\n')[0];
16 | 
17 |   data.state = $('.State').first().text().trim();
18 |   data.author = $('.gh-header-meta .author').first().text().trim();
19 |   data.created  = $('relative-time')[0].attribs.datetime;
20 | 
21 |   // labels
22 |   $('.IssueLabel').each(function(){
23 |     data.labels.push($(this).attr('data-name'));
24 |   })
25 |   data.labels = data.labels.filter(function(i) { return i != null });
26 |   // stackoverflow.com/questions/9229645/remove-duplicates-from-js-array
27 |   data.labels = [...new Set(data.labels)]
28 | 
29 |   data.milestone = $('.Progress').next().text().trim();
30 |   data.assignee = $('.assignee').text().trim();
31 | 
32 |   // participants anyone who has commented or been assigned in the issue
33 |   $('.participant-avatar').each(function(){
34 |     data.participants.push($(this).attr('href').replace('/',''));
35 |   })
36 |   // console.log(' - - - - - > data', data)
37 |   // NOTE: this is possibly the most messed up DOM structure ever!
38 |   // its almost as if someone @GitHub is deliberately trying to prevent crawlers
39 | 
40 | 
41 |   var entries = $('.markdown-body');
42 |   console.log('entries.length', entries.length);
43 | 
44 |   const selector = '.markdown-body:nth-child(' + 1 + ')';
45 |   console.log('selector', selector);
46 | 
47 |   console.log($(selector).text().trim());
48 |   // console.log(entries[0]);
49 | 
50 |   for(var i=0; i < entries.length; i++) {
51 | 
52 |     // console.log(entries[i]);
53 |     // var id = entries[i].attribs.id; // see: http://git.io/vOC5d
54 |     // console.log(id);
55 |     // var entry = {"id":id};
56 |   //   entry.author = $('#'+id+' .author').attr('href').replace('/','');
57 |   //   entry.created = $('#'+id+' time').attr('datetime');
58 |   //   entry.body = $('#'+id+' .comment-body').first().text().trim();
59 |   //   data.entries.push(entry);
60 |   }
61 |   return callback(null, data);
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/lib/issues.js:
--------------------------------------------------------------------------------
 1 | /** UNCOMMENT THIS IF YOU HAVE TIME/PATIENTCE TO MAKE IT WORK ...!
 2 |  * issue method scrapes a given GitHub repository's issues list
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {string} url - a valid GitHub repository url in the format {user}/{project}
 5 |  * @param {function} callback - the callback we should call after scraping
 6 |  *  a callback passed into this method should accept two parameters:
 7 |  *  @param {objectj} error an error object (set to null if no error occurred)
 8 |  *  @param {object} data - list of (Public) GitHub issues (for the repo)
 9 | 
10 | module.exports = function issues ($, url, callback) {
11 | 
12 |     var data = { entries : [], url: url}; // the list we will return
13 |     // meta data for the issues page
14 |     var links = $('.table-list-header-toggle > a')
15 |     // console.log(links);
16 |     if(links.length === 0){
17 |       console.log(' - - - - - - short circuit  (no links) - - - - - -')
18 |       return callback(404);
19 |     }
20 |     data.open = parseInt(links['0'].children[2].data.trim().replace('Open', '').replace(/,/, ''), 10);
21 |     data.closed = parseInt(links['1'].children[2].data.trim().replace('Open', '').replace(/,/, ''), 10);
22 |     // extract all the issues on this page!
23 |     var items = $('.table-list-item');
24 |     for(var i = 1; i < items.length + 1; i++) {
25 |       var o = {}; // individual issue object
26 |       var parent = '.table-list-item:nth-child(' +i +') ';
27 |       o.url = $(parent + '.issue-title-link').first()['0'].attribs.href;
28 |       o.title = $(parent + '.issue-title-link').first()['0'].children['0'].data.trim()
29 |       o.created = $(parent + 'time')['0'].attribs.datetime
30 |       o.author = $(parent + '.muted-link')['0'].children[0].data.trim();
31 |       o.comments = parseInt($(parent + '.issue-comments > a').first().text().trim(), 10);
32 |       // assignee extraction only if assigned
33 |       var img = $(parent + '.table-list-cell-avatar .tooltipped-n > img')
34 |       if(img.length > 0) {
35 |         o.assignee = img['0'].attribs.alt.replace('@','')
36 |       }
37 |       // milestone if one is set
38 |       var milestone = $(parent + '.css-truncate-target');
39 |       if(milestone.length > 0) {
40 |         o.milestone = milestone['0'].children[0].data.trim()
41 |       }
42 |       var labels = $(parent + '.labels > a');
43 |       var l = []; // only the label text!
44 |       for(var j = 0; j < labels.length; j++) {
45 |         l.push(labels[j].children[0].data.trim());
46 |       }
47 |       o.labels = l;
48 |       data.entries.push(o);
49 |     }
50 |     data = require('./next_page')($, data); // don't worry this gets cached ;-)
51 |     return callback(null, data);
52 | 
53 | }
54 | */
55 | 


--------------------------------------------------------------------------------
/lib/issues_search.js:
--------------------------------------------------------------------------------
 1 | // this will require using lib/http_request to be revived.
 2 | var cheerio = require('cheerio');
 3 | 
 4 | var baseUrl = 'https://github.com';
 5 | var defaults = {
 6 |   "username" : "this",   // username is kinda the point of the query!
 7 |   "query"    : "author", // all issues created by the user (anywhere!)
 8 |   "state"    : "open",   // not too worried about the closed ones at first
 9 |   "order"    : "desc",   // newest first!
10 |   "filter"   : "created" // created date
11 | };
12 | 
13 | // function set_options(options) {
14 | //   var keys = Object.keys(defaults);
15 | //   keys.map(function(k){
16 | //     options[k] = options[k] || defaults[k];
17 | //   })
18 | //   return options;
19 | // }
20 | /**
21 |  * format: https://github.com/search?type=Issues&
22 |  * q={query}%3A{username}&state={state}&o={order}&s={filter}
23 |  */
24 | // function set_url(options) {
25 | //   var url = baseUrl + '/search?type=Issues';
26 | //   url += '&q=' + options.query + '%3A' + options.username;
27 | //   url += '&state=' + options.state;
28 | //   url += '&o=' + options.order;
29 | //   url += '&s=' + options.filter;
30 | //   return url;
31 | // }
32 | 
33 | /** UNCOMMENT THIS IF YOU HAVE TIME/PATIENTCE TO MAKE IT WORK ...!
34 |  * issues_search method scrapes a given GitHub repository's issues list
35 |  * @param {object} options - options for running the issue search
36 |  *   username - the GitHub username
37 |  *   query    - 'mentions', 'assignee', 'author' or 'user' (defaults to author)
38 |  *   state    - 'open' or 'closed' (defaults to open)
39 |  *   order    - 'desc' or 'asc' descending / assending respectively (default desc)
40 |  *   filter   - 'created', 'updated', 'comments' (used in conjunction with order)
41 |  * see: README/issues>search
42 |  * @param {function} callback - the callback we should call after scraping
43 |  *  a callback passed into this method should accept two parameters:
44 |  *  @param {objectj} error an error object (set to null if no error occurred)
45 |  *  @param {objects} list - list of (Public) GitHub issues (for the repo)
46 | 
47 | module.exports = function issues_search (options, callback) {
48 | 
49 |   if(!callback || typeof options === 'function') {
50 |     callback = options;
51 |     return callback(400);
52 |   }
53 |   var url;
54 |   if(options.next) { // if we are parsing the next page of results!
55 |     url = baseUrl + options.next;
56 |   }
57 |   else {
58 |     options = set_options(options); // apply defaults for any unset keys
59 |     url = set_url(options);         // generate search url
60 |   }
61 |   console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - search url:')
62 |   console.log(url);
63 |   wreck.get(url, function (error, response, html) {
64 |     var list = { entries : [] }; // the list we will return
65 |     var $ = cheerio.load(html);
66 |     // console.log(html.toString());
67 |     var items = $('.issue-list-item');
68 |     for(var i = 1; i < items.length; i++) {
69 |       var o = {};
70 |       var parent = '.issue-list-item:nth-child(' +i +') ';
71 |       var a      = $(parent + '.title > a').first();
72 |       o.title    = a.text()// ['0'].title;
73 |       o.url      = a['0'].attribs.href;
74 |       var re = new RegExp('\n', 'g');
75 |       o.desc     = $(parent + '.description').first().text().replace(re, '').trim();
76 |       o.author   = $(parent + '.issue-list-meta > li:nth-child(2) > a')['0'].attribs.title;
77 |       o.created  = $(parent + '.issue-list-meta > li:nth-child(2) > time')['0'].attribs.datetime;
78 |       var coms   = $(parent + '.issue-list-meta > li:nth-child(3) > strong')['0'];
79 |       if(coms) {
80 |         o.comments = parseInt(coms.children[0].data, 10);
81 |       }
82 |       list.entries.push(o);
83 |     }
84 |     var next = $('.next_page')
85 |     if(next.length > 0) {
86 |       list.next = next['0'].attribs.href;
87 |     }
88 | 
89 |     return callback(error, list);
90 |   });
91 | }
92 | */
93 | 


--------------------------------------------------------------------------------
/lib/labels.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * labels method scrapes a given GitHub repository's list of labels
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {string} url - a valid GitHub repository url {owner}/{reponame}
 5 |  * @param {function} callback - the callback we should call after scraping
 6 |  *  a callback passed into this method should accept two parameters:
 7 |  *  @param {object} error - an error object (set to null if no error occurred)
 8 |  *  @param {array} list - list of labels with colors for the GitHub repository
 9 |  */
10 | function labels ($, url, callback) {
11 |   var data = { entries: [], url: url };
12 |   var items = $('.table-list-item');
13 |   for(var i = 1; i < items.length; i++) {
14 |     var parent = '.table-list-item:nth-child(' +i +') ';
15 |     var link = $(parent + '.label-link')['0'];
16 |     var label = {
17 |       name   : $(parent + '.label-name').first().text(),
18 |       style  : link.attribs.style.trim(),
19 |       link   : link.attribs.href,
20 |       count  : parseInt($(parent + '.label-description')['0'].children[0].data.replace('open issues', '').trim(), 10)
21 |     }
22 |     data.entries.push(label);
23 |   }
24 |   return callback(null, data);
25 | }
26 | 
27 | module.exports = labels;
28 | 


--------------------------------------------------------------------------------
/lib/milestones.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * milestones method scrapes a given GitHub repository's list of milesontes
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {string} project - a valid GitHub repository name
 5 |  * @param {function} callback - the callback we should call after scraping
 6 |  *  a callback passed into this method should accept two parameters:
 7 |  *  @param {object} error - an error object (set to null if no error occurred)
 8 |  *  @param {array} list - list of milestones with colors for the GitHub repository
 9 |  */
10 | 
11 | function milestones ($, url, callback) {
12 |   var data = { entries : [], url: url};
13 |   // .states gives us the number of open vs. closed milestones
14 |   var states = $('.states > a');
15 |   // console.log(states[0].children[2].data);
16 |   data.open = parseInt(states[0].children[2].data.replace('Open','').trim(), 10);
17 |   data.closed = parseInt(states[1].children[2].data.replace('Closed','').trim(), 10);
18 | 
19 |   $('.table-list-item').each(function (i) {
20 |     var milestone = {
21 |       name     : $(this).find('.milestone-title-link').first().text().trim(),
22 |       due      : $(this).find('.milestone-meta-item:nth-child(1)').text().trim(),
23 |       updated  : $(this).find('.milestone-meta-item:nth-child(2)').text().trim(),
24 |       desc     : $(this).find('.milestone-description-html').first().text().trim(),
25 |       progress : $(this).find('.progress-percent').first().text().trim(),
26 | 
27 |       open   : parseInt($(this).find('.stat:nth-child(2)').first().text().replace('open','').trim(), 10),
28 |       closed : parseInt($(this).find('.stat:nth-child(3)').first().text().replace('closed','').trim(), 10),
29 |     }
30 |     data.entries.push(milestone);
31 |   });
32 |   return callback(null, data);
33 | }
34 | 
35 | module.exports = milestones;
36 | 


--------------------------------------------------------------------------------
/lib/next_page.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * next_page checks for pagination on a page
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {Object} data - the data we have scraped from the page so far
 5 |  * @return {Object} the data object with a next_page key & value
 6 |  */
 7 | module.exports = function next_page ($, data) {
 8 |   var next = $('.paginate-container').find('a').last().attr('href');
 9 |   if(next) {
10 |     data.next_page = next
11 |   }
12 |   else {
13 |     data.next_page = ''
14 |   }
15 | 
16 |   return data;
17 | }
18 | 


--------------------------------------------------------------------------------
/lib/next_page_beta.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * next_page checks for pagination on a "beta" page ref #131
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {Object} data - the data we have scraped from the page so far
 5 |  * @return {Object} the data object with a next_page key & value
 6 |  */
 7 | module.exports = function next_page_beta ($, data) {
 8 |   const next = $('.TablePaginationSteps').find('[class^="Pagination__Page-"]').last().attr('href');
 9 |   data.next_page = '';
10 |   /* istanbul ignore else */
11 |   if (next) {
12 |     const url = data.url.split('?')[0];
13 |     data.next_page = url + '?type=all&' + 'page=' + next.replace('#', '');
14 |   }
15 | 
16 |   return data;
17 | }
18 | 


--------------------------------------------------------------------------------
/lib/org.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * org method scrapes a given GitHub organisation
 3 |  * @param {string} orgname - a valid GitHub orgname
 4 |  * @param {function} callback - the callback we should call after scraping
 5 |  *  a callback passed into this method should accept two parameters:
 6 |  *  @param {objectj} error an error object (set to null if no error occurred)
 7 |  *  @param {object} data - the complete organsiation data
 8 |  */
 9 | function org($, url, callback) {
10 |   var data = { url: url, type: 'org' };
11 |   data.name = $('h1.lh-condensed').first().text().trim();
12 |   // data.description = $('h1.lh-condensed').parent().next().text().trim(); // yep ...¯\_(ツ)_/¯
13 |   data.description = $('.container-xl .color-fg-muted').first().text().trim()
14 |   if($('span[itemprop=location]').length > 0){
15 |     data.location = $('span[itemprop=location]').first().text().trim();
16 |   }
17 |   if($('.octicon-link').length > 0){
18 |     // console.log($('.octicon-link'));
19 |     data.website = $('.octicon-link').next().text().trim();
20 |   }
21 |   if($('a[itemprop=email]').length > 0){
22 |     data.email = $('a[itemprop=email]').first().text().trim();
23 |   }
24 |   // var people  = $('.Counter').eq(1); // people is *second* in list of tabs!
25 |   // data.pcount = parseInt(people.first().text(), 10);
26 |   // data.pcount = isNaN(data.pcount) ? 0 : data.pcount
27 |   data.avatar = $('.avatar')[0].attribs.src;
28 |   var parts = data.avatar.split('/');
29 |   data.uid = parseInt(parts[parts.length-1].split('?')[0], 10);
30 |   // list of repos
31 |   var items = $('li.Box-row');
32 |   // console.log('items.length', items.length);
33 |   data.entries = []; // avoid having circular reference objects! :-(
34 |   items.each( function (i) { // JS counters start at 0.
35 |     var parent = 'li.Box-row:nth-child(' + (i+1) +') '; // CSS selectors start at 1.
36 |     // console.log($(parent))
37 |     data.entries.push({
38 |       name: $(parent + ' a').first().text().trim(),
39 |       lang: $(parent + 'span[itemprop=programmingLanguage]').first().text().trim(),
40 |       url: $(parent + ' a').first().attr('href'),
41 |       description: $(parent + 'p[itemprop=description]').first().text().trim(),
42 |       updated: $(parent + ' relative-time')[0].attribs.datetime
43 |     });
44 |   });
45 | 
46 |   data = require('./next_page')($, data); // don't worry this gets cached ;-)
47 |   callback(null, data);
48 | }
49 | 
50 | module.exports = org
51 | 


--------------------------------------------------------------------------------
/lib/org_repos.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * `org_repos` parses a given GitHub organization repositories page.
 3 |  * e.g: https://github.com/orgs/dwyl/repositories?type=all
 4 |  * @param {object} $ - the cheerio DOM object.
 5 |  * @param {string} url - the url of the page to be parsed.
 6 |  * @param {function} callback - the callback we should call after scraping
 7 |  *  a callback passed into this method should accept two parameters:
 8 |  *  @param {objectj} error an error object (set to null if no error occurred)
 9 |  *  @param {object} data - the complete organsiation data
10 |  */
11 | function org_repos($, url, callback) {
12 |   var data = { url: url, type: 'org_repos' };
13 |   data.name = $('h1.lh-condensed').first().text().trim();
14 |   // data.description = $('h1.lh-condensed').parent().next().text().trim(); // yep ...¯\_(ツ)_/¯
15 |   data.description = $('.container-xl .color-fg-muted').first().text().trim()
16 |   // var people  = $('.Counter').eq(1); // people is *second* in list of tabs!
17 |   // data.pcount = parseInt(people.first().text(), 10);
18 |   // data.pcount = isNaN(data.pcount) ? 0 : data.pcount
19 |   data.avatar = $('.avatar')[0].attribs.src;
20 |   var parts = data.avatar.split('/');
21 |   data.uid = parseInt(parts[parts.length-1].split('?')[0], 10);
22 |   // list of repos
23 |   var items = $('li.listviewitem');
24 |   // console.log('items.length', items.length);
25 |   data.entries = []; // avoid having circular reference objects! :-(
26 |   items.each( function (i) { // JS counters start at 0.
27 |     // console.log(i)
28 |     var parent = 'li:nth-child(' + (i+1) +') '; // CSS selectors start at 1.
29 |     console.log($(parent))
30 |     console.log($(parent + ' .markdown-title'))
31 |     data.entries.push({
32 |       // feel free to add more attributes to this! 🙏
33 |       name: $(parent + ' .markdown-title').text().trim(),
34 |       // lang: $(parent + ' .listview-item-main-content').find('[class^="Text-"]').text().trim(),
35 |       url: $(parent + ' a').first().attr('href'),
36 |       description: $(parent + ' .repos-list-description').first().text().trim(),
37 |       // updated: $(parent + ' relative-time')[0].attribs.datetime
38 |     });
39 |   });
40 |   // console.log(data)
41 | 
42 |   data = require('./next_page_beta')($, data); // don't worry this gets cached ;-)
43 |   callback(null, data);
44 | }
45 | 
46 | module.exports = org_repos
47 | 


--------------------------------------------------------------------------------
/lib/people.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * issue method scrapes a given GitHub organisation's PUBLIC People
 3 |  * @param {string} org - a valid GitHub organisation in the format {user}/{project}
 4 |  * @param {function} callback - the callback we should call after scraping
 5 |  *  a callback passed into this method should accept two parameters:
 6 |  *  @param {objectj} error an error object (set to null if no error occurred)
 7 |  *  @param {object} data - list of People who have made their membership Public for the org
 8 |  */
 9 | module.exports = function stargazers ($, url, callback) {
10 |   var data = { entries : [], url: url, type: 'people' };
11 | 
12 |   $('#org-members-table img.avatar').each(function (i, el) {
13 |     var src = el.attribs.src;
14 |     var parts = src.split('/');
15 |     var uid = parseInt(parts[parts.length-1].split('?')[0], 10);
16 |     data.entries.push({
17 |       avatar: src,
18 |       uid: uid,
19 |       username: el.attribs.alt.replace('@', '')
20 |     });
21 |   });
22 |   
23 |   data = require('./next_page')($, data); // don't worry this gets cached ;-)
24 |   return callback(null, data);
25 | }
26 | 


--------------------------------------------------------------------------------
/lib/profile.js:
--------------------------------------------------------------------------------
 1 | 
 2 | const selectors = require('../config/repos')
 3 | /**
 4 |  * profile method scrapes a given GitHub user profile
 5 |  * @param {string} username - a valid GitHub username
 6 |  * @param {function} callback - the callback we should call after scraping
 7 |  *  a callback passed into this method should accept two parameters:
 8 |  *  @param {objectj} error an error object (set to null if no error occurred)
 9 |  *  @param {object} data - the complete GitHub Profile for the username
10 |  */
11 | module.exports = function profile ($, url, callback) {
12 |  console.log($(`${selectors.PROFILE}`).first().find('.user-profile-bio').text())
13 |   var data = { url: url, type: 'profile' };
14 |   const tmpData=[]
15 |   const stats=[]
16 |   data.username = url.replace('/', '');
17 |   data.bio = $(`${selectors.PROFILE}`).first().find('.user-profile-bio').text();
18 |   data.avatar   = $(`${selectors.PROFILE}`).first().find('.avatar-user').first().attr('src'); // avatar-user
19 |   var parts = data.avatar.split('/');
20 |   data.uid = parseInt(parts[parts.length-1].split('?')[0], 10);
21 |  
22 |   data.repos     = k_to_int($('.UnderlineNav .octicon-repo').first().next().text().trim());
23 |   data.projects  = k_to_int($('.octicon-table').first().next().text().trim());
24 |   data.stars     = k_to_int($('.octicon-star').next().text().trim()); // number of repositories user has starred
25 |   data.followers = k_to_int($('.js-profile-editable-area .color-fg-default').first().text().trim());
26 |   data.following = k_to_int($('.js-profile-editable-area .color-fg-default').eq(1).text().trim());
27 | 
28 |   // Pinned Repos
29 | 
30 |   var repos = $('.pinned-item-list-item')
31 | 
32 |   // console.log('repos: ', repos);
33 |   data.pinned = [];
34 |   repos.each(function (i) {
35 |     data.pinned.push({
36 |       url: $(this).find('a.text-bold')[0]['attribs']['href'],
37 |       // Want More? see: https://github.com/nelsonic/github-scraper/issues/78
38 |     })
39 |   });
40 |   data.name = $('.vcard-fullname').text().trim();            // Full Name
41 |   data.worksfor = $('.p-org').first().text().trim();      // Works for
42 |   const location = $('li[itemprop=homeLocation]')
43 |   if(location && location.attr('aria-label')) {
44 |     data.location = location.attr('aria-label').replace("Home location: ", '');
45 |   }
46 |   data.website  = $('[data-test-selector=profile-website-url] > a').attr("href")
47 |   // data.joined   = $('.join-date').attr('datetime');       // Joined GitHub
48 | 
49 |   // Contributions to Open Source in the past 12 months #132
50 |   // data.contribs = parseInt($('.js-yearly-contributions h2').text().trim()
51 |   //   .split(' contributions')[0].replace(',', ''), 10);
52 |   // Contribution Matrix
53 |   // data = require('./profile_contribs.js')($, data);
54 | 
55 |   // List of (Public) organizations from profile
56 |   // data-hovercard-type="organization"
57 |   var orgs = $('.avatar-group-item');
58 |   // console.log(orgs);
59 |   data.orgs = {}; // https://github.com/nelsonic/github-scraper/issues/80
60 |   orgs.each( function (i) {
61 |     var url = orgs[i].attribs.href.replace('/', '');
62 |     data.orgs[url] = $(this).find('img')['0'].attribs.src; // org image
63 |   })
64 | 
65 |   // GitHub Developer Program member?
66 |   var member = $('.octicon-cpu').parent().text().trim();
67 |   // yes this is always on the page but the hide it using CSS!! :-(
68 |   var display = $('.bg-purple').parent().hasClass('d-none');
69 |   if(member && !display) {
70 |     data.developerprogram = true;
71 |   }
72 |   callback(null, data);
73 |   // add task to arana to scrape /{username}?tab=repositories after profile!
74 | }
75 | 
76 | // transform '3.4k' to 3400
77 | function k_to_int(val) {
78 |   // if (val === undefined) {
79 |   //   return 0;
80 |   // }
81 |   if (val.indexOf("k") > -1) {
82 |     val = val.split("k")[0];
83 |     val = parseFloat(val);
84 |     val = val * 1000;
85 |   }
86 |   val = parseInt(val);
87 |   return Math.floor(val)
88 | }


--------------------------------------------------------------------------------
/lib/profile_contribs.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * profile_contribs scrapes a user's GitHub Contribution Matrix
 3 |  * @param {Object} $ - a valid GitHub username
 4 |  * @param {Object} data - the complete GitHub Profile for the username
 5 |  * @returns {object} data - the complete GitHub Profile for the username
 6 |  */
 7 | module.exports = function profile($, data) {
 8 |   console.log(data)
 9 |   var c = $('.ContributionCalendar-day');
10 |   var matrix = {};
11 |   for(var i = 0; i < c.length; i++) {
12 |     var e = c[i].attribs; // the entry
13 |     
14 |     var id = e.id.replace('contribution-day-component-','')
15 |     // console.log(e.id, id)
16 |     if (e['data-date']) {
17 |       matrix[e['data-date']] = {
18 |         fill: e['fill'],
19 |         count: parseInt(e['data-count'], 10),
20 |         x: e['data-ix'],
21 |         y: id.split('-')[0]
22 |       }
23 |     }
24 |   }
25 |   // console.log(matrix)
26 |   data.contrib_matrix = matrix;
27 |   return data;
28 | }
29 | 


--------------------------------------------------------------------------------
/lib/repo.js:
--------------------------------------------------------------------------------
 1 | const parse_int = require('../lib/utils').parse_int;
 2 | const selectors=require('../config/repos')
 3 | 
 4 | /**
 5 |  * repo method scrapes a given GitHub repository page
 6 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 7 |  * @param {string} project - a valid GitHub repository name
 8 |  * @param {function} callback - the callback we should call after scraping
 9 |  *  a callback passed into this method should accept two parameters:
10 |  *  @param {object} error - an error object (set to null if no error occurred)
11 |  *  @param {array} data - list of (Public) information on the GitHub repository
12 |  */
13 | function repo ($, url, callback) {
14 | 
15 |   var data = { "url" : url, type: 'repo'};
16 |   data.description = $('.Layout-sidebar .f4').first().text().trim();
17 |   data.website  =$('.Layout-sidebar .octicon-link').parent().text().trim();
18 | 
19 |   var badges = $('.social-count');
20 |   var forkedfrom = $(`${selectors.FORKED_FROM}`).text();
21 |   if (forkedfrom) {
22 |    
23 |     data.forkedfrom = forkedfrom;
24 |   }
25 | 
26 |   data.tags = []
27 |   $(`${selectors.TOPIC_TAG}`)
28 |   .each(function(i,a){
29 |     data.tags.push($(this).text().trim())
30 |   })
31 | 
32 |   data.usedby = parse_int($('.hx_flex-avatar-stack').next().text().trim());
33 |   data.watchers = parse_int(strip($('.octicon-eye').parent().text().trim()));
34 |   data.stars    = parse_int(strip($('.Layout-sidebar .octicon-star').parent().text().trim()));
35 |   data.forks    = parse_int(strip($('.Layout-sidebar .octicon-repo-forked').parent().text().trim()));
36 |   // Commits are now client-side rendered by React. 🤦‍♂️
37 |   // data.commits  = parse_int($('.octicon-history').parent().text().trim());
38 |   // Branches failing ... https://github.com/nelsonic/github-scraper/issues/126
39 |   // console.log($('.Layout-main .octicon-git-branch'))
40 |   // data.branches = parse_int($('.Layout-main .octicon-git-branch').parent().next().text());
41 |   // data.releases = parse_int($('.octicon-tag').next().text());
42 |   
43 |   data.langs = []; // languages used in the repo:
44 |   $('.Layout-sidebar .list-style-none').last().find("a")
45 |   .each(function(i,e){
46 |     data.langs.push({
47 |       name:$(this).find('span').first().text(),
48 |       perc:$(this).find('span').last().text()
49 |     })
50 |    
51 |   })
52 |   // console.log(data)
53 |   return callback(null, data)
54 | }
55 | 
56 | module.exports = repo;
57 | 
58 | function strip(str) {
59 |   return str.split('\n')[0]
60 | }


--------------------------------------------------------------------------------
/lib/repos.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * repo method scrapes a given GitHub user's repositories tab
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {string} url - a valid GitHub username
 5 |  * @param {function} callback - the callback we should call after scraping
 6 |  *  a callback passed into this method should accept two parameters:
 7 |  *  @param {object} error an error object (set to null if no error occurred)
 8 |  *  @param {object} data - list of (Public) GitHub repositories (for the user)
 9 |  */
10 | module.exports = function repos ($, url, callback) {
11 |   var data = { entries: [], url:url}; // store repos in array
12 |   var items = $('.repo-list-item');
13 |   for(var i = 1; i < items.length; i++) {
14 |     var r = {};
15 |     var parent = '.repo-list-item:nth-child(' +i +') ';
16 |     var a = $(parent + '.repo-list-name > a').first()['0']
17 |     r.url     = a.attribs.href;
18 |     r.name    = a.children[0].data.trim();
19 |     // see: http://stackoverflow.com/questions/7969414/ (find element by itemprop)
20 |     r.lang    = $(parent + '.repo-list-stats > span[itemprop="programmingLanguage"]').first().text().trim()
21 |     r.desc    = $(parent + '.repo-list-description').first().text().trim()
22 |     r.info    = $(parent + '.repo-list-info').first().text().trim() || ''
23 |     r.stars   = parseInt($(parent + '.octicon-star').parent().first().text().trim(), 10)
24 |     r.forks   = $(parent + '.octicon-git-branch').parent().first().text().trim()
25 |     r.updated = $(parent + ' .repo-list-meta > time')[0].attribs.datetime
26 |     data.entries.push(r);
27 |   }
28 |   return callback(null, data)
29 | }
30 | 


--------------------------------------------------------------------------------
/lib/repos_user.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * repo method scrapes a given GitHub user's repositories tab
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {string} url - a valid GitHub username
 5 |  * @param {function} callback - the callback we should call after scraping
 6 |  *  a callback passed into this method should accept two parameters:
 7 |  *  @param {object} error an error object (set to null if no error occurred)
 8 |  *  @param {object} data - list of (Public) GitHub repositories (for the user)
 9 |  */
10 | module.exports = function repos ($, url, callback) {
11 |   var data = { entries: [], url:url}; // store repos in array
12 |   var items = $('.source');
13 |   // console.log('items.length:', items.length)
14 |   for(var i = 1; i < items.length; i++) {
15 |     var r = {};
16 |     var parent = '.source:nth-child(' + i +') ';
17 |     // console.log(parent)
18 |     var a = $('.wb-break-all > a', parent)
19 |     if(a && a.length > 0) {
20 |       a = a['0']
21 |       r.url     = a.attribs.href;
22 |       r.name    = a.children[0].data.trim();
23 |     }
24 |     // see: http://stackoverflow.com/questions/7969414/ (find element by itemprop)
25 |     var lang = $(parent + 'span[itemprop="programmingLanguage"]');
26 | 
27 |     if(lang && lang.length > 0) {
28 |       r.lang = lang['0'].children[0].data
29 |     } 
30 |     r.desc    = $(parent + '.repo-list-description').first().text().trim()
31 |     r.info    = $(parent + '.repo-list-info').first().text().trim() || ''
32 |     r.stars   = parseInt($(parent + '.octicon-star').parent().first().text().trim(), 10)
33 |     r.forks   = $(parent + '.octicon-git-branch').parent().first().text().trim()
34 |     var updated = $(parent + ' relative-time');
35 |     if (updated && updated.length > 0) {
36 |       r.updated = updated['0'].attribs.datetime
37 |     }
38 |     
39 |     data.entries.push(r);
40 |   }
41 |   return callback(null, data)
42 | }
43 | 


--------------------------------------------------------------------------------
/lib/scrapers.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   // feed: require('./feed'),               // activity feed (RSS)
 3 |   followers: require('./followers'),     // also scrapes following or stargazers
 4 |   issue: require('./issue'),
 5 |   // issues: require('./issues'),
 6 |   // issues_search: require('./issues_search'),
 7 |   // labels : require('./labels'),
 8 |   // milestones : require('./milestones'),
 9 |   org: require('./org'),
10 |   org_repos: require('./org_repos'),
11 |   people: require('./people'),
12 |   profile: require('./profile'),
13 |   repo: require('./repo'),
14 |   // repos: require('./repos'),
15 |   repos_user: require('./repos_user'),
16 |   // starred: require('./starred')
17 |   stars_watchers: require('./stars_watchers')
18 | }
19 | 


--------------------------------------------------------------------------------
/lib/starred.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * starred method scrapes a given GitHub user's starred repos list
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {string} url - a valid GitHub username
 5 |  * @param {function} callback - the callback we should call after scraping
 6 |  *  a callback passed into this method should accept two parameters:
 7 |  *  @param {object} error an error object (set to null if no error occurred)
 8 |  *  @param {object} data - list of (Public) GitHub repositories stared by user
 9 |  */
10 | module.exports = function starred ($, url, callback) {
11 |   var data = { entries : [], url: url };
12 |   $('.repo-list-name').each(function () {
13 |     data.entries.push($(this).find('a').attr('href'));
14 |   });
15 |   data = require('./next_page')($, data); // don't worry this gets cached ;-)
16 |   return callback(null, data);
17 | }
18 | 


--------------------------------------------------------------------------------
/lib/stars_watchers.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * followers method parses a given GitHub user's followers/following/stars list
 3 |  * @param {Object} $ - cheerio object with DOM of page to be scraped
 4 |  * @param {string} url - a valid GitHub username or url e.g: /{username}
 5 |  * @param {function} callback - the callback we should call after scraping
 6 |  *  a callback passed into this method should accept two parameters:
 7 |  *  @param {objectj} error an error object (set to null if no error occurred)
 8 |  *  @param {object} data - list of (Public) GitHub repositories (for the user)
 9 |  */
10 | module.exports = function stargazers_watchers ($, url, callback) {
11 |   var data = { entries : [], url: url, type: 'stars' };
12 |   data.stars = $('.tabnav .Counter').text().trim()
13 |   
14 |   $('.list-style-none img.avatar').each(function (i, el) {
15 |     var src = el.attribs.src;
16 |     var parts = src.split('/');
17 |     var uid = parseInt(parts[parts.length-1].split('?')[0], 10);
18 |     data.entries.push({
19 |       avatar: src,
20 |       uid: uid,
21 |       username: el.attribs.alt.replace('@', '')
22 |     });
23 |   });
24 | 
25 |   data = require('./next_page')($, data); // don't worry this gets cached ;-)
26 |   callback(null, data)
27 | }
28 | 


--------------------------------------------------------------------------------
/lib/switcher.js:
--------------------------------------------------------------------------------
  1 | var http_request = require('./http_request');
  2 | var cheerio = require('cheerio');
  3 | var validate = require('./url_validator');
  4 | var scrapers = require('./scrapers');
  5 | 
  6 | // Adding Colors to Terminal *Without* a Library/Module
  7 | var bgRedBlack = '\x1b[41m\x1b[30m';
  8 | var bgGreenBlack = '\x1b[42m\x1b[30m';
  9 | var RESET = '\x1b[0m'; // see: https://stackoverflow.com/a/41407246/1148249
 10 | /**
 11 |  * switcher is the brains of this module!
 12 |  * it decides which scraper to use for a given url
 13 |  * @param {string} url - a valid GitHub username
 14 |  * @param {function} callback - the callback we should call after scraping
 15 |  *  a callback passed into this method should accept two parameters:
 16 |  *  @param {object} error an error object (set to null if no error occurred)
 17 |  *  @param {object} data - list of (Public) GitHub repositories (for the user)
 18 |  */
 19 | module.exports = function switcher (url, callback) {
 20 | 
 21 |   if(!callback || typeof callback !== 'function') {
 22 |     var msg = "GitHub Scraper is Asynchronous, callback is required!\n"
 23 |     msg += '\n - - - - - - - - - - - - - - - called by '
 24 |     msg += arguments.callee.caller.toString()
 25 |     msg += ' - - - - - - - - - - - - - - - \n'
 26 |     throw "ERROR: " + __filename + ":17 \n" + msg;
 27 |   }
 28 | 
 29 |   var scraper; // the method we will use below
 30 |   if(!url || typeof url === 'undefined'){
 31 |     return callback(404);
 32 |   }
 33 |   url = validate(url, callback); // ensure we 404 on undefined url
 34 |   // console.log('\n- - - - - - - - - - - - - - - - - - - - - - - - - - URL:')
 35 |   // console.log(url);
 36 |   // console.log('- - - - - - - - - - - - - - - - - - - - - - - - - - - - \n')
 37 |   // if(url.match(/\.atom/)) { // feed parser has its own request handler
 38 |   //   return scrapers['feed'](url, callback);
 39 |   // }
 40 |   // centralised request issuer/hander
 41 |   http_request(url, function (status, html) {
 42 |     if (status !== 200 || !html) {
 43 |       console.log(bgRedBlack,
 44 |           " - - - GitHub Scraper SWITCHER FAIL >> " + url + "  - - - ", RESET);
 45 |       // console.log(error, response.headers);
 46 |       // console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - -')
 47 |       callback(status);
 48 |     }
 49 |     else {
 50 |       var $ = cheerio.load(html);
 51 | 
 52 |       // in the case of username or orgname urls (which have exactly the same format!)
 53 |       // we need to fetch the page before we can tell which scraper to use
 54 |       if(url.match(/tab=repositories/)) {
 55 |         console.log('repos_user - - - - - - - - -')
 56 |         scraper = 'repos_user';
 57 |       }
 58 |       // e.g: https://github.com/orgs/dwyl/repositories?type=all
 59 |       else if(url.match(/org/) && url.match(/repositories/)) {
 60 |         scraper = 'org_repos';
 61 |       }
 62 |       else if(url.match(/followers|following/)) {
 63 |         scraper = 'followers'; // html/DOM is identical for these 2 pages!
 64 |       }
 65 |       else if(url.match(/stargazers|watchers/)) {
 66 |         scraper = 'stars_watchers'; // html/DOM is identical for these 2 pages!
 67 |       }
 68 |       else if(url.match(/people/) && $($('.octicon-person'))) { // org people
 69 |         scraper = 'people';
 70 |       }
 71 |       else if($('.orghead').length > 0){
 72 |         scraper = 'org';
 73 |       }
 74 |       else if($('.h-card').length > 0) {
 75 |         // console.log('PROFILE!!')
 76 |         scraper = 'profile';
 77 |       }
 78 |       // else if(url.match(/stars/)) {
 79 |       //   scraper = 'starred';
 80 |       // }
 81 |       // else if($('.commits').length > 0) {
 82 |       else {
 83 |         scraper = 'repo';
 84 |       }
 85 |       // else if(url.match(/milestones/)) {
 86 |       //   scraper = 'milestones';
 87 |       // }
 88 |       // else if(url.match(/labels/)) {
 89 |       //   scraper = 'labels';
 90 |       // }
 91 |       if($('.issue').length > 0) {
 92 |         scraper = 'issue';
 93 |       }
 94 | 
 95 |       // else { // else if(url.match(/issues/)) {
 96 |       //   scraper = 'issues';
 97 |       // }
 98 |       console.log(bgGreenBlack,
 99 |         " - - - GitHub Scraper >> "+url +" >> "+scraper + "  - - - ", RESET);
100 |       return scrapers[scraper]($, url, callback)
101 |     }
102 |   });
103 | }
104 | 


--------------------------------------------------------------------------------
/lib/url_validator.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * url_validator does exactly what its name suggests validates a url
 3 |  * @param {String} url - a (hopefully) valid GitHub url
 4 |  * @param {Function} callback - the callback we should call after scraping
 5 |  * we are simply testing for presence of a callback and its typeof 'function'
 6 |  * @returns {String} - returns the validated url or throws error!
 7 |  */
 8 | module.exports = function validator (url, callback) {
 9 |   // console.log('\n- - - - - - - - - - - - - - - - - - - - - - - - - - URL:')
10 |   // console.log(url);
11 |   // check for existence of a callback function
12 |   // check if the url was set
13 |   if(!url || url.length === 0 || typeof url === 'undefined'){
14 |     return callback(400);
15 |   }
16 | 
17 |   // add preceeding forward slash if not present
18 |   if(url.charAt(0) !== '/' && url.indexOf('http') === -1) {
19 |     url = '/' + url;
20 |   }
21 |   // strip github.com from url
22 |   if(url.indexOf('github.com') !== -1) { // e.g: https://github.com/orgs/github/people?page=2
23 |     url = url.split('https://github.com')[1];
24 |   } // eg: https://github.com/orgs/dwyl/people
25 |   return url;
26 | }
27 | 


--------------------------------------------------------------------------------
/lib/utils.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * `parse_int` parses a String e.g: 1.2k and returns an Int 1200
 3 |  *  @param {String} str - the string to be parsed. e.g: "14.7k"
 4 |  *  @return {Number} int - the integer representation of the String.
 5 |  */
 6 | function parse_int (str) {
 7 |   if (!str) {
 8 |     return 0;
 9 |   }
10 | 
11 |   return parseInt(
12 |     str
13 |     .trim()
14 |     .replace(/\.(\d)k$/, "$100") // $1 match the digit \d
15 |     .replace(/k$/, "000")
16 |     .replace(/\.(\d)m$/, "$100000") // $1 match the digit \d
17 |     .replace(/m$/, "000000")
18 |     .replace(/[^0-9]/g, '')
19 |   , 10)
20 | }
21 | 
22 | module.exports = {
23 |   parse_int: parse_int
24 | }
25 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "github-scraper",
 3 |   "version": "7.1.1",
 4 |   "description": "Parse data from GitHub Profiles, Repos and Orgs",
 5 |   "main": "lib/index.js",
 6 |   "scripts": {
 7 |     "dev": "nodemon test/followers.js",
 8 |     "start": "live-server --open=./examples",
 9 |     "quick": "./node_modules/tape/bin/tape ./test/*.js",
10 |     "test": "./node_modules/.bin/istanbul cover ./node_modules/tape/bin/tape ./test/*.js | node_modules/tap-spec/bin/cmd.js",
11 |     "coverage": "./node_modules/.bin/istanbul cover ./node_modules/tape/bin/tape ./test/*.js && istanbul check-coverage --lines 100 --branches 100",
12 |     "deploy": "dpl"
13 |   },
14 |   "repository": {
15 |     "type": "git",
16 |     "url": "https://github.com/nelsonic/github-scraper.git"
17 |   },
18 |   "author": "Dyler Turden",
19 |   "license": "ISC",
20 |   "bugs": {
21 |     "url": "https://github.com/nelsonic/github-scraper/issues"
22 |   },
23 |   "homepage": "https://github.com/nelsonic/github-scraper",
24 |   "engines": {
25 |     "node": ">= 10"
26 |   },
27 |   "dependencies": {
28 |     "aws-sdk": "^2.1692.0",
29 |     "cheerio": "^1.0.0",
30 |     "env2": "^2.2.2",
31 |     "github-scraper": "^7.1.1"
32 |   },
33 |   "devDependencies": {
34 |     "dpl": "^5.0.1",
35 |     "istanbul": "^0.4.5",
36 |     "jshint": "^2.11.0",
37 |     "live-server": "^1.2.1",
38 |     "mkdirp": "^3.0.1",
39 |     "pre-commit": "1.2.2",
40 |     "tap-spec": "^5.0.0",
41 |     "tape": "^5.9.0"
42 |   },
43 |   "pre-commit": [
44 |     "coverage"
45 |   ],
46 |   "files_to_deploy": [
47 |     "index.js",
48 |     "package.json",
49 |     "lambda/",
50 |     "lib/",
51 |     ".env"
52 |   ],
53 |   "lambda_memory": 256,
54 |   "lambda_timeout": 42
55 | }
56 | 


--------------------------------------------------------------------------------
/test/e2e.test.js:
--------------------------------------------------------------------------------
  1 | var gs = require('../lib/');
  2 | var test = require('tape');
  3 | 
  4 | test('Scrape a known PROFILE @alanshaw', function(t){
  5 |   var user = 'alanshaw';
  6 |   gs(user, function(err, data) {
  7 |     t.ok(data.developerprogram === true, '- @' + user + ' is a member of the "GitHub Developer Program"');
  8 |     t.ok(data.followers > 100, '- @' + user + ' Has more than 100 followers');
  9 |     t.ok(data.stars > 100, '- @' + user + ' Has starred more than 100 repos');
 10 |     t.end()
 11 |   })
 12 | })
 13 | 
 14 | test('FOLLOWERS LIST for @iteles', function(t){
 15 | 	var url = 'iteles?tab=followers';
 16 | 	gs(url, function(err, data) {
 17 | 		t.ok(data.entries.length > 42, url +' count: '+data.entries.length);
 18 | 		t.end();
 19 | 	})
 20 | })
 21 | 
 22 | test('FOLLOWING LIST (SECOND PAGE) for @nelsonic', function(t){
 23 | 	var url = 'nelsonic?page=2&tab=following';
 24 | 	gs(url, function(err, data) {
 25 | 		t.ok(data.entries.length > 10, url +' count: '+data.entries.length);
 26 | 		t.end();
 27 | 	})
 28 | })
 29 | 
 30 | test.skip('STARRED repos for @iteles (multi-page)', function(t){
 31 |   var username = 'stars/iteles';
 32 | 	gs(username, function(err, data) {
 33 |     // t.ok(data.repos.length === 20, 'first page of org has 20 repos: '+data.repos.length)
 34 | 		t.ok(data.entries.length === 30, '@'+username +' has only "starred": '+data.entries.length +' repos (first page)');
 35 | 		t.ok(data.next_page.indexOf('page=2') > -1, '@'+username +' has multiple pages of starred repos');
 36 | 		gs(data.next_page, function(err2, data2){
 37 | 			console.log(data2.next_page)
 38 |       console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ')
 39 |       console.log(data2);
 40 | 			t.ok(data2.next_page.indexOf('page=3') > -1, '@'+username +' has multiple pages of starred repos');
 41 | 			t.end();
 42 | 		})
 43 | 	});
 44 | })
 45 | 
 46 | test.skip('parse @iteles activity feed (expect recent activity)', function(t){
 47 | 	var user = 'iteles.atom';
 48 | 	gs(user, function(err, data) {
 49 | 		t.ok(err === null, 'No error when parsing @' +user +' activity feed');
 50 |     var entry = data.entries.filter(function(e){
 51 |       return e.indexOf('commented');
 52 |     })
 53 |     t.ok(data.entries.length === 30, '@' +user +' activity feed contains 30 entries')
 54 | 		t.end();
 55 | 	})
 56 | });
 57 | 
 58 | test.skip('Find the repo with most stars for a given user', function(t) {
 59 |   var user = 'iteles?tab=repositories';
 60 |   gs(user, function(err, data) {
 61 |     console.log(data)
 62 |     console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ')
 63 |     data.entries.sort(function(a,b) {
 64 |       return b.stars - a.stars ;
 65 |     });
 66 |     var repo = data.entries[0]
 67 |     t.ok(repo.stars > 42, '@' + user +' > ' +repo.name +' has ' + repo.stars +' stars!');
 68 |     t.end();
 69 |   })
 70 | });
 71 | 
 72 | 
 73 | test.skip('Scrape an ORG WITH a next_page of repositories (known data)', function(t){
 74 | 	var url = 'dwyl';
 75 | 	gs(url, function(err, data) {
 76 |     console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ')
 77 |     console.log(data);
 78 | 		t.ok(data.entries.length > 19, 'org '+url + ' has ' +data.entries.length + ' repos.')
 79 |     t.ok(data.next_page === '/dwyl?page=2', 'dwyl has more than one page');
 80 | 		t.ok(data.pcount > 20 , '"pcount":'+data.pcount + ' (people in the company)');
 81 | 		t.end();
 82 | 	});
 83 | });
 84 | 
 85 | 
 86 | test.skip('find issue with most comments', function(t){
 87 | 	var project = '/dwyl/tudo/issues'
 88 | 	gs(project, function(err, data) {
 89 |     t.ok(err === null, 'No Error when crawling ' +project +' issues');
 90 | 
 91 |     data.entries.sort(function(a,b) {
 92 |       return b.comments - a.comments
 93 |     })
 94 |     var issue = data.entries[0];
 95 |     console.log('- - - - - - - - - - - issue with most comments in '+project)
 96 |     // console.log(issue)
 97 |     t.ok(issue.comments > 2, issue.title + ' has ' + issue.comments + ' comments!')
 98 | 		t.ok(data.open > 5, 'repo: ' +project +' has ' + data.count + ' issues (ZERO)');
 99 |     t.ok(data.closed > 5, 'repo: ' +project +' has ' +data.closed + ' CLOSED issues');
100 | 		t.end();
101 | 	})
102 | })
103 | 
104 | test.skip('Crawl a REPOSITORY single language repo', function(t){
105 | 	var project = 'nelsonic/practical-js-tdd';
106 | 	gs(project, function(err, data) {
107 |     console.log(data);
108 |     t.ok(data.langs[0].indexOf('JavaScript') > -1, 'Language is: '+ data.langs)
109 | 		t.end();
110 | 	})
111 | })
112 | 
113 | test.skip('LABELS for dwyl/tudo/labels', function(t){
114 | 	var url = '/dwyl/time/labels';
115 | 	gs(url, function(err, list) {
116 |     console.log(list);
117 | 		t.ok(err === null, 'No Error when crawling ' + url +' (repo pages)');
118 |     var question = list.entries.filter(function(item){
119 |       return item.name === 'question';
120 |     })
121 |     question = question[0];
122 | 		t.ok(question.link === url+'/question', 'question.link is : '+question.link+ ' === ' +url+'/question');
123 |     t.ok(question.count > 1, 'question.count (number of open issues): '+question.count);
124 |     t.ok(question.style.indexOf('#fff') > -1, 'question.styles are '+question.style);
125 | 		t.end();
126 | 	})
127 | })
128 | 
129 | test.skip('MILESTONSE for /dwyl/tudo/milestones', function(t){
130 | 	var url = '/dwyl/tudo/milestones';
131 | 	gs(url, function(err, data) {
132 |     console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ')
133 |     console.log(data);
134 | 		t.ok(err === null, 'No Error when crawling ' + url +' (repo pages)');
135 |     t.ok(data.open > 0, 'data.open '+data.open);
136 |     t.ok(data.closed > 0, 'data.closed '+data.closed);
137 | 		t.end();
138 | 	})
139 | })
140 | 
141 | test.skip('ISSUE contens without milestone', function(t){
142 | 	var url = '/dwyl/time/issues/154';
143 | 	gs(url, function(err, data){
144 |     console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ')
145 | 	  console.log(data);
146 | 	  var d = data.entries.filter(function(item){
147 | 	    return item.id === 'issuecomment-104228711';
148 | 	  })
149 | 	  d = d[0] // there should only be one entry
150 | 		t.ok(data.state === 'Closed', url +' state is: ' + data.state)
151 | 
152 | 		t.ok(d.body === 'I Love you!', url +' last comment is: - - - - - - - - > '+d.body);
153 | 		t.end()
154 | 	});
155 | })
156 | 
157 | test.skip('ORG PEOPLE ', function(t){
158 | 	var url = 'orgs/dwyl/people';
159 | 	gs(url, function(err, data){
160 |     console.log(' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dwyl people:')
161 | 	  console.log(data.entries.sort().join(', '));
162 | 
163 | 		t.ok(data.entries.indexOf('iteles') > -1, url +' has '+data.entries.length + ' people');
164 | 		t.end()
165 | 	});
166 | })
167 | 


--------------------------------------------------------------------------------
/test/feed.test.js:
--------------------------------------------------------------------------------
 1 | var test = require('tape');
 2 | // var feed = require('../lib/switcher');
 3 | 
 4 | test.skip('parse @iteles activity feed (expect recent activity)', function(t){
 5 | 	var user = 'iteles.atom';
 6 | 	feed(user, function(err, data){
 7 | 		t.ok(err === null, 'No error when parsing @' +user +' activity feed');
 8 |     var entry = data.entries.filter(function(e){
 9 |       return e.indexOf('commented');
10 |     })
11 |     t.ok(data.entries.length === 30, '@' +user +' activity feed contains 30 entries')
12 | 		t.end();
13 | 	})
14 | })
15 | 
16 | test.skip('Try to break it by supplying non-existent user', function(t){
17 | 	var user = '' + Math.floor(Math.random() * 1000000000000000) + '.atom';
18 | 	feed(user, function(err, data){
19 |     t.ok(err === 404, 'Got 404 Error when username does not exist');
20 | 		// t.ok(data === null, 'No data for @' +user +' activity feed');
21 |     // t.ok(data.entries.length === 30, '@' +user +' activity feed contains 30 entries')
22 | 		t.end();
23 | 	})
24 | })
25 | 


--------------------------------------------------------------------------------
/test/fixtures/dwyl-tudo-issue-51-api-comments.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "url": "https://api.github.com/repos/dwyl/tudo/issues/comments/123807796",
 4 |     "html_url": "https://github.com/dwyl/tudo/issues/51#issuecomment-123807796",
 5 |     "issue_url": "https://api.github.com/repos/dwyl/tudo/issues/51",
 6 |     "id": 123807796,
 7 |     "user": {
 8 |       "login": "iteles",
 9 |       "id": 4185328,
10 |       "avatar_url": "https://avatars.githubusercontent.com/u/4185328?v=3",
11 |       "gravatar_id": "",
12 |       "url": "https://api.github.com/users/iteles",
13 |       "html_url": "https://github.com/iteles",
14 |       "followers_url": "https://api.github.com/users/iteles/followers",
15 |       "following_url": "https://api.github.com/users/iteles/following{/other_user}",
16 |       "gists_url": "https://api.github.com/users/iteles/gists{/gist_id}",
17 |       "starred_url": "https://api.github.com/users/iteles/starred{/owner}{/repo}",
18 |       "subscriptions_url": "https://api.github.com/users/iteles/subscriptions",
19 |       "organizations_url": "https://api.github.com/users/iteles/orgs",
20 |       "repos_url": "https://api.github.com/users/iteles/repos",
21 |       "events_url": "https://api.github.com/users/iteles/events{/privacy}",
22 |       "received_events_url": "https://api.github.com/users/iteles/received_events",
23 |       "type": "User",
24 |       "site_admin": false
25 |     },
26 |     "created_at": "2015-07-22T17:54:12Z",
27 |     "updated_at": "2015-07-23T12:21:24Z",
28 |     "body": "I'd love to test this out, this will be an amazing selling point if we can get the performance to work like we expect!"
29 |   },
30 |   {
31 |     "url": "https://api.github.com/repos/dwyl/tudo/issues/comments/124048121",
32 |     "html_url": "https://github.com/dwyl/tudo/issues/51#issuecomment-124048121",
33 |     "issue_url": "https://api.github.com/repos/dwyl/tudo/issues/51",
34 |     "id": 124048121,
35 |     "user": {
36 |       "login": "nelsonic",
37 |       "id": 194400,
38 |       "avatar_url": "https://avatars.githubusercontent.com/u/194400?v=3",
39 |       "gravatar_id": "",
40 |       "url": "https://api.github.com/users/nelsonic",
41 |       "html_url": "https://github.com/nelsonic",
42 |       "followers_url": "https://api.github.com/users/nelsonic/followers",
43 |       "following_url": "https://api.github.com/users/nelsonic/following{/other_user}",
44 |       "gists_url": "https://api.github.com/users/nelsonic/gists{/gist_id}",
45 |       "starred_url": "https://api.github.com/users/nelsonic/starred{/owner}{/repo}",
46 |       "subscriptions_url": "https://api.github.com/users/nelsonic/subscriptions",
47 |       "organizations_url": "https://api.github.com/users/nelsonic/orgs",
48 |       "repos_url": "https://api.github.com/users/nelsonic/repos",
49 |       "events_url": "https://api.github.com/users/nelsonic/events{/privacy}",
50 |       "received_events_url": "https://api.github.com/users/nelsonic/received_events",
51 |       "type": "User",
52 |       "site_admin": false
53 |     },
54 |     "created_at": "2015-07-23T10:20:15Z",
55 |     "updated_at": "2015-07-23T10:20:15Z",
56 |     "body": "@iteles have you watched the **Foundation** Episode featuring ***Kevin Systrom*** (***instagram***) ?\r\n> https://www.youtube.com/watch?v=nld8B9l1aRE\r\n\r\nWhat were the [**USP**](https://en.wikipedia.org/wiki/Unique_selling_proposition)s that contributed to ***instagram***'s success (_considering how many photo-related-apps were in the app store at the time_) ?\r\n\r\ncc: @besarthoxhaj "
57 |   },
58 |   {
59 |     "url": "https://api.github.com/repos/dwyl/tudo/issues/comments/124075792",
60 |     "html_url": "https://github.com/dwyl/tudo/issues/51#issuecomment-124075792",
61 |     "issue_url": "https://api.github.com/repos/dwyl/tudo/issues/51",
62 |     "id": 124075792,
63 |     "user": {
64 |       "login": "besarthoxhaj",
65 |       "id": 7887496,
66 |       "avatar_url": "https://avatars.githubusercontent.com/u/7887496?v=3",
67 |       "gravatar_id": "",
68 |       "url": "https://api.github.com/users/besarthoxhaj",
69 |       "html_url": "https://github.com/besarthoxhaj",
70 |       "followers_url": "https://api.github.com/users/besarthoxhaj/followers",
71 |       "following_url": "https://api.github.com/users/besarthoxhaj/following{/other_user}",
72 |       "gists_url": "https://api.github.com/users/besarthoxhaj/gists{/gist_id}",
73 |       "starred_url": "https://api.github.com/users/besarthoxhaj/starred{/owner}{/repo}",
74 |       "subscriptions_url": "https://api.github.com/users/besarthoxhaj/subscriptions",
75 |       "organizations_url": "https://api.github.com/users/besarthoxhaj/orgs",
76 |       "repos_url": "https://api.github.com/users/besarthoxhaj/repos",
77 |       "events_url": "https://api.github.com/users/besarthoxhaj/events{/privacy}",
78 |       "received_events_url": "https://api.github.com/users/besarthoxhaj/received_events",
79 |       "type": "User",
80 |       "site_admin": false
81 |     },
82 |     "created_at": "2015-07-23T11:59:31Z",
83 |     "updated_at": "2015-07-23T11:59:31Z",
84 |     "body": "@nelsonic love the idea! Let's do it!"
85 |   }
86 | ]
87 | 


--------------------------------------------------------------------------------
/test/fixtures/dwyl-tudo-issue-51-api.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "url": "https://api.github.com/repos/dwyl/tudo/issues/51",
  3 |   "labels_url": "https://api.github.com/repos/dwyl/tudo/issues/51/labels{/name}",
  4 |   "comments_url": "https://api.github.com/repos/dwyl/tudo/issues/51/comments",
  5 |   "events_url": "https://api.github.com/repos/dwyl/tudo/issues/51/events",
  6 |   "html_url": "https://github.com/dwyl/tudo/issues/51",
  7 |   "id": 96442793,
  8 |   "number": 51,
  9 |   "title": "Pre-fetch people's issues while they are authenticating with GitHub",
 10 |   "user": {
 11 |     "login": "nelsonic",
 12 |     "id": 194400,
 13 |     "avatar_url": "https://avatars.githubusercontent.com/u/194400?v=3",
 14 |     "gravatar_id": "",
 15 |     "url": "https://api.github.com/users/nelsonic",
 16 |     "html_url": "https://github.com/nelsonic",
 17 |     "followers_url": "https://api.github.com/users/nelsonic/followers",
 18 |     "following_url": "https://api.github.com/users/nelsonic/following{/other_user}",
 19 |     "gists_url": "https://api.github.com/users/nelsonic/gists{/gist_id}",
 20 |     "starred_url": "https://api.github.com/users/nelsonic/starred{/owner}{/repo}",
 21 |     "subscriptions_url": "https://api.github.com/users/nelsonic/subscriptions",
 22 |     "organizations_url": "https://api.github.com/users/nelsonic/orgs",
 23 |     "repos_url": "https://api.github.com/users/nelsonic/repos",
 24 |     "events_url": "https://api.github.com/users/nelsonic/events{/privacy}",
 25 |     "received_events_url": "https://api.github.com/users/nelsonic/received_events",
 26 |     "type": "User",
 27 |     "site_admin": false
 28 |   },
 29 |   "labels": [
 30 |     {
 31 |       "url": "https://api.github.com/repos/dwyl/tudo/labels/enhancement",
 32 |       "name": "enhancement",
 33 |       "color": "84b6eb"
 34 |     },
 35 |     {
 36 |       "url": "https://api.github.com/repos/dwyl/tudo/labels/help%20wanted",
 37 |       "name": "help wanted",
 38 |       "color": "159818"
 39 |     },
 40 |     {
 41 |       "url": "https://api.github.com/repos/dwyl/tudo/labels/question",
 42 |       "name": "question",
 43 |       "color": "cc317c"
 44 |     }
 45 |   ],
 46 |   "state": "open",
 47 |   "locked": false,
 48 |   "assignee": {
 49 |     "login": "besarthoxhaj",
 50 |     "id": 7887496,
 51 |     "avatar_url": "https://avatars.githubusercontent.com/u/7887496?v=3",
 52 |     "gravatar_id": "",
 53 |     "url": "https://api.github.com/users/besarthoxhaj",
 54 |     "html_url": "https://github.com/besarthoxhaj",
 55 |     "followers_url": "https://api.github.com/users/besarthoxhaj/followers",
 56 |     "following_url": "https://api.github.com/users/besarthoxhaj/following{/other_user}",
 57 |     "gists_url": "https://api.github.com/users/besarthoxhaj/gists{/gist_id}",
 58 |     "starred_url": "https://api.github.com/users/besarthoxhaj/starred{/owner}{/repo}",
 59 |     "subscriptions_url": "https://api.github.com/users/besarthoxhaj/subscriptions",
 60 |     "organizations_url": "https://api.github.com/users/besarthoxhaj/orgs",
 61 |     "repos_url": "https://api.github.com/users/besarthoxhaj/repos",
 62 |     "events_url": "https://api.github.com/users/besarthoxhaj/events{/privacy}",
 63 |     "received_events_url": "https://api.github.com/users/besarthoxhaj/received_events",
 64 |     "type": "User",
 65 |     "site_admin": false
 66 |   },
 67 |   "milestone": {
 68 |     "url": "https://api.github.com/repos/dwyl/tudo/milestones/3",
 69 |     "html_url": "https://github.com/dwyl/tudo/milestones/Minimal%20Usable%20Product",
 70 |     "labels_url": "https://api.github.com/repos/dwyl/tudo/milestones/3/labels",
 71 |     "id": 1234895,
 72 |     "number": 3,
 73 |     "title": "Minimal Usable Product",
 74 |     "description": "What is the absolute minimum we can do to deliver value to people using the app?\r\n(and thus make them want to come back and use it!)",
 75 |     "creator": {
 76 |       "login": "nelsonic",
 77 |       "id": 194400,
 78 |       "avatar_url": "https://avatars.githubusercontent.com/u/194400?v=3",
 79 |       "gravatar_id": "",
 80 |       "url": "https://api.github.com/users/nelsonic",
 81 |       "html_url": "https://github.com/nelsonic",
 82 |       "followers_url": "https://api.github.com/users/nelsonic/followers",
 83 |       "following_url": "https://api.github.com/users/nelsonic/following{/other_user}",
 84 |       "gists_url": "https://api.github.com/users/nelsonic/gists{/gist_id}",
 85 |       "starred_url": "https://api.github.com/users/nelsonic/starred{/owner}{/repo}",
 86 |       "subscriptions_url": "https://api.github.com/users/nelsonic/subscriptions",
 87 |       "organizations_url": "https://api.github.com/users/nelsonic/orgs",
 88 |       "repos_url": "https://api.github.com/users/nelsonic/repos",
 89 |       "events_url": "https://api.github.com/users/nelsonic/events{/privacy}",
 90 |       "received_events_url": "https://api.github.com/users/nelsonic/received_events",
 91 |       "type": "User",
 92 |       "site_admin": false
 93 |     },
 94 |     "open_issues": 5,
 95 |     "closed_issues": 0,
 96 |     "state": "open",
 97 |     "created_at": "2015-07-31T09:39:40Z",
 98 |     "updated_at": "2015-08-02T07:38:33Z",
 99 |     "due_on": "2016-07-05T23:00:00Z",
100 |     "closed_at": null
101 |   },
102 |   "comments": 3,
103 |   "created_at": "2015-07-22T00:00:45Z",
104 |   "updated_at": "2015-08-02T08:47:00Z",
105 |   "closed_at": null,
106 |   "body": "instead of waiting for people to perform the steps to authorise Tudo (to access their GitHub orgs/issues we *could* request their GitHub username on the login page and initiate the retrieval of their issues *while* they are authenticating... That way, by the time they get back to Tudo their issues dashboard is already pre-rendered and loaded! This is a ***wow-factor*** people won't be *expecting* and thus our app immediately delivers on our first promise!\r\n\r\nThoughts?",
107 |   "closed_by": null
108 | }
109 | 


--------------------------------------------------------------------------------
/test/fixtures/dwyl-tudo-issue-51-scrape.json:
--------------------------------------------------------------------------------
 1 | { entries:
 2 |    [ { id: 'issue-96442793',
 3 |        author: 'nelsonic',
 4 |        created: '2015-07-22T00:00:45Z',
 5 |        body: 'instead of waiting for people to perform the steps to authorise Tudo (to access their GitHub orgs/issues we could request their GitHub username on the login page and initiate the retrieval of their issues while they are authenticating... That way, by the time they get back to Tudo their issues dashboard is already pre-rendered and loaded! This is a wow-factor people won\'t be expecting and thus our app immediately delivers on our first promise!\n\nThoughts?' },
 6 |      { id: 'issuecomment-123807796',
 7 |        author: 'iteles',
 8 |        created: '2015-07-22T17:54:12Z',
 9 |        body: 'I\'d love to test this out, this will be an amazing selling point if we can get the performance to work like we expect!' },
10 |      { id: 'issuecomment-124048121',
11 |        author: 'nelsonic',
12 |        created: '2015-07-23T10:20:15Z',
13 |        body: '@iteles have you watched the Foundation Episode featuring Kevin Systrom (instagram) ?\n\n\nhttps://www.youtube.com/watch?v=nld8B9l1aRE\n\n\nWhat were the USPs that contributed to instagram\'s success (considering how many photo-related-apps were in the app store at the time) ?\n\ncc: @besarthoxhaj' },
14 |      { id: 'issuecomment-124075792',
15 |        author: 'besarthoxhaj',
16 |        created: '2015-07-23T11:59:31Z',
17 |        body: '@nelsonic love the idea! Let\'s do it!' } ],
18 |   labels: [ 'enhancement', 'help wanted', 'question' ],
19 |   participants: [ 'nelsonic', 'iteles', 'besarthoxhaj' ],
20 |   url: '/dwyl/tudo/issues/51',
21 |   title: 'Pre-fetch people\'s issues while they are authenticating with GitHub',
22 |   state: 'Open',
23 |   author: 'nelsonic',
24 |   created: '2015-07-22T00:00:45Z',
25 |   milestone: 'Minimal Usable Product',
26 |   assignee: 'besarthoxhaj' }


--------------------------------------------------------------------------------
/test/fixtures/dwyl-tudo-issue-51.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | <!DOCTYPE html>
  5 | <html lang="en" class="">
  6 |   <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# object: http://ogp.me/ns/object# article: http://ogp.me/ns/article# profile: http://ogp.me/ns/profile#">
  7 |     <meta charset='utf-8'>
  8 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
  9 |     <meta http-equiv="Content-Language" content="en">
 10 |     <meta name="viewport" content="width=1020">
 11 |     
 12 |     
 13 |     <title>Pre-fetch people&#39;s issues while they are authenticating with GitHub · Issue #51 · dwyl/tudo · GitHub</title>
 14 |     <link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="GitHub">
 15 |     <link rel="fluid-icon" href="https://github.com/fluidicon.png" title="GitHub">
 16 |     <link rel="apple-touch-icon" sizes="57x57" href="/apple-touch-icon-114.png">
 17 |     <link rel="apple-touch-icon" sizes="114x114" href="/apple-touch-icon-114.png">
 18 |     <link rel="apple-touch-icon" sizes="72x72" href="/apple-touch-icon-144.png">
 19 |     <link rel="apple-touch-icon" sizes="144x144" href="/apple-touch-icon-144.png">
 20 |     <meta property="fb:app_id" content="1401488693436528">
 21 | 
 22 |       <meta content="@github" name="twitter:site" /><meta content="summary" name="twitter:card" /><meta content="Pre-fetch people&#39;s issues while they are authenticating with GitHub · Issue #51 · dwyl/tudo" name="twitter:title" /><meta content="instead of waiting for people to perform the steps to authorise Tudo (to access their GitHub orgs/issues we could request their GitHub username on the login page and initiate the retrieval of their..." name="twitter:description" /><meta content="https://avatars1.githubusercontent.com/u/194400?v=3&amp;s=400" name="twitter:image:src" />
 23 |       <meta content="GitHub" property="og:site_name" /><meta content="object" property="og:type" /><meta content="https://avatars1.githubusercontent.com/u/194400?v=3&amp;s=400" property="og:image" /><meta content="Pre-fetch people&#39;s issues while they are authenticating with GitHub · Issue #51 · dwyl/tudo" property="og:title" /><meta content="https://github.com/dwyl/tudo/issues/51" property="og:url" /><meta content="instead of waiting for people to perform the steps to authorise Tudo (to access their GitHub orgs/issues we could request their GitHub username on the login page and initiate the retrieval of their..." property="og:description" />
 24 |       <meta name="browser-stats-url" content="https://api.github.com/_private/browser/stats">
 25 |     <meta name="browser-errors-url" content="https://api.github.com/_private/browser/errors">
 26 |     <link rel="assets" href="https://assets-cdn.github.com/">
 27 |     
 28 |     <meta name="pjax-timeout" content="1000">
 29 |     
 30 | 
 31 |     <meta name="msapplication-TileImage" content="/windows-tile.png">
 32 |     <meta name="msapplication-TileColor" content="#ffffff">
 33 |     <meta name="selected-link" value="repo_issues" data-pjax-transient>
 34 | 
 35 |         <meta name="google-analytics" content="UA-3769691-2">
 36 | 
 37 |     <meta content="collector.githubapp.com" name="octolytics-host" /><meta content="collector-cdn.github.com" name="octolytics-script-host" /><meta content="github" name="octolytics-app-id" /><meta content="529B211C:6D95:72CFD27:55BE4911" name="octolytics-dimension-request_id" />
 38 |     
 39 |     <meta content="Rails, view, issues#show" data-pjax-transient="true" name="analytics-event" />
 40 |     <meta class="js-ga-set" name="dimension1" content="Logged Out">
 41 |       <meta class="js-ga-set" name="dimension4" content="Current repo nav">
 42 |     <meta name="is-dotcom" content="true">
 43 |         <meta name="hostname" content="github.com">
 44 |     <meta name="user-login" content="">
 45 | 
 46 |       <link rel="icon" sizes="any" mask href="https://assets-cdn.github.com/pinned-octocat.svg">
 47 |       <meta name="theme-color" content="#4078c0">
 48 |       <link rel="icon" type="image/x-icon" href="https://assets-cdn.github.com/favicon.ico">
 49 | 
 50 |     <!-- </textarea> --><!-- '"` --><meta content="authenticity_token" name="csrf-param" />
 51 | <meta content="7ZCcQJc6fHRtd2rrhk9Qv9tebQX8qDR1TSXE1RVq1qKe39QIrkAzMd4ddu2qiePlau0Fte5KOu3ai3TRL4+Edw==" name="csrf-token" />
 52 |     
 53 | 
 54 |     <link crossorigin="anonymous" href="https://assets-cdn.github.com/assets/github/index-c7126cd67871e693a9f863b7a0e99879ca39079b15a8784f8b543c03bf14ad72.css" media="all" rel="stylesheet" />
 55 |     <link crossorigin="anonymous" href="https://assets-cdn.github.com/assets/github2/index-87247f16e6450ef54cb0eda3f8f1484e33a3f18c7a7d3df1f76f67cba36a8d6d.css" media="all" rel="stylesheet" />
 56 |     
 57 |     
 58 | 
 59 | 
 60 |     <meta http-equiv="x-pjax-version" content="f8fdf7d6713452aadb5c847c2e94f51b">
 61 | 
 62 |       
 63 |   <meta name="description" content="tudo - :white_check_mark: A single version of the truth with integrated task tracking, delegation and accountability.">
 64 |   <meta name="go-import" content="github.com/dwyl/tudo git https://github.com/dwyl/tudo.git">
 65 | 
 66 |   <meta content="11708465" name="octolytics-dimension-user_id" /><meta content="dwyl" name="octolytics-dimension-user_login" /><meta content="37846873" name="octolytics-dimension-repository_id" /><meta content="dwyl/tudo" name="octolytics-dimension-repository_nwo" /><meta content="true" name="octolytics-dimension-repository_public" /><meta content="false" name="octolytics-dimension-repository_is_fork" /><meta content="37846873" name="octolytics-dimension-repository_network_root_id" /><meta content="dwyl/tudo" name="octolytics-dimension-repository_network_root_nwo" />
 67 |   <link href="https://github.com/dwyl/tudo/commits/master.atom" rel="alternate" title="Recent Commits to tudo:master" type="application/atom+xml">
 68 | 
 69 |   </head>
 70 | 
 71 | 
 72 |   <body class="logged_out  env-production  vis-public">
 73 |     <a href="#start-of-content" tabindex="1" class="accessibility-aid js-skip-to-content">Skip to content</a>
 74 |     <div class="wrapper">
 75 |       
 76 |       
 77 |       
 78 | 
 79 | 
 80 | 
 81 |         
 82 |         <div class="header header-logged-out" role="banner">
 83 |   <div class="container clearfix">
 84 | 
 85 |     <a class="header-logo-wordmark" href="https://github.com/" data-ga-click="(Logged out) Header, go to homepage, icon:logo-wordmark">
 86 |       <span class="mega-octicon octicon-logo-github"></span>
 87 |     </a>
 88 | 
 89 |     <div class="header-actions" role="navigation">
 90 |         <a class="btn btn-primary" href="/join" data-ga-click="(Logged out) Header, clicked Sign up, text:sign-up">Sign up</a>
 91 |       <a class="btn" href="/login?return_to=%2Fdwyl%2Ftudo%2Fissues%2F51" data-ga-click="(Logged out) Header, clicked Sign in, text:sign-in">Sign in</a>
 92 |     </div>
 93 | 
 94 |     <div class="site-search repo-scope js-site-search" role="search">
 95 |       <!-- </textarea> --><!-- '"` --><form accept-charset="UTF-8" action="/dwyl/tudo/search" class="js-site-search-form" data-global-search-url="/search" data-repo-search-url="/dwyl/tudo/search" method="get"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="&#x2713;" /></div>
 96 |   <label class="js-chromeless-input-container form-control">
 97 |     <div class="scope-badge">This repository</div>
 98 |     <input type="text"
 99 |       class="js-site-search-focus js-site-search-field is-clearable chromeless-input"
100 |       data-hotkey="s"
101 |       name="q"
102 |       placeholder="Search"
103 |       aria-label="Search this repository"
104 |       data-global-scope-placeholder="Search GitHub"
105 |       data-repo-scope-placeholder="Search"
106 |       tabindex="1"
107 |       autocapitalize="off">
108 |   </label>
109 | </form>
110 |     </div>
111 | 
112 |       <ul class="header-nav left" role="navigation">
113 |           <li class="header-nav-item">
114 |             <a class="header-nav-link" href="/explore" data-ga-click="(Logged out) Header, go to explore, text:explore">Explore</a>
115 |           </li>
116 |           <li class="header-nav-item">
117 |             <a class="header-nav-link" href="/features" data-ga-click="(Logged out) Header, go to features, text:features">Features</a>
118 |           </li>
119 |           <li class="header-nav-item">
120 |             <a class="header-nav-link" href="https://enterprise.github.com/" data-ga-click="(Logged out) Header, go to enterprise, text:enterprise">Enterprise</a>
121 |           </li>
122 |           <li class="header-nav-item">
123 |             <a class="header-nav-link" href="/blog" data-ga-click="(Logged out) Header, go to blog, text:blog">Blog</a>
124 |           </li>
125 |       </ul>
126 | 
127 |   </div>
128 | </div>
129 | 
130 | 
131 | 
132 |       <div id="start-of-content" class="accessibility-aid"></div>
133 |           <div class="site" itemscope itemtype="http://schema.org/WebPage">
134 |     <div id="js-flash-container">
135 |       
136 |     </div>
137 |     <div class="pagehead repohead instapaper_ignore readability-menu ">
138 |       <div class="container">
139 | 
140 |         <div class="clearfix">
141 |           
142 | <ul class="pagehead-actions">
143 | 
144 |   <li>
145 |       <a href="/login?return_to=%2Fdwyl%2Ftudo"
146 |     class="btn btn-sm btn-with-count tooltipped tooltipped-n"
147 |     aria-label="You must be signed in to watch a repository" rel="nofollow">
148 |     <span class="octicon octicon-eye"></span>
149 |     Watch
150 |   </a>
151 |   <a class="social-count" href="/dwyl/tudo/watchers">
152 |     27
153 |   </a>
154 | 
155 |   </li>
156 | 
157 |   <li>
158 |       <a href="/login?return_to=%2Fdwyl%2Ftudo"
159 |     class="btn btn-sm btn-with-count tooltipped tooltipped-n"
160 |     aria-label="You must be signed in to star a repository" rel="nofollow">
161 |     <span class="octicon octicon-star"></span>
162 |     Star
163 |   </a>
164 | 
165 |     <a class="social-count js-social-count" href="/dwyl/tudo/stargazers">
166 |       13
167 |     </a>
168 | 
169 |   </li>
170 | 
171 |     <li>
172 |       <a href="/login?return_to=%2Fdwyl%2Ftudo"
173 |         class="btn btn-sm btn-with-count tooltipped tooltipped-n"
174 |         aria-label="You must be signed in to fork a repository" rel="nofollow">
175 |         <span class="octicon octicon-repo-forked"></span>
176 |         Fork
177 |       </a>
178 |       <a href="/dwyl/tudo/network" class="social-count">
179 |         0
180 |       </a>
181 |     </li>
182 | </ul>
183 | 
184 |           <h1 itemscope itemtype="http://data-vocabulary.org/Breadcrumb" class="entry-title public ">
185 |             <span class="mega-octicon octicon-repo"></span>
186 |             <span class="author"><a href="/dwyl" class="url fn" itemprop="url" rel="author"><span itemprop="title">dwyl</span></a></span><!--
187 |          --><span class="path-divider">/</span><!--
188 |          --><strong><a href="/dwyl/tudo" data-pjax="#js-repo-pjax-container">tudo</a></strong>
189 | 
190 |             <span class="page-context-loader">
191 |               <img alt="" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
192 |             </span>
193 | 
194 |           </h1>
195 |         </div>
196 | 
197 |       </div>
198 |     </div>
199 | 
200 |       <div class="container">
201 |         <div class="repository-with-sidebar repo-container new-discussion-timeline ">
202 |           <div class="repository-sidebar clearfix">
203 |               
204 | 
205 | <nav class="sunken-menu repo-nav js-repo-nav js-sidenav-container-pjax js-octicon-loaders"
206 |      role="navigation"
207 |      data-pjax="#js-repo-pjax-container"
208 |      data-issue-count-url="/dwyl/tudo/issues/counts">
209 |   <ul class="sunken-menu-group">
210 |     <li class="tooltipped tooltipped-w" aria-label="Code">
211 |       <a href="/dwyl/tudo" aria-label="Code" class="js-selected-navigation-item sunken-menu-item" data-hotkey="g c" data-selected-links="repo_source repo_downloads repo_commits repo_releases repo_tags repo_branches /dwyl/tudo">
212 |         <span class="octicon octicon-code"></span> <span class="full-word">Code</span>
213 |         <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
214 | </a>    </li>
215 | 
216 |       <li class="tooltipped tooltipped-w" aria-label="Issues">
217 |         <a href="/dwyl/tudo/issues" aria-label="Issues" aria-selected="true" class="js-selected-navigation-item selected sunken-menu-item" data-hotkey="g i" data-selected-links="repo_issues repo_labels repo_milestones /dwyl/tudo/issues">
218 |           <span class="octicon octicon-issue-opened"></span> <span class="full-word">Issues</span>
219 |           <span class="js-issue-replace-counter"></span>
220 |           <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
221 | </a>      </li>
222 | 
223 |     <li class="tooltipped tooltipped-w" aria-label="Pull requests">
224 |       <a href="/dwyl/tudo/pulls" aria-label="Pull requests" class="js-selected-navigation-item sunken-menu-item" data-hotkey="g p" data-selected-links="repo_pulls /dwyl/tudo/pulls">
225 |           <span class="octicon octicon-git-pull-request"></span> <span class="full-word">Pull requests</span>
226 |           <span class="js-pull-replace-counter"></span>
227 |           <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
228 | </a>    </li>
229 | 
230 |   </ul>
231 |   <div class="sunken-menu-separator"></div>
232 |   <ul class="sunken-menu-group">
233 | 
234 |     <li class="tooltipped tooltipped-w" aria-label="Pulse">
235 |       <a href="/dwyl/tudo/pulse" aria-label="Pulse" class="js-selected-navigation-item sunken-menu-item" data-selected-links="pulse /dwyl/tudo/pulse">
236 |         <span class="octicon octicon-pulse"></span> <span class="full-word">Pulse</span>
237 |         <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
238 | </a>    </li>
239 | 
240 |     <li class="tooltipped tooltipped-w" aria-label="Graphs">
241 |       <a href="/dwyl/tudo/graphs" aria-label="Graphs" class="js-selected-navigation-item sunken-menu-item" data-selected-links="repo_graphs repo_contributors /dwyl/tudo/graphs">
242 |         <span class="octicon octicon-graph"></span> <span class="full-word">Graphs</span>
243 |         <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
244 | </a>    </li>
245 |   </ul>
246 | 
247 | 
248 | </nav>
249 | 
250 |                 <div class="only-with-full-nav">
251 |                     
252 | <div class="js-clone-url clone-url open"
253 |   data-protocol-type="http">
254 |   <h3><span class="text-emphasized">HTTPS</span> clone URL</h3>
255 |   <div class="input-group js-zeroclipboard-container">
256 |     <input type="text" class="input-mini input-monospace js-url-field js-zeroclipboard-target"
257 |            value="https://github.com/dwyl/tudo.git" readonly="readonly" aria-label="HTTPS clone URL">
258 |     <span class="input-group-button">
259 |       <button aria-label="Copy to clipboard" class="js-zeroclipboard btn btn-sm zeroclipboard-button tooltipped tooltipped-s" data-copied-hint="Copied!" type="button"><span class="octicon octicon-clippy"></span></button>
260 |     </span>
261 |   </div>
262 | </div>
263 | 
264 |   
265 | <div class="js-clone-url clone-url "
266 |   data-protocol-type="subversion">
267 |   <h3><span class="text-emphasized">Subversion</span> checkout URL</h3>
268 |   <div class="input-group js-zeroclipboard-container">
269 |     <input type="text" class="input-mini input-monospace js-url-field js-zeroclipboard-target"
270 |            value="https://github.com/dwyl/tudo" readonly="readonly" aria-label="Subversion checkout URL">
271 |     <span class="input-group-button">
272 |       <button aria-label="Copy to clipboard" class="js-zeroclipboard btn btn-sm zeroclipboard-button tooltipped tooltipped-s" data-copied-hint="Copied!" type="button"><span class="octicon octicon-clippy"></span></button>
273 |     </span>
274 |   </div>
275 | </div>
276 | 
277 | 
278 | 
279 |   <div class="clone-options">You can clone with
280 |     <!-- </textarea> --><!-- '"` --><form accept-charset="UTF-8" action="/users/set_protocol?protocol_selector=http&amp;protocol_type=clone" class="inline-form js-clone-selector-form " data-form-nonce="668ce49d0f11832d8677aa644f9d8b4bdef9069a" data-remote="true" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="&#x2713;" /><input name="authenticity_token" type="hidden" value="qD8vrIkBK46hof+aUQilZESUayuJBjVXAnMP5weAF3vQY1HsAPfXoc57usRhlUWDrn26AHToFTrb5CS23K9gHg==" /></div><button class="btn-link js-clone-selector" data-protocol="http" type="submit">HTTPS</button></form> or <!-- </textarea> --><!-- '"` --><form accept-charset="UTF-8" action="/users/set_protocol?protocol_selector=subversion&amp;protocol_type=clone" class="inline-form js-clone-selector-form " data-form-nonce="668ce49d0f11832d8677aa644f9d8b4bdef9069a" data-remote="true" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="&#x2713;" /><input name="authenticity_token" type="hidden" value="6uCYcwui4VwnMrH/TtJVdz0MH7I0vmzaShe6yvfdnkfHq3pVkbxIymv/TlM+gRudc0LC7OzrDozh6u5W/4+Xbg==" /></div><button class="btn-link js-clone-selector" data-protocol="subversion" type="submit">Subversion</button></form>.
281 |     <a href="https://help.github.com/articles/which-remote-url-should-i-use" class="help tooltipped tooltipped-n" aria-label="Get help on which URL is right for you.">
282 |       <span class="octicon octicon-question"></span>
283 |     </a>
284 |   </div>
285 | 
286 |                   <a href="/dwyl/tudo/archive/master.zip"
287 |                      class="btn btn-sm sidebar-button"
288 |                      aria-label="Download the contents of dwyl/tudo as a zip file"
289 |                      title="Download the contents of dwyl/tudo as a zip file"
290 |                      rel="nofollow">
291 |                     <span class="octicon octicon-cloud-download"></span>
292 |                     Download ZIP
293 |                   </a>
294 |                 </div>
295 |           </div>
296 |           <div id="js-repo-pjax-container" class="repository-content context-loader-container" data-pjax-container>
297 | 
298 |             
299 | <style type="text/css" media="screen">
300 |   span.labelstyle-fc2929, .linked-labelstyle-fc2929 {  background-color: #fc2929 !important;  color: #fff !important;}.labelstyle-fc2929.selected {  background-color: #fc2929 !important;  color: #fff !important;}.label-select-menu .labelstyle-fc2929.selected {  background:rgba(252, 41, 41, 0.12) !important;  color: #991818 !important;}
301 | 
302 | span.labelstyle-cccccc, .linked-labelstyle-cccccc {  background-color: #cccccc !important;  color: #333333 !important;}.labelstyle-cccccc.selected {  background-color: #cccccc !important;  color: #333333 !important;}.label-select-menu .labelstyle-cccccc.selected {  background:rgba(204, 204, 204, 0.12) !important;  color: #999999 !important;}
303 | 
304 | span.labelstyle-84b6eb, .linked-labelstyle-84b6eb {  background-color: #84b6eb !important;  color: #1c2733 !important;}.labelstyle-84b6eb.selected {  background-color: #84b6eb !important;  color: #1c2733 !important;}.label-select-menu .labelstyle-84b6eb.selected {  background:rgba(132, 182, 235, 0.12) !important;  color: #557699 !important;}
305 | 
306 | span.labelstyle-159818, .linked-labelstyle-159818 {  background-color: #159818 !important;  color: #fff !important;}.labelstyle-159818.selected {  background-color: #159818 !important;  color: #fff !important;}.label-select-menu .labelstyle-159818.selected {  background:rgba(21, 152, 24, 0.12) !important;  color: #159918 !important;}
307 | 
308 | span.labelstyle-e6e6e6, .linked-labelstyle-e6e6e6 {  background-color: #e6e6e6 !important;  color: #333333 !important;}.labelstyle-e6e6e6.selected {  background-color: #e6e6e6 !important;  color: #333333 !important;}.label-select-menu .labelstyle-e6e6e6.selected {  background:rgba(230, 230, 230, 0.12) !important;  color: #999999 !important;}
309 | 
310 | span.labelstyle-cc317c, .linked-labelstyle-cc317c {  background-color: #cc317c !important;  color: #fff !important;}.labelstyle-cc317c.selected {  background-color: #cc317c !important;  color: #fff !important;}.label-select-menu .labelstyle-cc317c.selected {  background:rgba(204, 49, 124, 0.12) !important;  color: #99245c !important;}
311 | 
312 | span.labelstyle-ffffff, .linked-labelstyle-ffffff {  background-color: #ffffff !important;  color: #333333 !important;}.labelstyle-ffffff.selected {  background-color: #ffffff !important;  color: #333333 !important;}.label-select-menu .labelstyle-ffffff.selected {  background:rgba(255, 255, 255, 0.12) !important;  color: #999999 !important;}
313 | </style>
314 | 
315 | <div class="issues-listing" data-pjax>
316 |   <div class="context-loader large-format-loader">
317 |   <p><img alt="" height="64" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-128.gif" width="64" /></p>
318 |   <p>Loading…</p>
319 | </div>
320 | 
321 |     <div id="show_issue" class="js-issues-results">
322 | 
323 |     
324 |   <div
325 |     id="partial-discussion-header"
326 |     class="gh-header js-details-container js-socket-channel js-updatable-content issue"
327 |     data-channel="dwyl/tudo:issue:96442793"
328 |     data-url="/dwyl/tudo/issues/51/show_partial?partial=issues%2Ftitle">
329 | 
330 |   <div class="gh-header-show ">
331 |       <div class="gh-header-actions">
332 |           <a href="/dwyl/tudo/issues/new" class="btn btn-sm btn-primary right" data-hotkey="c">
333 |             New issue
334 |           </a>
335 |       </div>
336 | 
337 |     <h1 class="gh-header-title">
338 |       <span class="js-issue-title">Pre-fetch people&#39;s issues while they are authenticating with GitHub</span>
339 |       <span class="gh-header-number">#51</span>
340 |     </h1>
341 |   </div>
342 | 
343 | 
344 |   <div class="flex-table gh-header-meta">
345 |     <div class="flex-table-item">
346 |         <div class="state state-open">
347 |           <span class="octicon octicon-issue-opened"></span>
348 |           Open
349 |         </div>
350 |     </div>
351 |     <div class="flex-table-item flex-table-item-primary">
352 |         <a href="/nelsonic" class="author">nelsonic</a> opened this <span class="noun">Issue</span> <time datetime="2015-07-22T00:00:45Z" is="relative-time">Jul 22, 2015</time>
353 |         &middot; 3 comments
354 |     </div>
355 |   </div>
356 | </div>
357 | 
358 | 
359 |     <div id="discussion_bucket" class="tab-content clearfix">
360 |       <div class="discussion-sidebar ">
361 |         <div id="partial-discussion-stats"
362 |      class="discussion-stats js-socket-channel js-updatable-content"
363 |      data-channel="dwyl/tudo:issue:96442793"
364 |      data-url="/dwyl/tudo/issues/51/show_partial?partial=issues%2Fdiscussion_stats">
365 | </div>
366 | 
367 | <div class="discussion-sidebar-item sidebar-labels js-discussion-sidebar-item">
368 |   <!-- </textarea> --><!-- '"` --><form accept-charset="UTF-8" action="/dwyl/tudo/issues/51?partial=issues%2Fsidebar%2Fshow%2Flabels" class="js-issue-sidebar-form" data-form-nonce="668ce49d0f11832d8677aa644f9d8b4bdef9069a" data-remote="true" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="&#x2713;" /><input name="_method" type="hidden" value="put" /><input name="authenticity_token" type="hidden" value="QM8FaBWlS1RF4T2D0hP3fyXtDuz+hG9RHZFiN/Y4W4gTIcXXLdWFsrwzlK+Q2XSUAX/d05R4QqReE3hLF5onIw==" /></div>
369 |       
370 |   <h3 class="discussion-sidebar-heading">
371 |     Labels
372 |   </h3>
373 | 
374 | 
375 |     <div class="labels css-truncate">
376 |       <a href="/dwyl/tudo/labels/enhancement" class="label css-truncate-target linked-labelstyle-84b6eb" title="enhancement">enhancement</a>
377 |   <a href="/dwyl/tudo/labels/help%20wanted" class="label css-truncate-target linked-labelstyle-159818" title="help wanted">help wanted</a>
378 |   <a href="/dwyl/tudo/labels/question" class="label css-truncate-target linked-labelstyle-cc317c" title="question">question</a>
379 | 
380 | </div>
381 | 
382 | </form></div>
383 | 
384 | <div class="discussion-sidebar-item sidebar-milestone js-discussion-sidebar-item">
385 |   <!-- </textarea> --><!-- '"` --><form accept-charset="UTF-8" action="/dwyl/tudo/issues/51/set_milestone?partial=issues%2Fsidebar%2Fshow%2Fmilestone" class="js-issue-sidebar-form" data-form-nonce="668ce49d0f11832d8677aa644f9d8b4bdef9069a" data-remote="true" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="&#x2713;" /><input name="_method" type="hidden" value="put" /><input name="authenticity_token" type="hidden" value="RwlWyKAoGvmId4KIqWOXVYO9MhXBZ6fphvvz4YbuYVbLBwlXfv1UcNHOg54CX2ztkqyPWSFjqjk3CPV2BIzoxQ==" /></div>
386 |     
387 |   <h3 class="discussion-sidebar-heading">
388 |     Milestone
389 |   </h3>
390 | 
391 | 
392 |       <span class="progress-bar"><span class="progress" style="width: 0.0%">&nbsp;</span></span>
393 |   <a title="Minimal Usable Product" href="/dwyl/tudo/milestones/Minimal%20Usable%20Product" class="milestone-name css-truncate">
394 |     <span class="css-truncate-target">Minimal Usable Pr...</span>
395 |   </a>
396 | 
397 | </form></div>
398 | 
399 | <div class="discussion-sidebar-item sidebar-assignee js-discussion-sidebar-item">
400 |   <!-- </textarea> --><!-- '"` --><form accept-charset="UTF-8" action="/dwyl/tudo/issues/51?partial=issues%2Fsidebar%2Fshow%2Fassignee" class="js-issue-sidebar-form" data-form-nonce="668ce49d0f11832d8677aa644f9d8b4bdef9069a" data-remote="true" method="post"><div style="margin:0;padding:0;display:inline"><input name="utf8" type="hidden" value="&#x2713;" /><input name="_method" type="hidden" value="put" /><input name="authenticity_token" type="hidden" value="H5CHwe29ilMu8jEH2rP/FvsXKE7Ss5DkW4bTEEHSe9D/jxoXO2/Pwr/2GcKGg8bws9CS0xIYrP3wMZIBULoZzQ==" /></div>
401 |     
402 |   <h3 class="discussion-sidebar-heading">
403 |     Assignee
404 |   </h3>
405 | 
406 | 
407 |     <span class="css-truncate">
408 |     <img alt="@besarthoxhaj" class="avatar" height="20" src="https://avatars2.githubusercontent.com/u/7887496?v=3&amp;s=40" width="20" />
409 |     <a href="/dwyl/tudo/issues/assigned/besarthoxhaj" class="assignee css-truncate-target">besarthoxhaj</a>
410 | </span>
411 | 
412 | </form></div>
413 | 
414 | 
415 | <div id="partial-users-participants" class="discussion-sidebar-item js-socket-channel js-updatable-content"
416 |     data-channel="dwyl/tudo:issue:96442793"
417 |       data-url="/dwyl/tudo/issues/51/show_partial?partial=issues%2Fparticipants"
418 |   >
419 |   <div class="participation">
420 | 
421 |     <h3 class="discussion-sidebar-heading">
422 |       3 participants
423 |     </h3>
424 |     <div class="participation-avatars">
425 |         <a class="participant-avatar tooltipped tooltipped-s" aria-label="nelsonic" href="/nelsonic"><img alt="@nelsonic" class="avatar" height="20" src="https://avatars1.githubusercontent.com/u/194400?v=3&amp;s=40" width="20" /> </a>
426 |         <a class="participant-avatar tooltipped tooltipped-s" aria-label="iteles" href="/iteles"><img alt="@iteles" class="avatar" height="20" src="https://avatars0.githubusercontent.com/u/4185328?v=3&amp;s=40" width="20" /> </a>
427 |         <a class="participant-avatar tooltipped tooltipped-s" aria-label="besarthoxhaj" href="/besarthoxhaj"><img alt="@besarthoxhaj" class="avatar" height="20" src="https://avatars2.githubusercontent.com/u/7887496?v=3&amp;s=40" width="20" /> </a>
428 |     </div>
429 |   </div>
430 | </div>
431 | 
432 | 
433 | 
434 | 
435 |       </div>
436 | 
437 |       <div class="discussion-timeline js-quote-selection-container">
438 | 
439 |         <div class="js-discussion js-socket-channel" data-channel="dwyl/tudo:marked-as-read:96442793">
440 |           
441 | 
442 |   <div class="timeline-comment-wrapper js-comment-container">
443 |     <a href="/nelsonic"><img alt="@nelsonic" class="timeline-comment-avatar" height="48" src="https://avatars1.githubusercontent.com/u/194400?v=3&amp;s=96" width="48" /></a>
444 |     <div id="issue-96442793"
445 |      class="comment previewable-edit timeline-comment js-comment js-task-list-container owner-comment"
446 |      data-body-version="dbc725b3c09aa668d36a6e7c53799417">
447 | 
448 |   <div class="timeline-comment-header ">
449 | 
450 | 
451 |     <span class="timeline-comment-label">Owner</span>
452 | 
453 | 
454 |   <div class="timeline-comment-header-text">
455 | 
456 |     <strong>
457 |       <a href="/nelsonic" class="author">nelsonic</a>
458 |     </strong>
459 | 
460 |     commented
461 | 
462 |     <a href="#issue-96442793" class="timestamp">
463 |       <time datetime="2015-07-22T00:00:45Z" is="relative-time">Jul 22, 2015</time>
464 |     </a>
465 |   </div>
466 | </div>
467 | 
468 | 
469 |   <div class="comment-content">
470 | 
471 |     <div class="edit-comment-hide">
472 |       <div class="comment-body markdown-body markdown-format js-comment-body">
473 |           <p>instead of waiting for people to perform the steps to authorise Tudo (to access their GitHub orgs/issues we <em>could</em> request their GitHub username on the login page and initiate the retrieval of their issues <em>while</em> they are authenticating... That way, by the time they get back to Tudo their issues dashboard is already pre-rendered and loaded! This is a <strong><em>wow-factor</em></strong> people won't be <em>expecting</em> and thus our app immediately delivers on our first promise!</p>
474 | 
475 | <p>Thoughts?</p>
476 |       </div>
477 |     </div>
478 | 
479 | 
480 |   </div>
481 | </div>
482 | 
483 |   </div>
484 |   
485 |       
486 | 
487 | 
488 |   <div class="discussion-item discussion-item-labeled">
489 |     <div class="discussion-item-header" id="event-361748431">
490 | 
491 |       <span class="octicon octicon-tag discussion-item-icon"></span>
492 |       <img alt="@nelsonic" class="avatar" height="16" src="https://avatars2.githubusercontent.com/u/194400?v=3&amp;s=32" width="16" />
493 |       <a href="/nelsonic" class="author">nelsonic</a>
494 |         added <span class="label-color" style="background-color: #84b6eb;" title="by nelsonic at 2015-07-22 00:01:16"><a href="/dwyl/tudo/labels/enhancement" style="color: #1c2733">enhancement</a></span> <span class="label-color" style="background-color: #159818;" title="by nelsonic at 2015-07-22 00:01:16"><a href="/dwyl/tudo/labels/help%20wanted" style="color: #fff">help wanted</a></span> <span class="label-color" style="background-color: #cc317c;" title="by nelsonic at 2015-07-22 00:01:16"><a href="/dwyl/tudo/labels/question" style="color: #fff">question</a></span> labels <a href="https://github.com/dwyl/tudo/issues/51#event-361748431" class="timestamp"><time class="timestamp" datetime="2015-07-22T00:01:16Z" is="relative-time">Jul 22, 2015</time></a>
495 |     </div>
496 |   </div>
497 | 
498 | 
499 | 
500 |   <div class="timeline-comment-wrapper js-comment-container">
501 |     <a href="/iteles"><img alt="@iteles" class="timeline-comment-avatar" height="48" src="https://avatars0.githubusercontent.com/u/4185328?v=3&amp;s=96" width="48" /></a>
502 |     <div id="issuecomment-123807796"
503 |      class="comment previewable-edit timeline-comment js-comment js-task-list-container owner-comment"
504 |      data-body-version="cf161b5c7794bab652998978ff1b2636">
505 | 
506 |   <div class="timeline-comment-header ">
507 | 
508 | 
509 |     <span class="timeline-comment-label">Owner</span>
510 | 
511 | 
512 |   <div class="timeline-comment-header-text">
513 | 
514 |     <strong>
515 |       <a href="/iteles" class="author">iteles</a>
516 |     </strong>
517 | 
518 |     commented
519 | 
520 |     <a href="#issuecomment-123807796" class="timestamp">
521 |       <time datetime="2015-07-22T17:54:12Z" is="relative-time">Jul 22, 2015</time>
522 |     </a>
523 |   </div>
524 | </div>
525 | 
526 | 
527 |   <div class="comment-content">
528 | 
529 |     <div class="edit-comment-hide">
530 |       <div class="comment-body markdown-body markdown-format js-comment-body">
531 |           <p>I'd love to test this out, this will be an amazing selling point if we can get the performance to work like we expect!</p>
532 |       </div>
533 |     </div>
534 | 
535 | 
536 |   </div>
537 | </div>
538 | 
539 |   </div>
540 |   
541 | 
542 | 
543 |       
544 | 
545 | 
546 |   <div class="discussion-item discussion-item-assigned">
547 |     <div class="discussion-item-header" id="event-363231945">
548 | 
549 |       <span class="octicon octicon-person discussion-item-icon"></span>
550 |       <img alt="@nelsonic" class="avatar" height="16" src="https://avatars2.githubusercontent.com/u/194400?v=3&amp;s=32" width="16" />
551 |       <a href="/nelsonic" class="author">nelsonic</a>
552 |       self-assigned this   <a href="https://github.com/dwyl/tudo/issues/51#event-363231945" class="timestamp"><time class="timestamp" datetime="2015-07-23T10:05:19Z" is="relative-time">Jul 23, 2015</time></a>
553 |     </div>
554 |   </div>
555 | 
556 | 
557 | 
558 |   <div class="timeline-comment-wrapper js-comment-container">
559 |     <a href="/nelsonic"><img alt="@nelsonic" class="timeline-comment-avatar" height="48" src="https://avatars1.githubusercontent.com/u/194400?v=3&amp;s=96" width="48" /></a>
560 |     <div id="issuecomment-124048121"
561 |      class="comment previewable-edit timeline-comment js-comment js-task-list-container owner-comment"
562 |      data-body-version="5b3fb818f3ec84768307197f2ef141ee">
563 | 
564 |   <div class="timeline-comment-header ">
565 | 
566 | 
567 |     <span class="timeline-comment-label">Owner</span>
568 | 
569 | 
570 |   <div class="timeline-comment-header-text">
571 | 
572 |     <strong>
573 |       <a href="/nelsonic" class="author">nelsonic</a>
574 |     </strong>
575 | 
576 |     commented
577 | 
578 |     <a href="#issuecomment-124048121" class="timestamp">
579 |       <time datetime="2015-07-23T10:20:15Z" is="relative-time">Jul 23, 2015</time>
580 |     </a>
581 |   </div>
582 | </div>
583 | 
584 | 
585 |   <div class="comment-content">
586 | 
587 |     <div class="edit-comment-hide">
588 |       <div class="comment-body markdown-body markdown-format js-comment-body">
589 |           <p><a href="https://github.com/iteles" class="user-mention">@iteles</a> have you watched the <strong>Foundation</strong> Episode featuring <strong><em>Kevin Systrom</em></strong> (<strong><em>instagram</em></strong>) ?</p>
590 | 
591 | <blockquote>
592 | <p><a href="https://www.youtube.com/watch?v=nld8B9l1aRE">https://www.youtube.com/watch?v=nld8B9l1aRE</a></p>
593 | </blockquote>
594 | 
595 | <p>What were the <a href="https://en.wikipedia.org/wiki/Unique_selling_proposition"><strong>USP</strong></a>s that contributed to <strong><em>instagram</em></strong>'s success (<em>considering how many photo-related-apps were in the app store at the time</em>) ?</p>
596 | 
597 | <p>cc: <a href="https://github.com/besarthoxhaj" class="user-mention">@besarthoxhaj</a> </p>
598 |       </div>
599 |     </div>
600 | 
601 | 
602 |   </div>
603 | </div>
604 | 
605 |   </div>
606 |   
607 | 
608 | 
609 |       
610 | 
611 | 
612 |   <div class="discussion-item discussion-item-unassigned">
613 |     <div class="discussion-item-header" id="event-363246409">
614 | 
615 |       <span class="octicon octicon-person discussion-item-icon"></span>
616 |       <img alt="@nelsonic" class="avatar" height="16" src="https://avatars2.githubusercontent.com/u/194400?v=3&amp;s=32" width="16" />
617 |       <a href="/nelsonic" class="author">nelsonic</a>
618 |       removed their assignment   <a href="https://github.com/dwyl/tudo/issues/51#event-363246409" class="timestamp"><time class="timestamp" datetime="2015-07-23T10:20:53Z" is="relative-time">Jul 23, 2015</time></a>
619 |     </div>
620 |   </div>
621 | 
622 | 
623 | 
624 | 
625 | 
626 |       
627 | 
628 | 
629 |   <div class="discussion-item discussion-item-assigned">
630 |     <div class="discussion-item-header" id="event-363246410">
631 | 
632 |       <span class="octicon octicon-person discussion-item-icon"></span>
633 |       <img alt="@besarthoxhaj" class="avatar" height="16" src="https://avatars1.githubusercontent.com/u/7887496?v=3&amp;s=32" width="16" />
634 |       <a href="/besarthoxhaj" class="author">besarthoxhaj</a>
635 |       was assigned  by <a href="/nelsonic"><span class="discussion-item-entity">nelsonic</span></a> <a href="https://github.com/dwyl/tudo/issues/51#event-363246410" class="timestamp"><time class="timestamp" datetime="2015-07-23T10:20:53Z" is="relative-time">Jul 23, 2015</time></a>
636 |     </div>
637 |   </div>
638 | 
639 | 
640 | 
641 |   <div class="timeline-comment-wrapper js-comment-container">
642 |     <a href="/besarthoxhaj"><img alt="@besarthoxhaj" class="timeline-comment-avatar" height="48" src="https://avatars2.githubusercontent.com/u/7887496?v=3&amp;s=96" width="48" /></a>
643 |     <div id="issuecomment-124075792"
644 |      class="comment previewable-edit timeline-comment js-comment js-task-list-container collaborator-comment"
645 |      data-body-version="25c891fd8276709ec1353a0cfb700fdf">
646 | 
647 |   <div class="timeline-comment-header ">
648 | 
649 | 
650 |     <span class="timeline-comment-label">Collaborator</span>
651 | 
652 | 
653 |   <div class="timeline-comment-header-text">
654 | 
655 |     <strong>
656 |       <a href="/besarthoxhaj" class="author">besarthoxhaj</a>
657 |     </strong>
658 | 
659 |     commented
660 | 
661 |     <a href="#issuecomment-124075792" class="timestamp">
662 |       <time datetime="2015-07-23T11:59:31Z" is="relative-time">Jul 23, 2015</time>
663 |     </a>
664 |   </div>
665 | </div>
666 | 
667 | 
668 |   <div class="comment-content">
669 | 
670 |     <div class="edit-comment-hide">
671 |       <div class="comment-body markdown-body markdown-format js-comment-body">
672 |           <p><a href="https://github.com/nelsonic" class="user-mention">@nelsonic</a> love the idea! Let's do it!</p>
673 |       </div>
674 |     </div>
675 | 
676 | 
677 |   </div>
678 | </div>
679 | 
680 |   </div>
681 |   
682 |       <div class="discussion-item discussion-item-ref">
683 |     <div class="discussion-item-header" id="ref-pullrequest-97349951">
684 |       <span class="octicon octicon-bookmark discussion-item-icon"></span>
685 | 
686 |       <img alt="@nelsonic" class="avatar" height="16" src="https://avatars2.githubusercontent.com/u/194400?v=3&amp;s=32" width="16" />
687 |       <a href="/nelsonic" class="author" truncate="true">nelsonic</a>
688 | 
689 |       referenced
690 |       this issue
691 |         in <strong>nelsonic/github-scraper</strong>
692 |       <a class="timestamp" href="#ref-pullrequest-97349951">
693 |         <time datetime="2015-07-28T20:05:48Z" is="relative-time">Jul 28, 2015</time>
694 |       </a>
695 |     </div>
696 | 
697 |       <span class="state state-merged right">
698 |           <span class="octicon octicon-git-pull-request"></span>
699 |         Merged
700 |       </span>
701 | 
702 | 
703 | 
704 |     <h3 class="discussion-item-ref-title">
705 |       <a href="/nelsonic/github-scraper/issues/4" class="title-link">
706 |         Revive
707 |         <span class="issue-num">#4</span>
708 | </a>    </h3>
709 |     
710 |   </div>
711 | 
712 | 
713 |   
714 | 
715 | 
716 |       
717 | 
718 | 
719 |   <div class="discussion-item discussion-item-milestoned">
720 |     <div class="discussion-item-header" id="event-370333380">
721 | 
722 |       <span class="octicon octicon-milestone discussion-item-icon"></span>
723 |       <img alt="@nelsonic" class="avatar" height="16" src="https://avatars2.githubusercontent.com/u/194400?v=3&amp;s=32" width="16" />
724 |       <a href="/nelsonic" class="author">nelsonic</a>
725 |       added this to the <a href="/dwyl/tudo/milestones/Minimal%20Viable%20Product" class="discussion-item-entity">Minimal Viable Product</a> milestone <a href="https://github.com/dwyl/tudo/issues/51#event-370333380" class="timestamp"><time class="timestamp" datetime="2015-07-31T09:40:01Z" is="relative-time">Jul 31, 2015</time></a>
726 |     </div>
727 |   </div>
728 | 
729 | 
730 | 
731 | 
732 | 
733 |       
734 | 
735 | 
736 |   <div class="discussion-item discussion-item-renamed">
737 |     <div class="discussion-item-header" id="event-371327811">
738 | 
739 |       <span class="octicon octicon-pencil discussion-item-icon"></span>
740 |       <img alt="@iteles" class="avatar" height="16" src="https://avatars3.githubusercontent.com/u/4185328?v=3&amp;s=32" width="16" />
741 |       <a href="/iteles" class="author">iteles</a>
742 |       changed the title from  <span class="renamed-was">Pre-fetch people&#39;s issues while they are authenticating with GiHub</span> to <span class="renamed-is">Pre-fetch people&#39;s issues while they are authenticating with GitHub</span> <a href="https://github.com/dwyl/tudo/issues/51#event-371327811" class="timestamp"><time class="timestamp" datetime="2015-08-02T08:47:00Z" is="relative-time">Aug 2, 2015</time></a>
743 |     </div>
744 |   </div>
745 | 
746 | 
747 | 
748 | 
749 | 
750 | 
751 | <!-- Rendered timeline since 2015-08-02 01:47:00 -->
752 | <div id="partial-timeline-marker"
753 |       class="js-timeline-marker js-socket-channel js-updatable-content"
754 |       data-channel="dwyl/tudo:issue:96442793"
755 |       data-url="/dwyl/tudo/issues/51/show_partial?partial=issues%2Ftimeline_marker&amp;since=1438505220"
756 |       data-mark-as-read-url="/dwyl/tudo/notifications/mark?ids=91538695"
757 |       data-last-modified="Sun, 02 Aug 2015 08:47:00 GMT">
758 | </div>
759 | 
760 | 
761 |         </div>
762 | 
763 |           <div class="discussion-timeline-actions">
764 |                 <div class="signed-out-comment">
765 | 	<a href="/join" class="btn btn-primary" rel="nofollow">Sign up for free</a>
766 | 	<strong>to join this conversation on GitHub</strong>.
767 | 	Already have an account?
768 | 	<a href="/login?return_to=%2Fdwyl%2Ftudo%2Fissues%2F51" rel="nofollow">Sign in to comment</a>
769 | </div>
770 | 
771 | 
772 |           </div>
773 |       </div>
774 | 
775 |     </div>
776 |     <div class="clear"></div>
777 |   </div>
778 | 
779 | </div>
780 | 
781 | 
782 |           </div>
783 |         </div>
784 |         <div class="modal-backdrop"></div>
785 |       </div>
786 |   </div>
787 | 
788 | 
789 |     </div><!-- /.wrapper -->
790 | 
791 |       <div class="container">
792 |   <div class="site-footer" role="contentinfo">
793 |     <ul class="site-footer-links right">
794 |         <li><a href="https://status.github.com/" data-ga-click="Footer, go to status, text:status">Status</a></li>
795 |       <li><a href="https://developer.github.com" data-ga-click="Footer, go to api, text:api">API</a></li>
796 |       <li><a href="https://training.github.com" data-ga-click="Footer, go to training, text:training">Training</a></li>
797 |       <li><a href="https://shop.github.com" data-ga-click="Footer, go to shop, text:shop">Shop</a></li>
798 |         <li><a href="https://github.com/blog" data-ga-click="Footer, go to blog, text:blog">Blog</a></li>
799 |         <li><a href="https://github.com/about" data-ga-click="Footer, go to about, text:about">About</a></li>
800 |         <li><a href="https://help.github.com" data-ga-click="Footer, go to help, text:help">Help</a></li>
801 | 
802 |     </ul>
803 | 
804 |     <a href="https://github.com" aria-label="Homepage">
805 |       <span class="mega-octicon octicon-mark-github" title="GitHub"></span>
806 | </a>
807 |     <ul class="site-footer-links">
808 |       <li>&copy; 2015 <span title="0.08994s from github-fe125-cp1-prd.iad.github.net">GitHub</span>, Inc.</li>
809 |         <li><a href="https://github.com/site/terms" data-ga-click="Footer, go to terms, text:terms">Terms</a></li>
810 |         <li><a href="https://github.com/site/privacy" data-ga-click="Footer, go to privacy, text:privacy">Privacy</a></li>
811 |         <li><a href="https://github.com/security" data-ga-click="Footer, go to security, text:security">Security</a></li>
812 |         <li><a href="https://github.com/contact" data-ga-click="Footer, go to contact, text:contact">Contact</a></li>
813 |     </ul>
814 |   </div>
815 | </div>
816 | 
817 | 
818 |     <div class="fullscreen-overlay js-fullscreen-overlay" id="fullscreen_overlay">
819 |   <div class="fullscreen-container js-suggester-container">
820 |     <div class="textarea-wrap">
821 |       <textarea name="fullscreen-contents" id="fullscreen-contents" class="fullscreen-contents js-fullscreen-contents" placeholder="" aria-label=""></textarea>
822 |       <div class="suggester-container">
823 |         <div class="suggester fullscreen-suggester js-suggester js-navigation-container"></div>
824 |       </div>
825 |     </div>
826 |   </div>
827 |   <div class="fullscreen-sidebar">
828 |     <a href="#" class="exit-fullscreen js-exit-fullscreen tooltipped tooltipped-w" aria-label="Exit Zen Mode">
829 |       <span class="mega-octicon octicon-screen-normal"></span>
830 |     </a>
831 |     <a href="#" class="theme-switcher js-theme-switcher tooltipped tooltipped-w"
832 |       aria-label="Switch themes">
833 |       <span class="octicon octicon-color-mode"></span>
834 |     </a>
835 |   </div>
836 | </div>
837 | 
838 | 
839 | 
840 |     
841 |     
842 | 
843 |     <div id="ajax-error-message" class="flash flash-error">
844 |       <span class="octicon octicon-alert"></span>
845 |       <a href="#" class="octicon octicon-x flash-close js-ajax-error-dismiss" aria-label="Dismiss error"></a>
846 |       Something went wrong with that request. Please try again.
847 |     </div>
848 | 
849 | 
850 |       <script crossorigin="anonymous" src="https://assets-cdn.github.com/assets/frameworks-eedcd4970c51d77d26b12825fc1fb1fbd554a880c0a8649a9cac6b63f1ee7cff.js"></script>
851 |       <script async="async" crossorigin="anonymous" src="https://assets-cdn.github.com/assets/github/index-1af8eb3fd83c34afcee37eae4704e57d3bb35ccacee5574545665527ae02d731.js"></script>
852 |       
853 |       
854 |   </body>
855 | </html>
856 | 
857 | 


--------------------------------------------------------------------------------
/test/followers.test.js:
--------------------------------------------------------------------------------
 1 | var test      = require('tape');
 2 | var followers = require('../lib/switcher');
 3 | 
 4 | test('read list of followers for @hangouts (single page of followers) ', function (t) {
 5 |   const username = "hangouts";
 6 |   const path = username + '?tab=followers';
 7 | 	followers(path, function(err, data) {
 8 |     t.equal(data.type, 'followers', username + ' data.type: ' + data.type);
 9 |     // console.log(data.entries.length)
10 |     // console.log(data)
11 |     t.ok(data.entries.length < 20, '@' + username + ' has ' + data.entries.length + ' followers');
12 |     const people = data.entries.map(e => e.username);
13 | 	  t.ok(people.indexOf('giko') >-1, 'giko follows @' + username)
14 | 		t.ok(typeof data.next_page === 'string',  '@' + username +' only has 1 page of followers');
15 | 		t.end();
16 | 	});
17 | })
18 | 
19 | test('read list of followers for @iteles (multi-page)', function(t){
20 |   const username = "iteles";
21 |   const path = username + '?tab=followers';
22 |   followers(path, function(err, data) {
23 |     // console.log(data.entries.length)
24 |     // console.log(data)
25 |     t.ok(data.entries.length === 50, '"followers": ' + data.entries.length + ' on page 1');
26 |     // console.log(' - - - - - - - - - - - - - data.next_page:');
27 |     // console.log(data.next_page);
28 |     t.ok(data.next_page.indexOf('page=2&tab=followers') > -1,
29 |     username +' multi-page followers');
30 |     // crawl second page of followers to confirm next_page is working:
31 |     followers(data.next_page, function (err2, data2) {
32 |    
33 |       // console.log(err2, data2);
34 |       t.ok(data2.entries.length === 50, '"followers": ' + data.entries.length);
35 |       t.ok(data2.next_page.indexOf('page=3&tab=followers') > -1,
36 |       username +' multi-page followers');
37 |       t.end();
38 |     });
39 |   });
40 | });
41 | 
42 | // see: github.com/nelsonic/github-scraper/issues/60
43 | test.skip('Regression Test for issue #60', function(t){
44 |   var username = 'hangouts?tab=followers';
45 | 	followers(username, function(err, data) {
46 |     // console.log(username + ' has followers: ' + data.entries);
47 | 		t.ok(data.entries.length > 1, '"followers": '+data.entries.length);
48 | 	  t.end();
49 | 	});
50 | })
51 | 


--------------------------------------------------------------------------------
/test/following.test.js:
--------------------------------------------------------------------------------
 1 | var test      = require('tape');
 2 | var following = require('../lib/switcher');
 3 | 
 4 | test('read list of following for @torvalds (zero people!)', function(t){
 5 |   var url = 'torvalds?tab=following';
 6 | 	following(url, function (err, data) {
 7 |     t.equal(data.type, 'following', 'data.type is: ' + data.type);
 8 | 		t.ok(data.entries.length === 0, '"following": '+data.entries.length);
 9 | 		t.ok(typeof data.next_page === 'string', url
10 |       +' has no "next_page" because he is not following anyone!');
11 | 		t.end();
12 | 	});
13 | })
14 | 
15 | test('read list of following for @Marak (multi-page)', function(t){
16 |   var url = 'Marak?tab=following';
17 | 	following(url, function (err, data) {
18 |     t.equal(data.type, 'following', 'data.type is: ' + data.type);
19 | 		t.ok(data.entries.length === 50, '"following": '+data.entries.length);
20 |     t.ok(data.next_page.indexOf('page=2') > -1,
21 |       url +' multi-page followers');
22 |     // crawl second page:
23 |     following(data.next_page, function (err2, data2) {
24 |       t.ok(data2.entries.length > 20, '"following": '+data.entries.length);
25 | 		  t.end();
26 |     })
27 | 	});
28 | })
29 | 


--------------------------------------------------------------------------------
/test/http_request.test.js:
--------------------------------------------------------------------------------
 1 | var http_request = require('../lib/http_request');
 2 | var test = require('tape');
 3 | var dir  = __dirname.split('/')[__dirname.split('/').length-1];
 4 | var file = dir + __filename.replace(__dirname, '') + " > ";
 5 | 
 6 | test('make GET request to invalid url (error branch check) EXPECT RED:', function (t) {
 7 |   var path = '/' + Math.floor(Math.random() * 1000000000000000);
 8 |   http_request(path, function (e, res) {
 9 |     t.equal(e, 404);
10 |     t.end();
11 |   });
12 | });
13 | 
14 | test('make GET request to invalid url (error branch check)', function (t) {
15 |   var path = '/nelsonic' ;
16 |   http_request(path, function (statusCode, html) {
17 |     // console.log(statusCode, html);
18 |     t.equal(statusCode, 200, 'statusCode for valid request is: ' + statusCode);
19 |     t.ok(html.indexOf('<!DOCTYPE html>') > -1, 'got html back from GitHub');
20 |     t.end();
21 |   });
22 | });
23 | 
24 | // see: https://github.com/nelsonic/github-scraper/issues/60
25 | var validate = require('../lib/url_validator');
26 | 
27 | test('Regression Test for issue #60', function(t) {
28 |   var path = '/hangouts?tab=followers';
29 |   http_request(path, function (statusCode, html) {
30 |     t.equal(statusCode, 200, 'statusCode for valid request is: ' + statusCode);
31 |     t.ok(html.indexOf('<!DOCTYPE html>') > -1, 'got html back from GitHub');
32 |     t.end();
33 |   });
34 | });
35 | 


--------------------------------------------------------------------------------
/test/issue.test.js:
--------------------------------------------------------------------------------
 1 | var test  = require('tape');
 2 | var issue = require('../lib/switcher');
 3 | 
 4 | test('Scrape /dwyl/tudo/issues/51 for comments & meta-data', function (t) {
 5 | 	var url = '/dwyl/tudo/issues/51';
 6 | 	issue(url, function(err, data) {
 7 | 		console.log(data)
 8 | 		t.ok(data.url.indexOf(url) > -1, url + ' is: ' +data.url)
 9 | 		t.ok(data.title.length > 0, url + ' has title: '+data.title);
10 | 		t.ok(data.state.length > 0, url + ' is: '+data.state);
11 | 		t.ok(data.author.length > 0, url + ' was authored by: '+data.author);
12 | 		t.ok(data.created.length > 0, url + ' was created on: '+data.created);
13 | 		// labels
14 | 		t.ok(data.labels.length > 2, url + ' has '+data.labels.length + ' labels')
15 | 		t.ok(data.milestone === 'Minimal Usable Product', 'Milestone is: '
16 | 			+ data.milestone);
17 | 		t.ok(data.assignee.length > 0, url + ' has assignee: '+ data.assignee);
18 | 		t.ok(data.participants.length > 2, url + ' has participants: '
19 | 			+ data.participants);
20 | 		t.ok(data.participants.indexOf('iteles') > -1, url
21 | 			+ ' has participation from @iteles');
22 | 
23 | 		// t.ok(data.entries.length > 2,
24 | 		// 		url + ' has: '+data.entries.length + ' comments');
25 | 
26 | 		t.end();
27 | 	});
28 | })
29 | 
30 | test.skip('Scrape known issue without assignee', function(t) {
31 | 	var url ='/1602/compound/issues/20'
32 | 	issue(url, function(err, data){
33 | 		t.ok(typeof data.assignee === 'undefined', "assignee is undefined")
34 | 		t.ok(data.state === 'Closed', url +' state is: ' + data.state)
35 | 		t.end()
36 | 	});
37 | })
38 | 
39 | test.skip('Scrape known issue without milestone', function(t){
40 | 	var url = '/dwyl/time/issues/154';
41 | 	issue(url, function(err, data){
42 | 	  console.log(data);
43 | 	  var d = data.entries.filter(function(item){
44 | 	    return item.id === 'issuecomment-104228711';
45 | 	  })
46 | 	  d = d[0] // there should only be one entry
47 | 		t.ok(data.state === 'Closed', url +' state is: ' + data.state)
48 | 		var dash = ' - - - - - - - - - - - - '
49 | 		var easter_egg = '\n' + dash +'>  '+ d.body +'  <' + dash +'\n'
50 | 		t.ok(d.body === 'I Love you!', url +' last comment is: '+easter_egg);
51 | 		t.end()
52 | 	});
53 | })
54 | 


--------------------------------------------------------------------------------
/test/issues.test.js:
--------------------------------------------------------------------------------
 1 | var test = require('tape');
 2 | var issues = require('../lib/switcher');
 3 | 
 4 | test.skip('crawl known repository that has *many* issues ', function(t){
 5 | 	var project = '/dwyl/time/issues'
 6 | 	issues(project, function(err, list) {
 7 |     t.ok(err === null, 'No Error when crawling ' +project +' issues');
 8 |     console.log(list.entries.length);
 9 | 		// t.ok(err === 404, 'Got 404 Error when username does not exist');
10 |     var count = list.entries.length;
11 | 		// first page should have 25 issues!
12 | 		t.ok(count === 25, 'repo: ' +project +' has ' +count + ' issues (non-zero) on (First Page)');
13 |     t.ok(list.open > 1, 'repo: ' +project +' has ' +list.open + ' OPEN issues (non-zero)');
14 |     t.ok(list.closed > 10, 'repo: ' +project +' has ' +list.closed + ' CLOSED issues');
15 |     // crawl the next page of issues:
16 |     issues(list.next_page, function(err2, list2){
17 |       t.ok(list2.open > 10, 'repo: ' +project +' has ' +list.open + ' OPEN issues (non-zero)');
18 |       t.ok(list2.closed > 5, 'repo: ' +project +' has ' +list2.closed + ' CLOSED issues');
19 |       t.end();
20 |     })
21 | 	})
22 | })
23 | 
24 | test.skip('crawl known repository that only has a single page of issues ', function(t){
25 | 	var project = '/dwyl/ignored/issues'
26 | 	issues(project, function(err, list) {
27 | 		console.log(list)
28 | 		t.ok(list.url.indexOf(project) > -1, '✓ url is set: '+list.url)
29 |     t.ok(err === null, 'No Error when crawling ' +project +' issues');
30 |     var count = list.entries.length;
31 | 		t.ok(count === 0, 'repo: ' +project +' has ' +count + ' issues (ZERO)');
32 |     t.ok(list.closed > 5, 'repo: ' +project +' has ' +list.closed + ' CLOSED issues');
33 | 		t.end();
34 | 	})
35 | })
36 | 
37 | // see: https://github.com/nelsonic/github-scraper/issues/53
38 | test.skip('crawl known repository (FORK) WITHOUT issues ', function(t){
39 | 	var project = 'ladieswhocode/london-tech-event-hack-collection/issues'
40 | 	issues(project, function(err, list) {
41 | 		t.ok(err === 404, '✓ '+project +" has no issues >> HTTP Status: "+err)
42 | 		t.end();
43 | 	})
44 | })
45 | 
46 | 
47 | var cheerio = require('cheerio')
48 | var issues2  = require('../lib/issues.js');
49 | // see: https://github.com/nelsonic/arana/issues/16
50 | test.skip('Problem Child (Fork) Repo (MANUAL INVOCATION)', function(t){
51 | 	var url = 'https://github.com/foundersandcoders/resolve-path'
52 | 	wreck.get(url, function (error, response, html) {
53 | 		var $ = cheerio.load(html);
54 | 		issues2($, url, function(err, data){
55 | 			console.log(err, data)
56 | 			t.ok(err === 404, '✓ '+url +" Got "+err + " (as expected!)")
57 | 			t.end();
58 | 		})
59 | 	});
60 | })
61 | 


--------------------------------------------------------------------------------
/test/issues_search.test.js:
--------------------------------------------------------------------------------
 1 | var test = require('tape');
 2 | var issues_search = require('../lib/issues_search');
 3 | 
 4 | test.skip('expect 400 repo is not stated', function(t) {
 5 |   issues_search(function(err) {
 6 |     t.ok(err === 400, 'got 400 error when no options defined');
 7 |     t.end();
 8 |   })
 9 | })
10 | 
11 | test.skip('expect random (non-existent) repo to return zero results ', function(t){
12 | 	var options = { username : Math.floor(Math.random() * 1000000000000000) } // a nice long "random" number
13 | 	issues_search(options, function(err, list){
14 |     // console.log(err, stats)
15 | 		t.ok(err === null, 'Search still returns a 200 with no results');
16 | 		t.ok(list.entries.length === 0, 'no issues (as expected)');
17 | 		t.end();
18 | 	})
19 | })
20 | 
21 | test.skip('scrape second page of results', function(t){
22 | 	var options = {
23 |     next: '/search?o=desc&p=2&q=author%3Aiteles&s=created&state=open&type=Issues'
24 |   }
25 | 	issues_search(options, function(err, list){
26 |     // console.log(err, list)
27 | 		t.ok(err === null, 'Search still returns a 200 with no results');
28 |     t.ok(list.entries.length > 0, 'non-zero number of issues');
29 | 		t.ok(list.entries[0].author === 'iteles', 'issue successfully scraped');
30 | 		t.end();
31 | 	})
32 | })
33 | // test for next page
34 | //
35 | 


--------------------------------------------------------------------------------
/test/labels.test.js:
--------------------------------------------------------------------------------
 1 | var test   = require('tape');
 2 | var labels = require('../lib/switcher');
 3 | 
 4 | test.skip('crawl dwyl/tudo/labels', function(t){
 5 | 	var project = 'dwyl/tudo/labels';
 6 | 	labels(project, function(err, list) {
 7 |     console.log(list);
 8 | 		t.ok(err === null, 'No Error when crawling ' + project +' (repo pages)');
 9 |     var question = list.entries.filter(function(item){
10 |       return item.name === 'question';
11 |     })
12 |     question = question[0];
13 | 		t.ok(question.link === '/dwyl/tudo/labels/question', 'question.link is : '+question.link);
14 |     t.ok(question.count > 1, 'question.count (number of open issues): '+question.count);
15 |     t.ok(question.style.indexOf('#fff') > -1, 'question.styles are '+question.style);
16 | 		t.end();
17 | 	})
18 | })
19 | 


--------------------------------------------------------------------------------
/test/milestones.test.js:
--------------------------------------------------------------------------------
 1 | var test       = require('tape');
 2 | var milestones = require('../lib/switcher');
 3 | 
 4 | test.skip('crawl /dwyl/tudo/milestones', function(t){
 5 | 	var project = '/dwyl/tudo/milestones';
 6 | 	milestones(project, function(err, data) {
 7 |     console.log(data);
 8 | 		t.ok(err === null, 'No Error when crawling ' + project +' (repo pages)');
 9 |     t.ok(data.open > 0, 'data.open '+data.open);
10 |     t.ok(data.closed > 0, 'data.closed '+data.closed);
11 | 		t.end();
12 | 	})
13 | })
14 | 
15 | test.skip('/rethinkdb/rethinkdb has many milestones', function(t){
16 | 	var project = '/rethinkdb/rethinkdb/milestones';
17 | 	milestones(project, function(err, data) {
18 |     // console.log(list);
19 | 		t.ok(err === null, 'No Error when crawling ' + project +' (repo pages)');
20 |     t.ok(data.open > 2, 'data.open '+data.open);
21 |     t.ok(data.entries.length === data.open, project + " has " + data.entries.length +' open milestones' )
22 |     t.ok(data.closed > 0, project + ' closed milestones: '+data.closed);
23 | 		t.end();
24 | 	})
25 | })
26 | 


--------------------------------------------------------------------------------
/test/org.test.js:
--------------------------------------------------------------------------------
 1 | var test = require('tape');
 2 | var org  = require('../lib/switcher');
 3 | var dir  = __dirname.split('/')[__dirname.split('/').length-1];
 4 | var file = dir + __filename.replace(__dirname, '') + " > ";
 5 | 
 6 | test(file + 'Scrape an org WITHOUT a next page (known data)', function(t){
 7 | 	var url = '/peersun';
 8 | 	org(url, function(err, data) {
 9 | 		t.equal(data.type, 'org', url + ' data.type: ' + data.type);
10 | 		t.ok(data.entries.length > 5, 'org '
11 | 			+ url + ' has ' + data.entries.length + ' repos.')
12 | 		// t.ok(data.pcount === 0, '"pcount":' + data.pcount);
13 | 
14 | 		console.log(data)
15 | 		const last = data.entries[data.entries.length-1];
16 | 		t.equal(last.updated, '2014-02-18T23:09:24Z',
17 | 			'last.updated: ' + last.updated);
18 | 		// console.log(' - - - - - - - - - - - - - data.entries:');
19 | 		// console.log(data.entries);
20 | 		// console.log(' - - - - - - - - - - - - -');
21 | 		t.end();
22 | 	});
23 | })
24 | 
25 | test(file + 'Scrape an org WITH a next page', function(t){
26 | 	var url = '/github';
27 | 	org(url, function(err, data) {
28 | 		// delete(data.entries)
29 | 		console.log(err, data);
30 | 		// t.ok(data.pcount > 100, '"pcount":'+data.pcount);
31 | 		t.ok(data.location === 'San Francisco, CA', 'data.location: ' + data.location);
32 | 		t.ok(data.website === 'https://github.com/about', 'data.url: '+data.url);
33 | 		// t.ok(data.email === 'support@github.com', 'data.email: '+data.email);
34 | 		t.equal(data.uid, 9919, url + ' uid is ' + data.uid);
35 | 		t.end();
36 | 	});
37 | })
38 | 
39 | test(file + 'Fetch Second page of dwyl org', function (t) {
40 | 	let url = 'dwyl';
41 | 	org(url, function(err, data) {
42 | 		// console.log(data.entries);
43 | 		t.ok(data.entries.length === 10, 'FIRST page of org has ' + data.entries.length + ' repos')
44 | 		// t.ok(data.pcount > 10, '"pcount":'+data.pcount);
45 | 		// t.ok(data.next_page === '/dwyl?page=2',
46 | 		// 	'data.next_page is: ' + data.next_page);
47 | 		url = '/orgs/dwyl/repositories?type=all';
48 | 		org(url, function(err, data) {
49 | 			console.log(data);
50 | 			t.ok(data.entries.length === 30, 'SECOND page of org has ' + data.entries.length + ' repos')
51 | 			// t.ok(data.pcount > 10, '"pcount":'+data.pcount);
52 | 			t.ok(data.next_page === '/orgs/dwyl/repositories?type=all&page=2', 'dwyl has more than one page');
53 | 			t.end();
54 | 		});
55 | 	});
56 | })
57 | 
58 | test(file + 'ORG with no people', function(t){
59 | 	var url = '/pandajs';
60 | 	org(url, function(err, data) {
61 | 		console.log('data', data);
62 | 		t.equal(data.description,
63 | 			"people who are super into pandas and javascript!",
64 | 			'data.description: ' + data.description)
65 | 		t.ok(typeof data.website === 'undefined', "No website")
66 | 		t.ok(typeof data.location === 'undefined', "No location")
67 | 		t.ok(typeof data.email === 'undefined', "No email")
68 | 		// t.ok(data.pcount === 0, url + ' "pcount":'+data.pcount);
69 | 		t.end();
70 | 	});
71 | })
72 | 


--------------------------------------------------------------------------------
/test/people.test.js:
--------------------------------------------------------------------------------
 1 | var test   = require('tape');
 2 | var people = require('../lib/switcher');
 3 | 
 4 | test('Scrape org with single page of people', function(t){
 5 | 	var org = 'orgs/tableflip/people'
 6 | 	people(org, function(err, data){
 7 | 	  t.ok(data.entries.length > 5, 'There are '+data.entries.length +' people in '+org);
 8 | 	  const people = data.entries.map(e => e.username);
 9 | 	  t.ok(people.indexOf('alanshaw') >-1, 'Alan is a member of '+org)
10 | 	  t.end();
11 | 	})
12 | })
13 | 
14 | test('Scrape org with multiple pages of people', function(t){
15 | 	var org = 'orgs/github/people'
16 | 	people(org, function(err, data){
17 | 		console.log(data.next_page);
18 | 		t.ok(data.entries.length > 20, 'There are '+data.entries.length +' people in '+org);
19 | 		t.ok(data.next_page === '/orgs/github/people?page=2', org +' has multiple pages of peeps!');
20 | 		people(data.next_page, function(err2, data2){
21 | 			t.ok(data2.next_page === '/orgs/github/people?page=3', org +' has multiple pages of peeps!');
22 | 			t.end();
23 | 		})
24 | 	})
25 | })
26 | 


--------------------------------------------------------------------------------
/test/profile.test.js:
--------------------------------------------------------------------------------
 1 | var test = require('tape');
 2 | var profile = require('../lib/switcher');
 3 | var dir = __dirname.split('/')[__dirname.split('/').length-1];
 4 | var file = dir + __filename.replace(__dirname, '') + " > ";
 5 | 
 6 | test(file + 'Scrape @nelsonic GitHub profile (consistent state profile)', function(t){
 7 | 	var user = 'nelsonic';
 8 | 	profile(user, function (err, data) {
 9 | 		// console.log('data', data)
10 | 		// t.equal(data.type, 'profile', user + ' data.type: ' + data.type);
11 | 		t.ok(data.avatar.match(/githubusercontent.com\/u\/194400/) !== null,
12 | 		'Image is what we expect: ' + data.avatar);
13 | 		t.ok(data.uid === 194400, '@' + user + ' has GitHub user_id: ' + data.uid);
14 | 		t.ok(data.username === 'nelsonic', '@' + user + ' username: ' + data.username);
15 | 
16 | 		// t.ok(data.current > 400, 'Current Streak ' + data.current +' is over 500 days!');
17 | 		t.ok(data.name   === 'Nelson',
18 | 			'- @' + user + ' Name:' + data.name);
19 | 		t.ok(data.worksfor === '@dwyl', user + ' Works for ' + data.worksfor);
20 | 		// t.ok(data.email    === 'contact.nelsonic+github@gmail.com',
21 | 			// '- @' + user + ' Email address is: contact.nelsonic@gmail.com');
22 | 		// console.log('data.website:', data.website);
23 | 		t.ok(data.website  === 'https://dwyl.com',
24 | 			user + ' Website URL is ' + data.website);
25 | 		// console.log(data.location);
26 | 		t.ok(data.location === 'Braga, Portugal', '- @' + user + ' Based in Braga, PT');
27 | 		t.ok(data.followers > 400, '- @' + user + ' Has more than 400 followers');
28 | 		t.ok(data.stars > 100, '- @' + user + ' Has starred '+ data.starred);
29 | 
30 | 		t.ok(data.following > 300, '- @' + user
31 | 			+ ' Is following more than 300 people');
32 | 		// t.ok(data.contribs > 3000, '- @' + user + ' Has made ' + data.contribs
33 | 		// 	+ ' contributions to Open Source this year!');
34 | 
35 | 		t.ok(data.pinned.length === 6, '- @' + user
36 | 			+ ' Has Six "Pinned" Repositories');
37 | 
38 | 		t.ok(Object.keys(data.orgs).length > 2, '- @' + user + ' Is a member of '
39 | 			+ Object.keys(data.orgs).length + ' Orgs');
40 | 
41 | 		t.ok(data.developerprogram === true, '- @'
42 | 			+ user + ' is a member of the "GitHub Developer Program"');
43 | 		// regression: https://github.com/nelsonic/github-scraper/issues/79
44 | 		t.ok(data.stars > 2000, '- @' + user + ' Has starred ' + data.stars);
45 | 
46 | 		t.end();
47 | 	});
48 | });
49 | 
50 | test(file + 'Check @torvalds IS NOT GitHub Dev Program Member', function(t){
51 | 	var url = 'torvalds';
52 | 	profile(url, function(err, data) {
53 | 		t.ok(typeof data.developerprogram === 'undefined', '- @' + url
54 | 			+ ' is NOT a member of the "GitHub Developer Program"');
55 | 		t.end();
56 | 	});
57 | });
58 | 
59 | test(file + 'Scrape @iteles detailed contribution matrix', function(t){
60 | 	var user = 'iteles';
61 | 	profile(user, function(err, data) {
62 | 		t.ok(data.bio.match(/Co-founder/i), '- @' + user + ' bio: ' + data.bio);
63 | 		// now client rendered so cannot parse! #132
64 | 		// t.ok(data.contribs > 100, '- @' + user + ' Has made ' + data.contribs
65 | 		// 	+ ' contributions to Open Source this year!');
66 | 		t.end();
67 | 	});
68 | });
69 | 
70 | test(file + '@dwylbot does not have a location!', function(t){
71 | 	var url = 'dwylbot';
72 | 	profile(url, function(err, data) {
73 | 		t.ok(typeof data.location === 'undefined', '- @' + url + ' is virtual!');
74 | 		t.end();
75 | 	});
76 | });
77 | 


--------------------------------------------------------------------------------
/test/repo.test.js:
--------------------------------------------------------------------------------
 1 | var test = require('tape');
 2 | var repo = require('../lib/switcher');
 3 | 
 4 | test('crawl known repository for stats', function(t) {
 5 | 	var project = 'dwyl/adoro';
 6 | 	repo(project, function(err, stats) {
 7 | 		// console.log(stats);
 8 | 		t.equal(stats.type, 'repo', project + ' data.type: ' + stats.type);
 9 | 		t.ok(err === null, 'No Error when crawling ' + project +' (repo pages)');
10 | 		t.ok(stats.watchers > 3, ' has more than 1 watchers: '+stats.watchers);
11 | 		t.ok(stats.stars > 10, ' has more than 5 stars: '+stats.stars);
12 | 		t.ok(stats.forks > 0, ' has more than 0 forks: '+stats.forks);
13 | 		// t.ok(stats.branches > 0, ' has non-zero number of branches: ' + stats.branches);
14 | 		t.ok(stats.langs[0].name.indexOf('HTML') > -1, 'Language is: '+ stats.langs[0].name);
15 | 		t.end();
16 | 	})
17 | })
18 | 
19 | test('crawl single language repo', function (t) {
20 | 	var project = 'nelsonic/coin-change-ruby';
21 | 	repo(project, function(err, stats) {
22 | 		const hasRuby=stats.langs.filter(e=>e.name==="Ruby")
23 |     t.ok(hasRuby.length, 'Language is: '+ hasRuby[0].name)
24 | 		t.end();
25 | 	})
26 | })
27 | 
28 | test('crawl ZERO language repo', function(t){
29 | 	var project = '/PeerSun/nodestack';
30 | 	repo(project, function(err, stats) {
31 |     t.ok(stats.langs.length === 0, 'Language is: '+ stats.langs +" (none)")
32 | 		t.end();
33 | 	})
34 | })
35 | 
36 | test('crawl forked repo', function(t){
37 |   var project = '/backhand/github-scraper';
38 |   repo(project, function(err, stats) {
39 | 	
40 |     t.ok(stats.forkedfrom === 'nelsonic/github-scraper',
41 | 			'Repo forked from /nelsonic/github-scraper')
42 |     t.end();
43 |   })
44 | })
45 | 
46 | test('crawl /dwyl/start-here (known repo)', function(t){
47 |   var project = '/dwyl/start-here';
48 |   repo(project, function(err, stats) {
49 | 	t.ok(stats.description.indexOf('Quick-start Guide') > -1,
50 | 		project + ' description: ' + stats.description);
51 |     t.end();
52 |   })
53 | })
54 | 
55 | test('dwyl/todo-list-javascript-tutorial known website', function (t) {
56 |   var project = 'dwyl/javascript-todo-list-tutorial';
57 |   repo(project, function(err, stats) {
58 | 	// console.log('stats:', stats)
59 | 	t.ok(stats.website === 'dwyl.github.io/javascript-todo-list-tutorial',
60 | 		project + ' website: ' + stats.website);
61 | 	t.ok(stats.tags.indexOf('javascript') > -1,
62 | 		project + ' tags: ' + stats.tags);
63 |     t.end();
64 |   })
65 | })
66 | 
67 | test('crawl repo with lots of stars', function(t) {
68 | 	var project = 'angular/angular';
69 | 	repo(project, function(err, stats) {
70 | 	t.ok(stats.watchers > 1000, ' has more than 1000 watchers: '+stats.watchers);
71 |     t.ok(stats.stars > 1000, ' has more than 1000 stars: '+stats.stars);
72 |     t.ok(stats.forks > 1000, ' has more than 1000 forks: '+stats.forks);
73 |     // t.ok(stats.commits > 1000, ' has more than 1000 commits: '+stats.commits);
74 | 	t.end();
75 | 	});
76 | });
77 | 
78 | test('crawl repo with "Used by" metric issue #106', function(t) {
79 | 	const project = 'dwyl/decache';
80 | 	repo(project, function(err, stats) {
81 | 	// console.log('stats', stats);
82 | 	// t.ok(stats.usedby > 25000, ' used by more than 25k: '+stats.usedby);
83 |     t.ok(stats.stars > 100, ' has more than 1000 stars: '+stats.stars);
84 |     t.ok(stats.forks > 10, ' has more than 1000 forks: '+stats.forks);
85 |     // t.ok(stats.commits > 50, ' has more than 1000 commits: '+stats.commits);
86 | 		t.end();
87 | 	});
88 | });
89 | 


--------------------------------------------------------------------------------
/test/repos.test.js:
--------------------------------------------------------------------------------
 1 | var test = require('tape');
 2 | var repositories = require('../lib/switcher');
 3 | 
 4 | test('crawl @iteles\' list of repositories (expect *many*!)', function(t){
 5 | 	var url = '/iteles?tab=repositories', repo;
 6 | 	repositories(url, function(err, repos){
 7 | 		t.ok(err === null, 'No Error when crawling ' +url +' repos tab');
 8 |     // console.log(repos)
 9 |     // console.log(' - - - - - - - - - - -')
10 |     repo = repos.entries.filter(function(r) {
11 |       return r.url === '/iteles/iteles.github.io';
12 |     })
13 |     repo = repo[0];
14 | 		t.ok(repo.name === 'iteles.github.io', ' repos contains iteles.github.io');
15 |     t.ok(repo.stars > 0, ' repo iteles.github.io has non-zero number of stars: '+repo.stars);
16 |     t.ok(repo.lang === 'HTML', ' repo ' + repo.url + ' is written in: '+repo.lang);
17 | 		t.end();
18 | 	})
19 | })
20 | 


--------------------------------------------------------------------------------
/test/starred.test.js:
--------------------------------------------------------------------------------
 1 | var test      = require('tape');
 2 | var starred = require('../lib/switcher');
 3 | 
 4 | test.skip('read list of starred repos for single page @lukebond (who never stars anything!) ', function(t){
 5 |   var username = 'stars/lukebond';
 6 | 	starred(username, function(err, data) {
 7 | 		// console.log(data);
 8 | 		// t.ok(data.repos.length === 20, 'first page of org has 20 repos: '+data.repos.length)
 9 | 		t.ok(data.entries.length < 10, '@'+username +' has only "starred": '+data.entries.length +' repos');
10 | 		t.ok(typeof data.next_page === 'undefined', username +' has no "next page" (because he does not star anything!)');
11 | 		t.end();
12 | 	});
13 | })
14 | 
15 | test.skip('read list of starred repos for single page @iteles (multi-page) ', function(t){
16 |   var username = 'stars/iteles';
17 | 	starred(username, function(err, data) {
18 | 		// console.log(data)
19 | 		// t.ok(data.repos.length === 20, 'first page of org has 20 repos: '+data.repos.length)
20 | 		t.ok(data.entries.length === 30, '@'+username +' has only "starred": '+data.entries.length +' repos (first page)');
21 | 		t.ok(data.next_page.indexOf('page=2') > -1, '@'+username +' has multiple pages of starred repos');
22 | 		starred(data.next_page, function(err2, data2){
23 | 			console.log(data2.next_page)
24 | 			t.ok(data2.next_page.indexOf('page=3') > -1, '@'+username +' has multiple pages of starred repos');
25 | 			t.end();
26 | 		})
27 | 	});
28 | })
29 | 


--------------------------------------------------------------------------------
/test/stars.test.js:
--------------------------------------------------------------------------------
 1 | var test  = require('tape');
 2 | var stars = require('../lib/switcher');
 3 | 
 4 | test('read list of stars for pandajs/sad ', function(t){
 5 |   var url = 'pandajs/sad/stargazers';
 6 | 	stars(url, function (err, data) {
 7 | 	  t.equal(data.type, 'stars', url + ' data.type: ' + data.type);
 8 | 	  t.ok(data.entries.length > 0, '"stars": '+data.entries.length);
 9 | 	  const people = data.entries.map(e => e.username);
10 | 	  t.ok(people.indexOf('nelsonic') >-1, 'Nelson starred '+ url)
11 | 	  t.ok(data.next_page === '', url +' only has 1 page of stars');
12 | 	  t.end();
13 | 	});
14 | })
15 | 
16 | test('read list of stars for dwyl/learn-tdd (multi-page)', function(t){
17 |   var url = 'dwyl/learn-tdd/stargazers';
18 | 	stars(url, function (err, data) {
19 | 		// console.log(data)
20 | 		t.equal(data.entries.length, 48, '"stars": '+data.entries.length);
21 |     t.ok(data.next_page.match(/page=2/), url +' multi-page stargazers');
22 |     // crawl second page:
23 |     stars(data.next_page, function(err2, data2) {
24 |       t.equal(data2.entries.length, 48, '"stars": ' + data.entries.length);
25 |       t.ok(data2.next_page.match(/page=3/), url +' multi-page stargazers');
26 | 	  t.end();
27 |     })
28 | 	});
29 | })
30 | 


--------------------------------------------------------------------------------
/test/switcher.test.js:
--------------------------------------------------------------------------------
 1 | var test     = require('tape');
 2 | var switcher = require('../lib/switcher');
 3 | var dir  = __dirname.split('/')[__dirname.split('/').length-1];
 4 | var file = dir + __filename.replace(__dirname, '') + " > ";
 5 | 
 6 | test(file + 'Attepmt to invoke the scraper WITHOUT VALID callback funciton',
 7 |   function(t) {
 8 |   var cberrmsg = "callback is required"
 9 |   try {
10 |     switcher();
11 |   } catch (error){
12 |     // console.log(error);
13 |     t.ok(error.indexOf(cberrmsg) > -1, "Got ERROR: "+error + " (as expected!)");
14 |     t.end();
15 |   }
16 | })
17 | 
18 | test(file + 'Force switcher error by not setting the url', function(t){
19 | 	var url;
20 | 	switcher(url, function(err, data){
21 |     t.ok(err === 404, 'Got 404 Error when username does not exist');
22 | 		t.end();
23 | 	})
24 | })
25 | 
26 | test(file + 'Try to break switcher by supplying non-existent user', function(t){
27 | 	var url = '/' + Math.floor(Math.random() * 1000000000000000);
28 | 	switcher(url, function(err, data){
29 |     t.ok(err === 404, 'Got 404 Error when username does not exist');
30 | 		t.end();
31 | 	})
32 | })
33 | 
34 | test.skip('Scrape a user profile supplying only the username', function(t){
35 |   var url = 'iteles'
36 |   switcher(url, function(err, data) {
37 |     t.ok(data.followercount > 40, '@'+url+'has '+data.followercount+' followers')
38 | 		console.log(' - - - - - - - - - - - - - - - - - - - - -')
39 |     console.log(data);
40 | 		console.log(' - - - - - - - - - - - - - - - - - - - - -')
41 |     t.end()
42 |   });
43 | })
44 | 
45 | test(file + 'Should correctly identify org repositories page', function(t){
46 | 	var url = 'https://github.com/orgs/dwyl/repositories?type=all';
47 |   console.log('url:', url)
48 | 	switcher(url, function(err, data) {
49 |     t.ok(data.type === 'org_repos')
50 |     t.end()
51 | 	})
52 | })
53 | 
54 | // var url = 'https://github.com/iteles/followers'
55 | // switcher(url, function(err, data){
56 | //   console.log(data);
57 | // })
58 | 
59 | // var url = 'https://github.com/alanshaw/david-www/stargazers'
60 | // switcher(url, function(err, data) {
61 | //   console.log(data);
62 | // });
63 | 
64 | // var url = 'https://github.com/alanshaw/followers'
65 | // switcher(url, function(err, data) {
66 | //   console.log(data);
67 | // });
68 | 
69 | // var url = 'dwyl'
70 | // switcher(url, function(err, data) {
71 | //   console.log(data);
72 | // });
73 | 
74 | 
75 | // var url2 = 'https://github.com/iteles/following?page=2'
76 | // switcher(url2, function(err, data){
77 | //   console.log(data);
78 | // })
79 | //
80 | //
81 | // var url3 = 'https://github.com/iteles/following?page=2'
82 | // switcher(url3, function(err, data){
83 | //   console.log(data);
84 | // })
85 | 


--------------------------------------------------------------------------------
/test/url_validator.test.js:
--------------------------------------------------------------------------------
 1 | var test     = require('tape');
 2 | var validate = require('../lib/url_validator');
 3 | 
 4 | test('Attempt to call scraper without a url (error test) ', function(t) {
 5 | 	validate(null, function(err){
 6 | 		t.ok(err, 400, 'Receive 400 Error when url is null');
 7 | 		t.end();
 8 | 	})
 9 | })
10 | 
11 | test('Attempt to call scraper with blank url', function(t) {
12 | 	validate('', function(err){
13 | 		t.ok(err, 400, 'Receive 400 Error when orgname is too short');
14 | 		t.end();
15 | 	})
16 | })
17 | 
18 | test('Call scraper with url without leading forward slash', function(t) {
19 | 	var url = validate('iteles', function(err){	});
20 |   console.log(url)
21 | 	t.ok(url, 400, 'Receive 400 Error when orgname is too short');
22 | 	t.end();
23 | })
24 | 
25 | // see: https://github.com/nelsonic/github-scraper/issues/84
26 | test('url_validator does NOT contain (perfectly valid) url containing word "undefined"', function(t) {
27 | 	var url = validate('/undefined/followers');
28 | 	var expected = '/undefined/followers';
29 | 	t.equal(url, expected, 'User "@undefined" is legit: ' +  url);
30 | 	t.end();
31 | });
32 | 
33 | test('Call scraper with full (valid) GitHub URL', function(t) {
34 |   var url = 'https://github.com/iteles'
35 | 	var expected = url.split('https://github.com')[1];
36 | 	var actual = validate(url, function(err){	});
37 |   console.log(expected, actual)
38 | 	t.equal(expected, actual, 'No change to url');
39 | 	t.end();
40 | })
41 | 
42 | test('Confirm url validator transforms iteles/followers?page=2 into full url', function(t){
43 |   var url  = 'iteles/followers?page=2'
44 |   // var url1 = 'https://github.com/iteles/followers?page=2'
45 |   var url2 = validate(url, function(err){	});
46 |   console.log(url, url2)
47 |   t.ok('/' + url === url2, url + ' equal to: ' + url2);
48 |   t.end();
49 | })
50 | 
51 | // see: https://github.com/nelsonic/github-scraper/issues/60
52 | test('Regression Test for issue #60', function(t){
53 |   var url = 'hangouts/followers';
54 |   var actual = validate(url, function(err){	});
55 |   // console.log(url1, url2)
56 |   t.ok('/' + url === actual, url + ' sucessfully transformed to: ' + actual);
57 |   t.end();
58 | })
59 | 


--------------------------------------------------------------------------------
/test/utils.test.js:
--------------------------------------------------------------------------------
 1 | const test = require('tape');
 2 | const parse_int = require('../lib/utils').parse_int;
 3 | 
 4 | test('parse_int Parses Strings from repo stats into Ints', function(t) {
 5 |   t.equal(parse_int("1"), 1, '"1" => 1')
 6 |   t.equal(parse_int("  1  "), 1, '"  1  " => 1')
 7 |   t.equal(parse_int("300"), 300, '"300" => 300')
 8 |   t.equal(parse_int("1k"), 1000, '"1k" => 1000')
 9 |   t.equal(parse_int("4.3k"), 4300, '"4.3k" => 4300')
10 |   t.equal(parse_int("89.6k"), 89600, '"89.6k" => 89600')
11 |   t.equal(parse_int("146k"), 146000, '"146k" => 146000')
12 |   t.equal(parse_int("310k"), 310000, '"310k" => 310000')
13 |   t.equal(parse_int("1m"), 1000000, '"1m" => 1000000')
14 |   t.equal(parse_int("1.1m"), 1100000, '"1.1m" => 1100000')
15 |   t.end()
16 | })
17 | 


--------------------------------------------------------------------------------