├── __init__.py
├── templates
├── robots.txt
├── feed.xml
├── index.html
├── base.html
├── summary_compare.html
└── summary.html
├── requirements.txt
├── luigi.cfg
├── schema.sql
├── queue_tasks.py
├── dnflow.cfg.template
├── static
├── css
│ └── style.css
└── js
│ └── index.js
├── .gitignore
├── LICENSE
├── json2csv.py
├── README.md
├── ui.py
└── summarize.py
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/templates/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Disallow: /
3 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask
2 | flask-oauthlib
3 | imagehash
4 | Jinja2
5 | luigi
6 | networkx
7 | pandas
8 | redis
9 | rq
10 | sqlalchemy
11 | twarc
12 | tweepy
13 |
--------------------------------------------------------------------------------
/luigi.cfg:
--------------------------------------------------------------------------------
1 | [core]
2 | parallel-scheduling = True
3 |
4 | [scheduler]
5 | record_task_history = True
6 | state_path = luigi-state.pickle
7 |
8 | [task_history]
9 | db_connection = sqlite:///history.sqlite.db
10 |
--------------------------------------------------------------------------------
/schema.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS searches;
2 | CREATE TABLE searches (
3 | id INTEGER PRIMARY KEY AUTOINCREMENT,
4 | text TEXT NOT NULL,
5 | date_path TEXT NOT NULL,
6 | user TEXT NOT NULL,
7 | status TEXT,
8 | created DATETIME DEFAULT CURRENT_TIMESTAMP,
9 | published DATETIME
10 | );
11 |
--------------------------------------------------------------------------------
/queue_tasks.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 |
4 | def run_flow(text, job_id, count, token, secret):
5 | subprocess.run([
6 | 'python',
7 | '-m',
8 | 'luigi',
9 | '--module',
10 | 'summarize',
11 | 'RunFlow',
12 | '--term',
13 | text,
14 | '--jobid',
15 | str(job_id),
16 | '--count',
17 | str(count),
18 | '--token',
19 | str(token),
20 | '--secret',
21 | str(secret)
22 | ])
23 |
--------------------------------------------------------------------------------
/dnflow.cfg.template:
--------------------------------------------------------------------------------
1 | HOSTNAME = 'localhost'
2 | DEBUG = True
3 | DATABASE = 'db.sqlite3'
4 | SECRET_KEY = 'a super secret key'
5 | STATIC_URL_PATH = '/static'
6 | DATA_DIR = 'data'
7 | REDIS_HOST = 'localhost'
8 | REDIS_PORT = 6379
9 | REDIS_DB = 4
10 | TWITTER_CONSUMER_KEY = 'YOUR_TWITTER_CONSUMER_KEY_HERE'
11 | TWITTER_CONSUMER_SECRET = 'YOUR_TWITTER_CONSUMER_SECRET_HERE'
12 | MAX_TIMEOUT = 24 * 60 * 60
13 |
14 | # set the following two variables to o non-empty values to add
15 | # basic auth for PUT updates on /job
16 | HTTP_BASICAUTH_USER = ''
17 | HTTP_BASICAUTH_PASS = ''
18 |
--------------------------------------------------------------------------------
/templates/feed.xml:
--------------------------------------------------------------------------------
1 |
2 |
12 | This is a design prototype for the Documenting the Now project. Please understand that data created here can disappear at any time. By using this application you are agreeing to our code of conduct. If you have any questions please let us know what you think in our Slack channel, or by emailing info@docnow.io 13 |
14 | 15 |