├── .dockerignore
├── .gitignore
├── CODE_OF_CONDUCT.md
├── Dockerfile-full
├── Dockerfile-lite
├── HOW-TO-DOWNLOAD-MANY-TWEETS.md
├── LICENSE
├── README.md
├── bin
├── build.sh
├── devel.sh
├── pull.sh
├── push.sh
├── run.sh
├── splunk-start.sh
└── splunk-stop.sh
├── entrypoint.sh
├── img
├── splunk-twint-dmuth-sentiment.png
├── splunk-twint-dmuth.png
├── splunk-twint-septa-social-tag-cloud.png
└── splunk-twint-septa-social.png
├── python-scripts
├── get-user-following.py
├── get-user-info.py
├── get-user-tweets.py
└── hello.py
├── splunk-app
├── appserver
│ └── static
│ │ └── dashboard.css
├── default
│ ├── app.conf
│ ├── data
│ │ ├── models
│ │ │ └── tweets.json
│ │ └── ui
│ │ │ ├── nav
│ │ │ └── default.xml
│ │ │ └── views
│ │ │ ├── search_user_tweets.xml
│ │ │ ├── sentiment.xml
│ │ │ ├── tag_cloud.xml
│ │ │ ├── tweet_breakdown_data_model.xml
│ │ │ ├── user_twitter_report.xml
│ │ │ └── user_twitter_report_base_search.xml
│ ├── datamodels.conf
│ └── props.conf
├── local
├── metadata
│ └── local.meta
└── static
│ ├── appIcon.png
│ ├── appIconAlt.png
│ ├── appIconAlt_2x.png
│ ├── appIcon_2x.png
│ └── twitter-icon.png
├── twint
├── twint-geo
├── twint-user
├── twint-user-by-year
└── user-prefs.conf
/.dockerignore:
--------------------------------------------------------------------------------
1 |
2 | # Logs
3 | logs/
4 |
5 | # Splunk data
6 | splunk-data/
7 |
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Steve Jobs
3 | .DS_Store
4 |
5 | # Vim
6 | *.swp
7 | *~
8 |
9 | # Stuff that we download
10 | logs/
11 |
12 | # Resume files
13 | resume-*
14 |
15 | # Splunk Data
16 | splunk-data/
17 |
18 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at dmuth AT dmuth DOT org. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/Dockerfile-full:
--------------------------------------------------------------------------------
1 |
2 |
3 | FROM alpine
4 |
5 | RUN apk add python3 bash git gcc g++ python3-dev libffi-dev \
6 | && pip3 install --upgrade pip
7 |
8 | RUN pip3 install twint
9 |
10 | #
11 | # Install SQLAlchemy for running any Python scripts within the container
12 | #
13 | RUN pip3 install sqlalchemy
14 |
15 | COPY entrypoint.sh /
16 | ENTRYPOINT ["/entrypoint.sh"]
17 |
18 |
--------------------------------------------------------------------------------
/Dockerfile-lite:
--------------------------------------------------------------------------------
1 |
2 |
3 | FROM alpine
4 |
5 | RUN apk add python3 bash git gcc g++ python3-dev libffi-dev \
6 | && pip3 install --upgrade pip
7 |
8 | RUN \
9 | #
10 | # Clone the Twint source code so that we can modify it to
11 | # remove references to Pandas.
12 | #
13 | git clone https://github.com/twintproject/twint.git \
14 | #
15 | # Now remove references to Pandas.
16 | # This may break some functionality, but it also reduces install time on
17 | # my 4-core i7 from like 15 minutes to more like 30 seconds, and since
18 | # my usecases don't currently touch Pandas, that's a win.
19 | #
20 | && sed -i -e "s/'pandas', //" /twint/setup.py \
21 | && sed -i -e "s/Pandas_au = True/Pandas_au = False/" /twint/twint/config.py \
22 | && sed -i -e "s/import pandas as pd/pd = None/" /twint/twint/storage/panda.py \
23 | #
24 | # Allow the user to adjust the timeout with the TWINT_TIMEOUT parameter, as
25 | # having 120 seconds by default sometimes causes performance issues, and I'd
26 | # rather the user make their own decisions about when they should have timeouts.
27 | #
28 | && sed -i -e "s/with timeout(120):/\
29 | import os\n\
30 | my_timeout = int(os.environ.get('TWINT_TIMEOUT', "120"))\n\
31 | logme.warn(\"Timeout: {} secs\".format(my_timeout))\n\
32 | with timeout(my_timeout):\n\
33 | /g" /twint/twint/get.py \
34 | #
35 | # Now install Twint.
36 | #
37 | && pip3 install -e /twint
38 |
39 |
40 | #
41 | # Install SQLAlchemy for running any Python scripts within the container
42 | #
43 | RUN pip3 install sqlalchemy
44 |
45 | COPY entrypoint.sh /
46 | ENTRYPOINT ["/entrypoint.sh"]
47 |
48 |
49 |
--------------------------------------------------------------------------------
/HOW-TO-DOWNLOAD-MANY-TWEETS.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # How To Download Many Tweets
4 |
5 | This tool is fine for occasional one-off use, but because Twint is CPU-intensive,
6 | if you want to do extended crawling of a user's Twitter timeline or even
7 | crawl multiple timelines at the same time, I recommend spinning up a virtual
8 | machine and running Twint there. This document will show you how.
9 |
10 |
11 | ## Prerequisites
12 |
13 | - An account on Digital Ocean
14 | - Installing the doctl app
15 | - Uploading an SSH public key to your Digital Ocean account.
16 |
17 |
18 | ## Creating a Droplet and Fetching Tweets
19 |
20 | - Create a droplet: `doctl compute droplet create twint --size s-3vcpu-1gb --image ubuntu-18-04-x64 --region nyc1 --ssh-keys $( doctl compute ssh-key list --no-header | head -n1 | awk '{print $3}' )`
21 | - As of this writing, the above command will create a Droplet with 3 CPUs running Ubuntu 18, and will cost $15/mo or 2.2 cents/hour.
22 | - Next, copy up your tmux configuration files. You are using tmux, right? I recommend running it on your droplet so that you can disconnect and let long-running instances of Twint continue to run:
23 | - `scp ~/.tmux* root@$(doctl compute droplet list twint --format PublicIPv4 --no-header):.`
24 | - Now, SSH in, install Docker, and clone this app:
25 | - `doctl compute ssh twint`
26 | - `apt-get update`
27 | - `apt-get install -y docker.io pv`
28 | - `docker run hello-world`
29 | - `git clone git@github.com:dmuth/twint-splunk.git`
30 | - `cd twint-splunk`
31 | - Back on your machine, if you have run Twint locally and wish to copy up your `logs/` directory, do so with:
32 | - `rsync -avz logs/ root@$(doctl compute droplet list twint --format PublicIPv4 --no-header):twint-splunk/logs`
33 | - On the Droplet again run `tmux` and then grab a user's tweets with this command:
34 | - `./twint-user-by-year USERNAME 2005 2019 | pv -l > /dev/null`
35 | - Back on your machine, you can download all tweets and destroy the Droplet when you're done:
36 | - `rsync -avz root@$(doctl compute droplet list twint --format PublicIPv4 --no-header):twint-splunk/logs/ logs/`
37 | - `doctl compute droplet delete twint -f`
38 | - Make a backup of the tweets: ` tar cfvz ~/Dropbox/tweets.tgz logs/`
39 | - Finally, start up Splunk with `./bin/devel.sh` or `./bin/run.sh` and go to https://localhost:8000/
40 |
41 |
42 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | MIT License
3 |
4 | Copyright (c) 2019 TWINT Project
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Splunk Twint
3 |
4 | Depending how you count, this repo is a few different things:
5 |
6 | - A Dockerized version of Twint
7 | - A series of shell scripts which wrap the Dockerized version of Twint to allow user timelines to be downloaded with checkpoints.
8 | - A Dockerized version of Splunk to ingest downloaded tweets
9 | and search through Twitter timelines as well as display some dashbaords.
10 |
11 | Screenshots:
12 |
13 |
14 |
15 |
16 |
17 |
18 | ## Quick and Dirty Usage
19 |
20 | You don't even need to clone the repo for this one:
21 |
22 | - `bash <(curl -s https://raw.githubusercontent.com/twintproject/twint-splunk/master/twint ) -u dmuth --year 2020 --since 2019-01-01`
23 | - All tweets I made from 2019.
24 |
25 | For these, you'll need to clone the repo with `git clone https://github.com/dmuth/splunk-twint.git` and then run these on the command line:
26 |
27 | - `./twint-user dmuth --year 2020 --since 2019-01-01`
28 | - Uses the `twint-user` helper script and does the same as above, except tweets will be written in JSON format to `logs/user/dmuth/` and a resume file will automatically be used:
29 | - When the command completes a file with the suffix `.done` will be written next to the log so that if the command is re-run with the same parameters, the download will be skipped.
30 | - `./twint-user-by-year dmuth 2010 2014 -o tweets.csv`
31 | - Download 5 years of tweets and write them to `logs/user/dmuth/` in JSON format.
32 | - If interrupted, downloads will resume where they left off and previous years will not be re-downloaded.
33 | - `./twint-geo 40.4442902 -79.9948067 1mi anthrocon-2019 --since 2019-07-01 --until 2019-07-10`
34 | - Download tweets within a mile of Anthrocon 2019 and write them to the file `logs/location/anthrocon-2019.json`.
35 | - If interrupted, downloads will resume where they left off and previous runs will not be re-downloaded.
36 |
37 |
38 | ### Advanced Usage
39 |
40 | If you want to download multiple Twitter timelines or a very busy user's timeline,
41 | please check out more detailed instructions in HOW-TO-DOWNLOAD-MANY-TWEETS.md.
42 |
43 |
44 | #### "Twint-lite"
45 |
46 | In a few places, the term "twint-lite" is referenced. If you look in
47 | the Dockerfile around line 20, you'll see that I wrote
48 | a series of `sed` commands to remove refernces to the `pandas` module. I have no issues
49 | with that module per se, but its presence was causing builds to take upwards of 10-15 minutes
50 | on my machine. Removing references to it sped up builds of the Docker image to be around 30 seconds,
51 | without breaking the core functionality of pulling Twitter timelines.
52 |
53 | Or, to paraphrase Adam Savage, I rejected the reality and substituted my own. :-)
54 |
55 |
56 | #### Running Python scripts in Splunk
57 |
58 | The Twint CLI is just a wrapper for the Twint Python module. The module can be
59 | accessed its from Python scripts. The syntax for running a Python script is as follows:
60 |
61 | `twint --run-python-script /path/to/python-script [args for Python script]`
62 |
63 | The following directories will be exported to the Docker container:
64 |
65 | - `/python-scripts/` - Where the script resides. Make sure all required libraries are local to that script.
66 | - `/mnt/` - The directory the `twint` wrapper is executed from.
67 |
68 | If the Python scripts write files, they should do so to either of those two directories.
69 |
70 | Example scripts that you can run with this app:
71 |
72 | - `./twint --run-python-script ./python-scripts/hello.py` - Hello World!
73 | - `./twint --run-python-script ./python-scripts/get-user-info.py dmuth` - Get user info for `dmuth`
74 | - `./twint --run-python-script ./python-scripts/get-user-tweets.py dmuth` - Get 20 recent tweets for `dmuth`
75 | - `TWINT_TIMEOUT=10 ./twint --run-python-script ./python-scripts/get-user-following.py dmuth`
76 | - Get 20 users `dmuth` is following and set a timeout of 10 seconds for each fetch. This does not mean 10 seconds for the entire script, but rather a timeout for _any_ read of Twitter. This is helpful for when Twitter is slow and you'd rather your script time out sooner rather than later.
77 |
78 | Configuration options for the twint module can be found at https://github.com/twintproject/twint/wiki/Configuration
79 |
80 | The SQLAlchemy module is installed for advanced interaction with SQLite databases that are created via Twint.
81 |
82 |
83 | ## Data Analytics in Splunk
84 |
85 | Splunk is a fantastic data analytics platform which just happens to
86 | ship with a free tier! You can ingest up to 500 MB of data per day into Splunk free of charge.
87 | To that end, I wrote a Dockerized version of Splunk called
88 | Splunk Lab, which can be used to quickly spin up
89 | an instance of Splunk and ingest data.
90 |
91 | Here's how to get started:
92 | - `./bin/splunk-start.sh` - Start an instance of Splunk Lab in a Docker container. The welcome screen will display the default login and password, and they can be modified before continuing. Once Splunk Lab is started, go to https://localhost:8000/, log in, and there will be some dashboards readily available.
93 | - `./bin/splunk-stop.sh` - Kill the Splunk instance
94 |
95 | By default, data will be read from the directory `logs/`, so all tweets should be written under that directory.
96 |
97 |
98 | ## Development
99 |
100 | - Twint Docker Management:
101 | - `./bin/build.sh [ full ]` - Build Docker image. For all scripts where `full` is available, if it is speicfied as the first argument, the full (with Pandas) verison will be built. Otherwise, the Lite version will be built.
102 | - `./bin/devel.sh [ full ]` - Build Docker image and spawn interactive shell.
103 | - `./bin/push.sh [ full ]` - Push Docker image to Docker Hub.
104 | - `./bin/pull.sh [ full ]` - Pull Docker image from Docker Hub.
105 | - `./bin/run.sh [ full ] args` - Run for production use. Additional args should be passed in on the command line.
106 | - Splunk Management:
107 | - `./bin/splunk-start.sh --devel` - Start Splunk Lab in an interactive shell. Exiting the shell will terminate the Splunk Lab container.
108 |
109 |
110 | ## Bugs/TODO
111 |
112 | - If you try writing a file to a directory that is not under the current directory, Docker will likely have path issues.
113 | - Support downloads on a monthly basis once the bug I found in Twint is fixed.
114 | - Write a script to allow tweets within a set of coordinates.
115 | - Build a dashboard to display tweets on a map.
116 | - Use the Sentiment Analysis module to gauge sentiment over time and build tag clouds of positive/negative tweets.
117 |
118 |
119 | ## Credits
120 |
121 | - Twint - The ability to download an entire timeline without having to use Twitter's overly restrctive API is amazing!
122 | - Splunk, for having such a fantastic product which is also a great example of Operational Excellence!
123 | - This text to ASCII art generator, for the logo I used in the script.
124 |
125 |
126 | # Contact
127 |
128 | My email is doug.muth@gmail.com. I am also @dmuth on Twitter
129 | and Facebook!
130 |
131 |
--------------------------------------------------------------------------------
/bin/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Build our Docker container.
4 | #
5 |
6 | # Errors are fatal
7 | set -e
8 |
9 | #
10 | # Are we building/running the full version? (Lite version by default)
11 | #
12 | FULL=""
13 | NAME="twint-lite"
14 | DOCKERFILE="Dockerfile-lite"
15 | if test "$1" == "full"
16 | then
17 | FULL=1
18 | NAME="twint-full"
19 | DOCKERFILE="Dockerfile-full"
20 | fi
21 |
22 | #
23 | # Change to the parent of this script
24 | #
25 | pushd $(dirname $0) > /dev/null
26 | cd ..
27 |
28 | echo "# "
29 | echo "# Building Docker container '${NAME}'..."
30 | echo "# "
31 | docker build . -f ${DOCKERFILE} -t ${NAME}
32 |
33 | echo "# "
34 | echo "# Tagging Docker container ${NAME}..."
35 | echo "# "
36 | docker tag ${NAME} dmuth1/${NAME}
37 | if test "$NAME" == "twint-lite"
38 | then
39 | docker tag twint-lite dmuth1/twint
40 | fi
41 |
42 | echo "# Done!"
43 |
44 |
45 |
--------------------------------------------------------------------------------
/bin/devel.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Build and run the Docker container in dev mode (with an interactive shell).
4 | #
5 |
6 | # Errors are fatal
7 | set -e
8 |
9 | #
10 | # Are we building/running the full version? (Lite version by default)
11 | #
12 | FULL=""
13 | NAME="twint-lite"
14 | if test "$1" == "full"
15 | then
16 | FULL=1
17 | NAME="twint-full"
18 | fi
19 |
20 |
21 | #
22 | # Change to the parent of this script
23 | #
24 | pushd $(dirname $0) > /dev/null
25 | cd ..
26 |
27 | ./bin/build.sh $@
28 |
29 | echo "# "
30 | echo "# Running Docker container '${NAME}' with interactive bash shell..."
31 | echo "# "
32 | #docker run -v $(pwd):/mnt -it ${NAME} bash
33 | docker run -v $(pwd):/mnt -v $(pwd)/python-scripts:/python-scripts -it ${NAME} bash
34 |
35 |
--------------------------------------------------------------------------------
/bin/pull.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Pull container(s) from Docker Hub.
4 | #
5 |
6 | # Errors are fatal
7 | set -e
8 |
9 | #
10 | # Are we building/running the full version? (Lite version by default)
11 | #
12 | FULL=""
13 | NAME="twint"
14 | if test "$1" == "full"
15 | then
16 | FULL=1
17 | NAME="twint-full"
18 | fi
19 |
20 | #
21 | # Change to the parent of this script
22 | #
23 | pushd $(dirname $0) > /dev/null
24 | cd ..
25 |
26 | echo "# "
27 | echo "# Pulling container '${NAME}' from Docker Hub..."
28 | echo "# "
29 | docker pull dmuth1/${NAME}
30 |
31 | echo "# "
32 | echo "# Tagging container '${NAME}'..."
33 | echo "# "
34 | docker tag dmuth1/${NAME} ${NAME}
35 |
36 |
37 | echo "# Done!"
38 |
39 |
--------------------------------------------------------------------------------
/bin/push.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Push our Docker container(s) to Docker Hub.
4 | #
5 |
6 | # Errors are fatal
7 | set -e
8 |
9 | #
10 | # Are we building/running the full version? (Lite version by default)
11 | #
12 | FULL=""
13 | NAME="twint-lite"
14 | if test "$1" == "full"
15 | then
16 | FULL=1
17 | NAME="twint-full"
18 | fi
19 |
20 | #
21 | # Change to the parent of this script
22 | #
23 | pushd $(dirname $0) > /dev/null
24 | cd ..
25 |
26 | echo "# "
27 | echo "# Pushing container '${NAME}' to Docker Hub..."
28 | echo "# "
29 | docker push dmuth1/${NAME}
30 | if test "$NAME" == "twint-lite"
31 | then
32 | docker push dmuth1/twint
33 | fi
34 |
35 |
36 | echo "# Done!"
37 |
38 |
--------------------------------------------------------------------------------
/bin/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Wrapper to run our Twint container.
4 | #
5 |
6 | # Errors are fatal
7 | set -e
8 |
9 | #
10 | # Are we running the full version? (Lite version by default)
11 | #
12 | NAME="twint-lite"
13 | FULL=""
14 | if test "$1" == "full"
15 | then
16 | NAME="twint-full"
17 | FULL="full"
18 | shift
19 | fi
20 |
21 | #
22 | # Change to the parent of this script
23 | #
24 | pushd $(dirname $0) > /dev/null
25 | cd ..
26 |
27 | ./bin/build.sh ${FULL}
28 |
29 | echo "# "
30 | echo "# Running Docker container..."
31 | echo "# "
32 | echo "# Args: $@"
33 | echo "# "
34 | docker run -v $(pwd):/mnt dmuth1/${NAME} $@
35 |
36 |
--------------------------------------------------------------------------------
/bin/splunk-start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # This shell script starts up Splunk and ingests JSONified Tweets made by twint.
4 | #
5 |
6 | # Errors are fatal
7 | set -e
8 |
9 | #
10 | # Things the user can override
11 | #
12 | SPLUNK_PORT=${SPLUNK_PORT:-8000}
13 | SPLUNK_PASSWORD=${SPLUNK_PASSWORD:-password1}
14 | SPLUNK_DATA=${SPLUNK_DATA:-splunk-data}
15 |
16 | DOCKER_IT=""
17 | DOCKER_V=""
18 |
19 | DEVEL_SPLUNK=""
20 |
21 | if test ! "$SPLUNK_START_ARGS" -o "$SPLUNK_START_ARGS" != "--accept-license"
22 | then
23 | echo "! "
24 | echo "! You need to access the Splunk License in order to continue."
25 | echo "! "
26 | echo "! Please restart this container with SPLUNK_START_ARGS set to \"--accept-license\""
27 | echo "! as follows:"
28 | echo "! "
29 | echo "! SPLUNK_START_ARGS=--accept-license"
30 | echo "! "
31 | exit 1
32 | fi
33 |
34 | PASSWORD_LEN=${#SPLUNK_PASSWORD}
35 | if test $PASSWORD_LEN -lt 8
36 | then
37 | echo "! "
38 | echo "! "
39 | echo "! Admin password needs to be at least 8 characters!"
40 | echo "! "
41 | echo "! Password specified: ${SPLUNK_PASSWORD}"
42 | echo "! "
43 | echo "! "
44 | exit 1
45 | fi
46 |
47 |
48 | if test "$1" == "--devel"
49 | then
50 | DEVEL_SPLUNK=1
51 | fi
52 |
53 |
54 | #
55 | # Create our Docker command line
56 | #
57 | DOCKER_NAME="--name splunk-twint"
58 | DOCKER_RM="--rm"
59 | DOCKER_V="-v $(pwd)/user-prefs.conf:/opt/splunk/etc/users/admin/user-prefs/local/user-prefs.conf"
60 | DOCKER_PORT="-p ${SPLUNK_PORT}:8000"
61 | DOCKER_LOGS="-v $(pwd)/logs:/logs"
62 | DOCKER_DATA="-v $(pwd)/${SPLUNK_DATA}:/data"
63 |
64 |
65 | echo " ____ _ _ _____ _ _ "
66 | echo " / ___| _ __ | | _ _ _ __ | | __ |_ _| __ __ (_) _ __ | |_ "
67 | echo " \___ \ | '_ \ | | | | | | | '_ \ | |/ / | | \ \ /\ / / | | | '_ \ | __|"
68 | echo " ___) | | |_) | | | | |_| | | | | | | < | | \ V V / | | | | | | | |_ "
69 | echo " |____/ | .__/ |_| \__,_| |_| |_| |_|\_\ |_| \_/\_/ |_| |_| |_| \__|"
70 | echo " |_| "
71 |
72 | echo
73 |
74 |
75 | echo "# "
76 | echo "# About to run Splunk Twint!"
77 | echo "# "
78 | echo "# Before we do, please confirm these settings:"
79 | echo "# "
80 | echo "# URL: https://localhost:${SPLUNK_PORT}/ (Set with \$SPLUNK_PORT)"
81 | echo "# Login/Password: admin/${SPLUNK_PASSWORD} (Set with \$SPLUNK_PASSWORD)"
82 | echo "# Splunk Data Directory: ${SPLUNK_DATA} (Set with \$SPLUNK_DATA)"
83 | echo "# "
84 |
85 | if test "$SPLUNK_PASSWORD" == "password1"
86 | then
87 | echo "# "
88 | echo "# PLEASE NOTE THAT YOU USED THE DEFAULT PASSWORD"
89 | echo "# "
90 | echo "# If you are testing this on localhost, you are probably fine."
91 | echo "# If you are not, then PLEASE use a different password for safety."
92 | echo "# If you have trouble coming up with a password, I have a utility "
93 | echo "# at https://diceware.dmuth.org/ which will help you pick a password "
94 | echo "# that can be remembered."
95 | echo "# "
96 | fi
97 |
98 |
99 | echo "> "
100 | echo "> Press ENTER to run Splunk Twint with the above settings, or ctrl-C to abort..."
101 | echo "> "
102 | read
103 |
104 | CMD="${DOCKER_RM} ${DOCKER_NAME} ${DOCKER_PORT} ${DOCKER_LOGS} ${DOCKER_DATA} ${DOCKER_V}"
105 | CMD="${CMD} -e SPLUNK_START_ARGS=${SPLUNK_START_ARGS}"
106 | CMD="${CMD} -e SPLUNK_PASSWORD=${SPLUNK_PASSWORD}"
107 | DOCKER_V_APP="-v $(pwd)/splunk-app:/opt/splunk/etc/apps/splunk-twint"
108 |
109 | if test ! "$DEVEL_SPLUNK"
110 | then
111 | ID=$(docker run $CMD ${DOCKER_V_MNT} ${DOCKER_V_APP} -d dmuth1/splunk-lab-ml)
112 | echo "# "
113 | echo "# Splunk Twint launched with Docker ID: "
114 | echo "# "
115 | echo "# ${ID} "
116 | echo "# "
117 | echo "# To check the logs for Splunk Twint: docker logs splunk-twint"
118 | echo "# "
119 | echo "# To kill Splunk Twint: docker kill splunk-twint"
120 | echo "# "
121 |
122 | else
123 | docker run $CMD ${DOCKER_V_MNT} ${DOCKER_V_APP} -it dmuth1/splunk-lab-ml bash
124 |
125 | fi
126 |
127 |
128 |
129 |
--------------------------------------------------------------------------------
/bin/splunk-stop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Kill our Splunk Lab container.
4 | #
5 |
6 | NAME="splunk-twint"
7 |
8 | echo "# Killing container '${NAME}'..."
9 | docker kill ${NAME}
10 | echo "# Done!"
11 |
12 |
--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Our entrypoint script.
4 | #
5 |
6 | # Errors are fatal
7 | set -e
8 |
9 | ARG=$1
10 |
11 | if test "$ARG" == "bash"
12 | then
13 | echo "# "
14 | echo "# Spawning an interactive bash shell in /mnt..."
15 | echo "# "
16 |
17 | cd /mnt
18 | exec /bin/bash
19 |
20 | elif test "$ARG" == "--run-python-script"
21 | then
22 |
23 | if test ! "$2"
24 | then
25 | echo "! "
26 | echo "! --run-python script specified, but no file specified as arg!"
27 | echo "! "
28 | exit 1
29 |
30 | elif test ! -f "$2"
31 | then
32 | echo "! "
33 | echo "! --run-python-script specified, but file $2 does not exist!"
34 | echo "! "
35 | exit 1
36 | fi
37 |
38 | SCRIPT=$2
39 | shift 2
40 |
41 | #
42 | # Change into our directory so that scripts are run from here.
43 | #
44 | cd /python-scripts
45 |
46 | echo "# "
47 | echo "# Excuting Python script ${SCRIPT} with these args: $@"
48 | echo "# "
49 | exec $SCRIPT $@
50 |
51 | fi
52 |
53 | #
54 | # We're running twint directly!
55 | # Change to /mnt since that should be linked to the host directory, then run.
56 | #
57 | cd /mnt
58 | exec /usr/bin/twint $@
59 |
60 |
--------------------------------------------------------------------------------
/img/splunk-twint-dmuth-sentiment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twintproject/twint-splunk/b27a6ce23b01258c640a63ec3efcfa691ca59ba3/img/splunk-twint-dmuth-sentiment.png
--------------------------------------------------------------------------------
/img/splunk-twint-dmuth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twintproject/twint-splunk/b27a6ce23b01258c640a63ec3efcfa691ca59ba3/img/splunk-twint-dmuth.png
--------------------------------------------------------------------------------
/img/splunk-twint-septa-social-tag-cloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twintproject/twint-splunk/b27a6ce23b01258c640a63ec3efcfa691ca59ba3/img/splunk-twint-septa-social-tag-cloud.png
--------------------------------------------------------------------------------
/img/splunk-twint-septa-social.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twintproject/twint-splunk/b27a6ce23b01258c640a63ec3efcfa691ca59ba3/img/splunk-twint-septa-social.png
--------------------------------------------------------------------------------
/python-scripts/get-user-following.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import sys
4 |
5 | import twint
6 |
7 | c = twint.Config()
8 |
9 | user = "dmuth"
10 | if len(sys.argv) > 1:
11 | user = sys.argv[1]
12 |
13 | # Must be an increment of 20, according to Twint
14 | num_followers = 20
15 |
16 | print("# Looking up {} followers of a single user: {}".format(num_followers, user), flush=True)
17 |
18 | c.Username = user
19 | c.Limit = num_followers
20 |
21 | twint.run.Following(c)
22 |
23 |
24 |
--------------------------------------------------------------------------------
/python-scripts/get-user-info.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import sys
4 |
5 | import twint
6 |
7 | c = twint.Config()
8 |
9 | user = "dmuth"
10 | if len(sys.argv) > 1:
11 | user = sys.argv[1]
12 |
13 | print("# Looking up user info of a single user: {}".format(user))
14 |
15 | c.Username = user
16 |
17 | twint.run.Lookup(c)
18 |
19 |
20 |
--------------------------------------------------------------------------------
/python-scripts/get-user-tweets.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import sys
4 |
5 | import twint
6 |
7 | c = twint.Config()
8 |
9 | user = "dmuth"
10 | if len(sys.argv) > 1:
11 | user = sys.argv[1]
12 |
13 | # Must be an increment of 20, according to Twint
14 | num_tweets = 20
15 |
16 | print("# Looking up {} recent tweets of a single user: {}".format(num_tweets, user), flush=True)
17 |
18 | c.Username = user
19 | c.Limit = num_tweets
20 |
21 | twint.run.Profile(c)
22 |
23 |
24 |
--------------------------------------------------------------------------------
/python-scripts/hello.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | print("Hello World!")
4 |
5 |
--------------------------------------------------------------------------------
/splunk-app/appserver/static/dashboard.css:
--------------------------------------------------------------------------------
1 |
2 | /**
3 | * Splunk needs to be restarted for these changes to be applied.
4 | */
5 | .splunk-header.splunk-view.splunk-header.splunk-view > div[data-view="views/shared/appbar/Master"] {
6 | background-color: #2B98D7;
7 | }
8 |
9 |
--------------------------------------------------------------------------------
/splunk-app/default/app.conf:
--------------------------------------------------------------------------------
1 | #
2 | # Splunk app configuration file
3 | #
4 |
5 | [install]
6 | is_configured = 0
7 |
8 | [ui]
9 | is_visible = 1
10 | label = Splunk Twint
11 |
12 | [launcher]
13 | author = Douglas Muth
14 | description = Splunk Twint: Splunk your Tweets!
15 | version = 1.0.0
16 |
17 |
--------------------------------------------------------------------------------
/splunk-app/default/data/models/tweets.json:
--------------------------------------------------------------------------------
1 | {
2 | "modelName": "tweets",
3 | "displayName": "tweets",
4 | "description": "Tweets",
5 | "objectSummary": {
6 | "Event-Based": 0,
7 | "Transaction-Based": 0,
8 | "Search-Based": 1
9 | },
10 | "objects": [
11 | {
12 | "objectName": "tweets",
13 | "displayName": "tweets",
14 | "parentName": "BaseSearch",
15 | "comment": "",
16 | "fields": [
17 | {
18 | "fieldName": "username",
19 | "owner": "tweets",
20 | "type": "string",
21 | "fieldSearch": "username=*",
22 | "required": true,
23 | "multivalue": false,
24 | "hidden": false,
25 | "editable": true,
26 | "displayName": "username",
27 | "comment": ""
28 | }
29 | ],
30 | "calculations": [],
31 | "constraints": [],
32 | "lineage": "tweets",
33 | "baseSearch": "index=main username=*"
34 | }
35 | ],
36 | "objectNameList": [
37 | "tweets"
38 | ]
39 | }
40 |
--------------------------------------------------------------------------------
/splunk-app/default/data/ui/nav/default.xml:
--------------------------------------------------------------------------------
1 |
20 |
--------------------------------------------------------------------------------
/splunk-app/default/data/ui/views/search_user_tweets.xml:
--------------------------------------------------------------------------------
1 |