├── .github
    └── workflows
    │   └── codeql.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── documentation
    └── fulldoc.md
├── requirements.txt
├── samples
    ├── sample_.env
    ├── sample_key.json
    └── sample_s3_config.cfg
├── scraper
    ├── detector.py
    ├── requirements.txt
    └── sns.py
└── webapp
    ├── app.py
    ├── form.py
    ├── readme.md
    ├── requirements.txt
    ├── setup.sh
    ├── sqlquery.py
    ├── static
        ├── Chart.min.js
        ├── banner4.jpeg
        ├── customCharts.js
        ├── logo-full.png
        ├── logo-transp.png
        ├── styles.css
        ├── table.css
        ├── userCharts.js
        └── utils.js
    ├── templates
        ├── _formhelpers.html
        ├── base.html
        ├── chart.html
        ├── custom.html
        ├── head.html
        ├── index.html
        ├── result.html
        └── scrap.html
    └── utils.py


/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ master ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ master ]
20 |   schedule:
21 |     - cron: '35 23 * * 3'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 |     permissions:
28 |       actions: read
29 |       contents: read
30 |       security-events: write
31 | 
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         language: [ 'javascript', 'python' ]
36 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
38 | 
39 |     steps:
40 |     - name: Checkout repository
41 |       uses: actions/checkout@v3
42 | 
43 |     # Initializes the CodeQL tools for scanning.
44 |     - name: Initialize CodeQL
45 |       uses: github/codeql-action/init@v2
46 |       with:
47 |         languages: ${{ matrix.language }}
48 |         # If you wish to specify custom queries, you can do so here or in a config file.
49 |         # By default, queries listed here will override any specified in a config file.
50 |         # Prefix the list here with "+" to use these queries and those in the config file.
51 |         
52 |         # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
53 |         # queries: security-extended,security-and-quality
54 | 
55 |         
56 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
57 |     # If this step fails, then you should remove it and run the build manually (see below)
58 |     - name: Autobuild
59 |       uses: github/codeql-action/autobuild@v2
60 | 
61 |     # ℹ️ Command-line programs to run using the OS shell.
62 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
63 | 
64 |     #   If the Autobuild fails above, remove it and uncomment the following three lines. 
65 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
66 | 
67 |     # - run: |
68 |     #   echo "Run, Build Application using script"
69 |     #   ./location_of_script_within_repo/buildscript.sh
70 | 
71 |     - name: Perform CodeQL Analysis
72 |       uses: github/codeql-action/analyze@v2
73 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | encrypted_conf
2 | video*
3 | key.json
4 | .DS*
5 | \#*
6 | .env
7 | s3_config.cfg
8 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3
 2 | 
 3 | COPY ./scraper /app/scraper
 4 | COPY ./webapp /app/webapp
 5 | COPY ./key.json /app/key.json
 6 | COPY ./requirements.txt /app/requirements.txt
 7 | 
 8 | EXPOSE 5000
 9 | 
10 | RUN apt update
11 | # Install node
12 | RUN apt install curl -y
13 | RUN curl -sL https://deb.nodesource.com/setup_14.x | bash -
14 | RUN apt -y install nodejs
15 | # Install npm and the scraper
16 | RUN curl -qL https://www.npmjs.com/install.sh | sh
17 | RUN npm install -g tiktok-scraper
18 | # Install other python requirements
19 | RUN python3 -m pip install -r /app/requirements.txt
20 | 
21 | ENV FLASK_APP=/app/webapp/app.py
22 | 
23 | ENTRYPOINT ["flask"]
24 | 
25 | CMD ["run", "--host=0.0.0.0"]
26 | 
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Victor L.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Social Net SCrap
  2 | 
  3 | A powerful tool that collect data from videos on social media.
  4 | 
  5 | **Possible improvments :**
  6 | 
  7 | - Add a graphic cool visualisation ffor data in the DB
  8 | - Automate db creation
  9 | - Delete video after usage or send it to s3 
 10 | - Reduce video quality before sending it to Google with ffmpeg
 11 | - Make google API calls optional
 12 | - Add other scrapper (insta / snapchat .. ? )
 13 | - Include tiktok-scrapper in this project 
 14 | -------
 15 | ## Run the program
 16 | 
 17 | Once you have setup the project with the instructions bellow, you can run the project.
 18 | 
 19 | The program takes some time to run (download video then send it to google for inspection). 
 20 | 
 21 | ```python
 22 | usage: sns.py [-h] [-u USER | --hashtag HASHTAG] [-n NUMBER]
 23 | 
 24 | optional arguments:
 25 |   -h, --help                    show this help message and exit
 26 |   -u USER, --user USER          the username of the account you want to scrap.
 27 |   --hashtag HASHTAG             the hashtag you want to scrap (without #).
 28 |   -n NUMBER, --number NUMBER    the number of videos to scrap (default: 10).
 29 | ```
 30 | 
 31 | Examples : 
 32 | ```bash
 33 | python3 ./sns -u <username> -n 3        # will download 3 videos of <username>
 34 | python3 ./sns --hashtag <hashtag> -n 3  # will download 3 videos from #hashtag
 35 | ```
 36 | 
 37 | ## Setup to run the program
 38 | 
 39 | In order to run this program you will need to : 
 40 | * Get your google credentials (optional : needed to call google API)
 41 | * Do not forget to enable the API [(see this link)](https://console.developers.google.com/apis/api/videointelligence.googleapis.com/overview)
 42 | * Setup Python3 and install the requirements
 43 | * Install s3cmd if you want to use with --s3
 44 | * Install tiktok-scrapper 
 45 | * Install mysql and setup the DB
 46 | 
 47 | ### Get Google credentials json file
 48 | 
 49 | You need to get the json file in the GCP console and add it at the root of this repository under the name `key.json`.
 50 | 
 51 | ### Setup Python
 52 | 
 53 | Env is very useful to avoid a mess on your host when installing lot of strange python packets.
 54 | 
 55 | ```bash
 56 | apt update
 57 | apt install virtualenv
 58 | virtualenv env -p python3
 59 | # to activate
 60 | source env/bin/activate
 61 | # to check versions
 62 | ls env/lib/
 63 | # to deactivate
 64 | deactivate
 65 | ```
 66 | 
 67 | Then install the requirements : 
 68 | 
 69 | ```bash
 70 | # In your virtual env
 71 | python3 -m pip install ./requirements.txt
 72 | ```
 73 | 
 74 | 
 75 | ### Install tiktok-scrapper
 76 | 
 77 | The tiktok scrapper is a npm command you need to install : 
 78 | ```bash
 79 | npm install -g tiktok-scraper
 80 | ```
 81 | 
 82 | If it is not working try the solution at this link :
 83 | https://github.com/drawrowfly/tiktok-scraper/pull/563#issuecomment-852264427
 84 | 
 85 | 
 86 | ### Install mysql if not done already
 87 | 
 88 | ```bash
 89 | # install mysql
 90 | apt install mysql
 91 | 
 92 | # Login as root (no password, just press 'enter')
 93 | mysql -u root -p 
 94 | ```
 95 | 
 96 | To create the DB : 
 97 | ```sql
 98 | CREATE DATABASE sns;
 99 | use sns;
100 | ```
101 | 
102 | You will need to store the db information in a `.env` file : 
103 | ```bash
104 | SNS_DB_HOST="127.0.0.1"
105 | SNS_DB_USER="sns"
106 | SNS_DB_PASS="yourpass"
107 | SNS_DB_NAME="sns"
108 | SNS_DB_PORT=3306
109 | ```
110 | 
111 | Then you will need to create all the tables : 
112 | 
113 | #### Create user table : 
114 | ```sql
115 | -- User
116 | CREATE TABLE user (id VARCHAR(255) PRIMARY KEY, nickname VARCHAR(255), avatar VARCHAR(1000), name VARCHAR(255), tikid VARCHAR(255), fans INT, secuid VARCHAR(255), signature VARCHAR(1000), digg INT, verified TINYINT, video INT, heart INT, following INT);
117 | 
118 | ALTER TABLE user CONVERT TO CHARACTER SET utf8mb4;
119 | 
120 | --  Music
121 | CREATE TABLE music (id VARCHAR(50) PRIMARY KEY, musicName VARCHAR(255), duration INT, playUrl VARCHAR(500), musicOriginal TINYINT, coverUrl VARCHAR(500), musicAlbum VARCHAR(255), musicAuthor VARCHAR(255));
122 | 
123 | ALTER TABLE music CONVERT TO CHARACTER SET utf8mb4;
124 | 
125 | -- Video
126 | CREATE TABLE video (id VARCHAR(50) PRIMARY KEY, userId VARCHAR(255), shareCount INT, commentCount INT, playCount INT, videoUrl VARCHAR(1000), text VARCHAR(1000), coverDynamic VARCHAR(1000), createTime VARCHAR(255), secretID VARCHAR(255), webVideoUrl VARCHAR(1000), diggCount INT, height INT, width INT, duration INT);
127 | 
128 | ALTER TABLE video CONVERT TO CHARACTER SET utf8mb4;
129 | 
130 | -- Mention
131 | CREATE TABLE mention (id INT PRIMARY KEY AUTO_INCREMENT, id_video VARCHAR(255), username VARCHAR(255));
132 | 
133 | ALTER TABLE mention CONVERT TO CHARACTER SET utf8mb4;
134 | 
135 | -- Hashtag
136 | CREATE TABLE hashtag (id INT PRIMARY KEY AUTO_INCREMENT, id_video VARCHAR(255), name VARCHAR(255), title VARCHAR(255), cover VARCHAR(1000));
137 | 
138 | ALTER TABLE hashtag CONVERT TO CHARACTER SET utf8mb4;
139 | 
140 | -- Brand
141 | CREATE TABLE brand (id_video VARCHAR(255) PRIMARY KEY, name VARCHAR(255));
142 | ALTER TABLE brand CONVERT TO CHARACTER SET utf8mb4;
143 | 
144 | -- Theme
145 | CREATE TABLE theme (id_video VARCHAR(255) PRIMARY KEY, name VARCHAR(255));
146 | 
147 | ALTER TABLE theme CONVERT TO CHARACTER SET utf8mb4;
148 | 
149 | -- explicit
150 | CREATE TABLE explicit (id_video VARCHAR(255) PRIMARY KEY, explicit VARCHAR(250));
151 | 
152 | ALTER TABLE explicit CONVERT TO CHARACTER SET utf8mb4;
153 | ```
154 | 


--------------------------------------------------------------------------------
/documentation/fulldoc.md:
--------------------------------------------------------------------------------
  1 | # Social Net SCrap
  2 | 
  3 | Social Net SCrap (later called SNS) is an Open Source Intelligence (OSINT) tool, that collect data and metada from videos on social medias from users' profile.
  4 | 
  5 | This document is here to list the full documentation of the project!
  6 | 
  7 | ## Summary
  8 | 
  9 | - I. Global overview
 10 |     1. Why this project  
 11 |     2. Global architechture  
 12 |     3. How to install  
 13 |     4. Open Source  
 14 |   
 15 | - II. The scrapper  
 16 |     1. How it works  
 17 |     2. MySQL setup  
 18 |     3. S3 setup  
 19 |     4. Google Cloud setup  
 20 |     5. Adding more analysis
 21 |     6. Usage example  
 22 |   
 23 | - III. The web interface  
 24 |     1. How it works  
 25 |     2. Research part  
 26 |     3. Scrap part  
 27 |     4. Adding more graphs  
 28 |   
 29 | - IV. To go further  
 30 |     1. Add new scrappers   
 31 |     2. Things to improve
 32 | 
 33 | ## I. Global Overview
 34 | ### 1. Why this project
 35 | 
 36 | We decided to create this tool because we think that social media are underestimated and underused to collect personnal data on a specific user or to make links between a group of people. 
 37 | 
 38 | Moreover the data usually collected and analyzed is mostly textual or from pictures but it's rare to find tools that analyze the videos. Nowadays, videos are becoming mainstream, as we can see through the increase of video "stories", apps like Vine or TikTok or platform such as Youtube and Twitch.  
 39 | Being able to analyze these videos and detect what kind of person is behond, what kind of intentions or ideas are propagated are now a matter. 
 40 | 
 41 | SNS is the first step of a bigger project. Where here we scrap only one platform, we want to be able to implement more and more platform. Where here we collect only some types of data and use AI with moderation, we want to collect much more data using AI. And finally where we only display some graphs to present these data, we want to perform a complete deep-analysis of links between people as well as there content and who they are.
 42 | 
 43 | ### 2. Global architecture
 44 | 
 45 | To create SNS, we tried to make each part independent. So the scrapp is working standalone and the web interface too. Also, we packed the project in a container to it is easier to use. Finally we wanted a precise secret management, so we are using environement variable in the container to manage secrets. 
 46 | 
 47 | We decided to let the users choose wheter or not they want to store the data in a local or remote database, as well as videos in a object storage or if they wanted to delete them after analysis. 
 48 | 
 49 | We also wanted to make SNS usable from the CLI as well as from the web interface. 
 50 | 
 51 | Here you can find a schema of the architecture of the project : 
 52 | 
 53 | ------------------- put schema here -------------------
 54 | 
 55 | ### 3. How to install
 56 | 
 57 | The installation is pretty straightforward ! 
 58 | 
 59 | ```bash
 60 | docker pull sns:latest
 61 | 
 62 | docker run -d -p 5000:5000 sns:latest
 63 | 
 64 | # You can also add env variable like this : 
 65 | docker run -d -p 5000:5000 sns:latest # ------------------- Add env variable command -------------------
 66 | ```
 67 | Then you will be able to access the web interface on `127.0.0.1:5000`.  
 68 | 
 69 | Otherwise if you want to use the CLI : 
 70 | ```bash
 71 | docker pull sns:latest
 72 | 
 73 | docker run -t -i sns:latest /bin/bash
 74 | cd /app/scrapper
 75 | 
 76 | # Then you will be in the contaier and you can simply run : 
 77 | python3 sns.py --help
 78 | ```
 79 | 
 80 | ### 4. Open Source
 81 | 
 82 | We wanted to make this project opensource, as we think this should be edited and improved by everyone. Also the size of the project is too big for a small team like us.  
 83 | Finally we wanted everyone to be able to use our tool. 
 84 | 
 85 | 
 86 | ## II. The scrapper
 87 | ### 1. How it works
 88 | 
 89 | ------------------- Add the last part -------------------
 90 | - explain the process 
 91 | - talk about the dependancy
 92 | 
 93 | ### 2. MySQL setup
 94 | 
 95 | ------------------- Add the last part -------------------
 96 | - scw account
 97 | - create database
 98 | - create a user
 99 | - connect to database
100 | - set the tables
101 | - set utf8
102 | - setup the env credentials
103 | 
104 | 
105 | ### 3. S3 setup
106 | 
107 | We decided to allow users to save or not the scrapped videos into an s3 bucket. We think that some people might want to reuse the videos later in a custom process. 
108 | In this documentation we will be using Scaleway Object Storage, so you need an account to connect to [the console of Scaleway](http://console.scaleway.com/).
109 | 
110 | ------------------- Add the last part -------------------
111 | - download s3cmd
112 | - create a bucket on scw
113 | - get your credentials
114 | - configure s3 cmd
115 | - set the config file at the right place
116 | - verify everything works
117 | 
118 | 
119 | ### 4. Google Cloud setup
120 | 
121 | ------------------- Add the last part -------------------
122 | - create a google cloud account
123 | - add an IAM role
124 | - get the credentials json file
125 | - set the creds at the right place
126 | - talk abour the GOOGLE_APPLICATION_CREDENTIALS env variable
127 | - authorized the use of the API to the
128 | 
129 | ### 5. Adding more analysis
130 | 
131 | ------------------- Add the last part -------------------
132 | - talk about google full potential
133 | - talk about other providers 
134 | - how to implement in the code 
135 | 
136 | ### 6. Usage example
137 | 
138 | ------------------- Add the last part -------------------
139 | - using cli only
140 | - saving to s3
141 | - using hashtag
142 | - using user
143 | 
144 | 
145 | ## III. The web interface  
146 | ### 1. How it works  
147 | 
148 | ------------------- Add the last part -------------------
149 | - explain flask
150 | - explain templates and jinja
151 | - explain the 2 parts search / scrap
152 | 
153 | ### 2. Research part  
154 | 
155 | ------------------- Add the last part -------------------
156 | - how to search
157 | - explain the search engine
158 | 
159 | ### 3. Scrap part  
160 | 
161 | ------------------- Add the last part -------------------
162 | - how to scrap
163 | - what if i do not see the user i scraped
164 | 
165 | ### 4. Adding more graphs  
166 |   
167 | ------------------- Add the last part -------------------
168 | - how graph works
169 | - how to add some
170 | 
171 | 
172 | ## IV. To go further  
173 | ### 1. Add new scrappers   
174 | 
175 | ------------------- Add the last part -------------------
176 | - link to insta scraper
177 | - how to implement new scraper
178 | 
179 | 
180 | 
181 | ### 2. Things to improve
182 | 
183 | ------------------- Add the last part -------------------
184 | - add ssl certificates to web interface
185 | - create an API
186 | - add more options to the scraper (like music etc)
187 | - add more relevant graphs
188 | - add more scraper
189 | - make the front better
190 | 
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | python-dotenv
2 | flask-wtf
3 | flask
4 | google-cloud-videointelligence==2.0.0
5 | mysql-connector


--------------------------------------------------------------------------------
/samples/sample_.env:
--------------------------------------------------------------------------------
1 | SNS_DB_HOST="127.0.0.1"
2 | SNS_DB_USER="your_db_username"
3 | SNS_DB_PASS="your_db_password"
4 | SNS_DB_NAME="your_db_name"
5 | SNS_DB_PORT=3630
6 | 
7 | GOOGLE_APPLICATION_CREDENTIALS=key.json 
8 | 
9 | S3_BUCKET_NAME=sns


--------------------------------------------------------------------------------
/samples/sample_key.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "service_account",
 3 |   "project_id": "xxxx",
 4 |   "private_key_id": "xxxx",
 5 |   "private_key": "xxxx",
 6 |   "client_email": "xxxx",
 7 |   "client_id": "xxxx",
 8 |   "auth_uri": "https://accounts.google.com/o/oauth2/auth",
 9 |   "token_uri": "https://oauth2.googleapis.com/token",
10 |   "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11 |   "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/xxxx"
12 | }
13 | 


--------------------------------------------------------------------------------
/samples/sample_s3_config.cfg:
--------------------------------------------------------------------------------
 1 | [default]
 2 | # Object Storage Region NL-AMS
 3 | host_base = s3.nl-ams.scw.cloud
 4 | host_bucket = %(bucket)s.s3.nl-ams.scw.cloud
 5 | bucket_location = nl-ams
 6 | use_https = True
 7 | 
 8 | # Login credentials
 9 | access_key = XXXX
10 | secret_key = XXXX


--------------------------------------------------------------------------------
/scraper/detector.py:
--------------------------------------------------------------------------------
 1 | from google.cloud import videointelligence_v1 as videointelligence
 2 | 
 3 | ##########################################
 4 | ######## person
 5 | ##########################################
 6 | def person(annotation_result):
 7 |     for annotation in annotation_result.person_detection_annotations:
 8 |         print("Person detected:")
 9 |         for track in annotation.tracks:
10 |             # Attributes include unique pieces of clothing,
11 |             # poses, or hair color.
12 |             print("Attributes:")
13 |             for attribute in timestamped_object.attributes:
14 |                 print(
15 |                     "\t{}:{} {}".format(
16 |                         attribute.name, attribute.value, attribute.confidence
17 |                     )
18 |                 )
19 | 
20 |             # Landmarks in person detection include body parts such as
21 |             # left_shoulder, right_ear, and right_ankle
22 |             print("Landmarks:")
23 |             for landmark in timestamped_object.landmarks:
24 |                 print(
25 |                     "\t{}: {} (x={}, y={})".format(
26 |                         landmark.name,
27 |                         landmark.confidence,
28 |                         landmark.point.x,  # Normalized vertex
29 |                         landmark.point.y,  # Normalized vertex
30 |                     )
31 |                 )
32 | 
33 | ##########################################
34 | ######## FACE
35 | ##########################################
36 | def visage(annotation_result):
37 |     for annotation in annotation_result.face_detection_annotations:
38 |         print("Face detected:")
39 |         for track in annotation.tracks:
40 |             # Attributes include glasses, headwear, smiling, direction of gaze
41 |             print("Attributes:")
42 |             for attribute in timestamped_object.attributes:
43 |                 print(
44 |                     "\t{}:{} {}".format(
45 |                         attribute.name, attribute.value, attribute.confidence
46 |                     )
47 |                 )
48 | 
49 | ##########################################
50 | ######## theme
51 | ##########################################
52 | def theme(annotation_result):
53 |     # Process video/segment level label annotations
54 |     videos_desc = []
55 |     segment_labels = annotation_result.segment_label_annotations
56 |     for i, segment_label in enumerate(segment_labels):
57 |         for i, segment in enumerate(segment_label.segments):
58 |             confidence = segment.confidence
59 |             if confidence >= 0.7 :
60 |                 videos_desc.append(segment_label.entity.description)
61 |     
62 |     return videos_desc
63 | 
64 | ##########################################
65 | ######## explicit
66 | ##########################################
67 | def explicit(annotation_result):
68 |     full = []
69 |     for frame in annotation_result.explicit_annotation.frames:
70 |         likelihood = videointelligence.Likelihood(frame.pornography_likelihood)
71 |         frame_time = frame.time_offset.seconds + frame.time_offset.microseconds / 1e6
72 |         full.append(likelihood.name)
73 |             
74 |     if "VERY_LIKELY" in full:
75 |         return "VERY_LIKELY"
76 |     if "LIKELY" in full:
77 |         return "LIKELY"
78 |     if "POSSIBLE" in full:
79 |         return "POSSIBLE"
80 | 
81 | 
82 | ##########################################
83 | ######## logo
84 | ##########################################
85 | def logo(annotation_result):
86 |     logos = []
87 |     # Annotations for list of logos detected, tracked and recognized in video.
88 |     for logo_recognition_annotation in annotation_result.logo_recognition_annotations:
89 |         entity = logo_recognition_annotation.entity
90 |         for track in logo_recognition_annotation.tracks:
91 |             confidence = track.confidence
92 |             if confidence > 0.92:
93 |                 logos.append(entity.description)
94 |     return logos


--------------------------------------------------------------------------------
/scraper/requirements.txt:
--------------------------------------------------------------------------------
1 | google-cloud-videointelligence==2.0.0
2 | mysql-connector
3 | python-dotenv
4 | flask
5 | flask-wtf


--------------------------------------------------------------------------------
/scraper/sns.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python3
  2 | 
  3 | # Before runnig make sure you have these tables ready :
  4 | # Also after creating table run :
  5 | # ALTER TABLE theme CONVERT TO CHARACTER SET utf8mb4;
  6 | #
  7 | # user
  8 | # user (id VARCHAR(255) PRIMARY KEY, nickname VARCHAR(255), avatar VARCHAR(1000), name VARCHAR(255), tikid VARCHAR(255), fans INT, 
  9 | # secuid VARCHAR(255), signature VARCHAR(1000), digg INT, verified TINYINT, video INT, heart INT, following INT)
 10 | # 
 11 | # music
 12 | # TABLE music (id VARCHAR(50) PRIMARY KEY, musicName VARCHAR(255), duration INT, playUrl VARCHAR(500), 
 13 | # musicOriginal TINYINT, coverUrl VARCHAR(500), musicAlbum VARCHAR(255), musicAuthor VARCHAR(255))
 14 | #
 15 | # video
 16 | # video (id VARCHAR(50) PRIMARY KEY, userId VARCHAR(255), shareCount INT, commentCount INT, playCount INT, 
 17 | # videoUrl VARCHAR(1000), text VARCHAR(1000), coverDynamic VARCHAR(1000), createTime VARCHAR(255), secretID VARCHAR(255), 
 18 | # webVideoUrl VARCHAR(1000), diggCount INT, height INT, width INT, duration INT)
 19 | #
 20 | # mention
 21 | # mention (id INT PRIMARY KEY AUTO_INCREMENT, id_video VARCHAR(255), username VARCHAR(255))
 22 | #
 23 | # hashtag
 24 | # hashtag (id INT PRIMARY KEY AUTO_INCREMENT, id_video VARCHAR(255), name VARCHAR(255), title VARCHAR(255), cover VARCHAR(1000))
 25 | #
 26 | # brand
 27 | # brand (id_video VARCHAR(255) PRIMARY KEY, name VARCHAR(255))
 28 | #
 29 | # theme
 30 | # theme (id_video VARCHAR(255) PRIMARY KEY, name VARCHAR(255))
 31 | #
 32 | # explicit
 33 | # explicit (id_video VARCHAR(255) PRIMARY KEY, explicit VARCHAR(250))
 34 | 
 35 | # - username / hashtag / number
 36 | 
 37 | import os 
 38 | import sys 
 39 | import argparse
 40 | # To retreive files from extensions :
 41 | import glob
 42 | import json
 43 | import io
 44 | import mysql.connector
 45 | import asyncio
 46 | from google.cloud import videointelligence_v1 as videointelligence
 47 | 
 48 | # Local import
 49 | import detector
 50 | 
 51 | 
 52 | def fill_args(args):
 53 |     nb = 10
 54 |     user = ""
 55 |     hashtag = ""
 56 |     is_user = True
 57 |     delete = False
 58 |     save = False
 59 | 
 60 | 
 61 |     if args.number: nb = args.number
 62 |     
 63 |     if args.user and args.user != "":
 64 |         user = args.user
 65 |         is_user = True
 66 | 
 67 |     if args.hashtag and args.hashtag != "":
 68 |         hashtag = args.hashtag
 69 |         is_user = False
 70 | 
 71 |     if args.delete == 1: 
 72 |         delete = True
 73 | 
 74 |     if args.s3 == 1:
 75 |         save = True
 76 | 
 77 |     return {"number":nb, "user":user, "hashtag":hashtag, "is_user":is_user, "delete":delete, "save":save}
 78 | 
 79 | def dl_videos(p):
 80 |     one = ("user " if p["user"] != "" else "hashtag ")
 81 |     two = (p["user"] if p["user"] != "" else p["hashtag"])
 82 |     three = str(p["number"])
 83 |     cmd = "tiktok-scraper " + one + two + " -n " + three + " -d -t json"
 84 |     print(cmd)
 85 |     os.system(cmd)
 86 |     
 87 | def get_files(p):
 88 |     dirname = (p["user"] if p["user"] != "" else "/#"+p["hashtag"])
 89 |     pwd = "./" + dirname
 90 |     search_for_videos = pwd + "/*.mp4"
 91 |     search_for_json = pwd + "/*.json"
 92 |     videos = glob.glob(search_for_videos)
 93 |     json = glob.glob(search_for_json)
 94 |     return (videos, json)
 95 | 
 96 | 
 97 | def parse_json(jsons, mycursor, mydb):
 98 |     if len(jsons) == 0:
 99 |         print("No video found... Quitting")
100 |         exit(84)
101 |     file = jsons[0]
102 |     with open(file, 'r') as f:
103 |         data = json.load(f)
104 |     
105 |     for elem in data:
106 |         # Insert or Update 
107 |         ### User info ###
108 |         meta = elem["authorMeta"] 
109 |         sql = """
110 |                 INSERT INTO user (id, name, nickname, avatar, tikid, fans, secuid, signature, digg, verified, video, heart, following) 
111 |                 VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE 
112 |                 name=%s, nickname=%s, avatar=%s, tikid=%s, fans=%s, secuid=%s, 
113 |                 signature=%s, digg=%s, verified=%s, video=%s, heart=%s, following=%s ;
114 |             """
115 |         val = (meta["id"], meta["name"], meta["nickName"], meta["avatar"], meta["id"], 
116 |                 meta["fans"], meta["secUid"], meta["signature"], meta["digg"], 
117 |                 (1 if meta["verified"] == True else 0), meta["video"], meta["heart"], meta["following"],
118 |                 # from here it's for the update part could not find something simpler
119 |                 meta["name"], meta["nickName"], meta["avatar"], meta["id"], 
120 |                 meta["fans"], meta["secUid"], meta["signature"], meta["digg"], 
121 |                 (1 if meta["verified"] == True else 0), meta["video"], meta["heart"], meta["following"])
122 | 
123 |         mycursor.execute(sql, val)
124 |         mydb.commit()
125 |             
126 |         ### Music info ###
127 |         meta = elem["musicMeta"] 
128 |         sql = """
129 |                 INSERT INTO music (id, musicName, duration, playUrl, musicOriginal, coverUrl, musicAlbum, musicAuthor)
130 |                 VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE 
131 |                 musicName=%s, duration=%s, playUrl=%s, musicOriginal=%s, coverUrl=%s, musicAlbum=%s, musicAuthor=%s;
132 |             """
133 |         val = (meta["musicId"], meta["musicName"], meta["duration"], meta["playUrl"], (1 if meta["musicOriginal"] == True else 0), 
134 |                 meta["coverLarge"], meta["musicAlbum"], meta["musicAuthor"],
135 |                 # from here it's for the update part could not find something simpler
136 |                 meta["musicName"], meta["duration"], meta["playUrl"], (1 if meta["musicOriginal"] == True else 0), 
137 |                 meta["coverLarge"], meta["musicAlbum"], meta["musicAuthor"])
138 | 
139 |         mycursor.execute(sql, val)
140 |         mydb.commit()
141 | 
142 |         ### Video info ###
143 |         meta = elem
144 |         sql = """
145 |                 INSERT INTO video (id, userId, shareCount, commentCount, playCount, videoUrl, text, coverDynamic, createTime, 
146 |                 secretID, webVideoUrl, diggCount, height, width, duration)
147 |                 VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE 
148 |                 userId=%s, shareCount=%s, commentCount=%s, playCount=%s, videoUrl=%s, text=%s, coverDynamic=%s,
149 |                 createTime=%s, secretID=%s, webVideoUrl=%s, diggCount=%s, height=%s, width=%s, duration=%s;
150 |             """
151 |         val = (meta["id"], meta["authorMeta"]["id"], meta["shareCount"], meta["commentCount"], meta["playCount"], meta["videoUrl"],
152 |                 meta["text"], meta["covers"]["dynamic"], meta["createTime"], meta["secretID"], meta["webVideoUrl"], meta["createTime"],
153 |                 meta["videoMeta"]["height"], meta["videoMeta"]["width"], meta["videoMeta"]["duration"],
154 |                 # from here it's for the update part could not find something simpler
155 |                 meta["authorMeta"]["id"], meta["shareCount"], meta["commentCount"], meta["playCount"], meta["videoUrl"],
156 |                 meta["text"], meta["covers"]["dynamic"], meta["createTime"], meta["secretID"], meta["webVideoUrl"], meta["createTime"],
157 |                 meta["videoMeta"]["height"], meta["videoMeta"]["width"], meta["videoMeta"]["duration"])
158 | 
159 |         mycursor.execute(sql, val)
160 |         mydb.commit()
161 | 
162 |         ### Mentions info ###
163 |         for mention in meta["mentions"]:
164 |             username = mention[1:] # This remove the @ before a mention 
165 |             sql = """
166 |                 INSERT INTO mention (id_video, username) 
167 |                 VALUES (%s, %s) 
168 |                 """
169 |             val = (meta["id"], mention)
170 |             mycursor.execute(sql, val)
171 |             mydb.commit()
172 |         
173 |         # As we insert anyway here, we need to remove duplicates now : 
174 |         # This keeps the highest id
175 |         sql = """
176 |             DELETE t1 FROM mention t1
177 |             INNER JOIN mention t2 
178 |             WHERE 
179 |                 t1.id < t2.id AND 
180 |                 t1.id_video = t2.id_video AND 
181 |                 t1.username = t2.username;
182 |             """
183 |         mycursor.execute(sql)
184 |         mydb.commit()
185 | 
186 |         ### Hashtags info ###
187 |         for hashtag in meta["hashtags"]:
188 |             sql = """
189 |                 INSERT INTO hashtag (id_video, name, title, cover) 
190 |                 VALUES (%s, %s, %s, %s)
191 |                 """
192 |             val = (meta["id"], hashtag["name"], hashtag["title"], hashtag["cover"])
193 |             mycursor.execute(sql, val)
194 |             mydb.commit()
195 |         
196 |         # As we insert anyway here, we need to remove duplicates now : 
197 |         # This keeps the highest id
198 |         sql = """
199 |             DELETE t1 FROM hashtag t1
200 |             INNER JOIN hashtag t2 
201 |             WHERE 
202 |                 t1.id < t2.id AND 
203 |                 t1.id_video = t2.id_video AND 
204 |                 t1.name = t2.name;
205 |             """
206 |         mycursor.execute(sql)
207 |         mydb.commit()
208 | 
209 | 
210 | def get_video_id(video):
211 |     # Get video id
212 |     tmp = video
213 |     p = tmp.find('/')
214 |     while p != -1:
215 |         tmp = tmp[p+1:]
216 |         p = tmp.find('/')
217 |     idv = tmp[:len(tmp) - 4]
218 |     return idv
219 | 
220 | 
221 | def delete_video(video):
222 |     print("Deleting : " + video)
223 |     cmd = "rm " + video
224 |     os.system(cmd)
225 |     
226 | 
227 | 
228 | def google_single_video(video, mycursor, mydb):
229 |     client = videointelligence.VideoIntelligenceServiceClient()
230 |     config = videointelligence.types.PersonDetectionConfig(
231 |         include_bounding_boxes=True,
232 |         include_attributes=True,
233 |         include_pose_landmarks=True,
234 |     )
235 |     context = videointelligence.types.VideoContext(person_detection_config=config)
236 | 
237 |     # Open video
238 |     with io.open(video, "rb") as f:
239 |         input_content = f.read()
240 |     # Start the asynchronous request
241 |     print("Sending video " + video + " for analysis...")
242 | 
243 |     # Maybe here we can send all the videos at the same time
244 |     operation = client.annotate_video(
245 |         request={
246 |             "features": [videointelligence.Feature.LABEL_DETECTION, videointelligence.Feature.LOGO_RECOGNITION, videointelligence.Feature.LABEL_DETECTION, videointelligence.Feature.PERSON_DETECTION, videointelligence.Feature.FACE_DETECTION, videointelligence.Feature.EXPLICIT_CONTENT_DETECTION],
247 |             "input_content": input_content,
248 |             "video_context": context,
249 |         }
250 |     )
251 |     result = operation.result(timeout=90)
252 | 
253 |     # Retrieve the first result, because a single video was processed.
254 |     annotation_result = result.annotation_results[0]
255 |             
256 |     print("Searching for explicit content...")
257 |     explicit = detector.explicit(annotation_result)
258 |     
259 |     print("Searching for logo...")
260 |     logos = detector.logo(annotation_result)
261 | 
262 |     print("Searching for theme...")
263 |     themes = detector.theme(annotation_result)
264 | 
265 |     # Saving to DB
266 |     ## Explicit content
267 |     id = get_video_id(video)
268 |     sql = """
269 |             INSERT INTO explicit (id_video, explicit) 
270 |             VALUES (%s, %s) ON DUPLICATE KEY UPDATE
271 |             explicit=%s;
272 |             """
273 |     val = (id, explicit, explicit)
274 |     mycursor.execute(sql, val)
275 |     mydb.commit()
276 | 
277 |     ## Logos
278 |     for brand in logos:
279 |         sql = """
280 |             INSERT INTO brand (id_video, name) 
281 |             VALUES (%s, %s) ON DUPLICATE KEY UPDATE
282 |             name=%s;
283 |             """
284 |         val = (id, brand, brand)
285 |         mycursor.execute(sql, val)
286 |         mydb.commit()
287 | 
288 |     ## Theme
289 |     for theme in themes:
290 |         sql = """
291 |             INSERT INTO theme (id_video, name) 
292 |             VALUES (%s, %s) ON DUPLICATE KEY UPDATE
293 |             name=%s;
294 |             """
295 |         val = (id, theme, theme)
296 |         mycursor.execute(sql, val)
297 |         mydb.commit()
298 |     
299 |     delete_video(video)
300 | 
301 | def google_call(videos, mycursor, mydb, should_delete):
302 |     for video in videos:
303 |         google_single_video(video, mycursor, mydb)
304 | 
305 | def setupDB():
306 |     db_host = os.getenv('SNS_DB_HOST', '127.0.0.1')
307 |     db_port = os.getenv('SNS_DB_PORT', 3630) 
308 |     db_user = os.getenv('SNS_DB_USER', 'toto')
309 |     db_pass = os.getenv('SNS_DB_PASS', 'toto')
310 |     db_name = os.getenv('SNS_DB_NAME', 'toto')
311 | 
312 |     print(db_host)
313 | 
314 |     mydb = mysql.connector.connect(
315 |         host = db_host,
316 |         user = db_user,
317 |         port = db_port,
318 |         password = db_pass,
319 |         database = db_name,
320 |         charset = 'utf8mb4'
321 |     )
322 |     return mydb
323 | 
324 | 
325 | def delete_jsons(jsons):
326 |     print("Deleting : ")
327 |     for file in jsons:
328 |         print("- " + file)
329 |         cmd = "rm " + file
330 |         os.system(cmd)
331 | 
332 | def save_videos(videos):
333 |     all_videos = " ".join(videos)
334 |     s3_bucket_name = "s3://" + os.getenv("S3_BUCKET_NAME", "sns")
335 |     cmd = "s3cmd -c ./s3_config.cfg put " + all_videos + " " + s3_bucket_name
336 |     print("sending videos to s3 : " + s3_bucket_name)
337 |     os.system(cmd)
338 | 
339 | def main():
340 |     parser = argparse.ArgumentParser()
341 |     group = parser.add_mutually_exclusive_group()
342 |     group.add_argument("-u", "--user", help="the username of the account you want to scrap.")
343 |     group.add_argument("--hashtag", help="the hashtag you want to scrap (without #).")
344 |     parser.add_argument("-n", "--number", help="the number of videos to scrap (default: 10).", type=int)
345 |     parser.add_argument("-d", "--delete", help="present if you want the videos to be deleted after usage (default: not deleted)", default=0, action="count")
346 |     parser.add_argument("-s", "--s3", help="if present, save to an s3, need a s3_config to be present (default: not saving", default=0, action="count")
347 |     args = parser.parse_args()
348 | 
349 |     # Get arguments 
350 |     params = fill_args(args)   
351 | 
352 |     # Dl videos with tiktok-scraper
353 |     dl_videos(params)
354 | 
355 |     # retreive videos and json filesn in tabs
356 |     (videos, jsons) = get_files(params)
357 | 
358 |     # Save videos if needed : 
359 |     if params["save"]:
360 |         save_videos(videos)
361 | 
362 |     # parse json/csv file and store the result in DB
363 |     parse_json(jsons, mycursor, mydb)
364 | 
365 |     # Add a step here to reduce the video quality ? 
366 |     # call google api and store result in DB
367 |     google_call(videos, mycursor, mydb, params["delete"])
368 | 
369 |     if params["delete"] :
370 |         delete_jsons(jsons)
371 | 
372 | 
373 | # Setp db from env conf
374 | # env config can be in a .env file
375 | from dotenv import load_dotenv, find_dotenv
376 | load_dotenv(find_dotenv()) 
377 | 
378 | mydb = setupDB()
379 | mycursor = mydb.cursor()
380 | 
381 | # start the program
382 | main()


--------------------------------------------------------------------------------
/webapp/app.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask
  2 | from flask import request, redirect, url_for
  3 | from flask import render_template
  4 | from flask import jsonify
  5 | import os
  6 | import mysql.connector
  7 | 
  8 | import sqlquery
  9 | import utils 
 10 | 
 11 | from form import searchform, scrapform
 12 | 
 13 | from dotenv import load_dotenv
 14 | 
 15 | app = Flask(__name__)
 16 | SECRET_KEY = os.urandom(32)
 17 | app.config['SECRET_KEY'] = SECRET_KEY
 18 | 
 19 | load_dotenv()
 20 | 
 21 | db_host = os.getenv('SNS_DB_HOST', '127.0.0.1')
 22 | db_port = os.getenv('SNS_DB_PORT', 3630)
 23 | db_user = os.getenv('SNS_DB_USER', 'toto')
 24 | db_pass = os.getenv('SNS_DB_PASS', 'toto')
 25 | db_name = os.getenv('SNS_DB_NAME', 'toto')
 26 | 
 27 | mydb = mysql.connector.connect(
 28 |     host = db_host,
 29 |     user = db_user,
 30 |     port = db_port,
 31 |     password = db_pass,
 32 |     database = db_name, 
 33 |     charset = 'utf8mb4'
 34 | )
 35 | 
 36 | cursor = mydb.cursor()
 37 | 
 38 | 
 39 | 
 40 | @app.route("/", methods=['GET', 'POST'])
 41 | def chart():
 42 |     searchF = searchform()
 43 |     
 44 |     if searchF.validate():
 45 |         return redirect(url_for('search', item=request.form.get('search')))
 46 | 
 47 |     ## get top 10 users
 48 |     names, videosNb = sqlquery.getTop10UsersByVideoCount(cursor)
 49 |     charts = {}
 50 |     charts["top10User"] = {'title': "Top 10 users with the most number of videos", \
 51 |                         'labels':names, \
 52 |                         'values':videosNb, \
 53 |                         'legend':'Number of video'}
 54 |     
 55 |     ## get top 10 brand
 56 |     brands, brandsCount = sqlquery.getTop10BrandByCount(cursor)
 57 |     charts["top10Brand"] = {'title': "Top 10 brands detected", \
 58 |                         'labels':brands, \
 59 |                         'values':brandsCount, \
 60 |                         'legend':'Number of times detected'}
 61 | 
 62 |     ## get explicit
 63 |     categories, count = sqlquery.getExplicitCountByCategory(cursor)
 64 |     charts["explicitCount"] = {'title': "Number of explicit per category", \
 65 |                         'labels':categories, \
 66 |                         'values':count, \
 67 |                         'legend':'Number of video per category'}
 68 | 
 69 |     return render_template('base.html', charts=charts, title="SNS", form=searchF)
 70 |  
 71 | 
 72 | @app.route('/search/<item>')
 73 | def search(item="N/A"):
 74 |     searchF = searchform()
 75 |     print(item)
 76 |     users = sqlquery.searchFromBaseUsername(cursor, item)
 77 |     return render_template('result.html', form=searchF,  item=item, users=users)
 78 | 
 79 | 
 80 | @app.route('/custom/<name>')
 81 | def custom(name="N/A"):
 82 |     searchF = searchform()
 83 |     userInfo = sqlquery.getUserInfo(cursor, name)
 84 |     videos = sqlquery.getUserVideos(cursor, name)
 85 |     videosInfo = utils.computeVideosInfo(videos)
 86 |     explicits = sqlquery.getExplicitVideoUrlFromUser(cursor, name)
 87 |     hashtags = sqlquery.getHashtagsCountForUser(cursor, name)
 88 |     mentions = sqlquery.getMentionsFromUser(cursor, name)
 89 |     brands = sqlquery.getBrandsCountForUser(cursor, name)
 90 |     return render_template('custom.html', form=searchF, info=userInfo, vidinfo=videosInfo, \
 91 |                             explicits=explicits, hashtags=hashtags, mentions=mentions, brands=brands)
 92 | 
 93 | 
 94 | @app.route('/scrap', methods=['GET', 'POST'])
 95 | def scrap():
 96 |     searchF = searchform()
 97 |     scrapF =scrapform()
 98 | 
 99 |     if scrapF.validate_on_submit():
100 |         scrapData = {'radio':request.form.get('radio'), \
101 |                     'data':request.form.get('data'), \
102 |                     'number':request.form.get('number')}
103 |         utils.launchScrapper(scrapData)
104 |         return redirect(url_for('chart'))
105 | 
106 |     return render_template('scrap.html', form=searchF, scrapForm=scrapF)
107 | 
108 | 


--------------------------------------------------------------------------------
/webapp/form.py:
--------------------------------------------------------------------------------
 1 | from flask_wtf import FlaskForm
 2 | from wtforms import StringField, validators, RadioField, IntegerField, DecimalField
 3 | from wtforms.validators import NumberRange
 4 | 
 5 | 
 6 | class searchform(FlaskForm):
 7 |     search = StringField('Search', [validators.Length(min=0, max=50), validators.DataRequired()])
 8 | 
 9 | 
10 | class scrapform(FlaskForm):
11 |     radio = RadioField('radio', choices=[('user','Username'),('hashtag','Hashtag')], default='user')
12 |     data = StringField('data', [validators.Length(min=0, max=100), validators.DataRequired()])
13 |     number = IntegerField('number', validators=[
14 |                 validators.Required(),
15 |                 validators.NumberRange(min=1, max=20)
16 |             ])
17 | 


--------------------------------------------------------------------------------
/webapp/readme.md:
--------------------------------------------------------------------------------
1 | # yo
2 | 
3 | Ne pas oublier
4 | 
5 | ```bash
6 | source env/bin/activate
7 | export FLASK_APP=ffl/app.py
8 | ```


--------------------------------------------------------------------------------
/webapp/requirements.txt:
--------------------------------------------------------------------------------
1 | python-dotenv
2 | flask-wtf
3 | flask


--------------------------------------------------------------------------------
/webapp/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | source env/bin/activate
4 | pip install -r ./requirements.txt
5 | export FLASK_APP=webapp/app.py


--------------------------------------------------------------------------------
/webapp/sqlquery.py:
--------------------------------------------------------------------------------
  1 | # Recupérer les utilisateurs avec un contenu possiblement explicit
  2 | # SELECT user.nickname FROM user INNER JOIN video ON video.userId = user.id INNER JOIN explicit ON explicit.id_video = video.id;
  3 | 
  4 | # Get url of possible explicit videos
  5 | # SELECT webVideoUrl from  video INNER JOIN explicit ON explicit.id_video = video.id limit 3;
  6 | 
  7 | # convert time in seconds to date format : 
  8 | # import datetime
  9 | # datetime.datetime.fromtimestamp(1549634152).strftime('%c')
 10 | 
 11 | # Get user specific by item
 12 | # SELECT name FROM user INNER JOIN video INNER JOIN explicit WHERE explicit.explicit='LIKELY' AND explicit.id_video=video.id AND video.userId=user.id;
 13 | 
 14 | # Get top 10 brand and count
 15 | def getTop10BrandByCount(cursor):
 16 |     cursor.execute("select name, count(*) from brand group by name limit 10;")
 17 |     result = cursor.fetchall()
 18 |     return [str(name) for name, count in result], [count for name, count in result]
 19 | 
 20 | # get explicit count 
 21 | def getExplicitCountByCategory(cursor):
 22 |     cursor.execute("select explicit, count(*) from explicit group by explicit;")
 23 |     result = cursor.fetchall()
 24 |     return [str(name) for name, count in result], [count for name, count in result]
 25 | 
 26 | # get top10 nickname by video count desc
 27 | def getTop10UsersByVideoCount(cursor):
 28 |     cursor.execute("select nickname, video from user order by video desc limit 10;")
 29 |     users = cursor.fetchall()
 30 |     return [names for names, nb in users], [nb for names, nb in users]
 31 | 
 32 | 
 33 | # search engine from username
 34 | def searchFromBaseUsername(cursor, username):
 35 |     username = '%' + username + '%'
 36 |     cursor.execute("select nickname, name from user where nickname like %s or name like %s", (username,username))
 37 |     users = cursor.fetchall()
 38 |     result = [{k:v} for k, v in users]
 39 |     return result
 40 | 
 41 | 
 42 | # get user basic informations
 43 | def getUserInfo(cursor, name):
 44 |     toSelect = ["nickname", "name", "fans", "signature", "verified", "video", "heart", "following"]
 45 |     cursor.execute("select nickname, name, fans, signature, verified, video, heart, following from user where name=%s;", (name,))
 46 |     userInfo = cursor.fetchall()
 47 |     zipped = zip(toSelect, userInfo[0])
 48 |     dicted = dict(zipped)
 49 |     return dicted
 50 | 
 51 | # get all videos data from a user
 52 | def getUserVideos(cursor, name):
 53 |     cursor.execute("select * from video inner join user where video.userId = user.id and user.name = %s;", (name,))
 54 |     results = cursor.fetchall()
 55 |     num_fields = len(cursor.description)
 56 |     field_names = [i[0] for i in cursor.description]
 57 |     final_res = []
 58 |     for item in results:
 59 |         zipped = zip(field_names, item)
 60 |         final_res.append(dict(zipped))
 61 |     return final_res
 62 | 
 63 | # Get all the explicit videos for a specific user
 64 | def getExplicitVideoUrlFromUser(cursor, name):
 65 |     cursor.execute("""  \
 66 |                     select webVideoUrl, explicit from video join explicit inner join user  \
 67 |                     WHERE user.name=%s and video.userId=user.id and video.id=explicit.id_video \
 68 |                     and (explicit.explicit='VERY_LIKELY' or explicit.explicit='LIKELY' or explicit.explicit='POSSIBLE');
 69 |                     """, (name,)) # or explicit.explicit='POSSIBLE'
 70 |     results = cursor.fetchall()
 71 |     results = dict(results)
 72 |     if results == {}:
 73 |         return {'N/A':'N/A'}
 74 |     return results
 75 | 
 76 | # Get all the hashtag and the count for a specific user
 77 | def getHashtagsCountForUser(cursor, name):
 78 |     cursor.execute("""  \
 79 |                     select hashtag.name, count(*) from hashtag inner join video inner join user where \
 80 |                     user.id=video.userId and hashtag.id_video=video.id and user.name=%s \
 81 |                     group by hashtag.name; \
 82 |                     """, (name,))
 83 |     results = cursor.fetchall()
 84 |     results = dict(results)
 85 |     if results == {}:
 86 |         return {'N/A':'N/A'}
 87 |     return results
 88 | 
 89 | # Get all mentions count for a specific user
 90 | def getMentionsFromUser(cursor, name):
 91 |     cursor.execute("""\
 92 |                     select username, count(*) from mention inner join video inner join user \
 93 |                     where user.id=video.userId and mention.id_video=video.id and user.name=%s \
 94 |                     group by username; \
 95 |                     """, (name,))
 96 |     results = cursor.fetchall()
 97 |     results = dict(results)
 98 |     if results == {}:
 99 |         return {'N/A':'N/A'}
100 |     return results
101 | 
102 | 
103 | # get all brand count that appears for a users
104 | def getBrandsCountForUser(cursor, name):
105 |     cursor.execute("""\
106 |                     select brand.name, count(*) from brand inner join video inner join user \
107 |                     where user.id=video.userId and brand.id_video=video.id and user.name=%s \
108 |                     group by brand.name; \
109 |                     """, (name,))
110 | 
111 |     results = cursor.fetchall()
112 |     results = dict(results)
113 |     if results == {}:
114 |         return {'N/A':'N/A'}
115 |     return results


--------------------------------------------------------------------------------
/webapp/static/banner4.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VictorLuc4/Social-Net-SCrap/c6b481e4473ee0e878026080e20fa32ca28245d2/webapp/static/banner4.jpeg


--------------------------------------------------------------------------------
/webapp/static/customCharts.js:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | // ----------------------------------------------
  4 | // explicit Count By Category
  5 | // ----------------------------------------------
  6 | categoryColors = getColorsForTab(explicitCountData.values, 0.35)
  7 | 
  8 | var explicitCountByCategory = document.getElementById("explicitCountByCategory").getContext("2d");
  9 |     Chart.defaults.global.responsive = false;
 10 | 
 11 | var explicitCountByCategoryChart = new Chart(explicitCountByCategory, {
 12 |     type: 'doughnut',
 13 |     data: {
 14 |         labels : explicitCountData.labels,
 15 |         datasets: [{
 16 |             label: explicitCountData.legend,
 17 |             fill: true,
 18 |             lineTension: 0.1,
 19 |             backgroundColor: categoryColors,
 20 |             borderColor: categoryColors,
 21 |             data : explicitCountData.values,
 22 |         }]
 23 |     },
 24 |     options: {
 25 |         title: {
 26 |             display: true,
 27 |             text: explicitCountData.title
 28 |         },
 29 | },
 30 | });
 31 | 
 32 | // ----------------------------------------------
 33 | // Top 10 users by video number :
 34 | // ----------------------------------------------
 35 | top10colors = getColorsForTab(top10User.values, 0.16)
 36 | 
 37 | var top10UsersByVideoCount = document.getElementById("top10UsersByVideoCount").getContext("2d");
 38 |     //Chart.defaults.global.responsive = false;
 39 | 
 40 | var top10UsersByVideoCountChart = new Chart(top10UsersByVideoCount, {
 41 |     type: 'bar',
 42 |     data: {
 43 |             labels : top10User.labels,
 44 |             datasets: [{
 45 |                 label: top10User.legend,
 46 |                 fill: true,
 47 |                 lineTension: 0.1,
 48 |                 backgroundColor: top10colors,
 49 |                 borderColor: top10colors,
 50 |                 borderCapStyle: 'butt',
 51 |                 borderDash: [],
 52 |                 borderDashOffset: 0.0,
 53 |                 borderJoinStyle: 'miter',
 54 |                 pointBorderColor: top10colors,
 55 |                 pointBackgroundColor: "#fff",
 56 |                 pointBorderWidth: 1,
 57 |                 pointHoverRadius: 5,
 58 |                 pointHoverBackgroundColor: top10colors,
 59 |                 pointHoverBorderColor: "rgba(220,220,220,1)",
 60 |                 pointHoverBorderWidth: 2,
 61 |                 pointRadius: 1,
 62 |                 pointHitRadius: 10,
 63 |                 data : top10User.values,
 64 |                 spanGaps: false
 65 |             }]
 66 |         },
 67 |         options: {
 68 |             title: {
 69 |                 display: true,
 70 |                 text: top10User.title
 71 |             },
 72 |     },
 73 | });
 74 | // ----------------------------------------------
 75 | 
 76 | // ----------------------------------------------
 77 | // Top 10 users by video number :
 78 | // ----------------------------------------------
 79 | top10colors = getColorsForTab(top10Brand.values, 0.16)
 80 | 
 81 | var top10BrandByCount = document.getElementById("top10BrandByCount").getContext("2d");
 82 |     //Chart.defaults.global.responsive = false;
 83 | 
 84 | var top10BrandByCountChart = new Chart(top10BrandByCount, {
 85 |     type: 'bar',
 86 |     data: {
 87 |             labels : top10Brand.labels,
 88 |             datasets: [{
 89 |                 label: top10Brand.legend,
 90 |                 fill: true,
 91 |                 lineTension: 0.1,
 92 |                 backgroundColor: top10colors,
 93 |                 borderColor: top10colors,
 94 |                 borderCapStyle: 'butt',
 95 |                 borderDash: [],
 96 |                 borderDashOffset: 0.0,
 97 |                 borderJoinStyle: 'miter',
 98 |                 pointBorderColor: top10colors,
 99 |                 pointBackgroundColor: "#fff",
100 |                 pointBorderWidth: 1,
101 |                 pointHoverRadius: 5,
102 |                 pointHoverBackgroundColor: top10colors,
103 |                 pointHoverBorderColor: "rgba(220,220,220,1)",
104 |                 pointHoverBorderWidth: 2,
105 |                 pointRadius: 1,
106 |                 pointHitRadius: 10,
107 |                 data : top10Brand.values,
108 |                 spanGaps: false
109 |             }]
110 |         },
111 |         options: {
112 |             title: {
113 |                 display: true,
114 |                 text: top10Brand.title
115 |             },
116 |     },
117 | });
118 | // ----------------------------------------------


--------------------------------------------------------------------------------
/webapp/static/logo-full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VictorLuc4/Social-Net-SCrap/c6b481e4473ee0e878026080e20fa32ca28245d2/webapp/static/logo-full.png


--------------------------------------------------------------------------------
/webapp/static/logo-transp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VictorLuc4/Social-Net-SCrap/c6b481e4473ee0e878026080e20fa32ca28245d2/webapp/static/logo-transp.png


--------------------------------------------------------------------------------
/webapp/static/styles.css:
--------------------------------------------------------------------------------
  1 | .wrapper
  2 | {
  3 |     width: 100%;
  4 |     height: auto;
  5 |     display: grid;
  6 |     grid-template-columns: 50%;
  7 |     grid-template-areas:
  8 |         "c c"
  9 |         "c c"
 10 |         "c c"
 11 |         "c c";
 12 | }
 13 | 
 14 | .general {
 15 |     padding: 10px;
 16 | }
 17 | 
 18 | canvas {
 19 |     grid-area: c;
 20 |     position: relative;
 21 | }
 22 | 
 23 | body {
 24 |     font-family: Arial;
 25 | }
 26 | 
 27 | body, html {
 28 |     height: 100%;
 29 |     margin: 0;
 30 |     font-family: Arial, Helvetica, sans-serif;
 31 |   }
 32 | 
 33 | * {
 34 |     box-sizing: border-box;
 35 | }
 36 | 
 37 | /* The hero image */
 38 | .hero-image {
 39 |     /* Use "linear-gradient" to add a darken background effect to the image (photographer.jpg). This will make the text easier to read */
 40 |     background-image: linear-gradient(rgba(0, 0, 0, 0.0), rgba(0, 0, 0, 0.0)), url("/static/banner4.jpeg");
 41 |   
 42 |     /* Set a specific height */
 43 |     height: 100%;
 44 |   
 45 |     /* Position and center the image to scale nicely on all screens */
 46 |     background-position: center;
 47 |     background-repeat: no-repeat;
 48 |     background-size: cover;
 49 |     position: relative;
 50 |   }
 51 | 
 52 |   /* The hero image header */
 53 | .hero-image-head {
 54 |     /* Use "linear-gradient" to add a darken background effect to the image (photographer.jpg). This will make the text easier to read */
 55 |     background-image: linear-gradient(rgba(0, 0, 0, 0.0), rgba(0, 0, 0, 0.0)), url("/static/banner4.jpeg");
 56 |   
 57 |     /* Set a specific height */
 58 |     height: 20%;
 59 |   
 60 |     /* Position and center the image to scale nicely on all screens */
 61 |     background-position: center;
 62 |     background-repeat: no-repeat;
 63 |     background-size: cover;
 64 |     position: relative;
 65 |   }
 66 |   
 67 |   /* Place text in the middle of the image */
 68 |   .hero-text {
 69 |     text-align: center;
 70 |     position: absolute;
 71 |     top: 50%;
 72 |     left: 50%;
 73 |     transform: translate(-50%, -50%);
 74 |     color: black;
 75 |   }
 76 | 
 77 | /* ----- Search Form ----- */
 78 | 
 79 | form.example input[type=text] {
 80 |     padding: 10px;
 81 |     font-size: 17px;
 82 |     border: 1px solid grey;
 83 |     float: left;
 84 |     width: 80%;
 85 |     height:40px;
 86 |     background: #f1f1f1;
 87 |   }
 88 | 
 89 | form.example button {
 90 |     float: left;
 91 |     width: 20%;
 92 |     padding: 10px;
 93 |     background: #86C4ECA2;
 94 |     color: white;
 95 |     font-size: 20px;
 96 |     height: 40px;
 97 |     border: 1px solid grey;
 98 |     border-left: none;
 99 |     cursor: pointer;
100 |   }
101 | 
102 |   form.example button:hover {
103 |     background: #86C4ECFF;
104 |   }
105 |   
106 |   form.example::after {
107 |     content: "";
108 |     clear: both;
109 |     display: table;
110 |   }
111 | 
112 | /* ----- Scraper Form ----- */
113 | 
114 |   form.scrapform input[type=text] {
115 |     padding: 10px;
116 |     font-size: 17px;
117 |     border: 1px solid grey;
118 |     float: center;
119 |     width: 100%;
120 |     background: #f1f1f1;
121 |   }
122 | 
123 |   form.scrapform button {
124 |     float: center;
125 |     width: 100%;
126 |     padding: 10px;
127 |     background: #86C4ECA2;
128 |     color: white;
129 |     font-size: 20px;
130 |     border: 1px solid grey;
131 |     border-left: none;
132 |     cursor: pointer;
133 |   }
134 |   
135 |   form.scrapform button:hover {
136 |     background: #86C4ECFF;
137 |   }
138 |   
139 |   form.scrapform::after {
140 |     content: "";
141 |     clear: both;
142 |     display: table;
143 |   }
144 | 
145 | 
146 | /* ----- Radio button ----- */
147 | 
148 | ul {
149 |   list-style-type: none;
150 |   margin: 0;
151 |   padding: 0;
152 | }
153 | 
154 | .radiobox {
155 |   float: center;
156 |   
157 |   width: 50em;
158 |   padding: 2em;
159 |   background: #f6f6f6;
160 | }
161 | 
162 | input[type=radio] {
163 |   position: absolute;
164 |   opacity: 0;
165 | }
166 | 
167 | input[type=radio] + label {
168 |   display: inline-block;
169 | }
170 | 
171 | input[type=radio] + label:before {
172 |   content: "";
173 |   display: inline-block;
174 |   vertical-align: -0.2em;
175 |   width: 1em;
176 |   height: 1em;
177 |   border: 0.15em solid #86C4ECFF;
178 |   border-radius: 0.2em;
179 |   margin-right: 0.3em;
180 |   background-color: white;
181 | }
182 | input[type=radio] + label:before {
183 |   border-radius: 50%;
184 | }
185 | input[type=radio]:checked + label:before {
186 |   background-color: #86C4ECFF;
187 |   box-shadow: inset 0 0 0 0.15em white;
188 | }
189 | input[type=radio]:focus + label:before {
190 |   outline: dotted 1px;
191 | }
192 | 
193 | /* ----- Progress bar ----- */
194 | 


--------------------------------------------------------------------------------
/webapp/static/table.css:
--------------------------------------------------------------------------------
 1 | .styled-table {
 2 |     border-collapse: collapse;
 3 |     margin: 10 0;
 4 |     font-size: 0.9em;
 5 |     font-family: sans-serif;
 6 |     width: 60%;
 7 |     box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
 8 | }
 9 | 
10 | .styled-table thead tr {
11 |     background-color: #86C4ECFF;
12 |     color: #ffffff;
13 |     text-align: left;
14 | }
15 | 
16 | .styled-table th,
17 | .styled-table td {
18 |     padding: 12px 15px;
19 | }
20 | 
21 | tbody {
22 |     display: block;
23 |     overflow: auto;
24 |     max-height: 200px;
25 | }
26 | 
27 | thead, tbody tr {
28 |     display: table;
29 |     width: 100%;
30 |     table-layout: fixed;
31 | }
32 | 
33 | .styled-table tbody tr {
34 |     border-bottom: 1px solid #8fedbf30;
35 | }
36 | 
37 | .styled-table tbody tr:nth-of-type(even) {
38 |     background-color: #86C4EC30;
39 | }
40 | 
41 | .styled-table tbody tr:last-of-type {
42 |     border-bottom: 2px solid #38a2e0a2;
43 | }
44 | 
45 | /* ---- results ----*/
46 | 
47 | .styled-res {
48 |     border-collapse: collapse;
49 |     margin: 10 0;
50 |     padding-left: 40px;
51 |     padding-right: 40px;
52 |     font-size: 0.9em;
53 |     font-family: sans-serif;
54 |     height: auto;
55 |     width: auto;
56 |     box-shadow: 0 0 20px rgba(0, 0, 0, 0.15);
57 | }
58 | 
59 | .styled-res thead tr {
60 |     background-color: #86C4ECFF;
61 |     color: #000000;
62 |     text-align: center;
63 | }
64 | 
65 | .styled-res th,
66 | .styled-res td {
67 |     padding: 12px 15px;
68 | }
69 | 
70 | tbody {
71 |     display: block;
72 |     overflow: auto;
73 |     max-height: 100%;
74 | }
75 | 
76 | thead, tbody tr {
77 |     display: table;
78 |     width: 100%;
79 |     table-layout: fixed;
80 | }
81 | 
82 | .styled-res tbody tr {
83 |     border-bottom: 1px solid #dddddd;
84 | }
85 | 
86 | .styled-res tbody tr:nth-of-type(even) {
87 |     background-color: #86C4EC30;
88 | }
89 | 
90 | 
91 | .styled-res tbody tr:last-of-type {
92 |     border-bottom: 2px solid #38a2e0a2;
93 | }
94 | 


--------------------------------------------------------------------------------
/webapp/static/userCharts.js:
--------------------------------------------------------------------------------
 1 | // ----------------------------------------------
 2 | // Preferred Time for posts
 3 | // ----------------------------------------------
 4 | //categoryColors = getColorsForTab(explicitCountData.values, 0.35)
 5 | 
 6 | var prefposttime = document.getElementById("prefposttime").getContext("2d");
 7 |     Chart.defaults.global.responsive = false;
 8 | 
 9 | var prefposttimeChart = new Chart(prefposttime, {
10 |     type: 'bubble',
11 |     data: {
12 |         datasets: [
13 |             {
14 |                 label: 'Posts',
15 |                 data: videoData.bubble,
16 |                 backgroundColor: "rgba(134,196,236, 0.6)"
17 |             },
18 |         ]
19 |     },
20 |     options: {
21 |         title: {
22 |             display: true,
23 |             text: "Posts posted times"
24 |         },
25 |         scales: {
26 |             yAxes: [{
27 |                 ticks: {
28 |                     max: 6,
29 |                     min: 0,
30 |                     stepSize: 1,
31 |                     callback: function(value, index, values) {
32 |                         days = {0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'};
33 |                         return days[index]
34 |                       }
35 |                 }
36 |             }],
37 |             xAxes: [{
38 |                 ticks: {
39 |                     max: 24,
40 |                     min: 0,
41 |                     stepSize: 1,
42 |                     callback: function(value, index, values) {
43 |                         days = {0:'midnight', 1:'1am', 2:'2am', 3:'3am', 4:'4am', 5:'5am', 6:'6am', 
44 |                         7:'7am', 8:'8am', 9:'9am', 10:'10am', 11:'11am', 12:'Lunch Time', 
45 |                         13:'1pm', 14:'2pm', 15:'3pm', 16:'4pm', 17:'5pm', 18:'6pm', 19:'7pm', 
46 |                         20:'8pm', 21:'9pm', 22:'10pm', 23:'11pm', 24:'midnight too'}
47 | 
48 |                         return days[index]
49 |                       }
50 |                 }
51 |             }]
52 |         },
53 | },
54 | });
55 | 
56 | 


--------------------------------------------------------------------------------
/webapp/static/utils.js:
--------------------------------------------------------------------------------
 1 | 
 2 | function getRandomColor() {
 3 |     var letters = '0123456789ABCDEF';
 4 |     var color = '#';
 5 |     for (var i = 0; i < 6; i++) {
 6 |       color += letters[Math.floor(Math.random() * 16)];
 7 |     }
 8 |     return color;
 9 |   }
10 | 
11 |   function approximateColor1ToColor2ByPercent(color1, color2, percent) {
12 |     var red1 = parseInt(color1[1] + color1[2], 16);
13 |     var green1 = parseInt(color1[3] + color1[4], 16);
14 |     var blue1 = parseInt(color1[5] + color1[6], 16);
15 |   
16 |     var red2 = parseInt(color2[1] + color2[2], 16);
17 |     var green2 = parseInt(color2[3] + color2[4], 16);
18 |     var blue2 = parseInt(color2[5] + color2[6], 16);
19 |   
20 |     var red = Math.round(mix(red1, red2, percent));
21 |     var green = Math.round(mix(green1, green2, percent));
22 |     var blue = Math.round(mix(blue1, blue2, percent));
23 |   
24 |     return generateHex(red, green, blue);
25 |   }
26 |   
27 |   function generateHex(r, g, b) {
28 |     r = r.toString(16);
29 |     g = g.toString(16);
30 |     b = b.toString(16);
31 |   
32 |     // to address problem mentioned by Alexis Wilke:
33 |     while (r.length < 2) { r = "0" + r; }
34 |     while (g.length < 2) { g = "0" + g; }
35 |     while (b.length < 2) { b = "0" + b; }
36 |   
37 |     return "#" + r + g + b;
38 |   }
39 |   
40 |   function mix(start, end, percent) {
41 |       return start + ((percent) * (end - start));
42 |   }
43 | 
44 |   function getColorsForTab(tab, force){
45 |     cols = []
46 |     first = getRandomColor()
47 |     sec = '#FFFFFF'
48 |     for (i = 0; i < tab.length; i++){
49 |         newcol = approximateColor1ToColor2ByPercent(first, sec, force)
50 |         first = newcol
51 |         cols.push(newcol)
52 |     }
53 |     return cols
54 | }


--------------------------------------------------------------------------------
/webapp/templates/_formhelpers.html:
--------------------------------------------------------------------------------
 1 | {% macro render_field(field) %}
 2 |     <dt>{{ field.label }}
 3 |     <dd>{{ field(**kwargs)|safe }}
 4 |     {% if field.errors %}
 5 |         <ul class=errors>
 6 |         {% for error in field.errors %}
 7 |             <li>{{ error }}</li>
 8 |         {% endfor %}
 9 |         </ul>
10 |     {% endif %}
11 |     </dd>
12 | {% endmacro %}


--------------------------------------------------------------------------------
/webapp/templates/base.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |       {% if title %}
 4 |       <title>{{ title }}</title>
 5 |       {% else %}
 6 |       <title>Welcome to SNS</title>
 7 |       {% endif %}
 8 |       <script src='/static/Chart.min.js'></script>
 9 |       <script src="/static/utils.js"></script>
10 | 
11 |       <meta name="viewport" content="width=device-width, initial-scale=1">
12 | 
13 |       <link rel="icon" href="/static/logo-transp.png">
14 |       <link rel="stylesheet" href="/static/styles.css">
15 |       <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
16 | 
17 |     </head>
18 |     <body >
19 |         <div class="hero-image">
20 |             <div class="hero-text">
21 |                 <img src="/static/logo-full.png" width="400" height="auto">
22 |                 <h2><a href="/" style="color:#000000;"> Research </a> | <a href="/scrap" style="color:#000000;"> Scrap </a></h2>
23 |                 <p>Not just another OSINT tool</p>
24 |                 <form method="POST" action="/" class="example" style="margin:auto;max-width:500px">
25 |                     {{ form.csrf_token }}
26 |                     {{ form.search(placeholder="Search user..") }}
27 |                     <button type="submit"><i class="fa fa-search"></i></button>
28 |                 </form>
29 |             </div>
30 |           </div>
31 |         {% block content %}{% endblock %}
32 |     </body>
33 | </html>


--------------------------------------------------------------------------------
/webapp/templates/chart.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block content %}
 4 | <div class="general">
 5 |     <link rel="stylesheet" href="/static/styles.css">
 6 | 
 7 |     <h1>General Overview</h1>
 8 |     <!-- bar chart canvas element -->
 9 |     <div class="wrapper">
10 |         <div id="content">
11 |             <canvas id="top10UsersByVideoCount" width="300" height="300"></canvas>
12 |         </div>
13 | 
14 |         <div id="content">
15 |         <canvas id="explicitCountByCategory" width="300" height="300"></canvas>
16 |         </div>
17 | 
18 |         <div id="content">
19 |         <canvas id="top10BrandByCount" width="300" height="300"></canvas>
20 |         </div>
21 |     </div>
22 | 
23 |     <script>
24 | 
25 |         top10User = {{ charts.top10User|tojson }}
26 |         explicitCountData = {{ charts.explicitCount|tojson }}
27 |         top10Brand = {{ charts.top10Brand|tojson }}
28 | 
29 |     </script>
30 |     <script src="static/customCharts.js"></script>
31 | </div>
32 | {% endblock %}


--------------------------------------------------------------------------------
/webapp/templates/custom.html:
--------------------------------------------------------------------------------
  1 | {% extends "head.html" %}
  2 | 
  3 | {% block content %}
  4 | <link rel="stylesheet" href="/static/table.css">
  5 | 
  6 | <div class="wrapper">
  7 |     <div>
  8 |         <h1>{{ info.nickname }} </h1>
  9 |         <b>{{ info.name }}</b> 
 10 |         {% if info.verified == 1 %}
 11 |         <i class="fa fa-check-circle" aria-hidden="true"></i>
 12 |         {% else %}
 13 |         <i class="fa fa-times-circle" aria-hidden="true"></i>
 14 |         {% endif %}
 15 |         </br>
 16 |         <i>{{ info.signature }}</i>
 17 |     </div>
 18 |     <div>
 19 |         <h3>From the video analized we found the informations below :</h3>
 20 |         <p>
 21 |             A video duration average of {{ vidinfo.duration_av }} seconds </br>
 22 |             An average of {{ vidinfo.share_av }} shares, {{ vidinfo.comments_av}} comments and {{vidinfo.plays_av }} plays per video. </br>
 23 |             The average size of the description is {{ vidinfo.descSize_av }}
 24 |         </p>
 25 |     </div>
 26 |     <div>
 27 |         <p>
 28 |         Following {{ info.following }} people </br>
 29 |         Has {{ info.video }} videos but only {{ vidinfo.videoNum }} were scrapped and analyzed</br>
 30 |         Has {{ info.fans }} fans </br>
 31 |         Has a total of {{ info.heart }} hearts </br>
 32 |         </p>
 33 |     </div>
 34 | 
 35 |     <div>
 36 |         <h3>Brands that appears in the videos :</h3>
 37 |         <table class="styled-table">
 38 |             <thead>
 39 |                 <tr>
 40 |                     <th>Brands</th>
 41 |                     <th>Number of time</th>
 42 |                 </tr>
 43 |             </thead>
 44 |             <tbody>
 45 |                 {% for key, value in brands.items() %}
 46 |                 <tr>
 47 |                     <td>{{key}}</td>
 48 |                     <td>{{value}}</td>
 49 |                 </tr>
 50 |                 {% endfor %}
 51 |             </tbody>
 52 |         </table>
 53 |     </div>
 54 |     <div>
 55 |         <h3>People mentionned in the videos : </h3>
 56 |         <table class="styled-table">
 57 |             <thead>
 58 |                 <tr>
 59 |                     <th>Mentionned</th>
 60 |                     <th>Number of time</th>
 61 |                 </tr>
 62 |             </thead>
 63 |             <tbody>
 64 |                 {% for key, value in mentions.items() %}
 65 |                 <tr>
 66 |                     {% if value == 'N/A' %}
 67 |                     <td>{{key}}</td>
 68 |                     {% else %}
 69 |                     <td><a href="https://www.tiktok.com/{{key}}" target="_blank" rel="noopener noreferrer">{{key}}</a></td>
 70 |                     {% endif %}
 71 |                     <td>{{value}}</td>
 72 |                 </tr>
 73 |                 {% endfor %}
 74 |             </tbody>
 75 |         </table>
 76 |     </div>
 77 |     <div>
 78 |         <h3>Hashtags used in the videos : </h3>
 79 |         <table class="styled-table">
 80 |             <thead>
 81 |                 <tr>
 82 |                     <th>Hashtags</th>
 83 |                     <th>Number of time used</th>
 84 |                 </tr>
 85 |             </thead>
 86 |             <tbody>
 87 |                 {% for key, value in hashtags.items() %}
 88 |                 <tr>
 89 |                     <td>{{key}}</td>
 90 |                     <td>{{ value }}</td>
 91 |                 </tr>
 92 |                 {% endfor %}
 93 |             </tbody>
 94 |         </table>
 95 |     </div>
 96 | </div>
 97 | <div>
 98 |     <div>
 99 |         <h3>Videos with possible explicit content :</h3>
100 |         <table class="styled-table">
101 |             <thead>
102 |                 <tr>
103 |                     <th>Video Url</th>
104 |                     <th>Explicit Video</th>
105 |                 </tr>
106 |             </thead>
107 |             <tbody>
108 |                 {% for key, value in explicits.items() %}
109 |                 <tr>
110 |                     {% if value == 'N/A' %}
111 |                         <td>{{key}}</td>
112 |                     {% else %}
113 |                         <td><a href="{{key}}" target="_blank" rel="noopener noreferrer">{{key}}</a></td>
114 |                     {% endif %}
115 |                     <td>{{ value }}</td>
116 |                 </tr>
117 |                 {% endfor %}
118 |             </tbody>
119 |         </table>
120 |     </div>
121 |     <div>
122 |         <h3>Posts map regarding the days and the hours : </h3>
123 |         <canvas id="prefposttime" width="800" height="500"></canvas></br>
124 |     </div>
125 | </div>
126 | 
127 | <script>
128 |     videoData = {{ vidinfo|tojson }}
129 | </script>
130 | <script src="/static/userCharts.js"></script>
131 | {% endblock %}


--------------------------------------------------------------------------------
/webapp/templates/head.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |       {% if title %}
 4 |       <title>{{ title }}</title>
 5 |       {% else %}
 6 |       <title>Welcome to SNS</title>
 7 |       {% endif %}
 8 |       <script src='/static/Chart.min.js'></script>
 9 |       <script src="/static/utils.js"></script>
10 | 
11 |       <meta name="viewport" content="width=device-width, initial-scale=1">
12 | 
13 |       <link rel="icon" href="/static/logo-transp.png">
14 |       <link rel="stylesheet" href="/static/styles.css">
15 |       <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
16 | 
17 |     </head>
18 |     <body >
19 |         <div class="hero-image-head">
20 |             <div class="hero-text">
21 |                 <h2><a href="/" style="color:#000000;"> Research </a> | <a href="/scrap" style="color:#000000;"> Scrap </a></h2>
22 |                 <p>Not just another OSINT tool</p>
23 |                 <form method="POST" action="/" class="example" style="margin:auto;max-width:300px">
24 |                     {{ form.csrf_token }}
25 |                     {{ form.search(placeholder="Search user..") }}
26 |                     <button type="submit"><i class="fa fa-search"></i></button>
27 |                 </form>
28 |             </div>
29 |           </div>
30 |         <div class="general">
31 |             {% block content %}{% endblock %}
32 |         </div>
33 |     </body>
34 | </html>


--------------------------------------------------------------------------------
/webapp/templates/index.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | 
3 | {% block content %}
4 |     <h1>Hi, {{ user.name }}!</h1>
5 |     {% for post in posts %}
6 |     <div><p>{{ post.author.username }} says: <b>{{ post.body }}</b></p></div>
7 |     {% endfor %}
8 | {% endblock %}


--------------------------------------------------------------------------------
/webapp/templates/result.html:
--------------------------------------------------------------------------------
 1 | {% extends "head.html" %}
 2 | 
 3 | {% block content %}
 4 | <div style="float:center">
 5 | <h1>Results found for "{{ item }}" </h1>
 6 | 
 7 |     <link rel="stylesheet" href="/static/table.css">
 8 | 
 9 |     <table class="styled-res" >
10 |         <thead>
11 |             <tr>
12 |                 <th>Nickname</th>
13 |                 <th>Real Name</th>
14 |             </tr>
15 |         </thead>
16 |         <tbody>
17 |             {% for dict_item in users %}
18 |             <tr>
19 |                 {% for key, value in dict_item.items() %}
20 |                 <td style="text-align:center;"><a href="/custom/{{value}}">{{key}}</a></td>
21 |                 <td style="text-align:center;">{{value}}</td>
22 |                 {% endfor %}
23 |             </tr>
24 |             {% endfor %}
25 |         </tbody>
26 |     </table>
27 | </div>
28 | 
29 | {% endblock %}


--------------------------------------------------------------------------------
/webapp/templates/scrap.html:
--------------------------------------------------------------------------------
 1 | {% extends "head.html" %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | <form method="POST" class="scrapform" action="/scrap" style="margin:auto;max-width:500px">
 6 |     {{ scrapForm.csrf_token }}
 7 |     <h1>Let'scrap some people...</h1>
 8 |     </br>
 9 |     Scrap a user or a hashtag ? </br>
10 |     {{ scrapForm.radio }} </br>
11 |     Username or hashtag to scrap :</br>
12 |     {{ scrapForm.data(placeholder="usershtag") }} </br></br>
13 |     Number of video to scrap from 1 to 20 : </br>
14 |     {{ scrapForm.number(placeholder="3") }}</br></br>
15 |     <button type="submit" ><i class="fa fa-paper-plane" aria-hidden="true"></i></button>
16 |     <div id="breakline" hidden="true">
17 |         Please wait for the page to reload. It can take up to 5 minutes. Take a break, and a coffee !
18 |     </div>
19 | </form>
20 | 
21 | {% endblock %}


--------------------------------------------------------------------------------
/webapp/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def launchScrapper(scrapData):
 4 |     print(scrapData)
 5 |     cmd = "python3 scraper/sns.py --" + scrapData['radio'] + " " + scrapData['data'] +"  -n " + scrapData['number']
 6 |     print(cmd)
 7 |     os.system(cmd)
 8 |     return 
 9 | 
10 | 
11 | def computeVideosInfo(videos):
12 |     import datetime
13 | 
14 |     durations = []
15 |     shares = []
16 |     comments = []
17 |     plays = []
18 |     descSize = []
19 |     webUrls = []
20 |     created = []
21 |     weekdays = []
22 |     hours = []
23 | 
24 |     for video in videos: 
25 |         durations.append(video["duration"])
26 |         shares.append(video["shareCount"])
27 |         comments.append(video["commentCount"])
28 |         plays.append(video["playCount"])
29 |         descSize.append(len(video["text"]))
30 |         webUrls.append(video["webVideoUrl"])
31 |         created_date = datetime.datetime.fromtimestamp(int(video["createTime"]))
32 |         fulldate = created_date.strftime('%c')
33 |         day = created_date.strftime('%A')
34 |         hour = created_date.strftime('%H')
35 |         created.append(fulldate)
36 |         weekdays.append(day)
37 |         hours.append(hour)
38 | 
39 |     bubble = getBubbleFromDaysAndHours(weekdays, hours)
40 |     videoNum = len(videos)
41 |     infos = {'videoNum':videoNum, \
42 |             'duration_av':int(sum(durations)/videoNum), \
43 |             'share_av': int(sum(shares)/videoNum), \
44 |             'plays_av': int(sum(plays)/videoNum), \
45 |             'comments_av': int(sum(comments)/videoNum), \
46 |             'descSize_av': int(sum(descSize)/videoNum), \
47 |             'web_urls' : webUrls, \
48 |             'created_date': created, \
49 |             'weekdays' : weekdays, \
50 |             'hours' : hours, \
51 |             'bubble': bubble }
52 | 
53 |     return infos
54 | 
55 | def getBubbleFromDaysAndHours(days, hours):
56 |     #days = ['Tuesday', 'Tuesday', 'Tuesday', 'Wednesday', 'Wednesday']
57 |     #hours = [2, 8, 8, 2, 2]
58 |     daysDict = {'Monday':0, 'Tuesday':1, 'Wednesday':2, 'Thursday':3, 'Friday':4, 'Saturday':5, 'Sunday':6}
59 |     infos = {}
60 |     for i in range(0, len(days)):
61 |         dnum = daysDict[days[i]]
62 | 
63 |         if dnum in infos.keys():
64 |             # the day exist, so we need to check for the hour now
65 |             if hours[i] in infos[dnum].keys():
66 |                 # hour for the day exists so we increase the occurence
67 |                 infos[dnum][hours[i]] += 1
68 |             else:
69 |                 # hour doesn't exist so we add it
70 |                 infos[dnum][hours[i]] = 1
71 |         else:
72 |             # the day doesn't exist so we add it with the hour 
73 |             infos[dnum] = {hours[i]: 1}
74 | 
75 |     # Then we need to transform in in  a x, y, r dictionary
76 |     bub = []
77 |     for y, val in infos.items():
78 |         tmp = {}
79 |         for x, z in val.items():
80 |             bub.append({'x':x, 'y':y, 'r':z*5})
81 | 
82 |     return bub


--------------------------------------------------------------------------------