4 |
5 |
6 |
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/Report/__pycache__/urls.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Report/__pycache__/urls.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/Report/__pycache__/views.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Report/__pycache__/views.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/Words/__pycache__/views.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Words/__pycache__/views.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/Report/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Report/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/Words/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Words/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/extractor/__pycache__/views.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/extractor/__pycache__/views.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/check_vid_src/__pycache__/urls.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/check_vid_src/__pycache__/urls.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/check_vid_src/__pycache__/wsgi.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/check_vid_src/__pycache__/wsgi.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/extractor/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/extractor/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/check_vid_src/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/check_vid_src/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/check_vid_src/__pycache__/settings.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/check_vid_src/__pycache__/settings.cpython-37.pyc
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/Report/urls.py:
--------------------------------------------------------------------------------
1 | from django.conf.urls import url
2 | from Report.views import Report
3 |
4 |
5 | urlpatterns = [
6 | url('', Report, name='submission'),
7 | ]
8 |
9 |
10 |
11 | ##from django.urls import path
12 | ##
13 | ##from . import views
14 | ##
15 | ##urlpatterns = [
16 | ## path('', views.index, name='index'),
17 | ##]
18 |
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/check_vid_src/asgi.py:
--------------------------------------------------------------------------------
1 | """
2 | ASGI config for check_vid_src project.
3 |
4 | It exposes the ASGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/3.0/howto/deployment/asgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.asgi import get_asgi_application
13 |
14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'check_vid_src.settings')
15 |
16 | application = get_asgi_application()
17 |
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/check_vid_src/wsgi.py:
--------------------------------------------------------------------------------
1 | """
2 | WSGI config for check_vid_src project.
3 |
4 | It exposes the WSGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/3.0/howto/deployment/wsgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.wsgi import get_wsgi_application
13 |
14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'check_vid_src.settings')
15 |
16 | application = get_wsgi_application()
17 |
--------------------------------------------------------------------------------
/check_vid_chromeext/src/bg/background.js:
--------------------------------------------------------------------------------
1 |
2 | function reset () {
3 |
4 | // set the icon to greyscale
5 | chrome.browserAction.setIcon({path : "../../icons/Logo_greyscale.png"});
6 |
7 | // clean the local storage
8 | chrome.storage.local.clear(function () {
9 | console.log("Report reset");
10 | });
11 | }
12 |
13 | reset();
14 |
15 |
16 |
17 |
18 | // // Add a listenser when DOM is loaded.
19 | chrome.webNavigation.onDOMContentLoaded.addListener(function (details) {
20 |
21 | //reset when reload
22 | reset();
23 |
24 |
25 | });
26 |
--------------------------------------------------------------------------------
/check_vid_chromeext/src/page_action/popup.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
10 |
11 |
Put in the Start Time (hour:minute:second) and the Duration you are intrested in finding relevant comments for. Click the "Submit" button and wait for comments to come up (The more comments the longer the wait).
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Django's command-line utility for administrative tasks."""
3 | import os
4 | import sys
5 |
6 |
7 | def main():
8 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'check_vid_src.settings')
9 | try:
10 | from django.core.management import execute_from_command_line
11 | except ImportError as exc:
12 | raise ImportError(
13 | "Couldn't import Django. Are you sure it's installed and "
14 | "available on your PYTHONPATH environment variable? Did you "
15 | "forget to activate a virtual environment?"
16 | ) from exc
17 | execute_from_command_line(sys.argv)
18 |
19 |
20 | if __name__ == '__main__':
21 | main()
22 |
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/extractor/views.py:
--------------------------------------------------------------------------------
1 | from django.shortcuts import render
2 |
3 | import pandas as pd
4 |
5 | from tqdm import tqdm
6 |
7 | import time
8 | import os
9 | import string
10 |
11 |
12 |
13 | from youtube_transcript_api import YouTubeTranscriptApi
14 |
15 | # Create your views here.
16 |
17 | def get_transcript_df(video_id_input):
18 | text=[]
19 | start=[]
20 | duration=[]
21 |
22 | for dic in YouTubeTranscriptApi.get_transcript(video_id_input):
23 | text+=[dic['text']]
24 | start+=[dic['start']]
25 | duration+=[dic['duration']]
26 |
27 |
28 | output_dict_tran = {
29 | 'text': text,
30 | 'start': start,
31 | 'duration': duration,
32 | }
33 | return pd.DataFrame(output_dict_tran, columns = output_dict_tran.keys())
34 |
35 |
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/check_vid_src/urls.py:
--------------------------------------------------------------------------------
1 | """check_vid_src URL Configuration
2 |
3 | The `urlpatterns` list routes URLs to views. For more information please see:
4 | https://docs.djangoproject.com/en/3.0/topics/http/urls/
5 | Examples:
6 | Function views
7 | 1. Add an import: from my_app import views
8 | 2. Add a URL to urlpatterns: path('', views.home, name='home')
9 | Class-based views
10 | 1. Add an import: from other_app.views import Home
11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
12 | Including another URLconf
13 | 1. Import the include() function: from django.urls import include, path
14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
15 | """
16 | from django.contrib import admin
17 | from django.urls import path
18 | from django.conf.urls import url, include
19 | from Report.views import Report
20 |
21 | urlpatterns = [
22 | path('admin/', admin.site.urls),
23 | ]
24 |
25 |
26 | urlpatterns += [
27 | url('submission/', include('Report.urls')),
28 | ]
29 |
--------------------------------------------------------------------------------
/check_vid_chromeext/src/browser_action/style.css:
--------------------------------------------------------------------------------
1 | body {
2 | margin: 0px 0px 0px 0px !important;
3 | min-width: 400px; /* your desired width */
4 | max-width: 100%;
5 | }
6 |
7 | .Report_container {
8 | /*display: none;*/
9 | /*display: block;*/
10 | /*left: 0;*/
11 | /*position: absolute;*/
12 | /*top: 34px;*/
13 | /*width: 210px; */
14 | height: 50px;
15 | width: 400px;
16 | font-size:18px;
17 | /*box-shadow: 0 3px 10px hsl(0, 0%, 30%);*/
18 | font-family: Helvetica, Arial, sans-serif;
19 | /*font-family: Georgia, Helvetica, Arial, sans-serif;*/
20 | /*box-sizing: border-box;*/
21 | }
22 |
23 | h4 {
24 | padding: 16px 16px 16px 16px;
25 | font-size: 15px;
26 | color: hsl(0, 0%, 50%);
27 | font-family: Helvetica, Arial, sans-serif;
28 | }
29 |
30 | .events {
31 | list-style: none;
32 | padding: 0px 0px 0px 0px;
33 | }
34 |
35 | .events a {
36 | font-size: 15px;
37 | background-color: hsl(0, 0%, 100%, 0);
38 | color: hsl(0, 0%, 10%);
39 | display: block;
40 | text-decoration: none;
41 | }
42 |
43 | .events li {
44 | /*font-size: 14px;*/
45 | background-color: hsl(0, 0%, 100%);
46 | color: hsl(0, 0%, 20%);
47 | display: block;
48 | padding: 8px 20px 8px 20px;
49 | text-decoration: none;
50 | border-bottom: 1px solid hsl(0, 0%, 80%);
51 | }
52 |
53 | .events li:hover {
54 | background-color: hsl(27, 84%, 77%, 1);
55 | text-decoration: none;
56 | }
57 |
58 | .events ul {
59 | /*font-size: 14px;*/
60 | background-color: hsl(0, 0%, 100%);
61 | color: hsl(0, 0%, 20%);
62 | display: block;
63 | padding: 8px 20px 8px 20px;
64 | text-decoration: none;
65 | /* border-bottom: 1px solid hsl(0, 0%, 80%);
66 | */}
67 |
68 | .events ul:hover {
69 | background-color: hsl(27, 84%, 77%, 1);
70 | text-decoration: none;
71 | }
72 |
73 | .venue {
74 | font-size: 12px;
75 | font-style: bold;
76 | background-color: hsl(0, 0%, 100%, 0);
77 | color: hsl(0, 0%, 40%);
78 | display: block;
79 | padding: 0px 8px 4px 0px;
80 | text-decoration: none;
81 | }
82 |
83 | /*******************************/
84 | .dropbtn {
85 | background-color: #3498DB;
86 | color: white;
87 | padding: 16px;
88 | font-size: 16px;
89 | border: none;
90 | cursor: pointer;
91 | }
92 |
93 | .dropbtn:hover, .dropbtn:focus {
94 | background-color: #2980B9;
95 | }
96 |
97 | .dropdown {
98 | position: relative;
99 | display: inline-block;
100 | }
101 |
102 | .dropdown-content {
103 | display: none;
104 | position: absolute;
105 | background-color: #f1f1f1;
106 | min-width: 160px;
107 | overflow: auto;
108 | box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
109 | z-index: 1;
110 | }
111 |
112 | .dropdown-content a {
113 | color: black;
114 | padding: 12px 16px;
115 | text-decoration: none;
116 | display: block;
117 | }
118 |
119 | .dropdown a:hover {background-color: #ddd;}
120 |
121 | .show {display: block;}
122 |
123 | /**********************/
124 | /*
125 | .Fs8au {
126 | border-top: 1px solid hsl(0, 0%, 90%);
127 | }
128 |
129 | .SWhEv {
130 | border-bottom: 1px solid hsl(0, 0%, 90%);
131 | }*/
132 |
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/check_vid_src/settings.py:
--------------------------------------------------------------------------------
1 | """
2 | Django settings for check_vid_src project.
3 |
4 | Generated by 'django-admin startproject' using Django 3.0.2.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/3.0/topics/settings/
8 |
9 | For the full list of settings and their values, see
10 | https://docs.djangoproject.com/en/3.0/ref/settings/
11 | """
12 |
13 | import os
14 |
15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
17 |
18 |
19 | # Quick-start development settings - unsuitable for production
20 | # See https://docs.djangoproject.com/en/3.0/howto/deployment/checklist/
21 |
22 | # SECURITY WARNING: keep the secret key used in production secret!
23 | SECRET_KEY = '@(a^an3h9fk(w5kiq4cep(j=3%r@%g9zx+w65dcxheg+$*f&@x'
24 |
25 | # SECURITY WARNING: don't run with debug turned on in production!
26 | DEBUG = True
27 |
28 | ALLOWED_HOSTS = ['52.90.31.24']
29 |
30 |
31 | # Application definition
32 |
33 | INSTALLED_APPS = [
34 | 'django.contrib.admin',
35 | 'django.contrib.auth',
36 | 'django.contrib.contenttypes',
37 | 'django.contrib.sessions',
38 | 'django.contrib.messages',
39 | 'django.contrib.staticfiles',
40 | 'corsheaders',
41 | ]
42 |
43 | MIDDLEWARE = [
44 | 'django.middleware.security.SecurityMiddleware',
45 | 'django.contrib.sessions.middleware.SessionMiddleware',
46 | 'django.middleware.common.CommonMiddleware',
47 | 'django.middleware.csrf.CsrfViewMiddleware',
48 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
49 | 'django.contrib.messages.middleware.MessageMiddleware',
50 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
51 | 'corsheaders.middleware.CorsMiddleware',
52 | 'django.middleware.common.CommonMiddleware',
53 | ]
54 |
55 |
56 | CORS_ORIGIN_ALLOW_ALL = True
57 |
58 | ROOT_URLCONF = 'check_vid_src.urls'
59 |
60 | TEMPLATES = [
61 | {
62 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
63 | 'DIRS': [],
64 | 'APP_DIRS': True,
65 | 'OPTIONS': {
66 | 'context_processors': [
67 | 'django.template.context_processors.debug',
68 | 'django.template.context_processors.request',
69 | 'django.contrib.auth.context_processors.auth',
70 | 'django.contrib.messages.context_processors.messages',
71 | ],
72 | },
73 | },
74 | ]
75 |
76 | WSGI_APPLICATION = 'check_vid_src.wsgi.application'
77 |
78 |
79 | # Database
80 | # https://docs.djangoproject.com/en/3.0/ref/settings/#databases
81 |
82 | DATABASES = {
83 | 'default': {
84 | 'ENGINE': 'django.db.backends.sqlite3',
85 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
86 | }
87 | }
88 |
89 |
90 | # Password validation
91 | # https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators
92 |
93 | AUTH_PASSWORD_VALIDATORS = [
94 | {
95 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
96 | },
97 | {
98 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
99 | },
100 | {
101 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
102 | },
103 | {
104 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
105 | },
106 | ]
107 |
108 |
109 | # Internationalization
110 | # https://docs.djangoproject.com/en/3.0/topics/i18n/
111 |
112 | LANGUAGE_CODE = 'en-us'
113 |
114 | TIME_ZONE = 'UTC'
115 |
116 | USE_I18N = True
117 |
118 | USE_L10N = True
119 |
120 | USE_TZ = True
121 |
122 |
123 | # Static files (CSS, JavaScript, Images)
124 | # https://docs.djangoproject.com/en/3.0/howto/static-files/
125 |
126 | STATIC_URL = '/static/'
127 | STATIC_ROOT = os.path.join(BASE_DIR, "static/")
128 |
--------------------------------------------------------------------------------
/check_vid_chromeext/_locales/en/messages.json:
--------------------------------------------------------------------------------
1 | {
2 | "l10nTabName": {
3 | "message":"Localization"
4 | ,"description":"name of the localization tab"
5 | }
6 | ,"l10nHeader": {
7 | "message":"It does localization too! (this whole tab is, actually)"
8 | ,"description":"Header text for the localization section"
9 | }
10 | ,"l10nIntro": {
11 | "message":"'L10n' refers to 'Localization' - 'L' an 'n' are obvious, and 10 comes from the number of letters between those two. It is the process/whatever of displaying something in the language of choice. It uses 'I18n', 'Internationalization', which refers to the tools / framework supporting L10n. I.e., something is internationalized if it has I18n support, and can be localized. Something is localized for you if it is in your language / dialect."
12 | ,"description":"introduce the basic idea."
13 | }
14 | ,"l10nProd": {
15 | "message":"You are planning to allow localization, right? You have no idea who will be using your extension! You have no idea who will be translating it! At least support the basics, it's not hard, and having the framework in place will let you transition much more easily later on."
16 | ,"description":"drive the point home. It's good for you."
17 | }
18 | ,"l10nFirstParagraph": {
19 | "message":"When the options page loads, elements decorated with data-l10n will automatically be localized!"
20 | ,"description":"inform that elements will be localized on load"
21 | }
22 | ,"l10nSecondParagraph": {
23 | "message":"If you need more complex localization, you can also define data-l10n-args. This should contain $containerType$ filled with $dataType$, which will be passed into Chrome's i18n API as $functionArgs$. In fact, this paragraph does just that, and wraps the args in mono-space font. Easy!"
24 | ,"description":"introduce the data-l10n-args attribute. End on a lame note."
25 | ,"placeholders": {
26 | "containerType": {
27 | "content":"$1"
28 | ,"example":"'array', 'list', or something similar"
29 | ,"description":"type of the args container"
30 | }
31 | ,"dataType": {
32 | "content":"$2"
33 | ,"example":"string"
34 | ,"description":"type of data in each array index"
35 | }
36 | ,"functionArgs": {
37 | "content":"$3"
38 | ,"example":"arguments"
39 | ,"description":"whatever you call what you pass into a function/method. args, params, etc."
40 | }
41 | }
42 | }
43 | ,"l10nThirdParagraph": {
44 | "message":"Message contents are passed right into innerHTML without processing - include any tags (or even scripts) that you feel like. If you have an input field, the placeholder will be set instead, and buttons will have the value attribute set."
45 | ,"description":"inform that we handle placeholders, buttons, and direct HTML input"
46 | }
47 | ,"l10nButtonsBefore": {
48 | "message":"Different types of buttons are handled as well. <button> elements have their html set:"
49 | }
50 | ,"l10nButton": {
51 | "message":"in a button"
52 | }
53 | ,"l10nButtonsBetween": {
54 | "message":"while <input type='submit'> and <input type='button'> get their 'value' set (note: no HTML):"
55 | }
56 | ,"l10nSubmit": {
57 | "message":"a submit value"
58 | }
59 | ,"l10nButtonsAfter": {
60 | "message":"Awesome, no?"
61 | }
62 | ,"l10nExtras": {
63 | "message":"You can even set data-l10n on things like the <title> tag, which lets you have translatable page titles, or fieldset <legend> tags, or anywhere else - the default Boil.localize() behavior will check every tag in the document, not just the body."
64 | ,"description":"inform about places which may not be obvious, like , etc"
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # checkvid
2 |
3 | The overall format of this chrome extension is based on [repo](https://github.com/jiananarthurli/insight_chrome_extension)
4 |
5 | Youtube makes efforts to have strict parental controls and a kid friendly ‘Youtube Kids’ website. However, 500 hours of videos are uploaded a minute, and inappropriate creators are constantly trying to push their videos past the algorithms that can not be too strict at the risk of severely limiting the market. I used youtube transcripts to rate videos and mark red flags in a chrome extension that parents can review quickly. I used a Word2Vec model to see how close a video text is to a list of ‘bad’ words as a measure of how non kid friendly the text is. This narrows the problem from millions of videos for youtube to screen exactly right, to just the parent screening single videos effectively and quickly. This also helps youtube to lose less subscribers, who would otherwise leave youtube due to traumatic child viewership.
6 |
7 | # The Background Model Code
8 | The files for the background model code are in the [check_vid_django/check_vid_src/](./check_vid_django/check_vid_src/) folder. These files are on an EC2 instance where the django is running to listen for a fetch() call from the chrome extention.
9 |
10 | There are three main folders for the code
11 | 1. Extractor - has the code to run an api that retrieves Youtube transcripts and convert them to a pandas dataframe.
12 | 2. Words - has a list of bad words I search for in videos
13 | 3. Report - uses the above folders to search for inappropriate material in the transcripts and returns a scoring that is converted into red flags of the top 5 most problematic portions of the video. The score is made using a Word2Vec model to compare the transcripts with the list of bad words. The weights for the Word2Vec model are from the well known [GoogleNews-vectors-negative300](https://code.google.com/archive/p/word2vec/)
14 |
15 | # The Chrome Extension
16 | The files for the chrome extenion are in the [check_vid_chromeext](./check_vid_chromeext) folder.
17 |
18 | The extenstion takes as an input the video's id. It then sends the video id to a Ec2 instance and retrieves the red flags of inapropriate language in the video. The extension returns the red flags as a list in the display window.
19 |
20 |
21 |
22 | ```bash
23 | .
24 | ├── README.md
25 | ├── _locales
26 | │ └── en
27 | │ └── messages.json
28 | ├── icons
29 | │ ├── Logo.png
30 | │ └── Logo_greyscale.png
31 | ├── manifest.json
32 | └── src
33 | ├── bg
34 | │ ├── background.html
35 | │ └── background.js
36 | └── browser_action
37 | ├── browser_action.html
38 | ├── browser_action.js
39 | └── style.css
40 | ```
41 |
42 | There are four components in the the Chrome extension: manifest (```./manifest.json```), background scripts (```./bg/background.js```), pop-up window (```./browser_action```) and icons (```./icons```).
43 |
44 | Manifest is the starting point of the extension. The basic info (extension name, description, etc), location of resources, and permissions are all stored in [```./manifest.json```](./manifest.json).
45 |
46 | The [```./src/bg/background.js```](./src/bg/background.js) is soley used to blank the data and output of the chrome extention when the url is reloaded.
47 |
48 | The functions that do most of the work are stored in [```./src/browser_action/browser_action.js
49 | ```](./src/browser_action/browser_action.js). A function that listens for the click of a "submit" button is added (document.getElementById('submit').onclick) so that the user will start the process when they are ready. The video id is extracted from the URL, and sent to an EC2 instance on AWS, which hosts an ongoing djagno process. The Dajngo process is used to put the video_id as an input to the background model and to recive the report of red flags in the video as an output. The report is returned in json strings and then stored locally (```chrome.storage.local.set()```). The icon will be changed (```chrome.browserAction.setIcon()```) to notify the user. the red flags will then be retrieved from the browser's local storage space (```chrome.storage.local.get()```). If the space is not empty, a red flag listing page will be constructed, using the styles defined in [```./browser_action/browser_action/style.css```](./src/browser_action/style.css). If the process fails, the initial display will not change.
50 |
51 | The locale information is stored in [```./_locales```](./_locales).
52 |
53 |
54 | Google has a great tutorial for Chrome extensions:
55 |
56 | https://developer.chrome.com/extensions/getstarted
57 |
58 | All the Chrome API functions can be found in the official docs:
59 |
60 | https://developer.chrome.com/extensions/api_index
61 |
62 | The Chrome API functions are async functions. More info can be found here:
63 |
64 | https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/async_function
65 |
66 | This is a great site for templates, if you need more fancy stuffs in your extension:
67 |
68 | https://extensionizr.com
69 |
--------------------------------------------------------------------------------
/check_vid_chromeext/src/browser_action/browser_action.js:
--------------------------------------------------------------------------------
1 |
2 | function load() {
3 |
4 | // Get the report_text container
5 | var report_text= document.getElementById("report_text");
6 |
7 | // chrome.storage.local.get("rating", function(data) {
8 | // if(typeof(data.rating) !== "undefined") {
9 | // report_text.innerHTML = 'This video is '+data.rating+'
';
10 |
11 |
12 | // }
13 |
14 | // })
15 |
16 |
17 |
18 | // Retrieve data from local memory
19 | chrome.storage.local.get("Report", function(data) {
20 |
21 | // If the data is updated
22 | if(typeof(data.Report) !== "undefined") {
23 |
24 |
25 | report_text.innerHTML = 'Below are times and text of the more questionable parts of this video
';
26 |
27 |
28 |
29 |
30 |
31 |
32 | // Generate entry to list for each comment
33 | data.Report.forEach(function(report, idx, array) {
34 |
35 |
36 | // get css style
37 | var elmnt = document.createElement("li")
38 |
39 | // Obtain the comment Author,, Link to Author's page, comment text,
40 | // number of replies and likes
41 | var flag_score = report.score;
42 |
43 | var flag_text = report.text;
44 |
45 | var flag_start=report.start;
46 |
47 | // Container for the Redflag score and start time.
48 | var div = document.createElement("a");
49 | div.innerHTML = 'At '+flag_start;//+ ' the video is '+flag_score +' Here is the text from that time:';
50 | div.setAttribute("class", "flag _description");
51 | // //provide link to author's page
52 | // div.setAttribute("href",comment_authorLink);
53 | var p = document.createElement("p");
54 |
55 | //comment text
56 | //
57 | p.innerHTML = '"' + flag_text + '"';
58 | // // Open a blank tab when the link is clicked.
59 | // p.setAttribute("target", "_blank");
60 |
61 |
62 | // // # of likes and replies
63 | // var p2 = document.createElement("p");
64 |
65 | // p2.innerHTML = Replies_likes;
66 | // // Open a blank tab when the link is clicked.
67 | // p2.setAttribute("target", "_blank");
68 |
69 | // put all elements together
70 | elmnt.appendChild(div);
71 | elmnt.appendChild(p);
72 | // elmnt.appendChild(p2);
73 |
74 | // Append the new comment to the list.
75 | report_text.appendChild(elmnt);
76 |
77 | });
78 | }
79 | });
80 |
81 | }
82 |
83 | // Trigger the function when DOM of the pop-up is loaded.
84 | document.addEventListener('DOMContentLoaded', function() {
85 |
86 | load();
87 |
88 | });
89 |
90 | //#####################
91 |
92 |
93 |
94 | function reset () {
95 |
96 | // set the icon to greyscale
97 | chrome.browserAction.setIcon({path : "../../icons/Logo_greyscale.png"});
98 |
99 | // clean the local storage
100 | chrome.storage.local.clear(function () {
101 | console.log("Report reset");
102 | });
103 | }
104 |
105 | function get_report(url, personal){
106 | // this function fetches the red flags
107 | // of the video, stores them in local storage, sets extension icon to color and
108 | // calls the load function to put red flags in the new html of the extension.
109 | var api_server = "http://52.90.31.24:8000/"; //"http://127.0.0.1:8000/"; // the second address is for local use and testing
110 |
111 |
112 |
113 | // check to see if user is at a youtube video
114 | if (url.includes("https://www.youtube.com/watch?v=")) {
115 |
116 |
117 | var topic = url.replace("https://www.youtube.com/watch?v=", "");
118 |
119 | // URL for http requests
120 | var req_url = api_server + "submission/?video_id=" + topic;
121 |
122 | // Send http requests
123 | fetch(req_url)
124 | .then(r => r.text())
125 | .then(function(result) {
126 | result_json = JSON.parse(result);
127 | if (result_json.found) {
128 | // Store the fetched data into local memory for display
129 | chrome.storage.local.set({Report: result_json.Report, rating: result_json.rating}, function() {
130 | console.log("Found Report");
131 | // Change to colored icon
132 | chrome.browserAction.setIcon({path : "../../icons/Logo.png"});
133 | load();
134 | });
135 | }
136 | });
137 | }
138 |
139 |
140 | }
141 |
142 |
143 |
144 |
145 | document.getElementById('check').onclick = function() {
146 | // When submit button is clicked, this will execute the entire process
147 | // of getting red flags,
148 | // and entering them into the new html of the extension
149 |
150 | //var personal = document.getElementById('personal').value; this maybe used at a later date
151 |
152 | // clear the current storage, in case comments from a prevoius run are still in storage
153 | reset();
154 |
155 | // get current url, and use the video id in that url to fetch red flags
156 | chrome.tabs.query({'active': true, 'lastFocusedWindow': true}, function (tabs) {
157 | url = tabs[0].url;
158 | get_report(url)//, personal)
159 | });
160 |
161 | };
162 |
163 |
164 |
165 |
166 |
167 |
168 |
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/Report/views.py:
--------------------------------------------------------------------------------
1 | from django.shortcuts import render
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 | from extractor.views import get_transcript_df
7 | from Words.views import swear_words, sex_words, stop_words #swear_words_model,sex_words_model
8 |
9 | from django.http import HttpResponse
10 | import json
11 | from time import time
12 |
13 | from gensim.models import Word2Vec, KeyedVectors
14 | from gensim.test.utils import get_tmpfile
15 | import string
16 | #<<<<<<< HEAD
17 | #<<<<<<< HEAD
18 | #=======
19 | #from nltk.corpus import stopwords
20 | #>>>>>>> d157a4af14bac5133c0b210e3c62bee1086f3222
21 | #=======
22 | #>>>>>>> 6194741edea21d9add625bb85e3a748d4f43ece4
23 | import nltk
24 | from nltk.corpus import stopwords
25 | #from nltk.corpus import stopwords
26 |
27 | lemma=nltk.stem.WordNetLemmatizer()#from nltk.stem import WordNetLemmatizer as lemma
28 |
29 |
30 |
31 |
32 | #from fuzzywuzzy import fuzz
33 |
34 | # Create your views here.
35 | model=KeyedVectors.load_word2vec_format('Report/GoogleNews-vectors-negative300.bin',binary=True, limit=100000)
36 |
37 | def text_process(mess):
38 | """
39 | Takes in a string of text, then performs the following:
40 | 1. Remove all punctuation
41 | 2. Remove all stopwords
42 | 3. Lematize
43 | 4. Returns a list of the cleaned text
44 | """
45 | # Check characters to see if they are in punctuation
46 | nopunc = [char for char in mess if char not in string.punctuation]
47 |
48 | # Join the characters again to form the string.
49 | nopunc = ''.join(nopunc)
50 |
51 | # Now just remove any stopwords
52 | nostop=[word for word in nopunc.split() if word.lower() not in stop_words()]
53 | nostop=' '.join(nostop)
54 | nostop=nostop.replace('ass','asss')
55 |
56 | lemmatized=[lemma.lemmatize(w) for w in str(nostop).lower().split()]
57 |
58 |
59 |
60 | return ' '.join(lemmatized)
61 | def text_score(text,badwords):#,badwords_model):#
62 | text_score_all=0
63 |
64 |
65 | if not only_model_words(text,model).split():
66 | text_score_all=0
67 | else:
68 | for E in only_model_words(text,model).split():
69 | score=0
70 | for word in badwords:
71 | if word in model.vocab:
72 | if model.similarity(E,word)>score:
73 | score=model.similarity(E,word)
74 | if score>text_score_all:
75 | text_score_all=score
76 | for word in ['fuc','shit','crap','bitch']:
77 | if word in text:
78 | text_score_all=1
79 | for word in badwords:
80 | if word in text.split():
81 | text_score_all=1
82 | return text_score_all
83 |
84 | def only_model_words(text,model):
85 | new_text=[]
86 | for word in text.split():
87 | if word in model.vocab:
88 | new_text+=[word]
89 | return ' '.join(new_text)
90 |
91 | def number_to_warning(n):
92 | upper=0.9
93 | lower=0.6
94 | if n>=upper:
95 | return 'probably bad'
96 | if (nlower):
97 | return 'possibly bad'
98 | if n<=lower:
99 | return 'probably fine'
100 |
101 | def rate_pd(tran_pd,rate_with=5):
102 | #, badwords_model=swear_words_model() + sex_words_model()
103 | tran_pd['score']=tran_pd.text.apply(lambda t: text_score(text_process(t), badwords = swear_words() + sex_words() ) )
104 | tran_pd['warning']=tran_pd.score.apply(lambda n: number_to_warning(n) )
105 | rating=number_to_warning(tran_pd.score.sort_values(ascending=False).head(rate_with).mean())
106 | return [tran_pd,rating]
107 |
108 | def sec_to_clock(t):
109 | t=int(t)
110 | return str(t//3600) + ':' + str(t%3600//60) + ':' + str( t%3600%60 )
111 |
112 | def replace_swears_stars(text):
113 |
114 | for word in ['fuck','shit','bitch','crap','ass']:
115 | text=text.replace(word,word[0]+'*'*(len(word)-1))
116 |
117 | return text
118 |
119 |
120 |
121 | def Report(request): #request
122 | # get inputs from request:
123 | # get id and isolate it from extra info
124 | video_id=request.GET['video_id']
125 | video_id=video_id.split('&')[0]
126 | video_id=video_id.split('=')[0]
127 |
128 |
129 | #get the transcript and comments of the video
130 | tran_pd= get_transcript_df(video_id)
131 |
132 | # retun dataframe (df) with the comments relvant to the seleceted time slot
133 | [Report_df,rating] = rate_pd(tran_pd, rate_with=5)
134 |
135 | Report_df=Report_df.sort_values(by='score',ascending=False).head(5)
136 | Report_df['censored_text']=Report_df.text.apply(lambda t: replace_swears_stars(t))
137 |
138 | # if comments are found, use comments in the df and construct the response dict
139 | Report_list = []
140 | found=False
141 | if len(Report_df) != 0:
142 | found = True
143 | i=0
144 | while i < len(Report_df):
145 | redFlag_text=Report_df.iloc[i].censored_text
146 | redFlag_score=Report_df.iloc[i].warning
147 | redFlag_start=sec_to_clock(Report_df.iloc[i].start)
148 | redFlag_duration=Report_df.iloc[i].duration
149 |
150 |
151 |
152 | # add all the above information to comment list
153 | Report_list.append({
154 | 'text' : str(redFlag_text),
155 | 'score' : str(redFlag_score),
156 | 'start' : str(redFlag_start),
157 | 'duration' : str(redFlag_duration),
158 | })
159 | i+=1
160 |
161 | Report_dict = {'found' : found, 'Report' : Report_list, 'rating' : rating}
162 |
163 | # convert to json
164 | response = json.dumps(Report_dict)
165 |
166 | #print a time bar in terminal
167 | tic = time()
168 | print("Time lapse {}".format(time() - tic))
169 |
170 | #retrun the response that has the relvant comments, this is passsed to the chrome extension
171 | return HttpResponse(response)
172 |
173 |
174 |
175 |
--------------------------------------------------------------------------------
/check_vid_django/check_vid_src/Words/views.py:
--------------------------------------------------------------------------------
1 | from django.shortcuts import render
2 |
3 | # Create your views here.
4 | def swear_words():
5 | return['anal', 'anus', 'arse', 'ass', 'ballsack', 'balls', 'bastard', 'bitch', 'biatch', 'bloody', 'blowjob', 'blow job',
6 | 'bollock',
7 | 'bollok',
8 | 'boner',
9 | 'boob',
10 | 'bugger',
11 | 'bum',
12 | 'butt',
13 | 'buttplug',
14 | 'clitoris',
15 | 'cock',
16 | 'coon',
17 | 'crap',
18 | 'cunt',
19 | 'damn',
20 | 'dick',
21 | 'dildo',
22 | 'dyke',
23 | 'fag',
24 | 'feck',
25 | 'fellate',
26 | 'fellatio',
27 | 'felching',
28 | 'fuck',
29 | 'f u c k',
30 | 'fudgepacker',
31 | 'fudge packer',
32 | 'flange',
33 | 'Goddamn',
34 | 'God damn',
35 | 'hell',
36 | 'homo',
37 | 'jerk',
38 | 'jizz',
39 | 'knobend',
40 | 'knob end',
41 | 'labia',
42 | 'lmao',
43 | 'lmfao',
44 | 'muff',
45 | 'nigger',
46 | 'nigga',
47 | 'omg',
48 | 'penis',
49 | 'piss',
50 | 'poop',
51 | 'prick',
52 | 'pube',
53 | 'pussy',
54 | 'queer',
55 | 'scrotum',
56 | 'sex',
57 | 'shit',
58 | 's hit',
59 | 'sh1t',
60 | 'slut',
61 | 'smegma',
62 | 'spunk',
63 | 'tit',
64 | 'tosser',
65 | 'turd',
66 | 'twat',
67 | 'vagina',
68 | 'wank',
69 | 'whore',
70 | 'wtf']
71 | # def swear_words_model():
72 | # return ['anal',
73 | # 'anus',
74 | # 'ass',
75 | # 'balls',
76 | # 'bastard',
77 | # 'bitch',
78 | # 'bloody',
79 | # 'boob',
80 | # 'bugger',
81 | # 'bum',
82 | # 'butt',
83 | # 'cock',
84 | # 'crap',
85 | # 'damn',
86 | # 'dick',
87 | # 'dyke',
88 | # 'fag',
89 | # 'fuck',
90 | # 'hell',
91 | # 'jerk',
92 | # 'penis',
93 | # 'piss',
94 | # 'poop',
95 | # 'prick',
96 | # 'pussy',
97 | # 'queer',
98 | # 'sex',
99 | # 'shit',
100 | # 'slut',
101 | # 'spunk',
102 | # 'tit',
103 | # 'vagina',
104 | # 'whore']
105 | def sex_words():
106 | return ['genitals',
107 | 'penises',
108 | 'genitalia',
109 | 'nipples',
110 | 'crotch',
111 | 'breasts',
112 | 'vagina',
113 | 'dick',
114 | 'testicles',
115 | 'boobs',
116 | 'buttocks',
117 | 'oral_sex',
118 | 'anal_sex',
119 | 'intercourse',
120 | 'boob',
121 | 'sexual',
122 | 'masturbation',
123 | 'ass',
124 | 'genital',
125 | 'anus',
126 | 'pussy',
127 | 'sexual_intercourse',
128 | 'nipple',
129 | 'masturbate',
130 | 'testicle',
131 | 'anal',
132 | 'buttock',
133 | 'porn',
134 | 'orgasm',
135 | 'sexually',
136 | 'lovemaking',
137 | 'porno',
138 | 'sexuality',
139 | 'condom',
140 | 'masturbating',
141 | 'sexual_encounters',
142 | 'nude',
143 | 'tits',
144 | 'bestiality',
145 | 'fondling',
146 | 'kinky',
147 | 'libido',
148 | 'horny',
149 | 'unprotected_sex',
150 | 'vaginal',
151 | 'thong',
152 | 'consensual_sex',
153 | 'orgasms',
154 | 'naked',
155 | 'heterosexual',
156 | 'lewd',
157 | 'panties',
158 | 'homosexual',
159 | 'nudity',
160 | 'butts',
161 | 'randy',
162 | 'rectum',
163 | 'bra',
164 | 'topless',
165 | 'cleavage',
166 | 'Sex',
167 | 'slut',
168 | 'prostitution',
169 | 'privates',
170 | 'prostitutes',
171 | 'prostitute',
172 | 'erotic',
173 | 'posterior',
174 | 'herpes',
175 | 'virginity',
176 | 'thongs',
177 | 'raunchy',
178 | 'fetish',
179 | 'bisexual',
180 | 'penis',
181 | 'risque',
182 | 'risqué',
183 | 'sex',
184 | 'erotica',
185 | 'sexy',
186 | 'striptease']
187 |
188 | # def sex_words_model():
189 | # return ['genitals',
190 | # 'penises',
191 | # 'genitalia',
192 | # 'nipples',
193 | # 'crotch',
194 | # 'breasts',
195 | # 'vagina',
196 | # 'dick',
197 | # 'testicles',
198 | # 'boobs',
199 | # 'buttocks',
200 | # 'oral_sex',
201 | # 'anal_sex',
202 | # 'intercourse',
203 | # 'boob',
204 | # 'sexual',
205 | # 'masturbation',
206 | # 'ass',
207 | # 'genital',
208 | # 'anus',
209 | # 'pussy',
210 | # 'sexual_intercourse',
211 | # 'nipple',
212 | # 'masturbate',
213 | # 'testicle',
214 | # 'anal',
215 | # 'buttock',
216 | # 'porn',
217 | # 'orgasm',
218 | # 'sexually',
219 | # 'lovemaking',
220 | # 'porno',
221 | # 'sexuality',
222 | # 'condom',
223 | # 'masturbating',
224 | # 'sexual_encounters',
225 | # 'nude',
226 | # 'tits',
227 | # 'bestiality',
228 | # 'fondling',
229 | # 'kinky',
230 | # 'libido',
231 | # 'horny',
232 | # 'unprotected_sex',
233 | # 'vaginal',
234 | # 'thong',
235 | # 'consensual_sex',
236 | # 'orgasms',
237 | # 'naked',
238 | # 'heterosexual',
239 | # 'lewd',
240 | # 'panties',
241 | # 'homosexual',
242 | # 'nudity',
243 | # 'butts',
244 | # 'randy',
245 | # 'rectum',
246 | # 'bra',
247 | # 'topless',
248 | # 'cleavage',
249 | # 'Sex',
250 | # 'slut',
251 | # 'prostitution',
252 | # 'privates',
253 | # 'prostitutes',
254 | # 'prostitute',
255 | # 'erotic',
256 | # 'posterior',
257 | # 'herpes',
258 | # 'virginity',
259 | # 'thongs',
260 | # 'raunchy',
261 | # 'fetish',
262 | # 'bisexual',
263 | # 'penis',
264 | # 'risque',
265 | # 'risqué',
266 | # 'sex',
267 | # 'erotica',
268 | # 'sexy',
269 | # 'striptease']
270 |
271 | def stop_words():
272 | return ['i',
273 | 'me',
274 | 'my',
275 | 'myself',
276 | 'we',
277 | 'our',
278 | 'ours',
279 | 'ourselves',
280 | 'you',
281 | "you're",
282 | "you've",
283 | "you'll",
284 | "you'd",
285 | 'your',
286 | 'yours',
287 | 'yourself',
288 | 'yourselves',
289 | 'he',
290 | 'him',
291 | 'his',
292 | 'himself',
293 | 'she',
294 | "she's",
295 | 'her',
296 | 'hers',
297 | 'herself',
298 | 'it',
299 | "it's",
300 | 'its',
301 | 'itself',
302 | 'they',
303 | 'them',
304 | 'their',
305 | 'theirs',
306 | 'themselves',
307 | 'what',
308 | 'which',
309 | 'who',
310 | 'whom',
311 | 'this',
312 | 'that',
313 | "that'll",
314 | 'these',
315 | 'those',
316 | 'am',
317 | 'is',
318 | 'are',
319 | 'was',
320 | 'were',
321 | 'be',
322 | 'been',
323 | 'being',
324 | 'have',
325 | 'has',
326 | 'had',
327 | 'having',
328 | 'do',
329 | 'does',
330 | 'did',
331 | 'doing',
332 | 'a',
333 | 'an',
334 | 'the',
335 | 'and',
336 | 'but',
337 | 'if',
338 | 'or',
339 | 'because',
340 | 'as',
341 | 'until',
342 | 'while',
343 | 'of',
344 | 'at',
345 | 'by',
346 | 'for',
347 | 'with',
348 | 'about',
349 | 'against',
350 | 'between',
351 | 'into',
352 | 'through',
353 | 'during',
354 | 'before',
355 | 'after',
356 | 'above',
357 | 'below',
358 | 'to',
359 | 'from',
360 | 'up',
361 | 'down',
362 | 'in',
363 | 'out',
364 | 'on',
365 | 'off',
366 | 'over',
367 | 'under',
368 | 'again',
369 | 'further',
370 | 'then',
371 | 'once',
372 | 'here',
373 | 'there',
374 | 'when',
375 | 'where',
376 | 'why',
377 | 'how',
378 | 'all',
379 | 'any',
380 | 'both',
381 | 'each',
382 | 'few',
383 | 'more',
384 | 'most',
385 | 'other',
386 | 'some',
387 | 'such',
388 | 'no',
389 | 'nor',
390 | 'not',
391 | 'only',
392 | 'own',
393 | 'same',
394 | 'so',
395 | 'than',
396 | 'too',
397 | 'very',
398 | 's',
399 | 't',
400 | 'can',
401 | 'will',
402 | 'just',
403 | 'don',
404 | "don't",
405 | 'should',
406 | "should've",
407 | 'now',
408 | 'd',
409 | 'll',
410 | 'm',
411 | 'o',
412 | 're',
413 | 've',
414 | 'y',
415 | 'ain',
416 | 'aren',
417 | "aren't",
418 | 'couldn',
419 | "couldn't",
420 | 'didn',
421 | "didn't",
422 | 'doesn',
423 | "doesn't",
424 | 'hadn',
425 | "hadn't",
426 | 'hasn',
427 | "hasn't",
428 | 'haven',
429 | "haven't",
430 | 'isn',
431 | "isn't",
432 | 'ma',
433 | 'mightn',
434 | "mightn't",
435 | 'mustn',
436 | "mustn't",
437 | 'needn',
438 | "needn't",
439 | 'shan',
440 | "shan't",
441 | 'shouldn',
442 | "shouldn't",
443 | 'wasn',
444 | "wasn't",
445 | 'weren',
446 | "weren't",
447 | 'won',
448 | "won't",
449 | 'wouldn',
450 | "wouldn't"]
451 |
--------------------------------------------------------------------------------