├── check_vid_django └── check_vid_src │ ├── Words │ ├── __init__.py │ ├── migrations │ │ └── __init__.py │ ├── models.py │ ├── tests.py │ ├── admin.py │ ├── apps.py │ ├── __pycache__ │ │ ├── views.cpython-37.pyc │ │ └── __init__.cpython-37.pyc │ └── views.py │ ├── Report │ ├── __init__.py │ ├── migrations │ │ └── __init__.py │ ├── models.py │ ├── tests.py │ ├── admin.py │ ├── apps.py │ ├── .gitignore │ ├── __pycache__ │ │ ├── urls.cpython-37.pyc │ │ ├── views.cpython-37.pyc │ │ └── __init__.cpython-37.pyc │ ├── urls.py │ └── views.py │ ├── extractor │ ├── __init__.py │ ├── migrations │ │ └── __init__.py │ ├── models.py │ ├── tests.py │ ├── admin.py │ ├── apps.py │ ├── __pycache__ │ │ ├── views.cpython-37.pyc │ │ └── __init__.cpython-37.pyc │ └── views.py │ ├── check_vid_src │ ├── __init__.py │ ├── __pycache__ │ │ ├── urls.cpython-37.pyc │ │ ├── wsgi.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ └── settings.cpython-37.pyc │ ├── asgi.py │ ├── wsgi.py │ ├── urls.py │ └── settings.py │ ├── db.sqlite3 │ └── manage.py ├── check_vid_chromeext ├── README.md ├── icons │ ├── Logo.png │ └── Logo_greyscale.png ├── src │ ├── bg │ │ ├── background.html │ │ └── background.js │ ├── page_action │ │ └── popup.html │ └── browser_action │ │ ├── browser_action.html │ │ ├── style.css │ │ └── browser_action.js ├── manifest.json └── _locales │ └── en │ └── messages.json └── README.md /check_vid_django/check_vid_src/Words/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/extractor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/check_vid_src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Words/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/extractor/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /check_vid_chromeext/README.md: -------------------------------------------------------------------------------- 1 | see parent folder for explanation 2 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Words/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Words/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Words/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/extractor/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/extractor/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /check_vid_chromeext/icons/Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_chromeext/icons/Logo.png -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/extractor/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/db.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/db.sqlite3 -------------------------------------------------------------------------------- /check_vid_chromeext/icons/Logo_greyscale.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_chromeext/icons/Logo_greyscale.png -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class ReportConfig(AppConfig): 5 | name = 'Report' 6 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Words/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class WordsConfig(AppConfig): 5 | name = 'Words' 6 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/.gitignore: -------------------------------------------------------------------------------- 1 | /Users/jessekanter/Insight/check_vid/check_vid_django/check_vid_src/Report/GoogleNews-vectors-negative300.bin 2 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/extractor/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class ExtractorConfig(AppConfig): 5 | name = 'extractor' 6 | -------------------------------------------------------------------------------- /check_vid_chromeext/src/bg/background.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/__pycache__/urls.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Report/__pycache__/urls.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/__pycache__/views.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Report/__pycache__/views.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Words/__pycache__/views.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Words/__pycache__/views.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Report/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Words/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/Words/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/extractor/__pycache__/views.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/extractor/__pycache__/views.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/check_vid_src/__pycache__/urls.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/check_vid_src/__pycache__/urls.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/check_vid_src/__pycache__/wsgi.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/check_vid_src/__pycache__/wsgi.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/extractor/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/extractor/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/check_vid_src/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/check_vid_src/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/check_vid_src/__pycache__/settings.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JesseKanter/checkVid/HEAD/check_vid_django/check_vid_src/check_vid_src/__pycache__/settings.cpython-37.pyc -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url 2 | from Report.views import Report 3 | 4 | 5 | urlpatterns = [ 6 | url('', Report, name='submission'), 7 | ] 8 | 9 | 10 | 11 | ##from django.urls import path 12 | ## 13 | ##from . import views 14 | ## 15 | ##urlpatterns = [ 16 | ## path('', views.index, name='index'), 17 | ##] 18 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/check_vid_src/asgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | ASGI config for check_vid_src project. 3 | 4 | It exposes the ASGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/asgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.asgi import get_asgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'check_vid_src.settings') 15 | 16 | application = get_asgi_application() 17 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/check_vid_src/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for check_vid_src project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'check_vid_src.settings') 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /check_vid_chromeext/src/bg/background.js: -------------------------------------------------------------------------------- 1 | 2 | function reset () { 3 | 4 | // set the icon to greyscale 5 | chrome.browserAction.setIcon({path : "../../icons/Logo_greyscale.png"}); 6 | 7 | // clean the local storage 8 | chrome.storage.local.clear(function () { 9 | console.log("Report reset"); 10 | }); 11 | } 12 | 13 | reset(); 14 | 15 | 16 | 17 | 18 | // // Add a listenser when DOM is loaded. 19 | chrome.webNavigation.onDOMContentLoaded.addListener(function (details) { 20 | 21 | //reset when reload 22 | reset(); 23 | 24 | 25 | }); 26 | -------------------------------------------------------------------------------- /check_vid_chromeext/src/page_action/popup.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |
6 | Start Time:
7 | Duration:
8 | 9 |
10 | 11 |

Put in the Start Time (hour:minute:second) and the Duration you are intrested in finding relevant comments for. Click the "Submit" button and wait for comments to come up (The more comments the longer the wait).

12 | 13 | 14 | -------------------------------------------------------------------------------- /check_vid_chromeext/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "CheckVid", 3 | "version": "0.0.2", 4 | "manifest_version": 2, 5 | "description": "augmenting checking youtube videos for parents", 6 | "icons": { 7 | "128": "icons/Logo.png" 8 | }, 9 | "background": { 10 | "scripts": ["src/bg/background.js"], 11 | "persistent": false 12 | }, 13 | "default_locale": "en", 14 | "browser_action": { 15 | "default_icon": "icons/Logo_greyscale.png", 16 | "default_title": "check_vid", 17 | "default_popup": "src/browser_action/browser_action.html" 18 | }, 19 | "permissions": [ 20 | "storage", 21 | "tabs", 22 | "webNavigation", 23 | "background" 24 | ] 25 | } -------------------------------------------------------------------------------- /check_vid_chromeext/src/browser_action/browser_action.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 19 |
20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Django's command-line utility for administrative tasks.""" 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'check_vid_src.settings') 9 | try: 10 | from django.core.management import execute_from_command_line 11 | except ImportError as exc: 12 | raise ImportError( 13 | "Couldn't import Django. Are you sure it's installed and " 14 | "available on your PYTHONPATH environment variable? Did you " 15 | "forget to activate a virtual environment?" 16 | ) from exc 17 | execute_from_command_line(sys.argv) 18 | 19 | 20 | if __name__ == '__main__': 21 | main() 22 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/extractor/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | import pandas as pd 4 | 5 | from tqdm import tqdm 6 | 7 | import time 8 | import os 9 | import string 10 | 11 | 12 | 13 | from youtube_transcript_api import YouTubeTranscriptApi 14 | 15 | # Create your views here. 16 | 17 | def get_transcript_df(video_id_input): 18 | text=[] 19 | start=[] 20 | duration=[] 21 | 22 | for dic in YouTubeTranscriptApi.get_transcript(video_id_input): 23 | text+=[dic['text']] 24 | start+=[dic['start']] 25 | duration+=[dic['duration']] 26 | 27 | 28 | output_dict_tran = { 29 | 'text': text, 30 | 'start': start, 31 | 'duration': duration, 32 | } 33 | return pd.DataFrame(output_dict_tran, columns = output_dict_tran.keys()) 34 | 35 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/check_vid_src/urls.py: -------------------------------------------------------------------------------- 1 | """check_vid_src URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/3.0/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: path('', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.urls import include, path 14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 15 | """ 16 | from django.contrib import admin 17 | from django.urls import path 18 | from django.conf.urls import url, include 19 | from Report.views import Report 20 | 21 | urlpatterns = [ 22 | path('admin/', admin.site.urls), 23 | ] 24 | 25 | 26 | urlpatterns += [ 27 | url('submission/', include('Report.urls')), 28 | ] 29 | -------------------------------------------------------------------------------- /check_vid_chromeext/src/browser_action/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0px 0px 0px 0px !important; 3 | min-width: 400px; /* your desired width */ 4 | max-width: 100%; 5 | } 6 | 7 | .Report_container { 8 | /*display: none;*/ 9 | /*display: block;*/ 10 | /*left: 0;*/ 11 | /*position: absolute;*/ 12 | /*top: 34px;*/ 13 | /*width: 210px; */ 14 | height: 50px; 15 | width: 400px; 16 | font-size:18px; 17 | /*box-shadow: 0 3px 10px hsl(0, 0%, 30%);*/ 18 | font-family: Helvetica, Arial, sans-serif; 19 | /*font-family: Georgia, Helvetica, Arial, sans-serif;*/ 20 | /*box-sizing: border-box;*/ 21 | } 22 | 23 | h4 { 24 | padding: 16px 16px 16px 16px; 25 | font-size: 15px; 26 | color: hsl(0, 0%, 50%); 27 | font-family: Helvetica, Arial, sans-serif; 28 | } 29 | 30 | .events { 31 | list-style: none; 32 | padding: 0px 0px 0px 0px; 33 | } 34 | 35 | .events a { 36 | font-size: 15px; 37 | background-color: hsl(0, 0%, 100%, 0); 38 | color: hsl(0, 0%, 10%); 39 | display: block; 40 | text-decoration: none; 41 | } 42 | 43 | .events li { 44 | /*font-size: 14px;*/ 45 | background-color: hsl(0, 0%, 100%); 46 | color: hsl(0, 0%, 20%); 47 | display: block; 48 | padding: 8px 20px 8px 20px; 49 | text-decoration: none; 50 | border-bottom: 1px solid hsl(0, 0%, 80%); 51 | } 52 | 53 | .events li:hover { 54 | background-color: hsl(27, 84%, 77%, 1); 55 | text-decoration: none; 56 | } 57 | 58 | .events ul { 59 | /*font-size: 14px;*/ 60 | background-color: hsl(0, 0%, 100%); 61 | color: hsl(0, 0%, 20%); 62 | display: block; 63 | padding: 8px 20px 8px 20px; 64 | text-decoration: none; 65 | /* border-bottom: 1px solid hsl(0, 0%, 80%); 66 | */} 67 | 68 | .events ul:hover { 69 | background-color: hsl(27, 84%, 77%, 1); 70 | text-decoration: none; 71 | } 72 | 73 | .venue { 74 | font-size: 12px; 75 | font-style: bold; 76 | background-color: hsl(0, 0%, 100%, 0); 77 | color: hsl(0, 0%, 40%); 78 | display: block; 79 | padding: 0px 8px 4px 0px; 80 | text-decoration: none; 81 | } 82 | 83 | /*******************************/ 84 | .dropbtn { 85 | background-color: #3498DB; 86 | color: white; 87 | padding: 16px; 88 | font-size: 16px; 89 | border: none; 90 | cursor: pointer; 91 | } 92 | 93 | .dropbtn:hover, .dropbtn:focus { 94 | background-color: #2980B9; 95 | } 96 | 97 | .dropdown { 98 | position: relative; 99 | display: inline-block; 100 | } 101 | 102 | .dropdown-content { 103 | display: none; 104 | position: absolute; 105 | background-color: #f1f1f1; 106 | min-width: 160px; 107 | overflow: auto; 108 | box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2); 109 | z-index: 1; 110 | } 111 | 112 | .dropdown-content a { 113 | color: black; 114 | padding: 12px 16px; 115 | text-decoration: none; 116 | display: block; 117 | } 118 | 119 | .dropdown a:hover {background-color: #ddd;} 120 | 121 | .show {display: block;} 122 | 123 | /**********************/ 124 | /* 125 | .Fs8au { 126 | border-top: 1px solid hsl(0, 0%, 90%); 127 | } 128 | 129 | .SWhEv { 130 | border-bottom: 1px solid hsl(0, 0%, 90%); 131 | }*/ 132 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/check_vid_src/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for check_vid_src project. 3 | 4 | Generated by 'django-admin startproject' using Django 3.0.2. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/3.0/ref/settings/ 11 | """ 12 | 13 | import os 14 | 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | 19 | # Quick-start development settings - unsuitable for production 20 | # See https://docs.djangoproject.com/en/3.0/howto/deployment/checklist/ 21 | 22 | # SECURITY WARNING: keep the secret key used in production secret! 23 | SECRET_KEY = '@(a^an3h9fk(w5kiq4cep(j=3%r@%g9zx+w65dcxheg+$*f&@x' 24 | 25 | # SECURITY WARNING: don't run with debug turned on in production! 26 | DEBUG = True 27 | 28 | ALLOWED_HOSTS = ['52.90.31.24'] 29 | 30 | 31 | # Application definition 32 | 33 | INSTALLED_APPS = [ 34 | 'django.contrib.admin', 35 | 'django.contrib.auth', 36 | 'django.contrib.contenttypes', 37 | 'django.contrib.sessions', 38 | 'django.contrib.messages', 39 | 'django.contrib.staticfiles', 40 | 'corsheaders', 41 | ] 42 | 43 | MIDDLEWARE = [ 44 | 'django.middleware.security.SecurityMiddleware', 45 | 'django.contrib.sessions.middleware.SessionMiddleware', 46 | 'django.middleware.common.CommonMiddleware', 47 | 'django.middleware.csrf.CsrfViewMiddleware', 48 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 49 | 'django.contrib.messages.middleware.MessageMiddleware', 50 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 51 | 'corsheaders.middleware.CorsMiddleware', 52 | 'django.middleware.common.CommonMiddleware', 53 | ] 54 | 55 | 56 | CORS_ORIGIN_ALLOW_ALL = True 57 | 58 | ROOT_URLCONF = 'check_vid_src.urls' 59 | 60 | TEMPLATES = [ 61 | { 62 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 63 | 'DIRS': [], 64 | 'APP_DIRS': True, 65 | 'OPTIONS': { 66 | 'context_processors': [ 67 | 'django.template.context_processors.debug', 68 | 'django.template.context_processors.request', 69 | 'django.contrib.auth.context_processors.auth', 70 | 'django.contrib.messages.context_processors.messages', 71 | ], 72 | }, 73 | }, 74 | ] 75 | 76 | WSGI_APPLICATION = 'check_vid_src.wsgi.application' 77 | 78 | 79 | # Database 80 | # https://docs.djangoproject.com/en/3.0/ref/settings/#databases 81 | 82 | DATABASES = { 83 | 'default': { 84 | 'ENGINE': 'django.db.backends.sqlite3', 85 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 86 | } 87 | } 88 | 89 | 90 | # Password validation 91 | # https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators 92 | 93 | AUTH_PASSWORD_VALIDATORS = [ 94 | { 95 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 96 | }, 97 | { 98 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 99 | }, 100 | { 101 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 102 | }, 103 | { 104 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 105 | }, 106 | ] 107 | 108 | 109 | # Internationalization 110 | # https://docs.djangoproject.com/en/3.0/topics/i18n/ 111 | 112 | LANGUAGE_CODE = 'en-us' 113 | 114 | TIME_ZONE = 'UTC' 115 | 116 | USE_I18N = True 117 | 118 | USE_L10N = True 119 | 120 | USE_TZ = True 121 | 122 | 123 | # Static files (CSS, JavaScript, Images) 124 | # https://docs.djangoproject.com/en/3.0/howto/static-files/ 125 | 126 | STATIC_URL = '/static/' 127 | STATIC_ROOT = os.path.join(BASE_DIR, "static/") 128 | -------------------------------------------------------------------------------- /check_vid_chromeext/_locales/en/messages.json: -------------------------------------------------------------------------------- 1 | { 2 | "l10nTabName": { 3 | "message":"Localization" 4 | ,"description":"name of the localization tab" 5 | } 6 | ,"l10nHeader": { 7 | "message":"It does localization too! (this whole tab is, actually)" 8 | ,"description":"Header text for the localization section" 9 | } 10 | ,"l10nIntro": { 11 | "message":"'L10n' refers to 'Localization' - 'L' an 'n' are obvious, and 10 comes from the number of letters between those two. It is the process/whatever of displaying something in the language of choice. It uses 'I18n', 'Internationalization', which refers to the tools / framework supporting L10n. I.e., something is internationalized if it has I18n support, and can be localized. Something is localized for you if it is in your language / dialect." 12 | ,"description":"introduce the basic idea." 13 | } 14 | ,"l10nProd": { 15 | "message":"You are planning to allow localization, right? You have no idea who will be using your extension! You have no idea who will be translating it! At least support the basics, it's not hard, and having the framework in place will let you transition much more easily later on." 16 | ,"description":"drive the point home. It's good for you." 17 | } 18 | ,"l10nFirstParagraph": { 19 | "message":"When the options page loads, elements decorated with data-l10n will automatically be localized!" 20 | ,"description":"inform that elements will be localized on load" 21 | } 22 | ,"l10nSecondParagraph": { 23 | "message":"If you need more complex localization, you can also define data-l10n-args. This should contain $containerType$ filled with $dataType$, which will be passed into Chrome's i18n API as $functionArgs$. In fact, this paragraph does just that, and wraps the args in mono-space font. Easy!" 24 | ,"description":"introduce the data-l10n-args attribute. End on a lame note." 25 | ,"placeholders": { 26 | "containerType": { 27 | "content":"$1" 28 | ,"example":"'array', 'list', or something similar" 29 | ,"description":"type of the args container" 30 | } 31 | ,"dataType": { 32 | "content":"$2" 33 | ,"example":"string" 34 | ,"description":"type of data in each array index" 35 | } 36 | ,"functionArgs": { 37 | "content":"$3" 38 | ,"example":"arguments" 39 | ,"description":"whatever you call what you pass into a function/method. args, params, etc." 40 | } 41 | } 42 | } 43 | ,"l10nThirdParagraph": { 44 | "message":"Message contents are passed right into innerHTML without processing - include any tags (or even scripts) that you feel like. If you have an input field, the placeholder will be set instead, and buttons will have the value attribute set." 45 | ,"description":"inform that we handle placeholders, buttons, and direct HTML input" 46 | } 47 | ,"l10nButtonsBefore": { 48 | "message":"Different types of buttons are handled as well. <button> elements have their html set:" 49 | } 50 | ,"l10nButton": { 51 | "message":"in a button" 52 | } 53 | ,"l10nButtonsBetween": { 54 | "message":"while <input type='submit'> and <input type='button'> get their 'value' set (note: no HTML):" 55 | } 56 | ,"l10nSubmit": { 57 | "message":"a submit value" 58 | } 59 | ,"l10nButtonsAfter": { 60 | "message":"Awesome, no?" 61 | } 62 | ,"l10nExtras": { 63 | "message":"You can even set data-l10n on things like the <title> tag, which lets you have translatable page titles, or fieldset <legend> tags, or anywhere else - the default Boil.localize() behavior will check every tag in the document, not just the body." 64 | ,"description":"inform about places which may not be obvious, like , etc" 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # checkvid 2 | 3 | The overall format of this chrome extension is based on [repo](https://github.com/jiananarthurli/insight_chrome_extension) 4 | 5 | Youtube makes efforts to have strict parental controls and a kid friendly ‘Youtube Kids’ website. However, 500 hours of videos are uploaded a minute, and inappropriate creators are constantly trying to push their videos past the algorithms that can not be too strict at the risk of severely limiting the market. I used youtube transcripts to rate videos and mark red flags in a chrome extension that parents can review quickly. I used a Word2Vec model to see how close a video text is to a list of ‘bad’ words as a measure of how non kid friendly the text is. This narrows the problem from millions of videos for youtube to screen exactly right, to just the parent screening single videos effectively and quickly. This also helps youtube to lose less subscribers, who would otherwise leave youtube due to traumatic child viewership. 6 | 7 | # The Background Model Code 8 | The files for the background model code are in the [check_vid_django/check_vid_src/](./check_vid_django/check_vid_src/) folder. These files are on an EC2 instance where the django is running to listen for a fetch() call from the chrome extention. 9 | 10 | There are three main folders for the code 11 | 1. Extractor - has the code to run an api that retrieves Youtube transcripts and convert them to a pandas dataframe. 12 | 2. Words - has a list of bad words I search for in videos 13 | 3. Report - uses the above folders to search for inappropriate material in the transcripts and returns a scoring that is converted into red flags of the top 5 most problematic portions of the video. The score is made using a Word2Vec model to compare the transcripts with the list of bad words. The weights for the Word2Vec model are from the well known [GoogleNews-vectors-negative300](https://code.google.com/archive/p/word2vec/) 14 | 15 | # The Chrome Extension 16 | The files for the chrome extenion are in the [check_vid_chromeext](./check_vid_chromeext) folder. 17 | 18 | The extenstion takes as an input the video's id. It then sends the video id to a Ec2 instance and retrieves the red flags of inapropriate language in the video. The extension returns the red flags as a list in the display window. 19 | 20 | 21 | 22 | ```bash 23 | . 24 | ├── README.md 25 | ├── _locales 26 | │   └── en 27 | │   └── messages.json 28 | ├── icons 29 | │   ├── Logo.png 30 | │   └── Logo_greyscale.png 31 | ├── manifest.json 32 | └── src 33 | ├── bg 34 | │   ├── background.html 35 | │   └── background.js 36 | └── browser_action 37 | ├── browser_action.html 38 | ├── browser_action.js 39 | └── style.css 40 | ``` 41 | 42 | There are four components in the the Chrome extension: manifest (```./manifest.json```), background scripts (```./bg/background.js```), pop-up window (```./browser_action```) and icons (```./icons```). 43 | 44 | Manifest is the starting point of the extension. The basic info (extension name, description, etc), location of resources, and permissions are all stored in [```./manifest.json```](./manifest.json). 45 | 46 | The [```./src/bg/background.js```](./src/bg/background.js) is soley used to blank the data and output of the chrome extention when the url is reloaded. 47 | 48 | The functions that do most of the work are stored in [```./src/browser_action/browser_action.js 49 | ```](./src/browser_action/browser_action.js). A function that listens for the click of a "submit" button is added (document.getElementById('submit').onclick) so that the user will start the process when they are ready. The video id is extracted from the URL, and sent to an EC2 instance on AWS, which hosts an ongoing djagno process. The Dajngo process is used to put the video_id as an input to the background model and to recive the report of red flags in the video as an output. The report is returned in json strings and then stored locally (```chrome.storage.local.set()```). The icon will be changed (```chrome.browserAction.setIcon()```) to notify the user. the red flags will then be retrieved from the browser's local storage space (```chrome.storage.local.get()```). If the space is not empty, a red flag listing page will be constructed, using the styles defined in [```./browser_action/browser_action/style.css```](./src/browser_action/style.css). If the process fails, the initial display will not change. 50 | 51 | The locale information is stored in [```./_locales```](./_locales). 52 | 53 | 54 | Google has a great tutorial for Chrome extensions: 55 | 56 | https://developer.chrome.com/extensions/getstarted 57 | 58 | All the Chrome API functions can be found in the official docs: 59 | 60 | https://developer.chrome.com/extensions/api_index 61 | 62 | The Chrome API functions are async functions. More info can be found here: 63 | 64 | https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/async_function 65 | 66 | This is a great site for templates, if you need more fancy stuffs in your extension: 67 | 68 | https://extensionizr.com 69 | -------------------------------------------------------------------------------- /check_vid_chromeext/src/browser_action/browser_action.js: -------------------------------------------------------------------------------- 1 | 2 | function load() { 3 | 4 | // Get the report_text container 5 | var report_text= document.getElementById("report_text"); 6 | 7 | // chrome.storage.local.get("rating", function(data) { 8 | // if(typeof(data.rating) !== "undefined") { 9 | // report_text.innerHTML = 'This video is '+data.rating+'<br><br>'; 10 | 11 | 12 | // } 13 | 14 | // }) 15 | 16 | 17 | 18 | // Retrieve data from local memory 19 | chrome.storage.local.get("Report", function(data) { 20 | 21 | // If the data is updated 22 | if(typeof(data.Report) !== "undefined") { 23 | 24 | 25 | report_text.innerHTML = 'Below are times and text of the more questionable parts of this video<br><br>'; 26 | 27 | 28 | 29 | 30 | 31 | 32 | // Generate entry to list for each comment 33 | data.Report.forEach(function(report, idx, array) { 34 | 35 | 36 | // get css style 37 | var elmnt = document.createElement("li") 38 | 39 | // Obtain the comment Author,, Link to Author's page, comment text, 40 | // number of replies and likes 41 | var flag_score = report.score; 42 | 43 | var flag_text = report.text; 44 | 45 | var flag_start=report.start; 46 | 47 | // Container for the Redflag score and start time. 48 | var div = document.createElement("a"); 49 | div.innerHTML = 'At '+flag_start;//+ ' the video is '+flag_score +'<br> Here is the text from that time:'; 50 | div.setAttribute("class", "flag _description"); 51 | // //provide link to author's page 52 | // div.setAttribute("href",comment_authorLink); 53 | var p = document.createElement("p"); 54 | 55 | //comment text 56 | // <p style="color:blue;font-size:50px;"></p> 57 | p.innerHTML = '"' + flag_text + '"'; 58 | // // Open a blank tab when the link is clicked. 59 | // p.setAttribute("target", "_blank"); 60 | 61 | 62 | // // # of likes and replies 63 | // var p2 = document.createElement("p"); 64 | 65 | // p2.innerHTML = Replies_likes; 66 | // // Open a blank tab when the link is clicked. 67 | // p2.setAttribute("target", "_blank"); 68 | 69 | // put all elements together 70 | elmnt.appendChild(div); 71 | elmnt.appendChild(p); 72 | // elmnt.appendChild(p2); 73 | 74 | // Append the new comment to the list. 75 | report_text.appendChild(elmnt); 76 | 77 | }); 78 | } 79 | }); 80 | 81 | } 82 | 83 | // Trigger the function when DOM of the pop-up is loaded. 84 | document.addEventListener('DOMContentLoaded', function() { 85 | 86 | load(); 87 | 88 | }); 89 | 90 | //##################### 91 | 92 | 93 | 94 | function reset () { 95 | 96 | // set the icon to greyscale 97 | chrome.browserAction.setIcon({path : "../../icons/Logo_greyscale.png"}); 98 | 99 | // clean the local storage 100 | chrome.storage.local.clear(function () { 101 | console.log("Report reset"); 102 | }); 103 | } 104 | 105 | function get_report(url, personal){ 106 | // this function fetches the red flags 107 | // of the video, stores them in local storage, sets extension icon to color and 108 | // calls the load function to put red flags in the new html of the extension. 109 | var api_server = "http://52.90.31.24:8000/"; //"http://127.0.0.1:8000/"; // the second address is for local use and testing 110 | 111 | 112 | 113 | // check to see if user is at a youtube video 114 | if (url.includes("https://www.youtube.com/watch?v=")) { 115 | 116 | 117 | var topic = url.replace("https://www.youtube.com/watch?v=", ""); 118 | 119 | // URL for http requests 120 | var req_url = api_server + "submission/?video_id=" + topic; 121 | 122 | // Send http requests 123 | fetch(req_url) 124 | .then(r => r.text()) 125 | .then(function(result) { 126 | result_json = JSON.parse(result); 127 | if (result_json.found) { 128 | // Store the fetched data into local memory for display 129 | chrome.storage.local.set({Report: result_json.Report, rating: result_json.rating}, function() { 130 | console.log("Found Report"); 131 | // Change to colored icon 132 | chrome.browserAction.setIcon({path : "../../icons/Logo.png"}); 133 | load(); 134 | }); 135 | } 136 | }); 137 | } 138 | 139 | 140 | } 141 | 142 | 143 | 144 | 145 | document.getElementById('check').onclick = function() { 146 | // When submit button is clicked, this will execute the entire process 147 | // of getting red flags, 148 | // and entering them into the new html of the extension 149 | 150 | //var personal = document.getElementById('personal').value; this maybe used at a later date 151 | 152 | // clear the current storage, in case comments from a prevoius run are still in storage 153 | reset(); 154 | 155 | // get current url, and use the video id in that url to fetch red flags 156 | chrome.tabs.query({'active': true, 'lastFocusedWindow': true}, function (tabs) { 157 | url = tabs[0].url; 158 | get_report(url)//, personal) 159 | }); 160 | 161 | }; 162 | 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Report/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from extractor.views import get_transcript_df 7 | from Words.views import swear_words, sex_words, stop_words #swear_words_model,sex_words_model 8 | 9 | from django.http import HttpResponse 10 | import json 11 | from time import time 12 | 13 | from gensim.models import Word2Vec, KeyedVectors 14 | from gensim.test.utils import get_tmpfile 15 | import string 16 | #<<<<<<< HEAD 17 | #<<<<<<< HEAD 18 | #======= 19 | #from nltk.corpus import stopwords 20 | #>>>>>>> d157a4af14bac5133c0b210e3c62bee1086f3222 21 | #======= 22 | #>>>>>>> 6194741edea21d9add625bb85e3a748d4f43ece4 23 | import nltk 24 | from nltk.corpus import stopwords 25 | #from nltk.corpus import stopwords 26 | 27 | lemma=nltk.stem.WordNetLemmatizer()#from nltk.stem import WordNetLemmatizer as lemma 28 | 29 | 30 | 31 | 32 | #from fuzzywuzzy import fuzz 33 | 34 | # Create your views here. 35 | model=KeyedVectors.load_word2vec_format('Report/GoogleNews-vectors-negative300.bin',binary=True, limit=100000) 36 | 37 | def text_process(mess): 38 | """ 39 | Takes in a string of text, then performs the following: 40 | 1. Remove all punctuation 41 | 2. Remove all stopwords 42 | 3. Lematize 43 | 4. Returns a list of the cleaned text 44 | """ 45 | # Check characters to see if they are in punctuation 46 | nopunc = [char for char in mess if char not in string.punctuation] 47 | 48 | # Join the characters again to form the string. 49 | nopunc = ''.join(nopunc) 50 | 51 | # Now just remove any stopwords 52 | nostop=[word for word in nopunc.split() if word.lower() not in stop_words()] 53 | nostop=' '.join(nostop) 54 | nostop=nostop.replace('ass','asss') 55 | 56 | lemmatized=[lemma.lemmatize(w) for w in str(nostop).lower().split()] 57 | 58 | 59 | 60 | return ' '.join(lemmatized) 61 | def text_score(text,badwords):#,badwords_model):# 62 | text_score_all=0 63 | 64 | 65 | if not only_model_words(text,model).split(): 66 | text_score_all=0 67 | else: 68 | for E in only_model_words(text,model).split(): 69 | score=0 70 | for word in badwords: 71 | if word in model.vocab: 72 | if model.similarity(E,word)>score: 73 | score=model.similarity(E,word) 74 | if score>text_score_all: 75 | text_score_all=score 76 | for word in ['fuc','shit','crap','bitch']: 77 | if word in text: 78 | text_score_all=1 79 | for word in badwords: 80 | if word in text.split(): 81 | text_score_all=1 82 | return text_score_all 83 | 84 | def only_model_words(text,model): 85 | new_text=[] 86 | for word in text.split(): 87 | if word in model.vocab: 88 | new_text+=[word] 89 | return ' '.join(new_text) 90 | 91 | def number_to_warning(n): 92 | upper=0.9 93 | lower=0.6 94 | if n>=upper: 95 | return 'probably bad' 96 | if (n<upper) & (n>lower): 97 | return 'possibly bad' 98 | if n<=lower: 99 | return 'probably fine' 100 | 101 | def rate_pd(tran_pd,rate_with=5): 102 | #, badwords_model=swear_words_model() + sex_words_model() 103 | tran_pd['score']=tran_pd.text.apply(lambda t: text_score(text_process(t), badwords = swear_words() + sex_words() ) ) 104 | tran_pd['warning']=tran_pd.score.apply(lambda n: number_to_warning(n) ) 105 | rating=number_to_warning(tran_pd.score.sort_values(ascending=False).head(rate_with).mean()) 106 | return [tran_pd,rating] 107 | 108 | def sec_to_clock(t): 109 | t=int(t) 110 | return str(t//3600) + ':' + str(t%3600//60) + ':' + str( t%3600%60 ) 111 | 112 | def replace_swears_stars(text): 113 | 114 | for word in ['fuck','shit','bitch','crap','ass']: 115 | text=text.replace(word,word[0]+'*'*(len(word)-1)) 116 | 117 | return text 118 | 119 | 120 | 121 | def Report(request): #request 122 | # get inputs from request: 123 | # get id and isolate it from extra info 124 | video_id=request.GET['video_id'] 125 | video_id=video_id.split('&')[0] 126 | video_id=video_id.split('=')[0] 127 | 128 | 129 | #get the transcript and comments of the video 130 | tran_pd= get_transcript_df(video_id) 131 | 132 | # retun dataframe (df) with the comments relvant to the seleceted time slot 133 | [Report_df,rating] = rate_pd(tran_pd, rate_with=5) 134 | 135 | Report_df=Report_df.sort_values(by='score',ascending=False).head(5) 136 | Report_df['censored_text']=Report_df.text.apply(lambda t: replace_swears_stars(t)) 137 | 138 | # if comments are found, use comments in the df and construct the response dict 139 | Report_list = [] 140 | found=False 141 | if len(Report_df) != 0: 142 | found = True 143 | i=0 144 | while i < len(Report_df): 145 | redFlag_text=Report_df.iloc[i].censored_text 146 | redFlag_score=Report_df.iloc[i].warning 147 | redFlag_start=sec_to_clock(Report_df.iloc[i].start) 148 | redFlag_duration=Report_df.iloc[i].duration 149 | 150 | 151 | 152 | # add all the above information to comment list 153 | Report_list.append({ 154 | 'text' : str(redFlag_text), 155 | 'score' : str(redFlag_score), 156 | 'start' : str(redFlag_start), 157 | 'duration' : str(redFlag_duration), 158 | }) 159 | i+=1 160 | 161 | Report_dict = {'found' : found, 'Report' : Report_list, 'rating' : rating} 162 | 163 | # convert to json 164 | response = json.dumps(Report_dict) 165 | 166 | #print a time bar in terminal 167 | tic = time() 168 | print("Time lapse {}".format(time() - tic)) 169 | 170 | #retrun the response that has the relvant comments, this is passsed to the chrome extension 171 | return HttpResponse(response) 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /check_vid_django/check_vid_src/Words/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | # Create your views here. 4 | def swear_words(): 5 | return['anal', 'anus', 'arse', 'ass', 'ballsack', 'balls', 'bastard', 'bitch', 'biatch', 'bloody', 'blowjob', 'blow job', 6 | 'bollock', 7 | 'bollok', 8 | 'boner', 9 | 'boob', 10 | 'bugger', 11 | 'bum', 12 | 'butt', 13 | 'buttplug', 14 | 'clitoris', 15 | 'cock', 16 | 'coon', 17 | 'crap', 18 | 'cunt', 19 | 'damn', 20 | 'dick', 21 | 'dildo', 22 | 'dyke', 23 | 'fag', 24 | 'feck', 25 | 'fellate', 26 | 'fellatio', 27 | 'felching', 28 | 'fuck', 29 | 'f u c k', 30 | 'fudgepacker', 31 | 'fudge packer', 32 | 'flange', 33 | 'Goddamn', 34 | 'God damn', 35 | 'hell', 36 | 'homo', 37 | 'jerk', 38 | 'jizz', 39 | 'knobend', 40 | 'knob end', 41 | 'labia', 42 | 'lmao', 43 | 'lmfao', 44 | 'muff', 45 | 'nigger', 46 | 'nigga', 47 | 'omg', 48 | 'penis', 49 | 'piss', 50 | 'poop', 51 | 'prick', 52 | 'pube', 53 | 'pussy', 54 | 'queer', 55 | 'scrotum', 56 | 'sex', 57 | 'shit', 58 | 's hit', 59 | 'sh1t', 60 | 'slut', 61 | 'smegma', 62 | 'spunk', 63 | 'tit', 64 | 'tosser', 65 | 'turd', 66 | 'twat', 67 | 'vagina', 68 | 'wank', 69 | 'whore', 70 | 'wtf'] 71 | # def swear_words_model(): 72 | # return ['anal', 73 | # 'anus', 74 | # 'ass', 75 | # 'balls', 76 | # 'bastard', 77 | # 'bitch', 78 | # 'bloody', 79 | # 'boob', 80 | # 'bugger', 81 | # 'bum', 82 | # 'butt', 83 | # 'cock', 84 | # 'crap', 85 | # 'damn', 86 | # 'dick', 87 | # 'dyke', 88 | # 'fag', 89 | # 'fuck', 90 | # 'hell', 91 | # 'jerk', 92 | # 'penis', 93 | # 'piss', 94 | # 'poop', 95 | # 'prick', 96 | # 'pussy', 97 | # 'queer', 98 | # 'sex', 99 | # 'shit', 100 | # 'slut', 101 | # 'spunk', 102 | # 'tit', 103 | # 'vagina', 104 | # 'whore'] 105 | def sex_words(): 106 | return ['genitals', 107 | 'penises', 108 | 'genitalia', 109 | 'nipples', 110 | 'crotch', 111 | 'breasts', 112 | 'vagina', 113 | 'dick', 114 | 'testicles', 115 | 'boobs', 116 | 'buttocks', 117 | 'oral_sex', 118 | 'anal_sex', 119 | 'intercourse', 120 | 'boob', 121 | 'sexual', 122 | 'masturbation', 123 | 'ass', 124 | 'genital', 125 | 'anus', 126 | 'pussy', 127 | 'sexual_intercourse', 128 | 'nipple', 129 | 'masturbate', 130 | 'testicle', 131 | 'anal', 132 | 'buttock', 133 | 'porn', 134 | 'orgasm', 135 | 'sexually', 136 | 'lovemaking', 137 | 'porno', 138 | 'sexuality', 139 | 'condom', 140 | 'masturbating', 141 | 'sexual_encounters', 142 | 'nude', 143 | 'tits', 144 | 'bestiality', 145 | 'fondling', 146 | 'kinky', 147 | 'libido', 148 | 'horny', 149 | 'unprotected_sex', 150 | 'vaginal', 151 | 'thong', 152 | 'consensual_sex', 153 | 'orgasms', 154 | 'naked', 155 | 'heterosexual', 156 | 'lewd', 157 | 'panties', 158 | 'homosexual', 159 | 'nudity', 160 | 'butts', 161 | 'randy', 162 | 'rectum', 163 | 'bra', 164 | 'topless', 165 | 'cleavage', 166 | 'Sex', 167 | 'slut', 168 | 'prostitution', 169 | 'privates', 170 | 'prostitutes', 171 | 'prostitute', 172 | 'erotic', 173 | 'posterior', 174 | 'herpes', 175 | 'virginity', 176 | 'thongs', 177 | 'raunchy', 178 | 'fetish', 179 | 'bisexual', 180 | 'penis', 181 | 'risque', 182 | 'risqué', 183 | 'sex', 184 | 'erotica', 185 | 'sexy', 186 | 'striptease'] 187 | 188 | # def sex_words_model(): 189 | # return ['genitals', 190 | # 'penises', 191 | # 'genitalia', 192 | # 'nipples', 193 | # 'crotch', 194 | # 'breasts', 195 | # 'vagina', 196 | # 'dick', 197 | # 'testicles', 198 | # 'boobs', 199 | # 'buttocks', 200 | # 'oral_sex', 201 | # 'anal_sex', 202 | # 'intercourse', 203 | # 'boob', 204 | # 'sexual', 205 | # 'masturbation', 206 | # 'ass', 207 | # 'genital', 208 | # 'anus', 209 | # 'pussy', 210 | # 'sexual_intercourse', 211 | # 'nipple', 212 | # 'masturbate', 213 | # 'testicle', 214 | # 'anal', 215 | # 'buttock', 216 | # 'porn', 217 | # 'orgasm', 218 | # 'sexually', 219 | # 'lovemaking', 220 | # 'porno', 221 | # 'sexuality', 222 | # 'condom', 223 | # 'masturbating', 224 | # 'sexual_encounters', 225 | # 'nude', 226 | # 'tits', 227 | # 'bestiality', 228 | # 'fondling', 229 | # 'kinky', 230 | # 'libido', 231 | # 'horny', 232 | # 'unprotected_sex', 233 | # 'vaginal', 234 | # 'thong', 235 | # 'consensual_sex', 236 | # 'orgasms', 237 | # 'naked', 238 | # 'heterosexual', 239 | # 'lewd', 240 | # 'panties', 241 | # 'homosexual', 242 | # 'nudity', 243 | # 'butts', 244 | # 'randy', 245 | # 'rectum', 246 | # 'bra', 247 | # 'topless', 248 | # 'cleavage', 249 | # 'Sex', 250 | # 'slut', 251 | # 'prostitution', 252 | # 'privates', 253 | # 'prostitutes', 254 | # 'prostitute', 255 | # 'erotic', 256 | # 'posterior', 257 | # 'herpes', 258 | # 'virginity', 259 | # 'thongs', 260 | # 'raunchy', 261 | # 'fetish', 262 | # 'bisexual', 263 | # 'penis', 264 | # 'risque', 265 | # 'risqué', 266 | # 'sex', 267 | # 'erotica', 268 | # 'sexy', 269 | # 'striptease'] 270 | 271 | def stop_words(): 272 | return ['i', 273 | 'me', 274 | 'my', 275 | 'myself', 276 | 'we', 277 | 'our', 278 | 'ours', 279 | 'ourselves', 280 | 'you', 281 | "you're", 282 | "you've", 283 | "you'll", 284 | "you'd", 285 | 'your', 286 | 'yours', 287 | 'yourself', 288 | 'yourselves', 289 | 'he', 290 | 'him', 291 | 'his', 292 | 'himself', 293 | 'she', 294 | "she's", 295 | 'her', 296 | 'hers', 297 | 'herself', 298 | 'it', 299 | "it's", 300 | 'its', 301 | 'itself', 302 | 'they', 303 | 'them', 304 | 'their', 305 | 'theirs', 306 | 'themselves', 307 | 'what', 308 | 'which', 309 | 'who', 310 | 'whom', 311 | 'this', 312 | 'that', 313 | "that'll", 314 | 'these', 315 | 'those', 316 | 'am', 317 | 'is', 318 | 'are', 319 | 'was', 320 | 'were', 321 | 'be', 322 | 'been', 323 | 'being', 324 | 'have', 325 | 'has', 326 | 'had', 327 | 'having', 328 | 'do', 329 | 'does', 330 | 'did', 331 | 'doing', 332 | 'a', 333 | 'an', 334 | 'the', 335 | 'and', 336 | 'but', 337 | 'if', 338 | 'or', 339 | 'because', 340 | 'as', 341 | 'until', 342 | 'while', 343 | 'of', 344 | 'at', 345 | 'by', 346 | 'for', 347 | 'with', 348 | 'about', 349 | 'against', 350 | 'between', 351 | 'into', 352 | 'through', 353 | 'during', 354 | 'before', 355 | 'after', 356 | 'above', 357 | 'below', 358 | 'to', 359 | 'from', 360 | 'up', 361 | 'down', 362 | 'in', 363 | 'out', 364 | 'on', 365 | 'off', 366 | 'over', 367 | 'under', 368 | 'again', 369 | 'further', 370 | 'then', 371 | 'once', 372 | 'here', 373 | 'there', 374 | 'when', 375 | 'where', 376 | 'why', 377 | 'how', 378 | 'all', 379 | 'any', 380 | 'both', 381 | 'each', 382 | 'few', 383 | 'more', 384 | 'most', 385 | 'other', 386 | 'some', 387 | 'such', 388 | 'no', 389 | 'nor', 390 | 'not', 391 | 'only', 392 | 'own', 393 | 'same', 394 | 'so', 395 | 'than', 396 | 'too', 397 | 'very', 398 | 's', 399 | 't', 400 | 'can', 401 | 'will', 402 | 'just', 403 | 'don', 404 | "don't", 405 | 'should', 406 | "should've", 407 | 'now', 408 | 'd', 409 | 'll', 410 | 'm', 411 | 'o', 412 | 're', 413 | 've', 414 | 'y', 415 | 'ain', 416 | 'aren', 417 | "aren't", 418 | 'couldn', 419 | "couldn't", 420 | 'didn', 421 | "didn't", 422 | 'doesn', 423 | "doesn't", 424 | 'hadn', 425 | "hadn't", 426 | 'hasn', 427 | "hasn't", 428 | 'haven', 429 | "haven't", 430 | 'isn', 431 | "isn't", 432 | 'ma', 433 | 'mightn', 434 | "mightn't", 435 | 'mustn', 436 | "mustn't", 437 | 'needn', 438 | "needn't", 439 | 'shan', 440 | "shan't", 441 | 'shouldn', 442 | "shouldn't", 443 | 'wasn', 444 | "wasn't", 445 | 'weren', 446 | "weren't", 447 | 'won', 448 | "won't", 449 | 'wouldn', 450 | "wouldn't"] 451 | --------------------------------------------------------------------------------