├── .gitattributes ├── .gitconfig ├── .gitignore ├── .gitmodules ├── .style.yapf ├── .travis.yml ├── LICENSE ├── README.md ├── app ├── .deps │ ├── .dummy │ ├── 00-load-django.py │ ├── Makefile.config │ ├── beakerx.json │ ├── common.sh │ ├── esper-ipython │ ├── esper-run │ ├── ipython_config.py │ ├── nbconfig │ │ ├── edit.json │ │ ├── notebook.json │ │ └── tree.json │ └── supervisord.conf ├── .dockerignore ├── .gitignore ├── Dockerfile.app ├── Dockerfile.base ├── app │ ├── __init__.py │ ├── settings.py │ ├── urls.py │ └── wsgi.py ├── assets │ ├── css │ │ ├── bootstrap.scss │ │ ├── main.scss │ │ ├── search-input.scss │ │ └── sidebar.scss │ ├── favicon-loading.ico │ ├── favicon.ico │ ├── images │ │ ├── dan_spinning.gif │ │ ├── haotian_spinning.gif │ │ ├── james_spinning.gif │ │ ├── kayvon_spinning.gif │ │ ├── maneesh_spinning.gif │ │ ├── spinner.gif │ │ └── will_spinning.gif │ └── js │ │ ├── Consumer.jsx │ │ ├── Provider.jsx │ │ ├── SearchInput.jsx │ │ ├── Sidebar.jsx │ │ ├── contexts.jsx │ │ └── web.jsx ├── deps │ ├── README.md │ ├── install-rust.sh │ ├── install.old.sh │ └── install.sh ├── esper │ ├── __init__.py │ ├── blackframes.py │ ├── blurriness.py │ ├── captions.py │ ├── clothing_detection.py │ ├── commercial_detection.py │ ├── commercial_detection_rekall.py │ ├── download │ │ ├── Dockerfile │ │ ├── count.sh │ │ ├── deploy.yml │ │ ├── start-cluster.sh │ │ ├── submit.py │ │ └── tasks.py │ ├── embed_google_images.py │ ├── face_detection.py │ ├── face_embedding_scanner.py │ ├── face_embeddings.py │ ├── frame_export.py │ ├── gender_detection.py │ ├── hairstyle_detection.py │ ├── hairstyle_prepare.py │ ├── identity.py │ ├── identity_clusters.py │ ├── identity_detect.py │ ├── ingest.py │ ├── ingest_interview_annotations.py │ ├── ingest_test.py │ ├── ingest_tvnews.py │ ├── interview.py │ ├── kube.py │ ├── load_aligned_transcript.py │ ├── major_canonical_shows.py │ ├── migrate.py │ ├── object_detection.py │ ├── plot_timeline.py │ ├── plot_util.py │ ├── pose_detect.py │ ├── prelude.py │ ├── queries │ │ ├── __init__.py │ │ ├── all_faces.py │ │ ├── all_videos.py │ │ ├── interview_with_person_x.py │ │ ├── other_queries.py │ │ ├── panels_rekall.py │ │ ├── panels_sql.py │ │ ├── queries.py │ │ ├── sandbox_labels.py │ │ └── unused_queries.py │ ├── rekall.py │ ├── rekall_parallel.py │ ├── rekall_query.py │ ├── rekall_vgrid.py │ ├── report.py │ ├── scanner_bench.py │ ├── scannerutil.py │ ├── shot_detection.py │ ├── spacy_tokenize.py │ ├── spark.py │ ├── spark_identity.py │ ├── spark_util.py │ ├── sql_dump.py │ ├── supercut.py │ ├── topics.py │ ├── transcript_alignment.py │ ├── update_show.py │ ├── update_tracks.py │ ├── validation.py │ ├── widget.py │ └── write_video.py ├── gunicorn_conf.py ├── manage.py ├── notebooks │ ├── alignment.ipynb │ ├── animatedness_scratch-Copy1.ipynb │ ├── animatedness_scratch.ipynb │ ├── attribute_labeling.ipynb │ ├── cluster_faces_and_export.ipynb │ ├── commercial.ipynb │ ├── corrupted_videos.ipynb │ ├── db_perf.ipynb │ ├── diarization.ipynb │ ├── document_figures.ipynb │ ├── download_and_embed_faces.ipynb │ ├── dump_stories_data.ipynb │ ├── duplicate_videos.ipynb │ ├── esper-tutorial.ipynb │ ├── example.ipynb │ ├── export_face_bboxes_for_aws.ipynb │ ├── face_attribute_visual_validation.ipynb │ ├── face_labeling.ipynb │ ├── face_search_eval.ipynb │ ├── facenet.py │ ├── gender.ipynb │ ├── gender_knn_classifier.ipynb │ ├── gender_labeling.ipynb │ ├── get_canonical_shows.ipynb │ ├── hairstyle.ipynb │ ├── identity_comparison_template.ipynb │ ├── identity_label_import.ipynb │ ├── identity_labeling.ipynb │ ├── identity_labeling_l2_only.ipynb │ ├── identity_labeling_uncommon.ipynb │ ├── import_aws_identities.ipynb │ ├── import_canonical_shows.ipynb │ ├── ingest.ipynb │ ├── interview.ipynb │ ├── introduction_detector.ipynb │ ├── labeled_identities │ │ ├── barack_obama.ipynb │ │ ├── bernie_sanders.ipynb │ │ ├── donald_trump.ipynb │ │ ├── donald_trump_on_hardball.ipynb │ │ ├── hillary_clinton.ipynb │ │ ├── hosts │ │ │ ├── abby_huntsman.ipynb │ │ │ ├── ainsley_earhardt.ipynb │ │ │ ├── alex_witt.ipynb │ │ │ ├── ali_velshi.ipynb │ │ │ ├── alisyn_camerota.ipynb │ │ │ ├── ana_cabrera.ipynb │ │ │ ├── anderson_cooper.ipynb │ │ │ ├── andrea_mitchell.ipynb │ │ │ ├── ari_melber.ipynb │ │ │ ├── arthel_neville.ipynb │ │ │ ├── ayman_mohyeldin.ipynb │ │ │ ├── bill_hemmer.ipynb │ │ │ ├── bill_o_reilly.ipynb │ │ │ ├── bret_baier.ipynb │ │ │ ├── brian_kilmeade.ipynb │ │ │ ├── brian_llenas.ipynb │ │ │ ├── brian_williams.ipynb │ │ │ ├── brit_hume.ipynb │ │ │ ├── brooke_baldwin.ipynb │ │ │ ├── carol_costello.ipynb │ │ │ ├── chris_cuomo.ipynb │ │ │ ├── chris_hayes.ipynb │ │ │ ├── chris_jansing.ipynb │ │ │ ├── chris_matthews.ipynb │ │ │ ├── christi_paul.ipynb │ │ │ ├── christine_romans.ipynb │ │ │ ├── chuck_todd.ipynb │ │ │ ├── craig_melvin.ipynb │ │ │ ├── dana_perino.ipynb │ │ │ ├── dave_briggs.ipynb │ │ │ ├── don_lemon.ipynb │ │ │ ├── doug_mckelway.ipynb │ │ │ ├── elizabeth_prann.ipynb │ │ │ ├── eric_shawn.ipynb │ │ │ ├── erin_burnett.ipynb │ │ │ ├── frances_rivera.ipynb │ │ │ ├── fredricka_whitfield.ipynb │ │ │ ├── greg_gutfeld.ipynb │ │ │ ├── greta_van_susteren.ipynb │ │ │ ├── griff_jenkins.ipynb │ │ │ ├── hallie_jackson.ipynb │ │ │ ├── harris_faulkner.ipynb │ │ │ ├── jake_tapper.ipynb │ │ │ ├── janice_dean.ipynb │ │ │ ├── jeanine_pirro.ipynb │ │ │ ├── jesse_watters.ipynb │ │ │ ├── joe_scarborough.ipynb │ │ │ ├── john_berman.ipynb │ │ │ ├── john_scott.ipynb │ │ │ ├── juan_williams.ipynb │ │ │ ├── julie_banderas.ipynb │ │ │ ├── kate_bolduan.ipynb │ │ │ ├── kate_snow.ipynb │ │ │ ├── katy_tur.ipynb │ │ │ ├── kelly_wright.ipynb │ │ │ ├── kimberly_guilfoyle.ipynb │ │ │ ├── lawrence_o_donnell.ipynb │ │ │ ├── leland_vittert.ipynb │ │ │ ├── louis_burgdorf.ipynb │ │ │ ├── martha_maccallum.ipynb │ │ │ ├── megyn_kelly.ipynb │ │ │ ├── melissa_francis.ipynb │ │ │ ├── michael_cohen.ipynb │ │ │ ├── mika_brzezinski.ipynb │ │ │ ├── molly_henneberg.ipynb │ │ │ ├── neil_cavuto.ipynb │ │ │ ├── pete_hegseth.ipynb │ │ │ ├── poppy_harlow.ipynb │ │ │ ├── rachel_maddow.ipynb │ │ │ ├── richard_lui.ipynb │ │ │ ├── sandra_smith.ipynb │ │ │ ├── sean_hannity.ipynb │ │ │ ├── shannon_bream.ipynb │ │ │ ├── shepard_smith.ipynb │ │ │ ├── stephanie_ruhle.ipynb │ │ │ ├── steve_doocy.ipynb │ │ │ ├── steve_kornacki.ipynb │ │ │ ├── tamron_hall.ipynb │ │ │ ├── thomas_roberts.ipynb │ │ │ ├── tom_shillue.ipynb │ │ │ ├── tucker_carlson.ipynb │ │ │ ├── uma_pemmaraju.ipynb │ │ │ ├── victor_blackwell.ipynb │ │ │ ├── walter_scott.ipynb │ │ │ ├── willie_geist.ipynb │ │ │ ├── wolf_blitzer.ipynb │ │ │ └── yasmin_vossoughian.ipynb │ │ ├── ivanka_trump.ipynb │ │ ├── jared_kushner.ipynb │ │ ├── john_mccain.ipynb │ │ ├── kellyanne_conway.ipynb │ │ ├── paul_manafort.ipynb │ │ ├── robert_mueller.ipynb │ │ ├── sarah_huckabee_sanders.ipynb │ │ ├── sean_spicer.ipynb │ │ ├── shooters │ │ │ ├── chris_harper_mercer.ipynb │ │ │ ├── devin_patrick_kelley.ipynb │ │ │ ├── dylann_roof.ipynb │ │ │ ├── esteban_santiago_ruiz.ipynb │ │ │ ├── gavin_eugene_long.ipynb │ │ │ ├── micah_xavier_johnson.ipynb │ │ │ ├── muhammad_youssef_abdulazeez.ipynb │ │ │ ├── omar_mateen.ipynb │ │ │ ├── robert_lewis_dear.ipynb │ │ │ ├── stephen_paddock.ipynb │ │ │ ├── syed_rizwan_farook.ipynb │ │ │ ├── tashfeen_malik.ipynb │ │ │ └── willie_corey_godbolt.ipynb │ │ └── vladimir_putin.ipynb │ ├── michaela_notebooks │ │ ├── Analysis of Shooters and Victims │ │ │ ├── Al Franken-export.txt │ │ │ ├── Al Franken.ipynb │ │ │ ├── Al Franken.txt │ │ │ ├── Alton Sterling (Victim).ipynb │ │ │ ├── Bill Cosby (Celebrity).ipynb │ │ │ ├── Bill Cosby-export.txt │ │ │ ├── Bill Cosby.txt │ │ │ ├── Chris Harper-Mercer (Shooter).ipynb │ │ │ ├── Chris Harper-Mercer.txt │ │ │ ├── Devin Patrick Kelley (Shooter).ipynb │ │ │ ├── Devin Patrick Kelley.txt │ │ │ ├── Dylann Roof (Shooter).ipynb │ │ │ ├── Dylann Roof.txt │ │ │ ├── Eric Garner (Victim).ipynb │ │ │ ├── Eric Garner.txt │ │ │ ├── Esteban Santiago-Ruiz (Shooter).ipynb │ │ │ ├── Esteban Santiago-Ruiz.txt │ │ │ ├── Gavin Eugene Long (Shooter).ipynb │ │ │ ├── Gavin Eugene Long.txt │ │ │ ├── George Zimmerman (Shooter).ipynb │ │ │ ├── George Zimmerman-export.txt │ │ │ ├── George Zimmerman.txt │ │ │ ├── Info.txt │ │ │ ├── Kanye West.ipynb │ │ │ ├── Kanye West.txt │ │ │ ├── Laquan McDonald (Victim).ipynb │ │ │ ├── Micah Xavier Johnson (Shooter).ipynb │ │ │ ├── Micah Xavier Johnson.txt │ │ │ ├── Michael Brown (Victim).ipynb │ │ │ ├── Omar Mateen (Shooter).ipynb │ │ │ ├── Omar Mateen.txt │ │ │ ├── Philando Castile (Victim).ipynb │ │ │ ├── Robert Lewis Dear Jr (Shooter).ipynb │ │ │ ├── Robert Lewis Dear Jr.txt │ │ │ ├── Roger Ailes (Celebrity).ipynb │ │ │ ├── Roger Ailes.txt │ │ │ ├── Roger Ailes_export.txt │ │ │ ├── Stephen Paddock (Shooter).ipynb │ │ │ ├── Stephen Paddock.txt │ │ │ ├── Syed Rizwan Farook (Shooter).ipynb │ │ │ ├── Syed Rizwan Farook One.txt │ │ │ ├── Syed Rizwan Farook Two.txt │ │ │ ├── Syed Rizwan Farook-export.txt │ │ │ ├── Syed Rizwan Farook.txt │ │ │ ├── Syed Rizwan Farook_export.txt │ │ │ ├── Tamir Rice (Victim).ipynb │ │ │ ├── Tashfeen Malik (Shooter).ipynb │ │ │ ├── Tashfeen Malik.txt │ │ │ ├── Trayvon Martin (Victim).ipynb │ │ │ ├── Trayvon Martin Final Test.txt │ │ │ ├── Trayvon Martin-export.txt │ │ │ ├── Trayvon Martin.txt │ │ │ ├── Trayvon Martin_export.txt │ │ │ ├── Walter Scott (Victim).ipynb │ │ │ ├── Willie Corey Godbolt (Shooter).ipynb │ │ │ ├── Willie Corey Godbolt.txt │ │ │ ├── roger-ailes-photos │ │ │ │ ├── 123703025.jpg │ │ │ │ ├── 251405426.jpg │ │ │ │ ├── 251529606.jpg │ │ │ │ ├── 251626619.jpg │ │ │ │ ├── 261346406.jpg │ │ │ │ ├── 262510755.jpg │ │ │ │ ├── 268732111.jpg │ │ │ │ ├── 314770412.jpg │ │ │ │ └── 7665011.jpg │ │ │ └── test1.html │ │ ├── Tashfeen Malik.txt │ │ ├── data.txt │ │ ├── do_stills_of_shooters_vary_by_channel.ipynb │ │ ├── do_stills_of_victims_vary_by_channel.ipynb │ │ ├── identity_labeling_uncommon.ipynb │ │ ├── ipywidgets Testing.ipynb │ │ ├── ove_cat.jpg │ │ └── scratch.ipynb │ ├── model_cross_validation.ipynb │ ├── perf.ipynb │ ├── problang │ │ ├── custom_mlp.py │ │ ├── transcript_heuristic.ipynb │ │ ├── transcript_labeling.ipynb │ │ ├── transcript_regression.ipynb │ │ ├── transcript_utils.py │ │ └── transcript_vae.ipynb │ ├── propagate_aws_labels.ipynb │ ├── queries.ipynb │ ├── queries_different_languages.ipynb │ ├── queries_v2.ipynb │ ├── queries_v3.ipynb │ ├── questions │ │ ├── are_big_faces_more_male (face height).ipynb │ │ ├── did_visual_depictions_of_comey_change_by_scandal.ipynb │ │ ├── do_hosts_have_big_heads.ipynb │ │ ├── do_stills_of_shooters_vary_by_channel.ipynb │ │ ├── do_stills_of_victims_vary_by_channel.ipynb │ │ ├── do_topics_differ_on_opinion_and_news_shows.ipynb │ │ ├── which_shooter_received_the_most_screentime.ipynb │ │ └── who_said_maga_on_tv.ipynb │ ├── race_knn_classifier.ipynb │ ├── race_labeler_validation.ipynb │ ├── rekall_3d │ │ ├── rekalling_entire_tvnews.ipynb │ │ └── tutorial.ipynb │ ├── rekall_examples.ipynb │ ├── rekall_queries.ipynb │ ├── report.ipynb │ ├── run_ingest_interview_annotations.ipynb │ ├── scratch.ipynb │ ├── senator_search.ipynb │ ├── set_frame_sampler.ipynb │ ├── shots.ipynb │ ├── simple_templates │ │ ├── identity_still_images.ipynb │ │ └── topic_by_show.ipynb │ ├── spark.ipynb │ ├── speaker-recognition.ipynb │ ├── stories.ipynb │ ├── subtitles.ipynb │ ├── supercuts.ipynb │ ├── tableau_csv.ipynb │ ├── test_captions.ipynb │ ├── topic_analysis_workflow.ipynb │ ├── topics │ │ ├── debug_abortion.ipynb │ │ ├── gender_abortion.ipynb │ │ ├── gender_autism.ipynb │ │ ├── gender_election.ipynb │ │ ├── gender_harrassment.ipynb │ │ ├── gender_many_topics.ipynb │ │ ├── gender_many_topics_mentions.ipynb │ │ ├── gender_sandy_hook.ipynb │ │ ├── gender_terrorism.ipynb │ │ ├── gender_trump.ipynb │ │ └── gender_vaccine.ipynb │ ├── validate_aws_identities.ipynb │ ├── validation.ipynb │ ├── vgrid_test.ipynb │ ├── video_timeline_plot_demo.ipynb │ └── widget_scripts │ │ ├── dump_face_data.ipynb │ │ └── dump_server_data.ipynb ├── package.json ├── query │ ├── __init__.py │ ├── admin.py │ ├── apps.py │ ├── base_models.py │ ├── management │ │ ├── __init__.py │ │ └── commands │ │ │ ├── __init__.py │ │ │ └── run.py │ ├── migrations │ │ ├── 0001_initial.py │ │ ├── 0002_auto_20180529_1049.py │ │ ├── 0003_show_hosts.py │ │ ├── 0004_auto_20180612_1255.py │ │ ├── 0005_auto_20180612_1316.py │ │ ├── 0006_auto_20180612_1317.py │ │ ├── 0007_canonicalshow_hosts.py │ │ ├── 0008_auto_20180625_1033.py │ │ ├── 0009_object.py │ │ ├── 0010_auto_20180625_1152.py │ │ ├── 0011_auto_20180625_1157.py │ │ ├── 0012_auto_20180712_1252.py │ │ ├── 0013_auto_20180825_2234.py │ │ ├── 0014_auto_20180926_1514.py │ │ ├── 0015_auto_20180926_1902.py │ │ ├── 0016_auto_20180926_2037.py │ │ ├── 0017_auto_20180926_2045.py │ │ ├── 0018_auto_20180926_2045.py │ │ ├── 0019_topic.py │ │ ├── 0020_segment_topics.py │ │ ├── 0021_auto_20180926_2104.py │ │ ├── 0022_auto_20181026_1002.py │ │ ├── 0022_frame_shot_boundary.py │ │ ├── 0023_auto_20181026_1010.py │ │ ├── 0023_auto_20181101_2104.py │ │ ├── 0024_auto_20181103_2019.py │ │ ├── 0024_labeledcommercial_labeledinterview_labeledpanel.py │ │ ├── 0025_merge_20181227_1812.py │ │ ├── 0026_auto_20181227_1812.py │ │ ├── 0027_auto_20181227_1829.py │ │ ├── 0028_auto_20181228_1555.py │ │ ├── 0028_auto_20190101_2032.py │ │ ├── 0029_video_duplicate.py │ │ ├── 0030_video_corrupted.py │ │ ├── 0031_merge_20190105_1016.py │ │ ├── 0032_auto_20190106_1451.py │ │ ├── 0033_auto_20190311_1730.py │ │ ├── 0034_auto_20190711_1442.py │ │ └── __init__.py │ ├── models.py │ ├── templates │ │ ├── base.html │ │ └── index.html │ ├── tests.py │ ├── urls.py │ └── views.py ├── requirements.app.txt ├── requirements.base.txt ├── scripts │ ├── build-tf.sh │ ├── clear-proxy.sh │ ├── create-sample.sh │ ├── export-table.sh │ ├── generate-protobuf.sh │ ├── google-setup.sh │ ├── jupyter-setup.sh │ ├── load-cloud-db.sh │ ├── set-jupyter-password.py │ ├── transfer-to-bigquery.sh │ └── upload-sample.sh ├── test │ ├── README.md │ └── __init__.py └── webpack.config.js ├── config ├── google.toml └── local.toml ├── configure.py ├── db ├── .dockerignore ├── Dockerfile └── postgresql.conf ├── guides ├── dev.md └── google.md ├── nginx ├── Dockerfile ├── google.conf.template ├── local.conf.template ├── nginx.conf.template └── subst.sh ├── requirements.txt ├── scripts ├── emergency-reset.py ├── gce-gpu-setup.sh ├── get-bucket-size.sh ├── jupyter-token.sh ├── run-app-tests.sh ├── serve-reset.sh ├── setup-db-csv-dir.sh ├── transfer-db.sh ├── transfer-storage-logs.sh └── travis-build.sh └── spark └── Dockerfile /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb filter=nbstrip -------------------------------------------------------------------------------- /.gitconfig: -------------------------------------------------------------------------------- 1 | [filter "nbstrip"] 2 | clean = "jq --indent 1 \ 3 | '(.cells[] | select(has(\"outputs\")) | .outputs) = [] \ 4 | | (.cells[] | select(has(\"execution_count\")) | .execution_count) = null \ 5 | '" 6 | smudge = cat 7 | required = true -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.sqlite3 2 | *.pyc 3 | *.swp 4 | docker-compose.yml 5 | nvidia-docker-compose.yml 6 | service-key.json 7 | db/data 8 | .scanner.toml 9 | **/__pycache__ 10 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "app/deps/vgrid"] 2 | path = app/deps/vgrid 3 | url = https://github.com/scanner-research/vgrid 4 | [submodule "app/deps/vgrid_jupyter"] 5 | path = app/deps/vgrid_jupyter 6 | url = https://github.com/scanner-research/vgrid_jupyter 7 | [submodule "app/deps/caption-index"] 8 | path = app/deps/caption-index 9 | url = https://github.com/scanner-research/caption-index.git 10 | [submodule "app/deps/rekall"] 11 | path = app/deps/rekall 12 | url = https://github.com/scanner-research/rekall.git 13 | [submodule "app/deps/rs-embed"] 14 | path = app/deps/rs-embed 15 | url = https://github.com/scanner-research/rs-embed.git 16 | [submodule "app/deps/esper-model-server"] 17 | path = app/deps/esper-model-server 18 | url = https://github.com/scanner-research/esper-model-server.git 19 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = pep8 3 | column_limit = 100 -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | 5 | sudo: required 6 | services: 7 | - docker 8 | 9 | # Use latest docker (need 17.05) 10 | addons: 11 | apt: 12 | packages: 13 | - docker-ce 14 | env: 15 | global: 16 | - DOCKER_REPO=scannerresearch/esper 17 | - DOCKER_EMAIL=wcrichto@cs.stanford.edu 18 | - DOCKER_USER=wcrichto 19 | - secure: "mPNnsLLZfEHbgH/R77wCYY4793Khdhg9Q9xE10hmTumvCAeWyO7oIJUSkONAX9AXlkVNzVMkPVMj3Un9uf5hwmc/HTUgQ8+lCTR+HPnV/CMsRSWGTf2kLSsqxF+wRLM90Vr0PVMXui3YeyanxEh0qbu3HsBjLVIMBiqBrybZx53E1vnNF1/lwQgi7ljpgQHNkkVLBc2yVsQ4izvLq7NlpcmtumEFeeWIft2WN7t4r7W7Q2c+QAADXrTFzrKxJkw4oz8Psn9WQeQlQf2HFdeVKoJJzwtce2QDKUxMpcRpVNIQ3drxBv33u4JMV7zhf/frGycuaKitiL8dLv0aX6rlW72tYh6Cc0SFQo7ZxbbZw18i6wRVLHf2OpzvFACvtbIx+5DCcojga/dbJGjRzn5fOCtF1nXuHz3z/OwQKjzYR/tQGD7Fa2PINQQpD4k+sOOhK3N+cHvvv9tCASUvYV4IXneSrlzlm6k/sTthx0oAZmFTLhTPgCdL2rt9Y1R3e/2p9MUza4uZk3Vh4KKN9NgjCUby42JyXbM9T48+hkiaRzb/NLk0fvU1q5bYFVi5vnsKijHJL3fqEt6Qp2nP+HwK9iXoq2yfayOpw5YHV2T+opr7GC7qVa7bEUmQ1OwCZWbJ3h2Fyfpa3lhkC59xHmAWayRWN9DX0jXsNaMj9y+p1pY=" 20 | 21 | before_install: 22 | - pip3 install -r requirements.txt 23 | 24 | script: 25 | - ./scripts/travis-build.sh 26 | - ./scripts/run-app-tests.sh 27 | -------------------------------------------------------------------------------- /app/.deps/.dummy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/.deps/.dummy -------------------------------------------------------------------------------- /app/.deps/00-load-django.py: -------------------------------------------------------------------------------- 1 | import os 2 | import django 3 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'app.settings') 4 | django.setup() 5 | -------------------------------------------------------------------------------- /app/.deps/beakerx.json: -------------------------------------------------------------------------------- 1 | { 2 | "beakerx": { 3 | "version": 2, 4 | "jvm_options": { 5 | "heap_GB": null, 6 | "other": [], 7 | "properties": {} 8 | }, 9 | "ui_options": { 10 | "auto_close": true, 11 | "improve_fonts": false, 12 | "wide_cells": false, 13 | "show_publication": false, 14 | "auto_save": true, 15 | "use_data_grid": true 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /app/.deps/common.sh: -------------------------------------------------------------------------------- 1 | alias kc=kubectl 2 | alias kga='kc get all' 3 | alias py=python 4 | 5 | function kpod() { 6 | kc get pod --output=json | jq ".items[] | select(.metadata.labels.app == \"scanner-$1\") | .metadata.name" -r 7 | } 8 | 9 | function klog() { 10 | kc logs ${*:2} po/$(kpod $1) 11 | } 12 | 13 | source /root/.cargo/env 14 | -------------------------------------------------------------------------------- /app/.deps/esper-ipython: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python3 /app/manage.py shell -------------------------------------------------------------------------------- /app/.deps/esper-run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python3 /app/manage.py run -- $* 4 | -------------------------------------------------------------------------------- /app/.deps/ipython_config.py: -------------------------------------------------------------------------------- 1 | c.InteractiveShellApp.extensions = ['autoreload'] 2 | c.InteractiveShellApp.exec_lines = ['%autoreload 2', '%matplotlib inline'] 3 | -------------------------------------------------------------------------------- /app/.deps/nbconfig/edit.json: -------------------------------------------------------------------------------- 1 | { 2 | "load_extensions": { 3 | "codefolding/edit": false 4 | } 5 | } -------------------------------------------------------------------------------- /app/.deps/nbconfig/notebook.json: -------------------------------------------------------------------------------- 1 | { 2 | "toc2": { 3 | "skip_h1_title": false, 4 | "toc_window_display": false, 5 | "markTocItemOnScroll": false, 6 | "widenNotebook": false, 7 | "navigate_menu": false, 8 | "moveMenuLeft": false, 9 | "sideBar": false, 10 | "collapse_to_match_collapsible_headings": false, 11 | "toc_cell": true 12 | }, 13 | "load_extensions": { 14 | "collapsible_headings/main": false, 15 | "toc2/main": true, 16 | "nbextensions_configurator/config_menu/main": true, 17 | "codefolding/main": false, 18 | "hide_input/main": true, 19 | "python-markdown/main": true, 20 | "code_prettify/autopep8": true, 21 | "notify/notify": true, 22 | "contrib_nbextensions_help_item/main": true 23 | }, 24 | "collapsible_headings": { 25 | "collapse_to_match_toc": true 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /app/.deps/nbconfig/tree.json: -------------------------------------------------------------------------------- 1 | { 2 | "load_extensions": { 3 | "nbextensions_configurator/tree_tab/main": true 4 | } 5 | } -------------------------------------------------------------------------------- /app/.deps/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | logfile=/tmp/supervisord.log 4 | pidfile=/tmp/supervisord.pid 5 | user=root 6 | 7 | [program:gunicorn] 8 | command=gunicorn --log-file=- -c gunicorn_conf.py app.wsgi:application --reload 9 | stdout_logfile=/dev/stdout 10 | stdout_logfile_maxbytes=0 11 | stderr_logfile=/dev/stderr 12 | stderr_logfile_maxbytes=0 13 | 14 | [program:notebook] 15 | command=python3 manage.py shell_plus --notebook 16 | stdout_logfile=/dev/stdout 17 | stdout_logfile_maxbytes=0 18 | stderr_logfile=/dev/stderr 19 | stderr_logfile_maxbytes=0 -------------------------------------------------------------------------------- /app/.dockerignore: -------------------------------------------------------------------------------- 1 | # Datbuildebug directories 2 | scanner_db 3 | node_modules 4 | data 5 | deps 6 | google_images 7 | nets 8 | .cache 9 | spark-data 10 | pg 11 | subs 12 | tmp 13 | assets/bundles 14 | assets/thumbnails 15 | assets/audio 16 | .ipynb_checkpoints 17 | rust-scripts/target 18 | rust-scripts/build 19 | esper_jupyter 20 | # Specific files 21 | service-key.json 22 | npm-debug.log 23 | supervisord.conf 24 | video-analysis 25 | .env 26 | 27 | # File kinds 28 | **/db.bk.* 29 | **/yapf*.py 30 | **/*.sql 31 | **/*.bin 32 | **/*.mkv 33 | **/*.mp4 34 | **/*.pkl 35 | **/*_pb2.py 36 | -------------------------------------------------------------------------------- /app/.gitignore: -------------------------------------------------------------------------------- 1 | # Datbuildebug directories 2 | scanner_db 3 | node_modules 4 | data 5 | deps/tmp 6 | google_images 7 | nets 8 | .cache 9 | spark-data 10 | pg 11 | subs 12 | tmp 13 | assets/bundles 14 | assets/thumbnails 15 | assets/audio 16 | .ipynb_checkpoints 17 | rust-scripts/target 18 | rust-scripts/build 19 | esper_jupyter 20 | # Specific files 21 | service-key.json 22 | npm-debug.log 23 | supervisord.conf 24 | video-analysis 25 | .env 26 | 27 | # File kinds 28 | **/db.bk.* 29 | **/yapf*.py 30 | **/*.sql 31 | **/*.bin 32 | **/*.mkv 33 | **/*.mp4 34 | **/*.pkl 35 | **/*_pb2.py 36 | -------------------------------------------------------------------------------- /app/Dockerfile.app: -------------------------------------------------------------------------------- 1 | ARG base_name 2 | ARG device=cpu 3 | FROM ${base_name}:${device} 4 | ARG cores=1 5 | ENV DJANGO_CONFIGURATION Docker 6 | ENV TERM=xterm 7 | 8 | # Misc apt dependencies 9 | RUN apt-get update && \ 10 | apt-get install -y cron npm nodejs curl unzip jq gdb psmisc zsh && \ 11 | ln -s /usr/bin/nodejs /usr/bin/node 12 | 13 | # Google Cloud SDK 14 | RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-xenial main" | \ 15 | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ 16 | curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ 17 | apt-get update && apt-get install -y google-cloud-sdk kubectl 18 | 19 | # Python setup 20 | COPY requirements.app.txt ./ 21 | RUN pip3 install -r requirements.app.txt 22 | 23 | # supervisor only works with python2, so have to specially download old pip to install it 24 | RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python get-pip.py && pip install supervisor==3.3.3 25 | 26 | # IPython config 27 | COPY .deps/ipython_config.py /root/.ipython/profile_default/ipython_config.py 28 | RUN mkdir -p /root/.ipython/profile_local/startup 29 | COPY .deps/ipython_config.py /root/.ipython/profile_local/ipython_config.py 30 | COPY .deps/00-load-django.py /root/.ipython/profile_local/startup/00-load-django.py 31 | 32 | # Fix npm hanging on OS X 33 | # https://github.com/npm/npm/issues/7862#issuecomment-220798263 34 | RUN npm config set registry http://registry.npmjs.org && \ 35 | npm config set strict-ssl false 36 | 37 | RUN npm i -g npm n && n stable 38 | 39 | # Install npm packages in ~/.local by default so they persist across container restarts 40 | RUN npm config set prefix /root/.local 41 | 42 | # Setup bash helpers 43 | COPY .deps/esper-run .deps/esper-ipython /usr/bin/ 44 | COPY .deps/common.sh /tmp 45 | RUN cat /tmp/common.sh >> /root/.bashrc 46 | 47 | # Fix Google Cloud Storage URL library dependencies 48 | RUN unset PYTHONPATH && pip2 install cryptography 49 | 50 | ENV GLOG_minloglevel 1 51 | ENV GOOGLE_APPLICATION_CREDENTIALS ${APPDIR}/service-key.json 52 | ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/lib:/usr/local/lib/python3.5/dist-packages/hwang 53 | ENV PYTHONPATH $PYTHONPATH:/app 54 | ENV PYTHONPATH /opt/scannertools:$PYTHONPATH 55 | 56 | CMD cp .scanner.toml /root/ && \ 57 | ./scripts/google-setup.sh && \ 58 | ./scripts/jupyter-setup.sh && \ 59 | supervisord -c supervisord.conf 60 | -------------------------------------------------------------------------------- /app/Dockerfile.base: -------------------------------------------------------------------------------- 1 | ARG tag 2 | FROM scannerresearch/scannertools:${tag}-latest 3 | # ARGS before FROM aren't accessible after the FROM, so we need to replicate the device arg. 4 | ARG build_tf=off 5 | ARG tf_version=1.12.0 6 | ARG device=cpu 7 | ARG cores=1 8 | 9 | ENV APPDIR=/app 10 | WORKDIR ${APPDIR} 11 | 12 | RUN apt-get update && apt-get install -y default-jre postgresql-9.5 libpq-dev gdb 13 | COPY ./scripts ./scripts 14 | COPY ./requirements.base.txt ./ 15 | RUN ./scripts/build-tf.sh 16 | RUN pip3 install -r requirements.base.txt 17 | 18 | COPY ./.deps/.dummy scannerpatc[h] ./ 19 | RUN if [ -f /app/scannerpatch ]; then \ 20 | cd /opt/scanner && \ 21 | git apply /app/scannerpatch && \ 22 | ./build.sh; \ 23 | fi 24 | 25 | COPY ./.scanner.toml /root/.scanner/config.toml 26 | -------------------------------------------------------------------------------- /app/app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/app/__init__.py -------------------------------------------------------------------------------- /app/app/urls.py: -------------------------------------------------------------------------------- 1 | """esper URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/1.10/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.conf.urls import url, include 14 | 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) 15 | """ 16 | from django.conf import settings 17 | from django.conf.urls import url, include 18 | from django.contrib import admin 19 | from django.conf.urls.static import static 20 | 21 | urlpatterns = [ 22 | url(r'^', include('query.urls')), 23 | url(r'^admin/', admin.site.urls), 24 | ] 25 | -------------------------------------------------------------------------------- /app/app/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for esper project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /app/assets/css/bootstrap.scss: -------------------------------------------------------------------------------- 1 | @import '~bootstrap/dist/css/bootstrap.min.css'; 2 | -------------------------------------------------------------------------------- /app/assets/css/main.scss: -------------------------------------------------------------------------------- 1 | @import '~bootstrap/dist/css/bootstrap.min.css'; 2 | @import '~open-iconic/font/css/open-iconic-bootstrap.css'; 3 | @import '~vgrid/dist/vgrid.css'; 4 | 5 | @import 'search-input'; 6 | @import 'sidebar'; 7 | 8 | body { 9 | background: #fefefe; 10 | font-family: 'Lato', 'Helvetica Neue', Arial, sans-serif; 11 | } 12 | 13 | .login-error { 14 | text-align: center; 15 | font-size: 24px; 16 | margin-top: 30px; 17 | } 18 | 19 | .clearfix { 20 | clear: both; 21 | } 22 | 23 | .search-result { 24 | position: relative; 25 | } -------------------------------------------------------------------------------- /app/assets/css/search-input.scss: -------------------------------------------------------------------------------- 1 | .search-input { 2 | $spacing: 10px; 3 | 4 | margin-bottom: 2*$spacing; 5 | 6 | .spinner { 7 | height: 30px; 8 | margin-left: 15px; 9 | } 10 | 11 | .panel { 12 | margin: 0; 13 | } 14 | 15 | .control-label { 16 | font-weight: normal; 17 | margin-right: 5px; 18 | } 19 | 20 | button { 21 | outline: 0!important; 22 | } 23 | 24 | .schema { 25 | margin-top: $spacing; 26 | 27 | .schema-classes { 28 | overflow: auto; 29 | 30 | .schema-class-name { 31 | font-weight: bold; 32 | margin-bottom: 5px; 33 | } 34 | 35 | .schema-class { 36 | float: left; 37 | width: 120px; 38 | margin-right: $spacing; 39 | margin-bottom: $spacing; 40 | } 41 | 42 | .schema-class-fields { 43 | overflow: auto; 44 | height: 120px; 45 | } 46 | 47 | .schema-field { 48 | cursor: pointer; 49 | } 50 | 51 | .panel-body { 52 | padding: 5px 10px; 53 | } 54 | } 55 | 56 | img { 57 | margin-top: $spacing; 58 | } 59 | 60 | .schema-example { 61 | margin-top: $spacing; 62 | 63 | .schema-example-name { 64 | font-weight: bold; 65 | margin-bottom: 5px; 66 | } 67 | } 68 | } 69 | 70 | .example-queries { 71 | margin-top: $spacing; 72 | } 73 | 74 | .ace_editor { 75 | margin-bottom: $spacing; 76 | } 77 | 78 | button { 79 | margin-right: $spacing; 80 | } 81 | 82 | .alert { 83 | margin-top: 10px; 84 | 85 | pre { 86 | border: none; 87 | background: transparent; 88 | padding: 0; 89 | margin: 0; 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /app/assets/css/sidebar.scss: -------------------------------------------------------------------------------- 1 | .sidebar { 2 | $width: 150px; 3 | width: $width; 4 | position: absolute; 5 | top: 0; 6 | height: 100%; 7 | 8 | $sidebar-margin: -($width+20px); 9 | 10 | &.left { 11 | left: $sidebar-margin; 12 | } 13 | 14 | &.right { 15 | right: $sidebar-margin; 16 | } 17 | 18 | > div { 19 | position: sticky; 20 | top: 20px; 21 | left: 0; 22 | } 23 | 24 | .options { 25 | input[type=range] { 26 | display: inline-block; 27 | width: 40%; 28 | margin-right: 10px; 29 | vertical-align: middle; 30 | } 31 | 32 | input[type=number] { 33 | display: inline-block; 34 | padding: 2px 6px; 35 | width: 40%; 36 | } 37 | } 38 | 39 | .metadata { 40 | .meta-block { 41 | margin-bottom: 10px; 42 | } 43 | 44 | .meta-key { 45 | font-weight: bold; 46 | } 47 | 48 | .colors { 49 | .meta-val > div { 50 | display: inline-block; 51 | margin-right: 20px; 52 | 53 | > div { 54 | display: inline-block; 55 | } 56 | } 57 | } 58 | } 59 | 60 | .help-section { 61 | margin-top: 10px; 62 | } 63 | 64 | .btn { 65 | font-size: 14px; 66 | padding: 4px 6px; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /app/assets/favicon-loading.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/favicon-loading.ico -------------------------------------------------------------------------------- /app/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/favicon.ico -------------------------------------------------------------------------------- /app/assets/images/dan_spinning.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/dan_spinning.gif -------------------------------------------------------------------------------- /app/assets/images/haotian_spinning.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/haotian_spinning.gif -------------------------------------------------------------------------------- /app/assets/images/james_spinning.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/james_spinning.gif -------------------------------------------------------------------------------- /app/assets/images/kayvon_spinning.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/kayvon_spinning.gif -------------------------------------------------------------------------------- /app/assets/images/maneesh_spinning.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/maneesh_spinning.gif -------------------------------------------------------------------------------- /app/assets/images/spinner.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/spinner.gif -------------------------------------------------------------------------------- /app/assets/images/will_spinning.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/assets/images/will_spinning.gif -------------------------------------------------------------------------------- /app/assets/js/Consumer.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import {Observer} from 'mobx-react'; 3 | 4 | export default class Consumer extends React.Component { 5 | render() { 6 | let args = []; 7 | return this.props.contexts.reduce( 8 | (acc, ctx) => () => { x => { args.unshift(x); return acc() }}, 9 | () => {() => this.props.children(...args)})(); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /app/assets/js/Provider.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | export default class Provider extends React.Component { 4 | render() { 5 | return this.props.values.reduce( 6 | (inner, [context, value]) => 7 | {inner}, 8 | this.props.children); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /app/assets/js/contexts.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | export let SettingsContext = React.createContext({}); 3 | export let DataContext = React.createContext({}); 4 | -------------------------------------------------------------------------------- /app/assets/js/web.jsx: -------------------------------------------------------------------------------- 1 | /* 2 | * web.jsx - Application entrypoint 3 | * 4 | * This file is called when the page is loaded. It initializes the App React view. 5 | */ 6 | 7 | import axios from 'axios'; 8 | import {observer} from 'mobx-react'; 9 | import React from 'react'; 10 | import ReactDOM from 'react-dom'; 11 | import SearchInput from './SearchInput.jsx'; 12 | import Sidebar from './Sidebar.jsx'; 13 | import VGrid from 'vgrid'; 14 | import {SettingsContext, DataContext} from './contexts'; 15 | import Provider from './Provider.jsx'; 16 | import Consumer from './Consumer.jsx'; 17 | import {observable} from 'mobx'; 18 | 19 | // Make AJAX work with Django's CSRF protection 20 | // https://stackoverflow.com/questions/39254562/csrf-with-django-reactredux-using-axios 21 | axios.defaults.xsrfHeaderName = "X-CSRFToken"; 22 | 23 | @observer 24 | export default class App extends React.Component { 25 | state = { 26 | valid: true, 27 | clickedBox: null, 28 | dataContext: null, 29 | i: 0 30 | } 31 | 32 | constructor() { 33 | super(); 34 | 35 | // Hacky way for us to publicly expose a demo while reducing remote code executixon risk. 36 | if (GLOBALS.bucket === 'esper') { 37 | let img = new Image(); 38 | img.onerror = (() => this.setState({valid: false})).bind(this); 39 | img.src = "https://storage.cloud.google.com/esper/do_not_delete.jpg"; 40 | } 41 | 42 | this._settings = observable.map({}); 43 | } 44 | 45 | _onSearch = (results) => { 46 | this.setState({dataContext: results, i: this.state.i + 1}); 47 | } 48 | 49 | _onBoxClick = (box) => { 50 | this.setState({clickedBox: box.id}); 51 | } 52 | 53 | _onSave = (toSave) => { 54 | return axios.post('/api/labeled', toSave); 55 | } 56 | 57 | render() { 58 | if (this.state.valid) { 59 | return ( 60 |
61 |

Esper

62 |
63 | 66 |
67 | 68 | {this.state.dataContext !== null 69 | ? (this.state.dataContext.groups.length > 0 70 | ?
71 | 73 | 74 |
75 | :
No results matching query.
) 76 | : null} 77 |
78 |
79 |
80 |
81 | ); 82 | } else { 83 | return
You must be logged into a validated Google account to access Esper.
84 | } 85 | } 86 | }; 87 | 88 | ReactDOM.render(, document.getElementById('app')); 89 | -------------------------------------------------------------------------------- /app/deps/README.md: -------------------------------------------------------------------------------- 1 | # Installing deps 2 | 3 | ## caption-index 4 | 5 | Probably want to start with step 4 since the files take some time to download. 6 | Also, there is no need to reconfigure or rebuild the containers. 7 | 1. pull and then do `git submodule init` and then `git submodule update` in 8 | the esper repo to get the caption index 9 | 2. in the app container, navigate into caption-index and run 10 | `pip3 install -r requirements.txt` 11 | 3. also in the app container and in the caption-index directory, run 12 | `get_models.sh` 13 | 4. in the `app/data directory`, do 14 | `gsutil cp -r gs://esper/tvnews/caption-index10/index10 .` 15 | 5. after this, there should be an `index10` directory with some files like 16 | `words.lex`, `docs.bin`, etc... 17 | 18 | Note: importing the caption module for the first time takes some time to load the lexicon. <-- may optimize this later to also mmap 19 | 20 | ## rekall 21 | 22 | 1. pull and then do `git submodule init` and then `git submodule update` in 23 | the esper repo to get rekall 24 | 2. in the app container, navigate into rekall and run 25 | `pip3 install -r requirements.txt` 26 | 27 | -------------------------------------------------------------------------------- /app/deps/install-rust.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain nightly 3 | source /root/.cargo/env 4 | rustup default nightly 5 | -------------------------------------------------------------------------------- /app/deps/install.old.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | RUN_TESTS=${RUN_TESTS:=0} 4 | 5 | # Fail fast 6 | set -e 7 | 8 | DEPS_DIR=/app/deps 9 | 10 | pushd . 11 | 12 | # Rekall 13 | cd $DEPS_DIR 14 | echo "Installing Rekall" 15 | cd rekall 16 | pip3 install --upgrade --force-reinstall --user -e . 17 | if [ $RUN_TESTS == 1 ]; then 18 | python3 setup.py test 19 | fi 20 | 21 | # Model server 22 | cd $DEPS_DIR 23 | echo "Installing Model-Server" 24 | cd esper-model-server 25 | ./extract_data.sh 26 | pip3 install --user -r requirements.txt 27 | if [ $RUN_TESTS == 1 ]; then 28 | pytest -v tests 29 | fi 30 | 31 | # Caption-Index 32 | cd $DEPS_DIR 33 | echo "Installing Caption-Index" 34 | cd caption-index 35 | rustup update 36 | rustup override set nightly 37 | pip3 install --upgrade --force-reinstall --user . 38 | ./get_models.sh 39 | if [ $RUN_TESTS == 1 ]; then 40 | python3 setup.py test 41 | fi 42 | 43 | # Rs-Embed 44 | cd $DEPS_DIR 45 | echo "Installing Rs-Embed" 46 | cd rs-embed 47 | rustup update 48 | rustup override set nightly 49 | pip3 install --upgrade --force-reinstall --user . 50 | if [ $RUN_TESTS == 1 ]; then 51 | python3 setup.py test 52 | fi 53 | 54 | cd $DEPS_DIR 55 | echo "Installing vgrid" 56 | #cd vgrid 57 | #npm install 58 | #npm link vgrid 59 | #npm run build 60 | 61 | cd $DEPS_DIR 62 | echo "Installing vgrid_jupyter" 63 | cd vgrid_jupyter/js 64 | npm link vgrid 65 | npm install 66 | npm run build 67 | cd .. 68 | pip3 install --upgrade --force-reinstall --user -e . 69 | 70 | jupyter nbextension enable --py --user widgetsnbextension 71 | jupyter contrib nbextension install --user --skip-running-check 72 | jupyter nbextensions_configurator enable --user 73 | jupyter nbextension enable --user hide_input/main 74 | jupyter nbextension enable --user toc2/main 75 | jupyter nbextension enable --user code_prettify/autopep8 76 | jupyter nbextension enable --user execute_time/ExecuteTime 77 | jupyter nbextension enable --py --user qgrid 78 | 79 | jupyter nbextension install vgrid_jupyter --py --symlink --user --overwrite 80 | jupyter nbextension enable vgrid_jupyter --py --user 81 | 82 | cd /app 83 | npm link vgrid 84 | npm install 85 | npm run build 86 | 87 | popd 88 | 89 | echo "SUCCESS! All dependencies installed" 90 | -------------------------------------------------------------------------------- /app/deps/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | RUN_TESTS=${RUN_TESTS:=0} 4 | 5 | # Fail fast 6 | set -e 7 | 8 | DEPS_DIR=/app/deps 9 | 10 | pushd . 11 | 12 | # Rekall 13 | cd $DEPS_DIR 14 | echo "Installing Rekall" 15 | cd rekall 16 | pip3 install --upgrade --force-reinstall --user -e . 17 | if [ $RUN_TESTS == 1 ]; then 18 | python3 setup.py test 19 | fi 20 | 21 | # Model server 22 | cd $DEPS_DIR 23 | echo "Installing Model-Server" 24 | cd esper-model-server 25 | ./extract_data.sh 26 | pip3 install --user -r requirements.txt 27 | if [ $RUN_TESTS == 1 ]; then 28 | pytest -v tests 29 | fi 30 | 31 | # Caption-Index 32 | cd $DEPS_DIR 33 | echo "Installing Caption-Index" 34 | cd caption-index 35 | rustup update 36 | rustup override set nightly 37 | pip3 install --upgrade --force-reinstall --user . 38 | ./get_models.sh 39 | if [ $RUN_TESTS == 1 ]; then 40 | python3 setup.py test 41 | fi 42 | 43 | # Rs-Embed 44 | cd $DEPS_DIR 45 | echo "Installing Rs-Embed" 46 | cd rs-embed 47 | rustup update 48 | rustup override set nightly 49 | pip3 install --upgrade --force-reinstall --user . 50 | if [ $RUN_TESTS == 1 ]; then 51 | python3 setup.py test 52 | fi 53 | 54 | cd $DEPS_DIR 55 | echo "Installing vgrid" 56 | cd vgrid 57 | npm install 58 | npm link 59 | npm run build 60 | 61 | cd $DEPS_DIR 62 | echo "Installing vgrid_jupyter" 63 | cd vgrid_jupyter/js 64 | npm link vgrid 65 | npm install 66 | npm run build 67 | cd .. 68 | pip3 install --upgrade --force-reinstall --user -e . 69 | 70 | jupyter nbextension enable --py --user widgetsnbextension 71 | jupyter contrib nbextension install --user --skip-running-check 72 | jupyter nbextensions_configurator enable --user 73 | jupyter nbextension enable --user hide_input/main 74 | jupyter nbextension enable --user toc2/main 75 | jupyter nbextension enable --user code_prettify/autopep8 76 | jupyter nbextension enable --user execute_time/ExecuteTime 77 | jupyter nbextension enable --py --user qgrid 78 | 79 | jupyter nbextension install vgrid_jupyter --py --symlink --user --overwrite 80 | jupyter nbextension enable vgrid_jupyter --py --user 81 | 82 | cd /app 83 | npm link vgrid 84 | npm install 85 | npm run build 86 | 87 | popd 88 | 89 | echo "SUCCESS! All dependencies installed" 90 | -------------------------------------------------------------------------------- /app/esper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/esper/__init__.py -------------------------------------------------------------------------------- /app/esper/blackframes.py: -------------------------------------------------------------------------------- 1 | from query.models import Video 2 | from scannertools import shot_detection, Pipeline 3 | from esper.scannerutil import ScannerWrapper 4 | from scannerpy import register_python_op 5 | from scannerpy.stdlib import readers 6 | import struct 7 | from typing import Sequence 8 | from esper.kube import make_cluster, cluster_config, worker_config 9 | from esper.prelude import pcache, par_for 10 | import numpy as np 11 | import os 12 | 13 | @register_python_op(name='IsBlack', batch=10000) 14 | def is_black(config, hists: Sequence[bytes]) -> Sequence[bytes]: 15 | output = [] 16 | for hists_batch in hists: 17 | h = readers.histograms(hists_batch, config.protobufs) 18 | threshold = 0.99 * sum(h[0]) 19 | is_black = h[0][0] > threshold and h[1][0] > threshold and h[2][0] > threshold 20 | output.append(struct.pack('B', 1 if is_black else 0)) 21 | return output 22 | 23 | class BlackFramesPipeline(Pipeline): 24 | job_suffix = 'blackframes' 25 | base_sources = ['videos', 'histograms'] 26 | parser_fn = lambda _: lambda buf, _: struct.unpack('B', buf) 27 | 28 | def build_pipeline(self): 29 | return { 30 | 'blackframes': self._db.ops.IsBlack(hists=self._sources['histograms'].op) 31 | } 32 | 33 | compute_black_frames = BlackFramesPipeline.make_runner() 34 | 35 | if __name__ == "__main__": 36 | videos = list(Video.objects.filter(threeyears_dataset=False).order_by('id')) 37 | 38 | cfg = cluster_config( 39 | num_workers=100, 40 | worker=worker_config('n1-standard-32')) 41 | # with make_cluster(cfg, no_start=True, no_delete=True) as db_wrapper: 42 | 43 | if True: 44 | db_wrapper = ScannerWrapper.create() 45 | 46 | db = db_wrapper.db 47 | hists = shot_detection.compute_histograms( 48 | db, 49 | videos=[v.for_scannertools() for v in videos], 50 | run_opts={ 51 | 'io_packet_size': 10000, 52 | 'work_packet_size': 1000 53 | }) 54 | 55 | bfs = compute_black_frames( 56 | db, 57 | videos=[v.for_scannertools() for v in videos], 58 | histograms=hists, 59 | run_opts={ 60 | 'io_packet_size': 100000, 61 | 'work_packet_size': 10000 62 | }) 63 | 64 | def load_bf(i): 65 | path = '/app/data/blackframes/{:07d}.bin'.format(i) 66 | if os.path.isfile(path): 67 | return 68 | 69 | try: 70 | with open(path, 'wb') as f: 71 | f.write(np.array(list(bfs[i].load()), dtype=np.uint8).tobytes()) 72 | except Exception: 73 | print(i) 74 | 75 | print('Loading...') 76 | par_for(load_bf, list(range(len(bfs))), workers=8) 77 | -------------------------------------------------------------------------------- /app/esper/blurriness.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | import socket 3 | import scannerpy 4 | import cv2 5 | 6 | @scannerpy.register_python_op(name='Blurriness') 7 | def blurriness(config, frame: scannerpy.FrameType, bboxes: bytes) -> bytes: 8 | bboxes = json.loads(bboxes.decode('utf-8')) 9 | results = [] 10 | for bbox in bboxes: 11 | img = frame[int(bbox['y1']):int(bbox['y2']), 12 | int(bbox['x1']):int(bbox['x2']), :] 13 | if img.shape[0] == 0 or img.shape[1] == 0: 14 | continue 15 | img = cv2.resize(img, (200, 200)) 16 | results.append({ 17 | 'id': bbox['id'], 18 | 'blurriness': cv2.Laplacian(img, cv2.CV_64F).var() 19 | }) 20 | 21 | return json.dumps(results).encode() 22 | 23 | 24 | db = make_scanner_db(multiworker=True) 25 | 26 | videos = Video.objects.all()[20000:] 27 | 28 | sql_config = db.protobufs.SQLConfig( 29 | adapter='postgres', 30 | hostaddr=socket.gethostbyname('db'), 31 | port=5432, 32 | dbname='esper', 33 | user=os.environ['DJANGO_DB_USER'], 34 | password=os.environ['DJANGO_DB_PASSWORD']) 35 | sql_query = db.protobufs.SQLQuery( 36 | fields=''' 37 | query_tvnews_face.id as id, 38 | (query_tvnews_face.bbox_x1 * query_tvnews_video.width) as x1, 39 | (query_tvnews_face.bbox_y1 * query_tvnews_video.height) as y1, 40 | (query_tvnews_face.bbox_x2 * query_tvnews_video.width) as x2, 41 | (query_tvnews_face.bbox_y2 * query_tvnews_video.height) as y2''', 42 | table='query_tvnews_face', 43 | joins=''' 44 | INNER JOIN "query_tvnews_person" ON ("query_tvnews_face"."person_id" = "query_tvnews_person"."id") 45 | INNER JOIN "query_tvnews_frame" ON ("query_tvnews_person"."frame_id" = "query_tvnews_frame"."id") 46 | INNER JOIN "query_tvnews_video" ON ("query_tvnews_frame"."video_id" = "query_tvnews_video"."id") 47 | ''', 48 | id='query_tvnews_face.id', 49 | group='query_tvnews_frame.number', 50 | job_table='query_tvnews_scannerjob' 51 | ) 52 | 53 | frame = db.sources.FrameColumn() 54 | frame_sampled = frame.sample() 55 | bboxes = db.sources.SQL(config=sql_config, query=sql_query) 56 | blurriness = db.ops.Blurriness(frame=frame_sampled, bboxes=bboxes) 57 | output = db.sinks.SQL(config=sql_config, query=sql_query, input=blurriness) 58 | 59 | log.debug('Fetching indices') 60 | def fetch_indices(v): 61 | return [f['person__frame__number'] 62 | for f in Face.objects.filter(person__frame__video=v).distinct('person__frame__number') \ 63 | .order_by('person__frame__number').values('person__frame__number')] 64 | frame_indices = par_for(fetch_indices, videos, workers=8) 65 | 66 | log.debug('Making jobs') 67 | jobs = [ 68 | Job(op_args={ 69 | frame: db.table(v.path).column('frame'), 70 | frame_sampled: db.sampler.gather(f), 71 | bboxes: {'filter': 'query_tvnews_video.id = {}'.format(v.id)}, 72 | output: {'job_name': v.path + '_blurriness'} 73 | }) 74 | for v, f in zip(videos, frame_indices) 75 | ] 76 | 77 | log.debug('Running job') 78 | db.run(output, jobs, pipeline_instances_per_node=8) 79 | -------------------------------------------------------------------------------- /app/esper/download/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | WORKDIR /app 3 | RUN apt update && apt install -y python3 python3-pip curl 4 | RUN pip3 install tqdm celery[redis] internetarchive 5 | # Google Cloud SDK 6 | COPY service-key.json . 7 | COPY ia.ini /root/.config/ia.ini 8 | ENV GOOGLE_APPLICATION_CREDENTIALS /app/service-key.json 9 | RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-xenial main" | \ 10 | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ 11 | curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ 12 | apt-get update && apt-get install -y google-cloud-sdk kubectl && \ 13 | gcloud config set project visualdb-1046 && \ 14 | gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS 15 | COPY tasks.py . 16 | CMD celery -A tasks worker -c 20 -------------------------------------------------------------------------------- /app/esper/download/count.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | redis-cli -h 10.0.0.3 -p 6379 -n 0 llen celery 3 | -------------------------------------------------------------------------------- /app/esper/download/deploy.yml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: archive-download 5 | spec: 6 | replicas: 50 7 | template: 8 | metadata: 9 | labels: 10 | app: archive-download 11 | spec: 12 | containers: 13 | - name: archive-download 14 | image: gcr.io/visualdb-1046/archive-download 15 | imagePullPolicy: Always 16 | resources: 17 | requests: 18 | cpu: 1.1 19 | -------------------------------------------------------------------------------- /app/esper/download/start-cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gcloud container clusters create archive-download \ 4 | --zone us-east1-b --machine-type "n1-standard-2" --disk-size "100" --num-nodes 50 \ 5 | --network redis --enable-autoscaling --min-nodes 1 --max-nodes 50 \ 6 | --enable-ip-alias 7 | -------------------------------------------------------------------------------- /app/esper/download/submit.py: -------------------------------------------------------------------------------- 1 | from tasks import download 2 | from tqdm import tqdm 3 | 4 | remaining = [s.strip() for s in open('remaining-videos.txt', 'r').readlines()] 5 | 6 | for video in tqdm(remaining): 7 | download.delay(video) 8 | -------------------------------------------------------------------------------- /app/esper/download/tasks.py: -------------------------------------------------------------------------------- 1 | from celery import Celery 2 | from tqdm import tqdm 3 | import os 4 | import subprocess as sp 5 | 6 | app = Celery('archive-download', broker='redis://10.0.0.3:6379/0') 7 | 8 | @app.task(acks_late=True) 9 | def download(video): 10 | print('Downloading {}'.format(video)) 11 | 12 | try: 13 | sp.check_call('ia download {} --glob "*.mp4"'.format(video), shell=True) 14 | sp.check_call('gsutil mv {id}/{id}.mp4 gs://esper/tvnews/videos/{id}.mp4'.format(id=video), shell=True) 15 | except Exception: 16 | print('Error: {}'.format(video)) 17 | -------------------------------------------------------------------------------- /app/esper/face_detection.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import Timer, Notifier, pcache 2 | from query.base_models import model_defaults 3 | from query.models import Video, Frame, Face, Labeler 4 | from scannertools import kube, face_detection 5 | from esper.kube import make_cluster, cluster_config, worker_config 6 | from esper.scanner_bench import ScannerJobConfig, bench 7 | from esper.scannerutil import ScannerWrapper, ScannerSQLPipeline, ScannerSQLTable 8 | import scannerpy 9 | from scannerpy.stdlib import readers 10 | import json 11 | from django.db.models.fields import NOT_PROVIDED 12 | from tqdm import tqdm 13 | 14 | labeler_id = Labeler.objects.get(name='mtcnn').id 15 | face_defaults = model_defaults(Face) 16 | 17 | @scannerpy.register_python_op(name='FacesToJson') 18 | def faces_to_json(config, bboxes: bytes, frame_ids: bytes) -> bytes: 19 | faces = readers.bboxes(bboxes, config.protobufs) 20 | frame_id = json.loads(frame_ids.decode('utf-8'))[0]['id'] 21 | return json.dumps([ 22 | {'frame_id': frame_id, 23 | 'bbox_x1': f.x1, 24 | 'bbox_x2': f.x2, 25 | 'bbox_y1': f.y1, 26 | 'bbox_y2': f.y2, 27 | 'probability': f.score, 28 | 'labeler_id': labeler_id, 29 | **face_defaults} 30 | for f in faces 31 | ]) 32 | 33 | class FaceDetectionPipeline(ScannerSQLPipeline, face_detection.FaceDetectionPipeline): 34 | db_class = Face 35 | json_kernel = 'FacesToJson' 36 | additional_sources = ['frame_ids'] 37 | 38 | def build_pipeline(self): 39 | output_ops = super(FaceDetectionPipeline, self).build_pipeline() 40 | output_ops['frame_ids'] = self._sources['frame_ids'].op 41 | return output_ops 42 | 43 | detect_faces = FaceDetectionPipeline.make_runner() 44 | 45 | videos = list(Video.objects.filter(threeyears_dataset=True).order_by('id')) 46 | 47 | if False: 48 | with Timer('benchmark'): 49 | videos = videos[:50] 50 | def run_pipeline(db, videos, frames, **kwargs): 51 | return face_detection.detect_faces(db, videos=[v.for_scannertools() for v in videos], frames=frames, cache=False, **kwargs) 52 | 53 | cfg = cluster_config(num_workers=5, worker=worker_config('n1-standard-32')) 54 | configs = [(cfg, [ScannerJobConfig(io_packet_size=1000, work_packet_size=20, batch=1)])] 55 | bench('face', {'videos': videos, 'frames': [[f['number'] for f in Frame.objects.filter(video=v).values('number').order_by('number')] for v in videos]}, 56 | run_pipeline, configs, no_delete=True, force=True) 57 | 58 | 59 | with Timer('run'): 60 | 61 | print('Getting frames') 62 | def load_frames(): 63 | return [[f['number'] for f in Frame.objects.filter(video=v, shot_boundary=False).values('number').order_by('number')] 64 | for v in tqdm(videos)] 65 | frames = pcache.get('face_frames', load_frames) 66 | 67 | cfg = cluster_config( 68 | num_workers=100, 69 | worker=worker_config('n1-standard-64'), 70 | num_load_workers=2, 71 | num_save_workers=2) 72 | with make_cluster(cfg, sql_pool=4, no_delete=True) as db_wrapper: 73 | 74 | # if True: 75 | # db_wrapper = ScannerWrapper.create(enable_watchdog=False) 76 | 77 | db = db_wrapper.db 78 | 79 | print('Starting detection') 80 | detect_faces( 81 | db, 82 | videos=[v.for_scannertools() for v in videos], 83 | db_videos=videos, 84 | frames=frames, 85 | frame_ids=[ScannerSQLTable(Frame, v, num_elements=len(f), 86 | filter='query_frame.shot_boundary = false') 87 | for v, f in zip(videos, frames)], 88 | run_opts={ 89 | 'io_packet_size': 1000, 90 | 'work_packet_size': 20, 91 | 'pipeline_instances_per_node': 16, 92 | 'checkpoint_frequency': 1000 93 | }) 94 | -------------------------------------------------------------------------------- /app/esper/face_embeddings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from rs_embed import EmbeddingData 4 | 5 | EMB_DIR = '/app/data/embs' 6 | EMB_PATH = os.path.join(EMB_DIR, 'face_embs.bin') 7 | ID_PATH = os.path.join(EMB_DIR, 'face_ids.bin') 8 | EMB_DIM = 128 9 | 10 | 11 | _EMB_DATA = EmbeddingData(ID_PATH, EMB_PATH, EMB_DIM) 12 | 13 | 14 | def count(): 15 | return _EMB_DATA.count() 16 | 17 | 18 | def ids(i, n): 19 | """Get n face ids starting at index i""" 20 | return _EMB_DATA.ids(i, n) 21 | 22 | 23 | def get(ids): 24 | """List of face ids -> List of pairs (id, embedding)""" 25 | return _EMB_DATA.get(ids) 26 | 27 | 28 | def mean(ids): 29 | """List of face ids -> mean embedding""" 30 | return _EMB_DATA.mean(ids) 31 | 32 | 33 | def features(ids): 34 | """List of face ids -> List of embeddings""" 35 | result = _EMB_DATA.get(ids) 36 | assert len(result) == len(ids) 37 | return [np.array(v) for _, v in result] 38 | 39 | 40 | def sample(k): 41 | """Returns list of face_ids, uniformly random with replacement""" 42 | return _EMB_DATA.sample(k) 43 | 44 | 45 | def exists(ids): 46 | """List of face ids -> List of bools""" 47 | return _EMB_DATA.exists(ids) 48 | 49 | 50 | def dist(ids, targets=None, target_ids=None): 51 | """ 52 | Computes the distance from each face in ids to the closest target 53 | 54 | Args: 55 | ids: List of faces to compute distances for 56 | targets: List of embeddings 57 | target_ids: List of face_ids 58 | 59 | Returns: 60 | List of distances in same order as as ids 61 | """ 62 | if targets is not None: 63 | targets = [ 64 | [float(z) for z in x.tolist()] 65 | if not isinstance(x, list) else x for x in targets 66 | ] 67 | return _EMB_DATA.dist(targets, ids) 68 | elif target_ids is not None: 69 | return _EMB_DATA.dist_by_id(target_ids, ids) 70 | else: 71 | raise ValueError('No targets given') 72 | 73 | 74 | def knn(targets=None, ids=None, k=2 ** 31, max_threshold=100., **kwargs): 75 | """ 76 | Computes distance of all faces to the targets 77 | (specified by targets or ids) 78 | 79 | Args: 80 | targets: List of embeddings (i.e., list of floats) 81 | ids: List of face ids (another way to specify targets 82 | max_threshold: largest distance 83 | 84 | Returns: 85 | List of (face_id, distance) pairs by asending distance 86 | """ 87 | if targets is not None: 88 | targets = [ 89 | [float(z) for z in x.tolist()] 90 | if not isinstance(x, list) else x for x in targets 91 | ] 92 | return _EMB_DATA.nn(targets, k, max_threshold, **kwargs) 93 | elif ids is not None: 94 | return _EMB_DATA.nn_by_id(ids, k, max_threshold, **kwargs) 95 | else: 96 | raise ValueError('No targets given') 97 | 98 | 99 | def kmeans(ids, k=25): 100 | """ 101 | Run kmeans on all face_ids in ids. 102 | 103 | Args: 104 | ids: List of face_ids 105 | 106 | Returns: 107 | List of (face_id, cluster number) pairs 108 | """ 109 | return _EMB_DATA.kmeans(ids, k) 110 | 111 | 112 | def logreg(ids, labels, **kwargs): 113 | """ 114 | Args: 115 | ids: List of face_ids 116 | labels: List of 0, 1 labels 117 | Returns: 118 | weights 119 | """ 120 | return _EMB_DATA.logreg( 121 | ids, labels, **kwargs) 122 | 123 | 124 | def logreg_predict(weights, **kwargs): 125 | """Returns: List of (face_id, score) pairs by ascending score)""" 126 | return _EMB_DATA.logreg_predict(weights, **kwargs) 127 | 128 | 129 | def knn_predict(train_ids, train_labels, k, **kwargs): 130 | """Returns: List of (face_id, score) pairs by ascending score)""" 131 | return _EMB_DATA.knn_predict(train_ids, train_labels, k, **kwargs) -------------------------------------------------------------------------------- /app/esper/frame_export.py: -------------------------------------------------------------------------------- 1 | from query.scripts.script_util import * 2 | import requests 3 | import math 4 | 5 | videos = Video.objects.all() 6 | for video in videos: 7 | stride = int(math.ceil(video.fps)/2) 8 | print((video.path, video.fps, stride)) 9 | ids = [ 10 | str(f['id']) 11 | for f in Frame.objects.filter(video=video, number__in=list(range(0, video.num_frames, stride))) 12 | .order_by('number').values('id') 13 | ] 14 | requests.post('http://localhost:8000/batch_fallback', data={'frames': ','.join(ids)}) 15 | -------------------------------------------------------------------------------- /app/esper/identity_detect.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from esper.tvnews.embed_kernel import EmbedFaceKernel 3 | from sklearn.neighbors import NearestNeighbors 4 | from scannerpy.stdlib import writers 5 | import cv2 6 | 7 | LABELER, _ = Labeler.objects.get_or_create(name='facenet') 8 | FEATURE_DISTANCE_THRESHOLD = 1.0 9 | 10 | 11 | # Simple K-NN based identity detector 12 | def identity_detect(videos, exemplar, features, force=False): 13 | log.debug('Loading features') 14 | ids, vectors = unzip([((i, j, k), f) 15 | for i, vid_features in tqdm(enumerate(features)) 16 | for j, frame in enumerate(vid_features) for k, f in enumerate(frame)]) 17 | 18 | log.debug('Building k-nn tree') 19 | feat_nn = NearestNeighbors().fit(np.vstack(vectors)) 20 | 21 | log.debug('Computing exemplar features') 22 | 23 | def compute_exemplar(): 24 | img = cv2.imread(exemplar) 25 | with Database() as db: 26 | bboxes = [db.protobufs.BoundingBox(x1=0, y1=0, x2=img.shape[1], y2=img.shape[0])] 27 | kernel = EmbedFaceKernel(None, db.protobufs) 28 | [emb] = kernel.execute( 29 | [cv2.cvtColor(img, cv2.COLOR_RGB2BGR), 30 | writers.bboxes([bboxes], db.protobufs)[0]]) 31 | return np.frombuffer(emb, dtype=np.float32) 32 | 33 | exemplar_vector = pcache.get('exemplar', compute_exemplar, method='pickle', force=force) 34 | 35 | log.debug('Doing lookup') 36 | dists, id_indices = feat_nn.kneighbors([exemplar_vector], len(vectors)) 37 | 38 | face_map = defaultdict(list) 39 | for q, (dist, id_idx) in enumerate(zip(dists[0], id_indices[0])): 40 | (i, j, k) = ids[id_idx] 41 | if dist > FEATURE_DISTANCE_THRESHOLD: 42 | break 43 | 44 | face_map[i].append((j, k)) 45 | 46 | return [face_map[i] for i in range(len(videos))] 47 | 48 | # return unzip([unzip(face_map[video.id]) for video in videos]) 49 | -------------------------------------------------------------------------------- /app/esper/ingest_test.py: -------------------------------------------------------------------------------- 1 | from query.scripts.script_util import * 2 | import json 3 | import math 4 | from django.db import transaction 5 | 6 | def parse(path): 7 | with open(path, 'r') as f: 8 | while True: 9 | path = f.next()[:-1] # this will raise StopIteration for us when we reach EOF 10 | num_rows = int(math.ceil(int(next(f)) / 24.0)) 11 | print((path, num_rows)) 12 | yield path, [f.next()[:-1] for _ in range(num_rows)] 13 | 14 | to_ingest = [ 15 | # ('assets/kcam_tiny_faces.txt', 'tinyfaces'), 16 | # ('assets/kcam_cpm_faces.txt', 'cpm'), 17 | ('assets/kcam_frcnn_people.txt', 'frcnn') 18 | ] # yapf: disable 19 | 20 | for fpath, labeler_name in to_ingest: 21 | print(fpath) 22 | 23 | labeler, _ = Labeler.objects.get_or_create(name=labeler_name) 24 | bar = progress_bar(len(list(parse(fpath)))) 25 | does_not_exist = [] 26 | 27 | for vi, (path, rows) in enumerate(parse(fpath)): 28 | try: 29 | video = Video.objects.get(path__contains=path) 30 | except Video.DoesNotExist: 31 | does_not_exist.append(path) 32 | continue 33 | 34 | video_boxes = { 35 | j: [ 36 | proto.BoundingBox(x1=r[0], y1=r[1], x2=r[2], y2=r[3]) 37 | for r in [[float(s) for s in box.split(' ')] for box in l.split(',')[:-1]] 38 | ] 39 | for j, l in enumerate(rows) if l != '' 40 | } 41 | 42 | Instance = PersonInstance if labeler_name == 'frcnn' else FaceInstance 43 | 44 | frames = list( 45 | Frame.objects.filter(video=video).order_by('number').extra( 46 | where=['number mod 24=0'])) 47 | faces = [ 48 | Instance(labeler=labeler, frame=frames[j], bbox=bbox) 49 | for j, frame_boxes in list(video_boxes.items()) for bbox in frame_boxes 50 | ] 51 | 52 | with transaction.atomic(): 53 | for face in faces: 54 | f = Face() 55 | f.save() 56 | face.concept = f 57 | 58 | Instance.objects.bulk_create(faces) 59 | 60 | bar.update(vi) 61 | 62 | print(('Failed to find: {}'.format(json.dumps(does_not_exist)))) 63 | -------------------------------------------------------------------------------- /app/esper/ingest_tvnews.py: -------------------------------------------------------------------------------- 1 | import subprocess as sp 2 | 3 | with open('/app/all-videos.txt', 'r') as f: 4 | all_videos = [s.strip() for s in f.readlines()] 5 | 6 | downloaded = [s.strip().split('/')[-1][:-4] for s in sp.check_output('gsutil ls "gs://esper/tvnews/videos/*.mp4"', shell=True).decode('utf-8').splitlines()] 7 | 8 | remaining = set(all_videos) - set(downloaded) 9 | 10 | with open('/app/remaining-videos.txt', 'w') as f: 11 | f.write('\n'.join(list(remaining))) 12 | -------------------------------------------------------------------------------- /app/esper/kube.py: -------------------------------------------------------------------------------- 1 | from scannertools import kube 2 | from esper.scannerutil import ScannerWrapper 3 | import os 4 | import shlex 5 | from contextlib import contextmanager 6 | 7 | def image_path(device): 8 | return 'gcr.io/{project}/{base_name}:{device}'.format( 9 | project=os.environ['GOOGLE_PROJECT'], 10 | base_name=os.environ['BASE_IMAGE_NAME'], 11 | device=device) 12 | 13 | cloud_config = kube.CloudConfig(project=os.environ['GOOGLE_PROJECT']) 14 | 15 | master_config = kube.MachineConfig( 16 | image=image_path('cpu'), 17 | type=kube.MachineTypeName(name='n1-highmem-32'), 18 | disk=250) 19 | 20 | def worker_config(machine_type, **kwargs): 21 | return kube.MachineConfig( 22 | image=image_path('gpu' if 'gpu' in kwargs else 'cpu'), 23 | type=kube.MachineTypeName(name=machine_type), 24 | disk=250, 25 | preemptible=True, 26 | **kwargs) 27 | 28 | def cluster_config(**kwargs): 29 | return kube.ClusterConfig( 30 | id='wc-test', 31 | autoscale=True, 32 | master=master_config, 33 | **kwargs) 34 | 35 | def cluster(cluster_config, sql_pool=None, **kwargs): 36 | containers = [] 37 | if sql_pool is not None: 38 | proxy_port = 5431 39 | containers.append({ 40 | 'name': 'dbpool', 41 | 'image': 'edoburu/pgbouncer', 42 | 'env': [ 43 | {'name': 'DATABASE_URL', 'value': 'postgres://{}:{}@0.0.0.0:5431/esper'.format( 44 | os.environ['DJANGO_DB_USER'], os.environ['DJANGO_DB_PASSWORD'] 45 | )}, 46 | {'name': 'DEFAULT_POOL_SIZE', 'value': str(sql_pool)}, 47 | {'name': 'MAX_CLIENT_CONN', 'value': '1000'}, # Allow any num. of client connections 48 | {'name': 'QUERY_WAIT_TIMEOUT', 'value': '0'}, # Don't timeout long queries 49 | ] 50 | }) 51 | else: 52 | proxy_port = 5432 53 | 54 | containers.append({ 55 | 'name': 'db', 56 | 'image': 'gcr.io/cloudsql-docker/gce-proxy:1.09', 57 | 'command': shlex.split( 58 | '/cloud_sql_proxy -instances={}:us-east1-d:esper-dev=tcp:0.0.0.0:{} -credential_file=/config/service-key.json' \ 59 | .format(os.environ['GOOGLE_PROJECT'], proxy_port)), 60 | 'volumeMounts': [{ 61 | 'name': 'service-key', 62 | 'mountPath': '/config' 63 | }] 64 | }) 65 | 66 | return kube.Cluster(cloud_config, cluster_config, containers=containers, **kwargs) 67 | 68 | @contextmanager 69 | def make_cluster(*args, **kwargs): 70 | with cluster(*args, **kwargs) as c: 71 | yield ScannerWrapper.create(cluster=c, enable_watchdog=False) 72 | 73 | if __name__ == '__main__': 74 | cluster(cluster_config(num_workers=1, worker=worker_config('n1-standard-16'))).cli() 75 | -------------------------------------------------------------------------------- /app/esper/load_aligned_transcript.py: -------------------------------------------------------------------------------- 1 | from scannertools.transcript_alignment import TranscriptAligner 2 | import pickle 3 | import os 4 | import sys 5 | 6 | def callback(name, outputs): 7 | video_name = name[:name.find('.mp4')].split('/')[2] 8 | path = name.replace('_align_transcript', '') 9 | 10 | align_dir = '/app/data/subs/aligned/' 11 | align_word_list = [] 12 | num_word_aligned = 0 13 | num_word_total = 0 14 | for seg_idx, seg_bytes in enumerate(outputs): 15 | seg = pickle.loads(seg_bytes) 16 | align_word_list += seg['align_word_list'] 17 | num_word_aligned += seg['num_word_aligned'] 18 | if 'num_word_total' in seg: 19 | num_word_total += seg['num_word_total'] 20 | else: 21 | num_word_total += len(seg['align_word_list']) 22 | 23 | output_path = os.path.join(align_dir, '{}.{}.srt'.format(video_name, 'word')) 24 | TranscriptAligner.dump_aligned_transcript_byword(align_word_list, output_path) 25 | 26 | if num_word_total == 0: 27 | print(path, 1) 28 | else: 29 | print(path, 1 - 1. * num_word_aligned / num_word_total) 30 | sys.stdout.flush() 31 | 32 | -------------------------------------------------------------------------------- /app/esper/major_canonical_shows.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from esper.widget import * 3 | from query.models import * 4 | 5 | from datetime import timedelta 6 | 7 | 8 | NUM_MAJOR_CANONICAL_SHOWS = 150 9 | 10 | MAJOR_CANONICAL_SHOWS = [ 11 | x['show__canonical_show__name'] for x in 12 | Video.objects.values( 13 | 'show__canonical_show__name' 14 | ).annotate( 15 | total_duration=Sum( 16 | ExpressionWrapper( 17 | F('num_frames') / F('fps'), 18 | output_field=FloatField())) 19 | ).order_by( 20 | '-total_duration' 21 | ).values( 22 | 'show__canonical_show__name' 23 | )[:NUM_MAJOR_CANONICAL_SHOWS] 24 | ] 25 | 26 | # Cache this 27 | _TOTAL_SHOT_TIME_BY_CSHOW = None 28 | 29 | 30 | def get_total_shot_time_by_canonical_show(): 31 | global _TOTAL_SHOT_TIME_BY_CSHOW 32 | if _TOTAL_SHOT_TIME_BY_CSHOW is None: 33 | query_results = Shot.objects.filter( 34 | video__show__canonical_show__name__in=MAJOR_CANONICAL_SHOWS, 35 | in_commercial=False, 36 | ).values( 37 | 'video__show__canonical_show__name' 38 | ).annotate( 39 | screen_time=Sum((F('max_frame') - F('min_frame')) / F('video__fps'), 40 | output_field=FloatField()) 41 | ) 42 | _TOTAL_SHOT_TIME_BY_CSHOW = { 43 | x['video__show__canonical_show__name']: 44 | timedelta(seconds=x['screen_time']) for x in query_results 45 | } 46 | return _TOTAL_SHOT_TIME_BY_CSHOW 47 | -------------------------------------------------------------------------------- /app/esper/object_detection.py: -------------------------------------------------------------------------------- 1 | from scannertools import object_detection 2 | from esper.prelude import ScannerWrapper, Timer, ScannerSQLTable 3 | from query.models import Object, Video, Frame, ScannerJob 4 | import scannerpy 5 | from scannerpy.stdlib import readers 6 | import json 7 | 8 | 9 | # 1. Define pipeline + auxiliary kernels 10 | 11 | @scannerpy.register_python_op(name='BboxToJson') 12 | def bbox_to_json(config, bboxes: bytes, frame_id: bytes) -> bytes: 13 | bboxes = readers.bboxes(bboxes, config.protobufs) 14 | frame_id = json.loads(frame_id.decode('utf-8'))[0]['id'] 15 | return json.dumps([{ 16 | 'bbox_x1': bb.x1, 17 | 'bbox_x2': bb.x2, 18 | 'bbox_y1': bb.y1, 19 | 'bbox_y2': bb.y2, 20 | 'probability': bb.score, 21 | 'label': bb.label, 22 | 'frame_id': frame_id 23 | } for bb in bboxes]) 24 | 25 | class ObjectDetectionPipeline(object_detection.ObjectDetectionPipeline): 26 | additional_sources = ['frame_ids'] 27 | 28 | def build_sink(self, db_videos): 29 | jsonified = self._db.ops.BboxToJson( 30 | bboxes=self._output_ops['bboxes'], frame_id=self._sources['frame_ids'].op) 31 | return ScannerWrapper(self._db).sql_sink( 32 | cls=Object, input=jsonified, videos=db_videos, suffix='objdet', insert=True) 33 | 34 | def parse_output(self): 35 | pass 36 | 37 | detect_objects = ObjectDetectionPipeline.make_runner() 38 | 39 | 40 | # 2. Gather inputs 41 | 42 | db_wrapper = ScannerWrapper.create() 43 | db = db_wrapper.db 44 | 45 | videos = db_wrapper.filter_videos(Video.objects.all(), ObjectDetectionPipeline)[:1] 46 | print('Processing {} videos'.format(len(videos))) 47 | 48 | frames = [ 49 | [f['number'] for f in 50 | Frame.objects.filter(video=v).values('number').order_by('number')] 51 | for v in videos 52 | ] 53 | 54 | 55 | # 3. Run pipeline 56 | 57 | detect_objects( 58 | db, 59 | videos=[v.for_scannertools() for v in videos], 60 | frames=frames, 61 | frame_ids=[ScannerSQLTable(Frame, v) for v in videos], 62 | db_videos=videos) 63 | 64 | print('Done!') 65 | -------------------------------------------------------------------------------- /app/esper/queries/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/esper/queries/__init__.py -------------------------------------------------------------------------------- /app/esper/queries/all_faces.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from .queries import query 3 | 4 | @query("All faces") 5 | def all_faces(): 6 | from query.models import Face 7 | from esper.widget import qs_to_result 8 | return qs_to_result(Face.objects.all(), stride=1000) 9 | 10 | 11 | @query("All clothing") 12 | def all_clothing(): 13 | from query.models import Clothing 14 | from esper.widget import qs_to_result 15 | return qs_to_result(Clothing.objects.all(), stride=1000) 16 | 17 | 18 | @query("All haircolor") 19 | def all_haircolor(): 20 | from query.models import HairColor 21 | from esper.widget import qs_to_result 22 | return qs_to_result(HairColor.objects.all(), stride=1000) 23 | 24 | 25 | @query("All hairlength") 26 | def all_hairlength(): 27 | from query.models import HairLength 28 | from esper.widget import qs_to_result 29 | return qs_to_result(HairLength.objects.all(), stride=1000) 30 | 31 | 32 | @query("White hair, not bald") 33 | def white_hair_not_bald(): 34 | from query.models import HairColor, HairLength 35 | from esper.widget import qs_to_result 36 | white_hair_ids = { 37 | x['face__id'] for x in 38 | HairColor.objects.filter(color__name='white').values('face__id')[0:1000000:1000] 39 | } 40 | bald_ids = { 41 | x['face__id'] for x in HairLength.objects.filter( 42 | length__name='bald', face__id__in=white_hair_ids 43 | ).values('face__id') 44 | } 45 | return qs_to_result(Face.objects.filter(id__in=white_hair_ids - bald_ids)) 46 | 47 | 48 | @query("Face tags") 49 | def face_tags(): 50 | from query.models import FaceTag 51 | from esper.widget import qs_to_result 52 | return qs_to_result(FaceTag.objects.filter( 53 | labeler__name='race:black:labeler=james', 54 | score__gt=0.5)) -------------------------------------------------------------------------------- /app/esper/queries/all_videos.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from .queries import query 3 | 4 | @query("All videos") 5 | def all_videos(): 6 | from query.models import Video 7 | from esper.widget import qs_to_result 8 | return qs_to_result(Video.objects.all()) 9 | -------------------------------------------------------------------------------- /app/esper/queries/interview_with_person_x.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from .queries import query 3 | 4 | @query('Interview with person X (rekall)') 5 | def interview_with_person_x(): 6 | from query.models import LabeledCommercial, FaceIdentity 7 | from rekall.video_interval_collection import VideoIntervalCollection 8 | from rekall.temporal_predicates import before, after, overlaps 9 | from rekall.logical_predicates import or_pred 10 | from esper.rekall import intrvllists_to_result 11 | 12 | # Get list of sandbox video IDs 13 | sandbox_videos = [ 14 | row.video_id 15 | for row in LabeledCommercial.objects.distinct('video_id') 16 | ] 17 | 18 | TWENTY_SECONDS = 600 19 | FORTY_FIVE_SECONDS = 1350 20 | EPSILON = 10 21 | 22 | guest_name = "bernie sanders" 23 | 24 | # Load hosts and instances of guest from SQL 25 | identities = FaceIdentity.objects.filter(face__shot__video_id__in=sandbox_videos) 26 | hosts_qs = identities.filter(face__is_host=True) 27 | guest_qs = identities.filter(identity__name=guest_name).filter(probability__gt=0.7) 28 | 29 | # Put bounding boxes in SQL 30 | hosts = VideoIntervalCollection.from_django_qs( 31 | hosts_qs.annotate(video_id=F("face__shot__video_id"), 32 | min_frame=F("face__shot__min_frame"), 33 | max_frame=F("face__shot__max_frame")) 34 | ) 35 | guest = VideoIntervalCollection.from_django_qs( 36 | guest_qs.annotate(video_id=F("face__shot__video_id"), 37 | min_frame=F("face__shot__min_frame"), 38 | max_frame=F("face__shot__max_frame")) 39 | ) 40 | 41 | # Get all shots where the guest and a host are on screen together 42 | guest_with_host = guest.overlaps(hosts).coalesce() 43 | 44 | # This temporal predicate defines A overlaps with B, or A before by less than 10 frames, 45 | # or A after B by less than 10 frames 46 | overlaps_before_or_after_pred = or_pred( 47 | or_pred(overlaps(), before(max_dist=EPSILON), arity=2), 48 | after(max_dist=EPSILON), arity=2) 49 | 50 | # This code finds sequences of: 51 | # guest with host overlaps/before/after host OR 52 | # guest with host overlaps/before/after guest 53 | interview_candidates = guest_with_host \ 54 | .merge(hosts, predicate=overlaps_before_or_after_pred) \ 55 | .set_union(guest_with_host.merge( 56 | guest, predicate=overlaps_before_or_after_pred)) \ 57 | .coalesce() 58 | 59 | # Sequences may be interrupted by shots where the guest or host don't 60 | # appear, so dilate and coalesce to merge neighboring segments 61 | interviews = interview_candidates \ 62 | .dilate(TWENTY_SECONDS) \ 63 | .coalesce() \ 64 | .dilate(-1 * TWENTY_SECONDS) \ 65 | .filter_length(min_length=FORTY_FIVE_SECONDS) 66 | 67 | # Return intervals 68 | return intrvllists_to_result(interviews.get_allintervals()) 69 | -------------------------------------------------------------------------------- /app/esper/queries/panels_rekall.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from .queries import query 3 | 4 | @query('Panels (rekall)') 5 | def panels_rekall(): 6 | from query.models import LabeledCommercial, Face 7 | from rekall.video_interval_collection import VideoIntervalCollection 8 | from rekall.parsers import in_array, bbox_payload_parser 9 | from rekall.merge_ops import payload_plus 10 | from rekall.bbox_predicates import height_at_least, same_value, left_of 11 | from rekall.spatial_predicates import scene_graph 12 | from rekall.payload_predicates import payload_satisfies 13 | from esper.rekall import intrvllists_to_result_bbox 14 | 15 | MIN_FACE_HEIGHT = 0.3 16 | EPSILON = 0.05 17 | 18 | # Get list of sandbox video IDs 19 | sandbox_videos = [ 20 | row.video_id 21 | for row in LabeledCommercial.objects.distinct('video_id') 22 | ] 23 | 24 | faces_qs = Face.objects.filter(shot__video_id__in=sandbox_videos).annotate( 25 | video_id=F("shot__video_id"), 26 | min_frame=F("shot__min_frame"), 27 | max_frame=F("shot__max_frame") 28 | ) 29 | 30 | # One interval for each face 31 | faces = VideoIntervalCollection.from_django_qs( 32 | faces_qs, 33 | with_payload=in_array( 34 | bbox_payload_parser( 35 | VideoIntervalCollection.django_accessor))) 36 | 37 | # Merge shots 38 | faces = faces.coalesce(payload_merge_op=payload_plus) 39 | 40 | # Define a scene graph for things that look like panels 41 | three_faces_scene_graph = { 42 | 'nodes': [ 43 | { 'name': 'face1', 'predicates': [ height_at_least(MIN_FACE_HEIGHT) ] }, 44 | { 'name': 'face2', 'predicates': [ height_at_least(MIN_FACE_HEIGHT) ] }, 45 | { 'name': 'face3', 'predicates': [ height_at_least(MIN_FACE_HEIGHT) ] } 46 | ], 47 | 'edges': [ 48 | { 'start': 'face1', 'end': 'face2', 49 | 'predicates': [ same_value('y1', epsilon=EPSILON), left_of() ] }, 50 | { 'start': 'face2', 'end': 'face3', 51 | 'predicates': [ same_value('y1', epsilon=EPSILON), left_of() ] }, 52 | ] 53 | } 54 | 55 | panels = faces.filter(payload_satisfies( 56 | scene_graph(three_faces_scene_graph, exact=True) 57 | )) 58 | 59 | return intrvllists_to_result_bbox(panels.get_allintervals()) 60 | 61 | -------------------------------------------------------------------------------- /app/esper/queries/panels_sql.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from .queries import query 3 | 4 | def panels(): 5 | from query.base_models import BoundingBox 6 | from query.models import Labeler, Face, Frame 7 | from esper.widget import qs_to_result 8 | from django.db.models import OuterRef, Count, IntegerField 9 | 10 | mtcnn = Labeler.objects.get(name='mtcnn') 11 | face_qs = Face.objects.annotate(height=BoundingBox.height_expr()).filter( 12 | height__gte=0.25, labeler=mtcnn, shot__in_commercial=False) 13 | frames = Frame.objects.annotate(c=Subquery( 14 | face_qs.filter(frame=OuterRef('pk')) \ 15 | .values('frame') \ 16 | .annotate(c=Count('*')) \ 17 | .values('c'), IntegerField())) \ 18 | .filter(c__gte=3, c__lte=3).order_by('id') 19 | 20 | output_frames = [] 21 | for frame in frames[:10000:10]: 22 | faces = list(face_qs.filter(frame=frame)) 23 | y = faces[0].bbox_y1 24 | valid = True 25 | for i in range(1, len(faces)): 26 | if abs(faces[i].bbox_y1 - y) > 0.05: 27 | valid = False 28 | break 29 | if valid: 30 | output_frames.append((frame, faces)) 31 | 32 | return output_frames 33 | 34 | 35 | @query("Panels (SQL)") 36 | def panels_(): 37 | from esper.queries.panels_sql import panels 38 | return simple_result([{ 39 | 'video': frame.video.id, 40 | 'min_frame': frame.number, 41 | 'objects': [bbox_to_dict(f) for f in faces] 42 | } for (frame, faces) in panels()], 'Frame') 43 | -------------------------------------------------------------------------------- /app/esper/queries/queries.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from collections import defaultdict 3 | from functools import reduce 4 | import inspect 5 | import os 6 | 7 | queries = [] 8 | 9 | 10 | def query(name): 11 | frame = inspect.stack()[1] 12 | module = inspect.getmodule(frame[0]) 13 | filename = module.__file__ 14 | 15 | def wrapper(f): 16 | lines = inspect.getsource(f).split('\n') 17 | lines = lines[:-1] # Seems to include a trailing newline 18 | 19 | # Hacky way to get just the function body 20 | i = 0 21 | while True: 22 | if "():" in lines[i]: 23 | break 24 | i = i + 1 25 | 26 | fn = lines[i:] 27 | fn += ['FN = ' + f.__name__] 28 | queries.append([name, '\n'.join(fn)]) 29 | 30 | return f 31 | 32 | return wrapper 33 | 34 | from .all_faces import * 35 | from .all_videos import * 36 | from .panels_sql import * 37 | from .panels_rekall import * 38 | from .interview_with_person_x import * 39 | from .sandbox_labels import * 40 | from .other_queries import * 41 | -------------------------------------------------------------------------------- /app/esper/queries/sandbox_labels.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from .queries import query 3 | 4 | @query('Hand-labeled Interviews (Sandbox)') 5 | def handlabeled_interviews(): 6 | from query.models import LabeledInterview 7 | from esper.rekall import qs_to_intrvllists, intrvllists_to_result 8 | 9 | interviews = LabeledInterview.objects \ 10 | .annotate(fps=F('video__fps')) \ 11 | .annotate(min_frame=F('fps') * F('start')) \ 12 | .annotate(max_frame=F('fps') * F('end')) 13 | 14 | return intrvllists_to_result(qs_to_intrvllists(interviews)) 15 | 16 | @query('Hand-labeled Panels (Sandbox)') 17 | def handlabeled_panels(): 18 | from query.models import LabeledPanel 19 | from esper.rekall import qs_to_intrvllists, intrvllists_to_result 20 | 21 | panels = LabeledPanel.objects \ 22 | .annotate(fps=F('video__fps')) \ 23 | .annotate(min_frame=F('fps') * F('start')) \ 24 | .annotate(max_frame=F('fps') * F('end')) 25 | 26 | return intrvllists_to_result(qs_to_intrvllists(panels)) 27 | 28 | @query('Hand-labeled Commercials (Sandbox)') 29 | def handlabeled_commercials(): 30 | from query.models import LabeledCommercial 31 | from esper.rekall import qs_to_intrvllists, intrvllists_to_result 32 | 33 | commercials = LabeledCommercial.objects \ 34 | .annotate(fps=F('video__fps')) \ 35 | .annotate(min_frame=F('fps') * F('start')) \ 36 | .annotate(max_frame=F('fps') * F('end')) 37 | 38 | return intrvllists_to_result(qs_to_intrvllists(commercials)) 39 | 40 | @query('Multiple Timelines (Sandbox)') 41 | def multiple_timelines(): 42 | from query.models import LabeledInterview, LabeledPanel, LabeledCommercial 43 | from esper.rekall import qs_to_intrvllists, intrvllists_to_result, add_intrvllists_to_result 44 | 45 | interviews = LabeledInterview.objects \ 46 | .annotate(fps=F('video__fps')) \ 47 | .annotate(min_frame=F('fps') * F('start')) \ 48 | .annotate(max_frame=F('fps') * F('end')) 49 | panels = LabeledPanel.objects \ 50 | .annotate(fps=F('video__fps')) \ 51 | .annotate(min_frame=F('fps') * F('start')) \ 52 | .annotate(max_frame=F('fps') * F('end')) 53 | commercials = LabeledCommercial.objects \ 54 | .annotate(fps=F('video__fps')) \ 55 | .annotate(min_frame=F('fps') * F('start')) \ 56 | .annotate(max_frame=F('fps') * F('end')) 57 | 58 | result = intrvllists_to_result(qs_to_intrvllists(interviews)) 59 | add_intrvllists_to_result(result, qs_to_intrvllists(panels), color="blue") 60 | add_intrvllists_to_result(result, qs_to_intrvllists(commercials), color="purple") 61 | return result 62 | -------------------------------------------------------------------------------- /app/esper/rekall_vgrid.py: -------------------------------------------------------------------------------- 1 | """Extensions to rekall.vgrid_utils especialized to Esper Database Schema. 2 | 3 | Tracks: 4 | CaptionTrack: Add caption to the VBlocks. Works in either VideoVBlocks or 5 | IntervalVBlocks mode. 6 | """ 7 | 8 | from rekall.interval_set_3d import IntervalSet3D, Interval3D 9 | from rekall.vgrid_utils.vblocks_builder import build_interval, DrawType_Caption 10 | import esper.captions 11 | 12 | class CaptionTrack: 13 | """Track for adding captions to vblocks. 14 | 15 | Works with either VideoVBlocksBuilder or IntervalVBlocksBuilder. 16 | 17 | Example of using with VideoVBlockBuilder: 18 | 19 | # face_collection is a DomainIntervalCollection of face intervals. 20 | 21 | # We want to see a list of VBlocks where each is a video in the collection 22 | # We want to see two tracks in each VBlock: one showing all faces in the 23 | # collection with bounding boxes drawn and a flag metadata set; another 24 | # showing all captions. 25 | 26 | json = VideoVBlocksBuilder()\\ 27 | .add_track( 28 | VideoTrackBuilder('faces', face_collection)\\ 29 | .set_draw_type(DrawType_Bbox())\\ 30 | .add_metadata('flag', Metadata_Flag()))\\ 31 | .add_track(CaptionTrack())\\ 32 | .build() 33 | """ 34 | def __init__(self): 35 | self.name = 'caption' 36 | self.video_ids = set([]) 37 | self._cache = {} 38 | 39 | def build_for_video(self, video_id): 40 | if video_id in self._cache: 41 | return self._cache[video_id] 42 | ret = self._get_captions_for_video(video_id) 43 | self._cache[video_id] = ret 44 | return ret 45 | 46 | def build_for_interval(self, video_id, interval): 47 | return self.build_for_video(video_id) 48 | 49 | def _get_captions_for_video(self, video_id): 50 | """Returns the JSON intervals with the captions 51 | 52 | Note: 53 | The returned intervals use seconds on temporal dimension instead 54 | of frame number 55 | """ 56 | subs = esper.captions.get_json(video_id) 57 | 58 | output = [] 59 | for sub in subs: 60 | interval = Interval3D( 61 | (sub['startTime'], sub['endTime']), 62 | payload=sub['text']) 63 | output.append(build_interval(video_id, interval, 64 | DrawType_Caption(), {})) 65 | return output 66 | -------------------------------------------------------------------------------- /app/esper/report.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from matplotlib.patches import Patch 3 | from matplotlib.lines import Line2D 4 | from scipy.stats import linregress 5 | import statsmodels.api as sm 6 | 7 | MALE_COLOR = 'tab:blue' 8 | FEMALE_COLOR = 'tab:red' 9 | MARKER_SIZE = 50 10 | 11 | 12 | def align(col, all_dfs): 13 | all_cols = reduce(lambda x, y: x & y, [set(list(df[col])) for df in all_dfs]) 14 | 15 | main_df = all_dfs[0][all_dfs[0][col].isin(all_cols)].sort_values(by=['M%']).reset_index( 16 | drop=True).reset_index() 17 | 18 | def _align(df): 19 | return df[df[col].isin(all_cols)].set_index(col).reindex( 20 | main_df[col]).reset_index().reset_index() 21 | 22 | return [main_df] + [_align(df) for df in all_dfs[1:]] 23 | 24 | 25 | def screen_speak_scatter(screen_df, screen_nh_df, speak_df, speak_nh_df, col, title, plots): 26 | fig = plt.figure() 27 | ax = fig.add_subplot(111) 28 | 29 | ax.axhline(50, color='black', linestyle='--') 30 | 31 | if 'screen' in plots: 32 | screen_df.plot('index', 'M%', ax=ax, color=MALE_COLOR, kind='scatter', marker='s', s=MARKER_SIZE) 33 | screen_df.plot('index', 'F%', ax=ax, color=FEMALE_COLOR, kind='scatter', marker='s', s=MARKER_SIZE) 34 | 35 | if len(plots) == 1: 36 | pairs = list(zip(screen_df['M%'].tolist(), screen_df['F%'].tolist())) 37 | c = matplotlib.collections.LineCollection( 38 | [((i, a), (i, b)) for (i, (a, b)) in enumerate(pairs)], 39 | colors=[MALE_COLOR if a > b else FEMALE_COLOR for (a, b) in pairs], 40 | linewidths=[3 for _ in range(len(pairs))]) 41 | ax.add_collection(c) 42 | 43 | if 'screen_nh' in plots: 44 | screen_nh_df.plot('index', 'M%', ax=ax, color=MALE_COLOR, kind='scatter', marker='x', s=MARKER_SIZE) 45 | screen_nh_df.plot('index', 'F%', ax=ax, color=FEMALE_COLOR, kind='scatter', marker='x', s=MARKER_SIZE) 46 | 47 | # print(model.summary()) 48 | # n = len(screen_nh_df.index) 49 | # [intercept, slope] = model.params 50 | # X = screen_df['M%'].tolist() 51 | 52 | # ax.scatter(range(len(X)), [intercept + slope * x for x in X], color='green') 53 | # ax.axhline(np.mean(screen_nh_df['M%']), color='black', linestyle='--') 54 | 55 | # slope, intercept, r, p, _3 = linregress(screen_nh_df.index.tolist(), 56 | # screen_nh_df['M%'].tolist()) 57 | # ax.plot([0, n], [intercept, intercept + slope * n], color='black') 58 | # print(r, p) 59 | 60 | if 'speak' in plots: 61 | speak_df.plot('index', 'M%', ax=ax, color=MALE_COLOR, kind='scatter', marker='^') 62 | speak_df.plot('index', 'F%', ax=ax, color=FEMALE_COLOR, kind='scatter', marker='^') 63 | 64 | if 'speak_nh' in plots: 65 | # speak_nh_df.plot('index', 'M%', ax=ax, color='tab:orange', kind='scatter', marker='x') 66 | pass 67 | 68 | ax.set_ylim(0, 100) 69 | ax.set_ylabel('Percentage of time') 70 | ax.set_xlabel('') 71 | ax.set_xticks(range(len(screen_df[col]))) 72 | ax.set_xticklabels(screen_df[col], rotation=45, horizontalalignment='right') 73 | ax.tick_params(labelsize='large') 74 | 75 | legends = { 76 | 'screen': ['Screen time - male', 'Screen time - female'], 77 | 'screen_nh': ['Screen time (no host) - male', 'Screen time (no host) - female'], 78 | 'speak': ['Speaking time - male', 'Speaking time - female'], 79 | 'speak_nh': ['Speaking time (no host)'] 80 | } 81 | 82 | ax.legend(['50%'] + flatten([legends[p] for p in plots])) 83 | plt.title(title) 84 | plt.tight_layout() 85 | -------------------------------------------------------------------------------- /app/esper/spacy_tokenize.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from esper.widget import * 3 | import spacy 4 | import pysrt 5 | import itertools 6 | from spacy.attrs import ORTH, LEMMA 7 | import esper.datatypes_pb2 as datatypes 8 | import multiprocessing as mp 9 | 10 | SUB_CACHE_DIR = '/app/data/subs' 11 | 12 | nlp = spacy.load('en', disable=['parser', 'ner']) 13 | nlp.tokenizer.add_special_case( 14 | '>>>', 15 | [{ORTH: ">>>", LEMMA: ">>>"}]) 16 | nlp.tokenizer.add_special_case( 17 | '>>', 18 | [{ORTH: ">>", LEMMA: ">>"}]) 19 | 20 | videos = list(Video.objects.all()) 21 | 22 | def load_transcript(video): 23 | if video.srt_extension == '': 24 | return None 25 | 26 | path = '/app/data/subs/orig/{}.{}.srt'.format(video.item_name(), video.srt_extension) 27 | 28 | # TODO(wcrichto): small subset of documents are failing with utf8 decode errors 29 | try: 30 | subs = pysrt.from_string(open(path, 'rb').read().decode('utf-8')) 31 | except Exception: 32 | print(video.path) 33 | return None 34 | 35 | # In practice, seems like subs are usually about 5 seconds late, so this is a hand-tuned shift 36 | subs.shift(seconds=-5) 37 | 38 | return subs 39 | 40 | def time_to_float(t): 41 | return t.hours * 3600 + t.minutes * 60 + t.seconds 42 | 43 | def pos_from_str(s): 44 | exceptions = { 45 | '-LRB-': 'LRB', 46 | '-RRB-': 'RRB', 47 | ',': 'COMMA', 48 | ':': 'COLON', 49 | '.': 'PERIOD', 50 | '\'\'': 'SINGLEQUOTE', 51 | '""': 'DOUBLEQUOTE', 52 | '#': 'POUND', 53 | '``': 'BACKTICK', 54 | '$': 'DOLLAR', 55 | 'PRP$': 'PRPD', 56 | '_SP': 'SP', 57 | 'WP$': 'WPD' 58 | } 59 | 60 | try: 61 | return getattr(datatypes.Document, exceptions[s] if s in exceptions else s) 62 | except AttributeError: 63 | # Sometimes spacy returns '' for token.tag_, not sure why? XX is "unknown" so best guess here 64 | return datatypes.Document.XX 65 | 66 | def do_tokenize(video): 67 | flat_path = '{}/flat/{}.txt'.format(SUB_CACHE_DIR, video.item_name()) 68 | meta_path = '{}/meta/{}.bin'.format(SUB_CACHE_DIR, video.item_name()) 69 | if os.path.isfile(meta_path): 70 | return 71 | 72 | subs = load_transcript(video) 73 | if subs is None: 74 | return 75 | 76 | # Create/invoke a generator to tokenize the subtitle text 77 | texts = [sub.text.encode('ascii', 'ignore').decode('utf-8') for sub in subs] 78 | # NB: we have to remove unicode characters for now since Spacy tokens only track the word index, not 79 | # byte index of the token, so there's no easy way to figure out the byte offset of an arbitrary token w/ 80 | # unicode chars > 1 byte. 81 | all_tokens = list(nlp.pipe(texts, batch_size=10000, n_threads=mp.cpu_count())) 82 | 83 | # Convert tokens into Protobuf 84 | cursor = 0 85 | doc = datatypes.Document() 86 | full_text = '' 87 | for (sub, text, tokens) in zip(subs, texts, all_tokens): 88 | for tok in tokens: 89 | word = doc.words.add() 90 | word.char_start = cursor + tok.idx 91 | word.char_end = word.char_start + len(tok.text) 92 | word.time_start = time_to_float(sub.start) 93 | word.time_end = time_to_float(sub.end) 94 | word.pos = pos_from_str(tok.tag_) 95 | word.lemma = tok.lemma_ 96 | full_text += text + ' ' 97 | cursor += len(text) + 1 98 | 99 | # Write flattened transcript as text file 100 | with open(flat_path, 'w') as f: 101 | f.write(full_text) 102 | 103 | # Write proto metadata 104 | with open(meta_path, 'wb') as f: 105 | f.write(doc.SerializeToString()) 106 | 107 | par_for(do_tokenize, videos, workers=12) 108 | -------------------------------------------------------------------------------- /app/esper/sql_dump.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | from esper.spark import * 3 | import django.apps 4 | import os 5 | 6 | models = [m._meta.db_table for m in django.apps.apps.get_models(include_auto_created=True)] 7 | 8 | with Timer('Exporting models'): 9 | def export_model(model): 10 | try: 11 | sp.check_call("/app/scripts/export-table.sh {}".format(model), shell=True) 12 | except Exception: 13 | import traceback 14 | print(model) 15 | traceback.print_exc() 16 | par_for(export_model, models, workers=8) 17 | 18 | with Timer('Ingest into Spark'): 19 | def transfer_model_spark(model): 20 | if os.path.exists('/app/data/pg/{}.csv'.format(model)): 21 | df = spark.load_csv('/app/data/pg/{}.csv'.format(model)) 22 | spark.save(model, df) 23 | par_for(transfer_model_spark, models, workers=8) 24 | 25 | # with Timer('Ingest into BigQuery'): 26 | # sp.check_call('bq rm -r -f tvnews && bq mk tvnews', shell=True) 27 | # def transfer_model_bq(model): 28 | # try: 29 | # sp.check_call("/app/scripts/transfer-to-bigquery.sh {}".format(model), shell=True) 30 | # except Exception: 31 | # import traceback 32 | # print(model) 33 | # traceback.print_exc() 34 | # par_for(transfer_model_bq, models, workers=8) 35 | -------------------------------------------------------------------------------- /app/esper/update_show.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | 3 | for video in Video.objects.all(): 4 | parts = os.path.splitext(os.path.split(video.path)[1])[0].split('_') 5 | [channel, date, time] = parts[:3] 6 | dt = datetime.datetime.strptime('{} {}'.format(date, time), '%Y%m%d %H%M%S') 7 | if channel[-1] == 'W': 8 | channel = channel[:-1] 9 | 10 | show = ' '.join(parts[3:-1] if parts[-1] == 'segment' else parts[3:]) 11 | 12 | video.time = dt 13 | # video.channel = Channel.objects.get_or_create(name=channel)[0] 14 | # video.show = Show.objects.get_or_create(name=show)[0] 15 | video.save() 16 | -------------------------------------------------------------------------------- /app/esper/update_tracks.py: -------------------------------------------------------------------------------- 1 | from esper.prelude import * 2 | 3 | tracks = list(PersonTrack.objects.filter(video__videotag__tag__name='pose-test')) 4 | for i, track in enumerate(tracks): 5 | print(i) 6 | faces = list(Face.objects.filter(person__tracks=track).select_related('person__frame')) 7 | if len(faces) == 0: 8 | continue 9 | track.min_frame = min([f.person.frame.number for f in faces]) 10 | track.max_frame = max([f.person.frame.number for f in faces]) 11 | PersonTrack.objects.bulk_update(tracks) 12 | -------------------------------------------------------------------------------- /app/esper/write_video.py: -------------------------------------------------------------------------------- 1 | from query.scripts.script_util import * 2 | from scannerpy.stdlib import writers 3 | from collections import defaultdict 4 | 5 | with Database() as db: 6 | video = Video.objects.all()[0] 7 | faces = FaceInstance.objects.filter(frame__video=video).order_by('frame__number').values('bbox', 'frame__number') 8 | t = db.table(video.path) 9 | 10 | # frames = defaultdict(list) 11 | # for face in faces: 12 | # bbox = face['bbox'] 13 | # bbox.x1 *= video.width 14 | # bbox.x2 *= video.width 15 | # bbox.y1 *= video.height 16 | # bbox.y2 *= video.height 17 | # frames[face['frame__number']].append(bbox) 18 | 19 | # N = t.num_rows() 20 | # all_bboxes = [[] for _ in range(N)] 21 | 22 | # for frame, bboxes in frames.iteritems(): 23 | # for i in range(frame, min(frame+24, N)): 24 | # all_bboxes[i] = bboxes 25 | 26 | # bb_t = db.new_table('test', ['bboxes'], [[t] for t in all_bboxes], fn=writers.bboxes, force=True) 27 | # print bb_t.num_rows() 28 | 29 | bb_t = db.table('test') 30 | 31 | frame = t.as_op().all() 32 | bboxes = bb_t.as_op().all() 33 | out_frame = db.ops.DrawBox(frame=frame, bboxes=bboxes) 34 | job = Job(columns=[out_frame], name='test2') 35 | out_table = db.run(job, force=True) 36 | out_table.column('frame').save_mp4('faces') 37 | -------------------------------------------------------------------------------- /app/gunicorn_conf.py: -------------------------------------------------------------------------------- 1 | bind = '0.0.0.0:8000' 2 | loglevel = 'debug' 3 | errorlog = '-' 4 | accesslog = '-' 5 | timeout = 0 6 | workers = 1 7 | -------------------------------------------------------------------------------- /app/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings") 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError: 10 | # The above import may fail for some other reason. Ensure that the 11 | # issue is really that Django is missing to avoid masking other 12 | # exceptions on Python 2. 13 | try: 14 | import django 15 | except ImportError: 16 | raise ImportError( 17 | "Couldn't import Django. Are you sure it's installed and " 18 | "available on your PYTHONPATH environment variable? Did you " 19 | "forget to activate a virtual environment?" 20 | ) 21 | raise 22 | execute_from_command_line(sys.argv) 23 | -------------------------------------------------------------------------------- /app/notebooks/duplicate_videos.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-01-04T03:57:16.981067Z", 9 | "start_time": "2019-01-04T03:57:16.340575Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "from esper.prelude import *" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "ExecuteTime": { 22 | "end_time": "2019-01-04T03:58:34.416540Z", 23 | "start_time": "2019-01-04T03:58:28.009468Z" 24 | } 25 | }, 26 | "outputs": [], 27 | "source": [] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "ExecuteTime": { 34 | "end_time": "2019-01-04T03:58:49.183148Z", 35 | "start_time": "2019-01-04T03:58:48.969612Z" 36 | } 37 | }, 38 | "outputs": [], 39 | "source": [] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "ExecuteTime": { 46 | "end_time": "2019-01-04T04:05:25.137187Z", 47 | "start_time": "2019-01-04T04:05:25.043349Z" 48 | } 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "Video.objects.filter(duplicate=False).count()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2019-01-04T04:04:39.154696Z", 61 | "start_time": "2019-01-04T04:02:37.122592Z" 62 | } 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "for channel in Channel.objects.all():\n", 67 | " videos = list(Video.objects.filter(channel=channel).order_by('time'))\n", 68 | " by_time = collect(videos, lambda v: v.time)\n", 69 | " for k, vs in by_time.items():\n", 70 | " if len(vs) > 1:\n", 71 | " for v in vs[1:]:\n", 72 | " v.duplicate = True\n", 73 | " v.save()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "ExecuteTime": { 81 | "end_time": "2019-01-04T04:01:22.106396Z", 82 | "start_time": "2019-01-04T04:01:22.038049Z" 83 | } 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "pprint([v.item_name() for v in Video.objects.filter(channel__name='CNN', time__year=2012, time__month=3, time__day=11).order_by('time')])" 88 | ] 89 | } 90 | ], 91 | "metadata": { 92 | "kernelspec": { 93 | "display_name": "Django Shell-Plus", 94 | "language": "python", 95 | "name": "django_extensions" 96 | }, 97 | "language_info": { 98 | "codemirror_mode": { 99 | "name": "ipython", 100 | "version": 3 101 | }, 102 | "file_extension": ".py", 103 | "mimetype": "text/x-python", 104 | "name": "python", 105 | "nbconvert_exporter": "python", 106 | "pygments_lexer": "ipython3", 107 | "version": "3.5.2" 108 | }, 109 | "toc": { 110 | "nav_menu": {}, 111 | "number_sections": true, 112 | "sideBar": true, 113 | "skip_h1_title": false, 114 | "toc_cell": false, 115 | "toc_position": {}, 116 | "toc_section_display": "block", 117 | "toc_window_display": false 118 | } 119 | }, 120 | "nbformat": 4, 121 | "nbformat_minor": 2 122 | } 123 | -------------------------------------------------------------------------------- /app/notebooks/example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from query.datasets.prelude import *" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "print('Number of frames: {}'.format(Frame.objects.all().count())) \n", 19 | "print('Number of faces: {}'.format(Face.objects.all().count())) \n", 20 | "print('Number of poses: {}'.format(Pose.objects.all().count()))" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# TODO(wcrichto): more examples here" 30 | ] 31 | } 32 | ], 33 | "metadata": { 34 | "kernelspec": { 35 | "display_name": "Django Shell-Plus", 36 | "language": "python", 37 | "name": "django_extensions" 38 | }, 39 | "language_info": { 40 | "codemirror_mode": { 41 | "name": "ipython", 42 | "version": 2 43 | }, 44 | "file_extension": ".py", 45 | "mimetype": "text/x-python", 46 | "name": "python", 47 | "nbconvert_exporter": "python", 48 | "pygments_lexer": "ipython2", 49 | "version": "2.7.14" 50 | } 51 | }, 52 | "nbformat": 4, 53 | "nbformat_minor": 2 54 | } 55 | -------------------------------------------------------------------------------- /app/notebooks/export_face_bboxes_for_aws.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-08-08T23:32:43.002377Z", 9 | "start_time": "2019-08-08T23:32:42.973091Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import os\n", 15 | "import json\n", 16 | "from collections import defaultdict\n", 17 | "from tqdm import tqdm\n", 18 | "\n", 19 | "OUT_DIR = '/app/data/aws_face_bboxes_new'\n", 20 | "\n", 21 | "def get_video_name(s):\n", 22 | " return os.path.splitext(s.split('/')[-1])[0]\n", 23 | "\n", 24 | "frame_3s_sampler = FrameSampler.objects.get(name='3s')\n", 25 | "face_labeler = Labeler.objects.get(name='mtcnn:july-25-2019')" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "ExecuteTime": { 33 | "start_time": "2019-08-08T23:32:44.081Z" 34 | } 35 | }, 36 | "outputs": [ 37 | { 38 | "name": "stderr", 39 | "output_type": "stream", 40 | "text": [ 41 | " 48%|█████████████████████████▍ | 123581/257031 [49:38<19:34, 113.60it/s]" 42 | ] 43 | } 44 | ], 45 | "source": [ 46 | "if not os.path.exists(OUT_DIR):\n", 47 | " os.makedirs(OUT_DIR)\n", 48 | "\n", 49 | "for video in tqdm(Video.objects.all().order_by('id')):\n", 50 | " video_name = get_video_name(video.path)\n", 51 | " frame_to_faces = defaultdict(list)\n", 52 | " for f in Face.objects.filter(\n", 53 | " frame__video=video, frame__sampler=frame_3s_sampler, labeler=face_labeler\n", 54 | " ).values('id', 'frame__number', 'bbox_x1', 'bbox_x2', 'bbox_y1', 'bbox_y2'):\n", 55 | " frame_to_faces[f['frame__number']].append({\n", 56 | " 'id': f['id'], 'bbox': {\n", 57 | " 'x1': f['bbox_x1'], 'x2': f['bbox_x2'], 'y1': f['bbox_y1'], 'y2': f['bbox_y2']\n", 58 | " }\n", 59 | " })\n", 60 | " video_out_file = os.path.join(OUT_DIR, video_name + '.faces.json')\n", 61 | " with open(video_out_file, 'w') as f:\n", 62 | " json.dump(list(frame_to_faces.items()), f)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "ExecuteTime": { 70 | "start_time": "2019-08-09T00:22:32.815Z" 71 | } 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "video_and_fps = [(v.id, get_video_name(v.path), v.fps) for v in Video.objects.all()]" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "ExecuteTime": { 83 | "start_time": "2019-08-09T00:22:33.167Z" 84 | } 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "with open('/app/data/videos_and_fps.json', 'w') as f:\n", 89 | " json.dump(video_and_fps, f)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [] 98 | } 99 | ], 100 | "metadata": { 101 | "kernelspec": { 102 | "display_name": "Django Shell-Plus", 103 | "language": "python", 104 | "name": "django_extensions" 105 | }, 106 | "language_info": { 107 | "codemirror_mode": { 108 | "name": "ipython", 109 | "version": 3 110 | }, 111 | "file_extension": ".py", 112 | "mimetype": "text/x-python", 113 | "name": "python", 114 | "nbconvert_exporter": "python", 115 | "pygments_lexer": "ipython3", 116 | "version": "3.5.2" 117 | }, 118 | "toc": { 119 | "nav_menu": {}, 120 | "number_sections": true, 121 | "sideBar": true, 122 | "skip_h1_title": false, 123 | "toc_cell": false, 124 | "toc_position": {}, 125 | "toc_section_display": "block", 126 | "toc_window_display": false 127 | } 128 | }, 129 | "nbformat": 4, 130 | "nbformat_minor": 2 131 | } 132 | -------------------------------------------------------------------------------- /app/notebooks/face_attribute_visual_validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-01-10T21:29:06.390877Z", 9 | "start_time": "2019-01-10T21:29:06.369328Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "from IPython.display import display\n", 15 | "from esper.widget import *" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# Strided sample of haircolors" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "ExecuteTime": { 30 | "end_time": "2019-01-10T21:29:47.894560Z", 31 | "start_time": "2019-01-10T21:29:38.783980Z" 32 | }, 33 | "scrolled": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "for hc in HairColorName.objects.all():\n", 38 | " print(hc.name)\n", 39 | " widget = esper_widget(\n", 40 | " qs_to_resultf==,\n", 41 | " jupyter_keybindings=True, disable_playback=True, results_per_page=25, \n", 42 | " crop_bboxes=True\n", 43 | " )\n", 44 | " display(widget)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# Strided sample of hairlengths" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "ExecuteTime": { 59 | "end_time": "2019-01-10T21:29:30.614884Z", 60 | "start_time": "2019-01-10T21:29:18.554504Z" 61 | }, 62 | "scrolled": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "for hl in HairLengthName.objects.all():\n", 67 | " print(hl.name)\n", 68 | " widget = esper_widget(\n", 69 | " qs_to_result(HairLength.objects.filter(length=hl), stride=10000),\n", 70 | " jupyter_keybindings=True, disable_playback=True, results_per_page=25,\n", 71 | " crop_bboxes=False\n", 72 | " )\n", 73 | " display(widget)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [] 82 | } 83 | ], 84 | "metadata": { 85 | "kernelspec": { 86 | "display_name": "Django Shell-Plus", 87 | "language": "python", 88 | "name": "django_extensions" 89 | }, 90 | "language_info": { 91 | "codemirror_mode": { 92 | "name": "ipython", 93 | "version": 3 94 | }, 95 | "file_extension": ".py", 96 | "mimetype": "text/x-python", 97 | "name": "python", 98 | "nbconvert_exporter": "python", 99 | "pygments_lexer": "ipython3", 100 | "version": "3.5.2" 101 | }, 102 | "toc": { 103 | "nav_menu": {}, 104 | "number_sections": true, 105 | "sideBar": true, 106 | "skip_h1_title": false, 107 | "toc_cell": false, 108 | "toc_position": {}, 109 | "toc_section_display": "block", 110 | "toc_window_display": false 111 | } 112 | }, 113 | "nbformat": 4, 114 | "nbformat_minor": 2 115 | } 116 | -------------------------------------------------------------------------------- /app/notebooks/get_canonical_shows.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2018-12-18T23:51:48.501217Z", 9 | "start_time": "2018-12-18T23:51:39.741092Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "from esper.prelude import *\n", 15 | "from esper.identity import *" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "ExecuteTime": { 23 | "end_time": "2018-12-19T00:56:45.311540Z", 24 | "start_time": "2018-12-19T00:56:45.030276Z" 25 | }, 26 | "scrolled": false 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "from django.db.models import Count, Sum, F, FloatField\n", 31 | "show_and_video_counts = { \n", 32 | " (x['show__name'], x['channel__name'], x['count'], x['total_duration'] / 3600.)\n", 33 | " for x in Video.objects.values(\n", 34 | " 'show__name', 'channel__name'\n", 35 | " ).annotate(\n", 36 | " count=Count('show__name'),\n", 37 | " total_duration=Sum(F('num_frames') / F('fps'), output_field=FloatField())\n", 38 | " )\n", 39 | "}\n", 40 | "\n", 41 | "print('Show Name,Channel,Video Count,Hours,Recurring,Canonical Show Name'.format(k, v))\n", 42 | "for name, channel, count, duration in sorted(show_and_video_counts):\n", 43 | " print('\"{}\",\"{}\",{},{},{},\"{}\"'.format(name, channel, count, duration, count > 30, name))" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [] 52 | } 53 | ], 54 | "metadata": { 55 | "kernelspec": { 56 | "display_name": "Django Shell-Plus", 57 | "language": "python", 58 | "name": "django_extensions" 59 | }, 60 | "language_info": { 61 | "codemirror_mode": { 62 | "name": "ipython", 63 | "version": 3 64 | }, 65 | "file_extension": ".py", 66 | "mimetype": "text/x-python", 67 | "name": "python", 68 | "nbconvert_exporter": "python", 69 | "pygments_lexer": "ipython3", 70 | "version": "3.5.2" 71 | }, 72 | "toc": { 73 | "nav_menu": {}, 74 | "number_sections": true, 75 | "sideBar": false, 76 | "skip_h1_title": false, 77 | "toc_cell": false, 78 | "toc_position": {}, 79 | "toc_section_display": "block", 80 | "toc_window_display": false 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 2 85 | } 86 | -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Chris Harper-Mercer.txt: -------------------------------------------------------------------------------- 1 | {"0": [289439165, 315101219, 260135011, 308988138, 314370344, 305932611, 283017590, 260135307, 283016608, 314371520, 291186128, 282895833, 305931811, 260135504, 327684878, 314372696, 283016805, 327627367, 308988534, 305932411, 307244791, 260135702, 314372893, 260135899, 308988731, 310765415, 282896026, 314369956, 283016020, 310765215, 260142048, 267831330, 267831133, 266475600, 309042293, 285205841, 248687001, 270940645, 327555565, 317921366, 267871773, 324849199, 267871674, 267871872, 266819217, 291155296, 248689346, 338689817, 317922150, 266475404, 267831231, 299585502, 344686599, 270840741, 344609815, 291145765, 267849766, 260071466, 329150508, 305934394, 293430334, 310752331, 267887714, 270844023, 284599420, 329151511, 344609964, 318423581, 269622985, 267887813, 298280144, 305935571, 288912611, 267849966, 263811311, 306316738, 310752529, 289382674, 342406427, 306326844, 272915815, 329147756, 267824495, 310810017, 310753521, 270921128, 288912809, 267865514, 267824593, 299997093, 275584516, 310756882, 267886111, 292246067, 291182143, 292258380, 267888211, 288913007, 269622888, 293431922, 275252860, 267886211, 305934982, 329151111, 329150111, 310752928, 305934004, 310752129, 309042886, 329148141, 328749809, 291183347, 292246264, 329413382, 270841637, 269623081, 267848492, 329150309, 266855228, 310757864, 293430136, 289382277, 342143909, 328750006, 263811011, 263935941, 329413580, 310753127, 270841835, 306415659, 314347383, 288823489, 276023556, 279981281, 327635496, 279986424, 306316544, 330612997, 250886275, 288813361, 314413506, 327641439, 306415458, 329037249, 267162054, 276023752, 314347977, 276033997, 217173467, 288813559, 276033800, 279893566, 327635896, 307206840, 279981680, 306316933, 327638666, 324909714, 270219887, 306415260, 307206640, 306326645, 275584713, 279891578, 270160083, 291824400, 307244591, 306317127, 310809418, 276023949, 342501939, 276024146, 330612601, 265332606, 307207039, 275584910, 310757276, 298281908, 276120501, 327636297, 265319430, 344655884, 314414094, 305935375, 310809617, 293429940, 308726853, 306208472, 250886230, 283026869, 299997293, 292246659, 266444934, 344621192, 270839945, 267849866, 292477069, 279986035, 305934200, 327626965, 308727249, 283026675, 267871478, 293430531, 272915716, 275252494, 267887913, 326597683, 270834998, 329041207, 275252677, 267850066, 267871576, 275254145, 292258182, 288823688, 267888012, 279891182, 315101612, 267850164, 267886013, 329150911, 305934786, 260140961, 275106539, 329037052, 267888112, 318878204, 270219784, 267887120, 314413308, 267850263, 319496732, 267886112, 327685668, 291825194, 329413187, 291824200, 298280540, 267848293, 267850362, 263811711, 318878397, 299996894, 267831036, 291823399, 260140860, 305935178, 291145565, 308987744, 263935841, 344609666, 270834605, 318877626, 292476868, 327685074, 344621044, 266570753, 279885837, 291186718, 306356264, 279980093, 291185734, 279995463, 266570853, 279984248, 279990358, 279886037, 266570953, 279979895, 279886635, 291178750, 310753324, 291185931, 279990553, 266571053, 291178551, 279983457, 319436153, 279886237, 266570170, 291178949, 279990749, 279979496, 291185140, 330615566, 327630353, 279983655, 279886436, 266570366, 291179149, 291186320, 279990945, 279979695, 291185336, 266570463, 279983852, 279885438, 342502137, 270943008, 266570560, 314371912, 291179348, 291186519, 279991144, 291185536, 279995265, 270220187, 327630750, 279885637, 266570657, 279984050, 305932211, 266570269, 291186915, 279886834, 263811111, 270215223, 267827259, 267867199, 309043281, 260071566, 329413777, 335738394, 267867299, 329150708, 327638273, 267867400, 330612002, 263811411, 329413975, 267867501, 314414486, 330615175, 305935767, 270844315, 263811511, 342406623, 279985642, 305933806, 283028985, 329414170, 263811611, 267867701, 267826960, 327639651, 267867801, 267826861, 313207510, 267867601, 309041899, 342284029, 260141852, 263811211, 267827059, 267865415, 267827159, 267867098], "2": [288705695, 338689216, 289417415, 299584727, 289417613, 275926515, 259669979, 299584921, 259669879, 259670179, 289418212, 338689019, 306208671]} -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Eric Garner.txt: -------------------------------------------------------------------------------- 1 | {"0": [216508454, 147071711, 215036859, 298486470], "1": [215971845, 215946506, 215936524, 215961880, 215946265, 215942942, 215934756, 215970087, 215946026, 215970292, 215969331, 215969858, 215961666, 215945800, 318887266, 215962638, 215934042, 215933788, 215970909, 215971423, 215962210, 215964263, 215945454, 215969652, 215945338, 215962431, 147072533, 215944320, 215935108, 215943980, 215943055, 215964053, 215970712, 215971737, 215943749, 215943623, 215934279, 215943394, 215970504, 215964363, 215972054, 215972573, 215969224, 215962856, 215972263, 75473904, 216137594, 216137004, 213625870, 336362002, 214576149, 216137243, 214577157, 336362795, 336363589, 214576613, 215445324, 214575697, 336364382, 216136763, 214575474, 215444647, 213131956, 214576833, 214576038, 213130969, 336360411, 213627871, 215651453, 214576370, 213628916, 215652137, 336361208, 216148989, 213629113], "2": [143039353, 74872605, 144399912, 143039020, 213113053, 144371511, 74938509, 144399192, 144370788, 74872934, 213112938, 144400268, 213112717, 74873230, 144399549, 213112825, 74872314], "3": [216238362, 345062702, 345064752, 217103670, 217039935, 311521376, 217103549, 216128918, 216129185, 223016797, 345064136, 250738104, 250738106, 250738108, 250738110, 217040068, 216238246, 345063420, 223016686, 345065468, 296458037, 275929954, 296457838, 275930748, 296456647, 296457442, 308692195, 296457045], "4": [285968605, 285975374, 76529506, 343714038, 218939196, 217498951, 216238476, 76532589, 76531065, 218139528, 219832033, 219786536]} -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Info.txt: -------------------------------------------------------------------------------- 1 | Accounted for people (more clusters per person, focus on controversial names): 2 | 3 | Shooters: 4 | 5 | Chris Harper-Mercer (Okay) 6 | Devin Patrick Kelley (Okay) 7 | Dylann Roof (Good) 8 | Esteban Santiago-Ruiz (Okay) 9 | Gavin Eugene-Long (Bad-not significant enough data) 10 | Micah Xavier Johnson (Good) 11 | Omar Mateen (Good) 12 | Robert Lewis Dear Jr (Good) 13 | Stephen Paddock (Good) 14 | Syed Rizwan Farook (Good) 15 | Tashfeen Malik (Good) 16 | Willie Corey Godbolt (Bad-not significant enough data) 17 | 18 | Victims: 19 | Eric Garner (Not in database) 20 | Laquan McDonald (Not in database) 21 | Michael Brown (Not in database) 22 | Philando Castille (Not in database) 23 | Tamir Rice (Not in database) 24 | Trayvon Martin (Redo) 25 | Walter Scott (IDK) 26 | 27 | Police Officers: 28 | 29 | Daniel Pantaleo (Eric Garner, Not in DB) 30 | Darren Wilson (Michaela Brown, Not in DB) 31 | Jason Van Dyke (Laquan McDonald, Not in DB) 32 | ??? (Tamir Rice, Unknown) 33 | Michael Slager (Walter Scott, Not in DB) 34 | Blane Salamoni (Alton Sterling, Not in DB) 35 | Howie Lake II (Alton Sterling, Not in DB) 36 | Jeronimo Yanez (Philando Castille, Not in DB) 37 | 38 | Celebrity: 39 | 40 | Robert Ailes (TBD) 41 | Miley Cyrus (TBD) 42 | Kanye West (Not in database) 43 | George Bush (Not in database) 44 | Michael Jackson (Not in database) 45 | Barack Obama (Not in database) 46 | -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Laquan McDonald (Victim).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Django Shell-Plus", 14 | "language": "python", 15 | "name": "django_extensions" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.5.2" 28 | }, 29 | "toc": { 30 | "nav_menu": {}, 31 | "number_sections": true, 32 | "sideBar": true, 33 | "skip_h1_title": false, 34 | "toc_cell": false, 35 | "toc_position": {}, 36 | "toc_section_display": "block", 37 | "toc_window_display": false 38 | } 39 | }, 40 | "nbformat": 4, 41 | "nbformat_minor": 2 42 | } 43 | -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Michael Brown (Victim).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Michael Brown" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "ExecuteTime": { 15 | "end_time": "2019-08-07T17:22:52.450158Z", 16 | "start_time": "2019-08-07T17:22:45.358697Z" 17 | } 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import ipywidgets as widgets\n", 22 | "from IPython.display import display\n", 23 | "import esper.identity_clusters\n", 24 | "from esper.identity_clusters import identity_clustering_workflow,_manual_recluster,visualization_workflow" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 2, 30 | "metadata": { 31 | "ExecuteTime": { 32 | "end_time": "2019-08-07T17:22:52.496257Z", 33 | "start_time": "2019-08-07T17:22:52.453623Z" 34 | } 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "shootings = [\n", 39 | " ('Muhammad Youssef Abdulazeez', 'Chattanooga', 'Jul 16, 2015'),\n", 40 | " ('Chris Harper-Mercer', 'Umpqua Community College', 'Oct 1, 2015'),\n", 41 | " ('Robert Lewis Dear Jr', 'Colorado Springs - Planned Parenthood', 'Nov 27, 2015'),\n", 42 | " ('Syed Rizwan Farook', 'San Bernardino', 'Dec 2, 2015'), \n", 43 | " ('Tashfeen Malik', 'San Bernardino', 'Dec 2, 2015'),\n", 44 | " ('Dylann Roof', 'Charleston Shurch', 'Jun 17, 2015'),\n", 45 | " ('Omar Mateen', 'Orlando Nightclub', 'Jun 12, 2016'),\n", 46 | " ('Micah Xavier Johnson', 'Dallas Police', 'Jul 7-8, 2016'),\n", 47 | " ('Gavin Eugene Long', 'Baton Rouge Police', 'Jul 17, 2016'),\n", 48 | " ('Esteban Santiago-Ruiz', 'Ft. Lauderdale Airport', 'Jan 6, 2017'),\n", 49 | " ('Willie Corey Godbolt', 'Lincoln County', 'May 28, 2017'),\n", 50 | " ('Stephen Paddock', 'Las Vegas', 'Oct 1, 2017'),\n", 51 | " ('Devin Patrick Kelley', 'San Antonio Church', 'Nov 5, 2017'),\n", 52 | " ('Eric Garner', 'New York', 'Jul 17, 2014'),\n", 53 | " ('Michael Brown', 'Ferguson', 'Aug 09, 2014')\n", 54 | "]\n", 55 | "orm_set = { x.name for x in Identity.objects.filter(name__in=[s[0].lower() for s in shootings]) }\n", 56 | "for s in shootings:\n", 57 | " assert s[0].lower() in orm_set, '{} is not in the database'.format(s)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "ExecuteTime": { 65 | "start_time": "2019-08-07T17:22:14.990Z" 66 | } 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "identity_clustering_workflow('Michael Brown','Aug 09, 2014', True)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "Django Shell-Plus", 84 | "language": "python", 85 | "name": "django_extensions" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.5.2" 98 | }, 99 | "toc": { 100 | "nav_menu": {}, 101 | "number_sections": true, 102 | "sideBar": true, 103 | "skip_h1_title": false, 104 | "toc_cell": false, 105 | "toc_position": {}, 106 | "toc_section_display": "block", 107 | "toc_window_display": false 108 | } 109 | }, 110 | "nbformat": 4, 111 | "nbformat_minor": 2 112 | } 113 | -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Philando Castile (Victim).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Django Shell-Plus", 14 | "language": "python", 15 | "name": "django_extensions" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.5.2" 28 | }, 29 | "toc": { 30 | "nav_menu": {}, 31 | "number_sections": true, 32 | "sideBar": true, 33 | "skip_h1_title": false, 34 | "toc_cell": false, 35 | "toc_position": {}, 36 | "toc_section_display": "block", 37 | "toc_window_display": false 38 | } 39 | }, 40 | "nbformat": 4, 41 | "nbformat_minor": 2 42 | } 43 | -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Walter Scott (Victim).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Django Shell-Plus", 14 | "language": "python", 15 | "name": "django_extensions" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.5.2" 28 | }, 29 | "toc": { 30 | "nav_menu": {}, 31 | "number_sections": true, 32 | "sideBar": true, 33 | "skip_h1_title": false, 34 | "toc_cell": false, 35 | "toc_position": {}, 36 | "toc_section_display": "block", 37 | "toc_window_display": false 38 | } 39 | }, 40 | "nbformat": 4, 41 | "nbformat_minor": 2 42 | } 43 | -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/Willie Corey Godbolt.txt: -------------------------------------------------------------------------------- 1 | {"8": [48300033, 147300397, 257024107, 275603643, 340277298, 147665202, 303567166, 87942542, 148391520, 250380699, 303567365, 147796497, 256591299, 148378295, 147295022, 78029629, 147451364, 340653098, 300084063, 334263361, 148378792, 147657912, 147302633, 147789191, 340590468, 147666495, 147665859, 340276900, 147789515, 148379434, 147658542, 340277099, 147302303, 148391850, 147658860, 280660079, 92649587, 340584709, 78018826, 280660274, 147663942, 334309830, 147663309, 78027223, 147294695, 147667455, 76298810, 78019164, 334310030, 340560537, 78007571, 78019330, 147658226, 147450716, 78019503, 300084260, 78019672, 147295335, 147668090, 147451038, 215524142, 217312517, 334261588, 217312662, 78007746, 148807590, 78008417, 78007914, 147664575, 147300081, 78008081, 147301355, 147795854, 78008248, 275603443]} -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/123703025.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/123703025.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251405426.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251405426.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251529606.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251529606.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251626619.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/251626619.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/261346406.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/261346406.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/262510755.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/262510755.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/268732111.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/268732111.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/314770412.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/314770412.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/7665011.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/roger-ailes-photos/7665011.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/Analysis of Shooters and Victims/test1.html: -------------------------------------------------------------------------------- 1 | Name: Syed Rizwan Farook -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/ove_cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/notebooks/michaela_notebooks/ove_cat.jpg -------------------------------------------------------------------------------- /app/notebooks/michaela_notebooks/scratch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2019-06-27T20:29:03.643667Z", 9 | "start_time": "2019-06-27T20:29:03.620653Z" 10 | } 11 | }, 12 | "outputs": [ 13 | { 14 | "name": "stdout", 15 | "output_type": "stream", 16 | "text": [ 17 | "/app/data/black_face_ids.csv\n", 18 | "/app/data/hayes.png\n", 19 | "/app/data/aws-face-crops.tar.gz\n", 20 | "/app/data/dan_neurips\n", 21 | "/app/data/lemon.png\n", 22 | "/app/data/stories\n", 23 | "/app/data/blond_fox.png\n", 24 | "/app/data/video_10y.csv\n", 25 | "/app/data/google_images\n", 26 | "/app/data/azure-face-crops\n", 27 | "/app/data/race_pred\n", 28 | "/app/data/black.dan.250.random.json\n", 29 | "/app/data/embs\n", 30 | "/app/data/cnn_fox_msnbc.png\n", 31 | "/app/data/stories-data\n", 32 | "/app/data/video_ia_ids.json\n", 33 | "/app/data/baldwin.png\n", 34 | "/app/data/index10a\n", 35 | "/app/data/face_eval\n", 36 | "/app/data/widget-data\n", 37 | "/app/data/pg\n", 38 | "/app/data/gender_10y.csv\n", 39 | "/app/data/black.250.random.json\n", 40 | "/app/data/maddow.png\n", 41 | "/app/data/blond_cnn.png\n", 42 | "/app/data/face_identity_model_v2\n", 43 | "/app/data/index10\n", 44 | "/app/data/spark\n", 45 | "/app/data/interview_10y-all.pkl\n", 46 | "/app/data/.ipynb_checkpoints\n", 47 | "/app/data/black.1k.random.json\n", 48 | "/app/data/identity_10y.csv\n", 49 | "/app/data/aws-face-crops\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "import os\n", 55 | "\n", 56 | "# The data directory in the container\n", 57 | "for x in os.listdir('/app/data'):\n", 58 | " print('/app/data/' + x)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [] 67 | } 68 | ], 69 | "metadata": { 70 | "kernelspec": { 71 | "display_name": "Django Shell-Plus", 72 | "language": "python", 73 | "name": "django_extensions" 74 | }, 75 | "language_info": { 76 | "codemirror_mode": { 77 | "name": "ipython", 78 | "version": 3 79 | }, 80 | "file_extension": ".py", 81 | "mimetype": "text/x-python", 82 | "name": "python", 83 | "nbconvert_exporter": "python", 84 | "pygments_lexer": "ipython3", 85 | "version": "3.5.2" 86 | }, 87 | "toc": { 88 | "nav_menu": {}, 89 | "number_sections": true, 90 | "sideBar": true, 91 | "skip_h1_title": false, 92 | "toc_cell": false, 93 | "toc_position": {}, 94 | "toc_section_display": "block", 95 | "toc_window_display": false 96 | } 97 | }, 98 | "nbformat": 4, 99 | "nbformat_minor": 2 100 | } 101 | -------------------------------------------------------------------------------- /app/notebooks/model_cross_validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "from query.scripts.script_util import *\n", 13 | "from collections import defaultdict" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "def bbox_area(f):\n", 23 | " return (f.bbox_x2 - f.bbox_x1) * (f.bbox_y2 - f.bbox_y1) \n", 24 | "\n", 25 | "def bbox_midpoint(f):\n", 26 | " return np.array([(f.bbox_x1 + f.bbox_x2) / 2, (f.bbox_y1 + f.bbox_y2) / 2])\n", 27 | "\n", 28 | "def bbox_dist(f1, f2):\n", 29 | " return np.linalg.norm(bbox_midpoint(f1) - bbox_midpoint(f2))\n", 30 | "\n", 31 | "def bbox_to_dict(f):\n", 32 | " return {\n", 33 | " 'bbox_x1': f.bbox_x1,\n", 34 | " 'bbox_x2': f.bbox_x2,\n", 35 | " 'bbox_y1': f.bbox_y1,\n", 36 | " 'bbox_y2': f.bbox_y2,\n", 37 | " 'bbox_score': f.bbox_score,\n", 38 | " 'labeler': f.labeler.id\n", 39 | " }\n", 40 | "\n", 41 | "labeler_names = [l['labeler__name'] for l in FaceInstance.objects.values('labeler__name').distinct()]\n", 42 | "print(labeler_names)\n", 43 | "\n", 44 | "videos = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))\n", 45 | "for frame in Frame.objects.filter(Q(video__show='Situation Room With Wolf Blitzer') | Q(video__show='Special Report With Bret Baier')).select_related('video')[:10000:10]:\n", 46 | " faces = FaceInstance.objects.filter(frame=frame).select_related('labeler')\n", 47 | " for face in faces:\n", 48 | " videos[frame.video.id][frame.id][face.labeler.name].append(face)\n", 49 | "\n", 50 | "AREA_THRESHOLD = 0.02\n", 51 | "DIST_THRESHOLD = 0.10\n", 52 | "\n", 53 | "mistakes = defaultdict(lambda: defaultdict(tuple))\n", 54 | "for video, frames in videos.iteritems():\n", 55 | " for frame, labelers in frames.iteritems():\n", 56 | " for labeler, faces in labelers.iteritems():\n", 57 | " for face in faces:\n", 58 | " if bbox_area(face) < AREA_THRESHOLD:\n", 59 | " continue\n", 60 | " \n", 61 | " mistake = True\n", 62 | " for other_labeler in labeler_names:\n", 63 | " if labeler == other_labeler: continue\n", 64 | " other_faces = labelers[other_labeler] if other_labeler in labelers else []\n", 65 | " for other_face in other_faces:\n", 66 | " if bbox_dist(face, other_face) < DIST_THRESHOLD:\n", 67 | " mistake = False\n", 68 | " break\n", 69 | " \n", 70 | " if mistake and len(other_faces) > 0:\n", 71 | " mistakes[video][frame] = (faces, other_faces)\n", 72 | " break\n", 73 | " else:\n", 74 | " continue\n", 75 | " break\n", 76 | " \n", 77 | "result = []\n", 78 | "for video, frames in list(mistakes.iteritems())[:1000:10]:\n", 79 | " for frame, (faces, other_faces) in frames.iteritems():\n", 80 | " result.append({\n", 81 | " 'video': video,\n", 82 | " 'start_frame': frame,\n", 83 | " 'bboxes': [bbox_to_dict(f) for f in faces + other_faces]\n", 84 | " })\n", 85 | " \n", 86 | "print(len(result))" 87 | ] 88 | } 89 | ], 90 | "metadata": { 91 | "kernelspec": { 92 | "display_name": "Django Shell-Plus", 93 | "language": "python", 94 | "name": "django_extensions" 95 | }, 96 | "language_info": { 97 | "codemirror_mode": { 98 | "name": "ipython", 99 | "version": 2 100 | }, 101 | "file_extension": ".py", 102 | "mimetype": "text/x-python", 103 | "name": "python", 104 | "nbconvert_exporter": "python", 105 | "pygments_lexer": "ipython2", 106 | "version": "2.7.12" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 2 111 | } 112 | -------------------------------------------------------------------------------- /app/notebooks/perf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from query.datasets.prelude import *\n", 10 | "from timeit import default_timer as now\n", 11 | "\n", 12 | "q1 = FaceGender.objects.values('id')\n", 13 | "q2 = FaceGender.objects.values('id', 'face__person__frame__video__show') " 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "def profile(s, f):\n", 23 | " start = now()\n", 24 | " f()\n", 25 | " print('{}: {:.3f}'.format(s, now() - start))\n", 26 | " \n", 27 | "profile('nojoin', lambda: list(q1))\n", 28 | "profile('withjoin', lambda: list(q2))" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "q1.print_sql()\n", 38 | "print('')\n", 39 | "q1.explain()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "q2.print_sql()\n", 49 | "print('')\n", 50 | "q2.explain()" 51 | ] 52 | } 53 | ], 54 | "metadata": { 55 | "kernelspec": { 56 | "display_name": "Django Shell-Plus", 57 | "language": "python", 58 | "name": "django_extensions" 59 | }, 60 | "language_info": { 61 | "codemirror_mode": { 62 | "name": "ipython", 63 | "version": 2 64 | }, 65 | "file_extension": ".py", 66 | "mimetype": "text/x-python", 67 | "name": "python", 68 | "nbconvert_exporter": "python", 69 | "pygments_lexer": "ipython2", 70 | "version": "2.7.14" 71 | }, 72 | "toc": { 73 | "nav_menu": {}, 74 | "number_sections": true, 75 | "sideBar": false, 76 | "skip_h1_title": false, 77 | "toc_cell": false, 78 | "toc_position": {}, 79 | "toc_section_display": "block", 80 | "toc_window_display": true 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 2 85 | } 86 | -------------------------------------------------------------------------------- /app/notebooks/problang/transcript_heuristic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

Table of Contents

\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "ExecuteTime": { 18 | "end_time": "2018-12-02T21:13:47.249717Z", 19 | "start_time": "2018-12-02T21:13:47.227026Z" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "from esper.prelude import *\n", 25 | "from transcript_utils import *\n", 26 | "from operator import itemgetter" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "ExecuteTime": { 34 | "end_time": "2018-12-02T21:12:24.800479Z", 35 | "start_time": "2018-12-02T21:12:24.770393Z" 36 | } 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "all_videos = video_list()" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "ExecuteTime": { 48 | "end_time": "2018-12-02T21:13:49.466216Z", 49 | "start_time": "2018-12-02T21:13:47.896796Z" 50 | } 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "topic = 'immigration'\n", 55 | "lexicon = mutual_info(topic)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "ExecuteTime": { 63 | "end_time": "2018-12-02T21:13:53.091644Z", 64 | "start_time": "2018-12-02T21:13:50.493133Z" 65 | } 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "initial_segments = find_segments(all_videos, lexicon, threshold=20, window_size=SEGMENT_SIZE, stride=SEGMENT_STRIDE)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2018-12-02T21:13:53.119137Z", 78 | "start_time": "2018-12-02T21:13:53.094024Z" 79 | } 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "initial_segments_dict = [{'item_name': seg[0], 'segment': seg[2], 'score': seg[3]} for seg in initial_segments]\n", 84 | "initial_segments_dict.sort(key=itemgetter('score'), reverse=True)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "ExecuteTime": { 92 | "end_time": "2018-12-02T21:13:53.146327Z", 93 | "start_time": "2018-12-02T21:13:53.120848Z" 94 | } 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "pcache.set('initial_segments', initial_segments_dict)" 99 | ] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "Django Shell-Plus", 105 | "language": "python", 106 | "name": "django_extensions" 107 | }, 108 | "language_info": { 109 | "codemirror_mode": { 110 | "name": "ipython", 111 | "version": 3 112 | }, 113 | "file_extension": ".py", 114 | "mimetype": "text/x-python", 115 | "name": "python", 116 | "nbconvert_exporter": "python", 117 | "pygments_lexer": "ipython3", 118 | "version": "3.5.2" 119 | }, 120 | "toc": { 121 | "base_numbering": 1, 122 | "nav_menu": {}, 123 | "number_sections": true, 124 | "sideBar": false, 125 | "skip_h1_title": false, 126 | "title_cell": "Table of Contents", 127 | "title_sidebar": "Contents", 128 | "toc_cell": true, 129 | "toc_position": {}, 130 | "toc_section_display": "block", 131 | "toc_window_display": false 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 2 136 | } 137 | -------------------------------------------------------------------------------- /app/notebooks/simple_templates/identity_still_images.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

Table of Contents

\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Instructions\n", 18 | "\n", 19 | "Run the cell below. You will be prompted to input a name." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "ExecuteTime": { 27 | "end_time": "2018-08-22T18:36:53.445405Z", 28 | "start_time": "2018-08-22T18:31:39.674087Z" 29 | }, 30 | "scrolled": false 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "from esper.identity_clusters import identity_clustering_workflow\n", 35 | "identity_clustering_workflow(input('Input a name: ').strip(), init_clusters=20)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | } 45 | ], 46 | "metadata": { 47 | "kernelspec": { 48 | "display_name": "Django Shell-Plus", 49 | "language": "python", 50 | "name": "django_extensions" 51 | }, 52 | "language_info": { 53 | "codemirror_mode": { 54 | "name": "ipython", 55 | "version": 3 56 | }, 57 | "file_extension": ".py", 58 | "mimetype": "text/x-python", 59 | "name": "python", 60 | "nbconvert_exporter": "python", 61 | "pygments_lexer": "ipython3", 62 | "version": "3.5.2" 63 | }, 64 | "toc": { 65 | "base_numbering": 1, 66 | "nav_menu": {}, 67 | "number_sections": true, 68 | "sideBar": false, 69 | "skip_h1_title": false, 70 | "title_cell": "Table of Contents", 71 | "title_sidebar": "Contents", 72 | "toc_cell": true, 73 | "toc_position": {}, 74 | "toc_section_display": "block", 75 | "toc_window_display": false 76 | } 77 | }, 78 | "nbformat": 4, 79 | "nbformat_minor": 2 80 | } 81 | -------------------------------------------------------------------------------- /app/notebooks/spark.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pyspark.sql import SparkSession" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "spark = SparkSession.builder.master(\"spark://spark:7077\").getOrCreate()" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "df = spark.createDataFrame([('Alice', 1)])\n", 28 | "df.show()" 29 | ] 30 | } 31 | ], 32 | "metadata": { 33 | "kernelspec": { 34 | "display_name": "Django Shell-Plus", 35 | "language": "python", 36 | "name": "django_extensions" 37 | }, 38 | "language_info": { 39 | "codemirror_mode": { 40 | "name": "ipython", 41 | "version": 2 42 | }, 43 | "file_extension": ".py", 44 | "mimetype": "text/x-python", 45 | "name": "python", 46 | "nbconvert_exporter": "python", 47 | "pygments_lexer": "ipython2", 48 | "version": "2.7.14" 49 | }, 50 | "toc": { 51 | "nav_menu": {}, 52 | "number_sections": true, 53 | "sideBar": false, 54 | "skip_h1_title": false, 55 | "toc_cell": false, 56 | "toc_position": { 57 | "height": "47px", 58 | "left": "1491px", 59 | "right": "20px", 60 | "top": "110.5px", 61 | "width": "185px" 62 | }, 63 | "toc_section_display": "block", 64 | "toc_window_display": true 65 | } 66 | }, 67 | "nbformat": 4, 68 | "nbformat_minor": 2 69 | } 70 | -------------------------------------------------------------------------------- /app/notebooks/test_captions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

Table of Contents

\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "ExecuteTime": { 18 | "end_time": "2018-12-21T00:03:32.011406Z", 19 | "start_time": "2018-12-21T00:00:59.470534Z" 20 | } 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "from esper.captions import topic_search, phrase_search, get_lowercase_segments" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "ExecuteTime": { 32 | "end_time": "2018-12-21T00:08:55.818835Z", 33 | "start_time": "2018-12-21T00:08:03.211906Z" 34 | } 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "result = topic_search(['TACO BELL'])\n", 39 | "count = 0\n", 40 | "for d in result:\n", 41 | " count += 1\n", 42 | "print('Found in {} videos'.format(count))\n", 43 | "\n", 44 | "video_ids = [v.id for v in Video.objects.all()[:1000]]\n", 45 | "result = topic_search(['TACO BELL'], video_ids=video_ids)\n", 46 | "count = 0\n", 47 | "for d in result:\n", 48 | " count += 1\n", 49 | "print('Found in {} videos (of {} ids)'.format(count, len(video_ids)))" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "ExecuteTime": { 57 | "end_time": "2018-12-21T00:08:55.903266Z", 58 | "start_time": "2018-12-21T00:08:55.822651Z" 59 | } 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "result = phrase_search('TACO BELL')\n", 64 | "count = 0\n", 65 | "for d in result:\n", 66 | " count += 1\n", 67 | "print('Found in {} videos'.format(count))\n", 68 | "\n", 69 | "video_ids = [v.id for v in Video.objects.all()[:1000]]\n", 70 | "result = phrase_search('TACO BELL', video_ids=video_ids)\n", 71 | "count = 0\n", 72 | "for d in result:\n", 73 | " count += 1\n", 74 | "print('Found in {} videos (of {} ids)'.format(count, len(video_ids)))" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "ExecuteTime": { 82 | "end_time": "2018-12-17T19:27:55.291000Z", 83 | "start_time": "2018-12-17T19:27:13.735758Z" 84 | }, 85 | "scrolled": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "count = 0\n", 90 | "for document_id, lowercase_ranges in get_lowercase_segments():\n", 91 | " count += 1\n", 92 | " if len(lowercase_ranges) > 0:\n", 93 | " print('{} has {} lowercase ranges'.format(\n", 94 | " document_id, len(lowercase_ranges)))\n", 95 | "print('checked {} documents'.format(count))" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [] 104 | } 105 | ], 106 | "metadata": { 107 | "kernelspec": { 108 | "display_name": "Django Shell-Plus", 109 | "language": "python", 110 | "name": "django_extensions" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 3 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython3", 122 | "version": "3.5.2" 123 | }, 124 | "toc": { 125 | "nav_menu": {}, 126 | "number_sections": true, 127 | "sideBar": false, 128 | "skip_h1_title": false, 129 | "toc_cell": true, 130 | "toc_position": {}, 131 | "toc_section_display": "block", 132 | "toc_window_display": false 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 2 137 | } 138 | -------------------------------------------------------------------------------- /app/notebooks/validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Click **Kernel > Restart & Run All**." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from query.datasets.prelude import *\n", 17 | "from query.datasets.tvnews.validation import *" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# start: 3164, end: 4281 in 15 min\n", 27 | "\n", 28 | "\n", 29 | "# TODO: get distribution of shot length for M vs. F\n", 30 | "\n", 31 | "base_face = face_validation('All faces', lambda x: x)\n", 32 | "big_face = face_validation(\n", 33 | " 'Faces height > 0.2', lambda qs: qs.annotate(height=F('bbox_y2') - F('bbox_y1')).filter(height__gte=0.2))\n", 34 | "fg_face = face_validation(\n", 35 | " 'Faces in foreground', lambda qs: qs.filter(background=False))" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "scrolled": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "all_gender_accuracy, all_gender_cmat = gender_validation('All gender', base_face)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "fg_gender_accuracy, fg_gender_cmat = gender_validation('Gender w/ faces in foreground', fg_face)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "big_gender_accuracy, big_gender_cmat = gender_validation('Gender w/ face height > 0.2', big_face)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "screentime_validation('All faces', lambda x: x, all_gender_cmat)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "screentime_validation(\n", 83 | " 'Face height > 0.2',\n", 84 | " lambda qs: qs.annotate(height=F('face__bbox_y2') - F('face__bbox_y1')).filter(height__gte=0.2),\n", 85 | " big_gender_cmat)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "speaking_validation()" 95 | ] 96 | } 97 | ], 98 | "metadata": { 99 | "kernelspec": { 100 | "display_name": "Django Shell-Plus", 101 | "language": "python", 102 | "name": "django_extensions" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 2 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython2", 114 | "version": "2.7.14" 115 | }, 116 | "toc": { 117 | "nav_menu": {}, 118 | "number_sections": true, 119 | "sideBar": false, 120 | "skip_h1_title": false, 121 | "toc_cell": false, 122 | "toc_position": {}, 123 | "toc_section_display": "none", 124 | "toc_window_display": false 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /app/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "esper", 3 | "description": "Video search engine and interface", 4 | "version": "1.0.0", 5 | "repository": { 6 | "type": "git", 7 | "url": "git+https://github.com/scanner-research/esper.git" 8 | }, 9 | "author": "Will Crichton", 10 | "license": "Apache-2.0", 11 | "bugs": { 12 | "url": "https://github.com/scanner-research/esper/issues" 13 | }, 14 | "homepage": "https://github.com/scanner-research/esper#readme", 15 | "scripts": { 16 | "build": "webpack", 17 | "watch": "webpack -w", 18 | "prepublish": "webpack -p" 19 | }, 20 | "devDependencies": { 21 | "axios": "^0.16.1", 22 | "babel-core": "^6.24.1", 23 | "babel-loader": "^7.0.0", 24 | "babel-plugin-transform-decorators-legacy": "^1.3.4", 25 | "babel-preset-env": "^1.6.1", 26 | "babel-preset-react": "^6.24.1", 27 | "babel-preset-stage-0": "^6.24.1", 28 | "bootstrap": "^3.3.7", 29 | "create-react-class": "^15.6.3", 30 | "css-loader": "^0.28.1", 31 | "extract-text-webpack-plugin": "^2.1.0", 32 | "file-loader": "^0.11.1", 33 | "jquery": "^3.3.1", 34 | "left-pad": "^1.1.3", 35 | "lodash": "^4.17.4", 36 | "mobx": "^4.8.0", 37 | "mobx-react": "^5.4.3", 38 | "node-sass": "^4.5.2", 39 | "open-iconic": "^1.1.1", 40 | "react": "^16.3.2", 41 | "react-ace": "^5.4.0", 42 | "react-bootstrap": "0.31.3", 43 | "react-dom": "^16.3.2", 44 | "sass-loader": "^6.0.5", 45 | "source-map-loader": "^0.2.4", 46 | "url-loader": "^0.5.8", 47 | "webpack": "^2.5.1", 48 | "webpack-bundle-tracker": "^0.2.0" 49 | }, 50 | "config": { 51 | "unsafe-perm": true 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /app/query/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/query/__init__.py -------------------------------------------------------------------------------- /app/query/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | from .models import * 4 | -------------------------------------------------------------------------------- /app/query/apps.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from django.apps import AppConfig 4 | 5 | 6 | class QueryConfig(AppConfig): 7 | name = 'query' 8 | -------------------------------------------------------------------------------- /app/query/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/query/management/__init__.py -------------------------------------------------------------------------------- /app/query/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/query/management/commands/__init__.py -------------------------------------------------------------------------------- /app/query/management/commands/run.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand, CommandError 2 | import imp 3 | import sys 4 | 5 | class Command(BaseCommand): 6 | help = 'Run a script' 7 | 8 | def add_arguments(self, parser): 9 | parser.add_argument('path') 10 | parser.add_argument('args', nargs='*') 11 | 12 | def handle(self, *args, **options): 13 | sys.argv = tuple([options['path']] + list(args)) 14 | imp.load_source('__main__', options['path']) 15 | -------------------------------------------------------------------------------- /app/query/migrations/0002_auto_20180529_1049.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-05-29 10:49 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0001_initial'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='faceidentity', 17 | name='probability', 18 | field=models.FloatField(default=1.0), 19 | ), 20 | migrations.AddField( 21 | model_name='labeler', 22 | name='created', 23 | field=models.DateTimeField(auto_now_add=True, null=True), 24 | ), 25 | migrations.AddField( 26 | model_name='labeler', 27 | name='data_path', 28 | field=models.CharField(blank=True, max_length=256, null=True), 29 | ), 30 | ] 31 | -------------------------------------------------------------------------------- /app/query/migrations/0003_show_hosts.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-06-04 19:50 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0002_auto_20180529_1049'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='show', 17 | name='hosts', 18 | field=models.ManyToManyField(blank=True, to='query.Thing'), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /app/query/migrations/0004_auto_20180612_1255.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-06-12 12:55 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0003_show_hosts'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='CanonicalShow', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('name', models.CharField(max_length=256)), 21 | ('is_prime_time', models.BooleanField(default=False)), 22 | ('is_weekend', models.BooleanField(default=False)), 23 | ('is_recurring', models.BooleanField(default=False)), 24 | ], 25 | ), 26 | migrations.AddField( 27 | model_name='show', 28 | name='canonical_show', 29 | field=models.ForeignKey(blank=True, default=None, null=True, on_delete=django.db.models.deletion.CASCADE, to='query.CanonicalShow'), 30 | ), 31 | ] 32 | -------------------------------------------------------------------------------- /app/query/migrations/0005_auto_20180612_1316.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-06-12 13:16 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0004_auto_20180612_1255'), 12 | ] 13 | 14 | operations = [ 15 | migrations.RemoveField( 16 | model_name='canonicalshow', 17 | name='is_prime_time', 18 | ), 19 | migrations.RemoveField( 20 | model_name='canonicalshow', 21 | name='is_weekend', 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /app/query/migrations/0006_auto_20180612_1317.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-06-12 13:17 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0005_auto_20180612_1316'), 13 | ] 14 | 15 | operations = [ 16 | migrations.AlterField( 17 | model_name='show', 18 | name='canonical_show', 19 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.CanonicalShow'), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /app/query/migrations/0007_canonicalshow_hosts.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-06-15 00:40 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0006_auto_20180612_1317'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='canonicalshow', 17 | name='hosts', 18 | field=models.ManyToManyField(blank=True, to='query.Thing'), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /app/query/migrations/0008_auto_20180625_1033.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-06-25 10:33 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0007_canonicalshow_hosts'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='face', 17 | name='probability', 18 | field=models.FloatField(default=1.0), 19 | ), 20 | migrations.AddField( 21 | model_name='facegender', 22 | name='probability', 23 | field=models.FloatField(default=1.0), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /app/query/migrations/0009_object.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-06-25 11:47 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0008_auto_20180625_1033'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='Object', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('bbox_x1', models.FloatField()), 21 | ('bbox_x2', models.FloatField()), 22 | ('bbox_y1', models.FloatField()), 23 | ('bbox_y2', models.FloatField()), 24 | ('bbox_score', models.FloatField()), 25 | ('cls', models.IntegerField()), 26 | ('probability', models.FloatField()), 27 | ('frame', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Frame')), 28 | ], 29 | options={ 30 | 'abstract': False, 31 | }, 32 | ), 33 | ] 34 | -------------------------------------------------------------------------------- /app/query/migrations/0010_auto_20180625_1152.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-06-25 11:52 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0009_object'), 12 | ] 13 | 14 | operations = [ 15 | migrations.RenameField( 16 | model_name='object', 17 | old_name='cls', 18 | new_name='label', 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /app/query/migrations/0011_auto_20180625_1157.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-06-25 11:57 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0010_auto_20180625_1152'), 12 | ] 13 | 14 | operations = [ 15 | migrations.RemoveField( 16 | model_name='face', 17 | name='bbox_score', 18 | ), 19 | migrations.RemoveField( 20 | model_name='object', 21 | name='bbox_score', 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /app/query/migrations/0012_auto_20180712_1252.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-07-12 12:52 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0011_auto_20180625_1157'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name='faceidentity', 17 | name='probability', 18 | field=models.FloatField(db_index=True, default=1.0), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /app/query/migrations/0013_auto_20180825_2234.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-08-25 22:34 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0012_auto_20180712_1252'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='video', 17 | name='threeyears_dataset', 18 | field=models.BooleanField(default=False), 19 | ), 20 | migrations.AlterField( 21 | model_name='faceidentity', 22 | name='probability', 23 | field=models.FloatField(default=1.0), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /app/query/migrations/0014_auto_20180926_1514.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-09-26 15:14 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0013_auto_20180825_2234'), 13 | ] 14 | 15 | operations = [ 16 | migrations.RemoveField( 17 | model_name='speaker', 18 | name='gender', 19 | ), 20 | migrations.RemoveField( 21 | model_name='speaker', 22 | name='identity', 23 | ), 24 | migrations.RemoveField( 25 | model_name='speaker', 26 | name='labeler', 27 | ), 28 | migrations.RemoveField( 29 | model_name='speaker', 30 | name='video', 31 | ), 32 | migrations.RemoveField( 33 | model_name='identity', 34 | name='thing', 35 | ), 36 | migrations.AddField( 37 | model_name='faceidentity', 38 | name='identity2', 39 | field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='query.Identity'), 40 | ), 41 | migrations.AlterField( 42 | model_name='identity', 43 | name='name', 44 | field=models.CharField(default='', max_length=256), 45 | preserve_default=False, 46 | ), 47 | migrations.DeleteModel( 48 | name='Speaker', 49 | ), 50 | ] 51 | -------------------------------------------------------------------------------- /app/query/migrations/0015_auto_20180926_1902.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-09-26 19:02 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0014_auto_20180926_1514'), 13 | ] 14 | 15 | operations = [ 16 | migrations.RemoveField( 17 | model_name='faceidentity', 18 | name='identity', 19 | ), 20 | migrations.AlterField( 21 | model_name='faceidentity', 22 | name='identity2', 23 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Identity'), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /app/query/migrations/0016_auto_20180926_2037.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-09-26 20:37 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0015_auto_20180926_1902'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='canonicalshow', 17 | name='hosts2', 18 | field=models.ManyToManyField(blank=True, to='query.Identity'), 19 | ), 20 | migrations.AddField( 21 | model_name='show', 22 | name='hosts2', 23 | field=models.ManyToManyField(blank=True, to='query.Identity'), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /app/query/migrations/0017_auto_20180926_2045.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-09-26 20:45 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0016_auto_20180926_2037'), 12 | ] 13 | 14 | operations = [ 15 | migrations.RemoveField( 16 | model_name='canonicalshow', 17 | name='hosts', 18 | ), 19 | migrations.RemoveField( 20 | model_name='show', 21 | name='hosts', 22 | ), 23 | ] 24 | -------------------------------------------------------------------------------- /app/query/migrations/0018_auto_20180926_2045.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-09-26 20:45 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0017_auto_20180926_2045'), 12 | ] 13 | 14 | operations = [ 15 | migrations.RenameField( 16 | model_name='canonicalshow', 17 | old_name='hosts2', 18 | new_name='hosts', 19 | ), 20 | migrations.RenameField( 21 | model_name='faceidentity', 22 | old_name='identity2', 23 | new_name='identity', 24 | ), 25 | migrations.RenameField( 26 | model_name='show', 27 | old_name='hosts2', 28 | new_name='hosts', 29 | ), 30 | ] 31 | -------------------------------------------------------------------------------- /app/query/migrations/0019_topic.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-09-26 20:47 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0018_auto_20180926_2045'), 12 | ] 13 | 14 | operations = [ 15 | migrations.CreateModel( 16 | name='Topic', 17 | fields=[ 18 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 19 | ('name', models.CharField(max_length=256)), 20 | ], 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /app/query/migrations/0020_segment_topics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-09-26 20:49 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0019_topic'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='segment', 17 | name='topics', 18 | field=models.ManyToManyField(to='query.Topic'), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /app/query/migrations/0021_auto_20180926_2104.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-09-26 21:04 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0020_segment_topics'), 12 | ] 13 | 14 | operations = [ 15 | migrations.RemoveField( 16 | model_name='thing', 17 | name='type', 18 | ), 19 | migrations.RemoveField( 20 | model_name='segment', 21 | name='things', 22 | ), 23 | migrations.DeleteModel( 24 | name='Thing', 25 | ), 26 | migrations.DeleteModel( 27 | name='ThingType', 28 | ), 29 | ] 30 | -------------------------------------------------------------------------------- /app/query/migrations/0022_auto_20181026_1002.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-10-26 10:02 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0021_auto_20180926_2104'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='TimeSegment', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('start', models.IntegerField()), 21 | ('end', models.IntegerField()), 22 | ], 23 | ), 24 | migrations.CreateModel( 25 | name='LabeledCommercial', 26 | fields=[ 27 | ('timesegment_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='query.TimeSegment')), 28 | ], 29 | bases=('query.timesegment',), 30 | ), 31 | migrations.CreateModel( 32 | name='LabeledInterview', 33 | fields=[ 34 | ('timesegment_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='query.TimeSegment')), 35 | ('original', models.BooleanField(default=True)), 36 | ('scattered_clips', models.BooleanField(default=False)), 37 | ('interviewer1', models.CharField(blank=True, default=None, max_length=256, null=True)), 38 | ('interviewer2', models.CharField(blank=True, default=None, max_length=256, null=True)), 39 | ('guest1', models.CharField(blank=True, default=None, max_length=256, null=True)), 40 | ('guest2', models.CharField(blank=True, default=None, max_length=256, null=True)), 41 | ], 42 | bases=('query.timesegment',), 43 | ), 44 | migrations.CreateModel( 45 | name='LabeledPanel', 46 | fields=[ 47 | ('timesegment_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='query.TimeSegment')), 48 | ('num_panelists', models.IntegerField()), 49 | ], 50 | bases=('query.timesegment',), 51 | ), 52 | migrations.AddField( 53 | model_name='timesegment', 54 | name='video', 55 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Video'), 56 | ), 57 | ] 58 | -------------------------------------------------------------------------------- /app/query/migrations/0022_frame_shot_boundary.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-10-28 18:46 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0021_auto_20180926_2104'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='frame', 17 | name='shot_boundary', 18 | field=models.BooleanField(default=True), 19 | preserve_default=False, 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /app/query/migrations/0023_auto_20181026_1010.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-10-26 10:10 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0022_auto_20181026_1002'), 12 | ] 13 | 14 | operations = [ 15 | migrations.RemoveField( 16 | model_name='labeledcommercial', 17 | name='timesegment_ptr', 18 | ), 19 | migrations.RemoveField( 20 | model_name='labeledinterview', 21 | name='timesegment_ptr', 22 | ), 23 | migrations.RemoveField( 24 | model_name='labeledpanel', 25 | name='timesegment_ptr', 26 | ), 27 | migrations.RemoveField( 28 | model_name='timesegment', 29 | name='video', 30 | ), 31 | migrations.DeleteModel( 32 | name='LabeledCommercial', 33 | ), 34 | migrations.DeleteModel( 35 | name='LabeledInterview', 36 | ), 37 | migrations.DeleteModel( 38 | name='LabeledPanel', 39 | ), 40 | migrations.DeleteModel( 41 | name='TimeSegment', 42 | ), 43 | ] 44 | -------------------------------------------------------------------------------- /app/query/migrations/0023_auto_20181101_2104.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-11-01 21:04 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0022_frame_shot_boundary'), 13 | ] 14 | 15 | operations = [ 16 | migrations.AddField( 17 | model_name='face', 18 | name='frame', 19 | field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='query.Frame'), 20 | ), 21 | migrations.AddField( 22 | model_name='pose', 23 | name='frame', 24 | field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='query.Frame'), 25 | ), 26 | migrations.AlterUniqueTogether( 27 | name='face', 28 | unique_together=set([]), 29 | ), 30 | migrations.AlterUniqueTogether( 31 | name='pose', 32 | unique_together=set([]), 33 | ), 34 | ] 35 | -------------------------------------------------------------------------------- /app/query/migrations/0024_auto_20181103_2019.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-11-03 20:19 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0023_auto_20181101_2104'), 13 | ] 14 | 15 | operations = [ 16 | migrations.RemoveField( 17 | model_name='person', 18 | name='frame', 19 | ), 20 | migrations.RemoveField( 21 | model_name='pose', 22 | name='person', 23 | ), 24 | migrations.AlterField( 25 | model_name='face', 26 | name='frame', 27 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Frame'), 28 | ), 29 | migrations.AlterField( 30 | model_name='pose', 31 | name='frame', 32 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Frame'), 33 | ), 34 | migrations.RemoveField( 35 | model_name='face', 36 | name='person', 37 | ), 38 | migrations.AlterUniqueTogether( 39 | name='face', 40 | unique_together=set([('labeler', 'frame', 'bbox_x1', 'bbox_x2', 'bbox_y1', 'bbox_y2')]), 41 | ), 42 | migrations.DeleteModel( 43 | name='Person', 44 | ), 45 | ] 46 | -------------------------------------------------------------------------------- /app/query/migrations/0024_labeledcommercial_labeledinterview_labeledpanel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-10-26 10:12 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0023_auto_20181026_1010'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='LabeledCommercial', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('start', models.FloatField()), 21 | ('end', models.FloatField()), 22 | ('video', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Video')), 23 | ], 24 | ), 25 | migrations.CreateModel( 26 | name='LabeledInterview', 27 | fields=[ 28 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 29 | ('start', models.FloatField()), 30 | ('end', models.FloatField()), 31 | ('interviewer1', models.CharField(blank=True, default=None, max_length=256, null=True)), 32 | ('interviewer2', models.CharField(blank=True, default=None, max_length=256, null=True)), 33 | ('guest1', models.CharField(blank=True, default=None, max_length=256, null=True)), 34 | ('guest2', models.CharField(blank=True, default=None, max_length=256, null=True)), 35 | ('original', models.BooleanField(default=True)), 36 | ('scattered_clips', models.BooleanField(default=False)), 37 | ('video', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Video')), 38 | ], 39 | ), 40 | migrations.CreateModel( 41 | name='LabeledPanel', 42 | fields=[ 43 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 44 | ('start', models.FloatField()), 45 | ('end', models.FloatField()), 46 | ('num_panelists', models.IntegerField()), 47 | ('video', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Video')), 48 | ], 49 | ), 50 | ] 51 | -------------------------------------------------------------------------------- /app/query/migrations/0025_merge_20181227_1812.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-12-27 18:12 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0024_labeledcommercial_labeledinterview_labeledpanel'), 12 | ('query', '0024_auto_20181103_2019'), 13 | ] 14 | 15 | operations = [ 16 | ] 17 | -------------------------------------------------------------------------------- /app/query/migrations/0026_auto_20181227_1812.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-12-27 18:12 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0025_merge_20181227_1812'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='HairColor', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ], 21 | ), 22 | migrations.CreateModel( 23 | name='HairColorName', 24 | fields=[ 25 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 26 | ('name', models.CharField(max_length=256)), 27 | ], 28 | ), 29 | migrations.AddField( 30 | model_name='haircolor', 31 | name='color', 32 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.HairColorName'), 33 | ), 34 | migrations.AddField( 35 | model_name='haircolor', 36 | name='face', 37 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Face'), 38 | ), 39 | migrations.AddField( 40 | model_name='haircolor', 41 | name='labeler', 42 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Labeler'), 43 | ), 44 | migrations.AlterUniqueTogether( 45 | name='haircolor', 46 | unique_together=set([('labeler', 'face')]), 47 | ), 48 | ] 49 | -------------------------------------------------------------------------------- /app/query/migrations/0027_auto_20181227_1829.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-12-27 18:29 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0026_auto_20181227_1812'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name='haircolorname', 17 | name='name', 18 | field=models.CharField(max_length=256, unique=True), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /app/query/migrations/0028_auto_20181228_1555.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2018-12-28 15:55 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0027_auto_20181227_1829'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AlterField( 16 | model_name='canonicalshow', 17 | name='name', 18 | field=models.CharField(max_length=256, unique=True), 19 | ), 20 | migrations.AlterField( 21 | model_name='channel', 22 | name='name', 23 | field=models.CharField(max_length=256, unique=True), 24 | ), 25 | migrations.AlterField( 26 | model_name='identity', 27 | name='name', 28 | field=models.CharField(max_length=256, unique=True), 29 | ), 30 | migrations.AlterField( 31 | model_name='show', 32 | name='name', 33 | field=models.CharField(max_length=256, unique=True), 34 | ), 35 | migrations.AlterField( 36 | model_name='topic', 37 | name='name', 38 | field=models.CharField(max_length=256, unique=True), 39 | ), 40 | ] 41 | -------------------------------------------------------------------------------- /app/query/migrations/0028_auto_20190101_2032.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2019-01-01 20:32 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0027_auto_20181227_1829'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='Clothing', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ], 21 | ), 22 | migrations.CreateModel( 23 | name='ClothingName', 24 | fields=[ 25 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 26 | ('name', models.CharField(max_length=256, unique=True)), 27 | ], 28 | ), 29 | migrations.AddField( 30 | model_name='clothing', 31 | name='clothing', 32 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.ClothingName'), 33 | ), 34 | migrations.AddField( 35 | model_name='clothing', 36 | name='face', 37 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Face'), 38 | ), 39 | migrations.AddField( 40 | model_name='clothing', 41 | name='labeler', 42 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Labeler'), 43 | ), 44 | migrations.AlterUniqueTogether( 45 | name='clothing', 46 | unique_together=set([('labeler', 'face')]), 47 | ), 48 | ] 49 | -------------------------------------------------------------------------------- /app/query/migrations/0029_video_duplicate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2019-01-03 19:56 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0028_auto_20181228_1555'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='video', 17 | name='duplicate', 18 | field=models.BooleanField(default=False), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /app/query/migrations/0030_video_corrupted.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2019-01-03 20:14 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0029_video_duplicate'), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name='video', 17 | name='corrupted', 18 | field=models.BooleanField(default=False), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /app/query/migrations/0031_merge_20190105_1016.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2019-01-05 10:16 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations 6 | 7 | 8 | class Migration(migrations.Migration): 9 | 10 | dependencies = [ 11 | ('query', '0028_auto_20190101_2032'), 12 | ('query', '0030_video_corrupted'), 13 | ] 14 | 15 | operations = [ 16 | ] 17 | -------------------------------------------------------------------------------- /app/query/migrations/0032_auto_20190106_1451.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2019-01-06 14:51 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0031_merge_20190105_1016'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='HairLength', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('face', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Face')), 21 | ('labeler', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Labeler')), 22 | ], 23 | ), 24 | migrations.CreateModel( 25 | name='HairLengthName', 26 | fields=[ 27 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 28 | ('name', models.CharField(max_length=256, unique=True)), 29 | ], 30 | ), 31 | migrations.AddField( 32 | model_name='hairlength', 33 | name='length', 34 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.HairLengthName'), 35 | ), 36 | migrations.AlterUniqueTogether( 37 | name='hairlength', 38 | unique_together=set([('labeler', 'face')]), 39 | ), 40 | ] 41 | -------------------------------------------------------------------------------- /app/query/migrations/0033_auto_20190311_1730.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2019-03-11 17:30 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0032_auto_20190106_1451'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='FaceTag', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('score', models.FloatField(default=1.0)), 21 | ('face', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Face')), 22 | ('labeler', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Labeler')), 23 | ('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='query.Tag')), 24 | ], 25 | ), 26 | migrations.CreateModel( 27 | name='IdentityTag', 28 | fields=[ 29 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 30 | ('name', models.CharField(max_length=256, unique=True)), 31 | ], 32 | ), 33 | migrations.AddField( 34 | model_name='identity', 35 | name='tags', 36 | field=models.ManyToManyField(blank=True, to='query.IdentityTag'), 37 | ), 38 | migrations.AlterUniqueTogether( 39 | name='facetag', 40 | unique_together=set([('labeler', 'face')]), 41 | ), 42 | ] 43 | -------------------------------------------------------------------------------- /app/query/migrations/0034_auto_20190711_1442.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by Django 1.11 on 2019-07-11 14:42 3 | from __future__ import unicode_literals 4 | 5 | from django.db import migrations, models 6 | import django.db.models.deletion 7 | 8 | 9 | class Migration(migrations.Migration): 10 | 11 | dependencies = [ 12 | ('query', '0033_auto_20190311_1730'), 13 | ] 14 | 15 | operations = [ 16 | migrations.CreateModel( 17 | name='FrameSampler', 18 | fields=[ 19 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 20 | ('name', models.CharField(max_length=256)), 21 | ], 22 | ), 23 | migrations.AddField( 24 | model_name='frame', 25 | name='sampler', 26 | field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='query.FrameSampler'), 27 | ), 28 | ] 29 | -------------------------------------------------------------------------------- /app/query/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/query/migrations/__init__.py -------------------------------------------------------------------------------- /app/query/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Esper 5 | 9 | 10 | 11 | 12 | 13 | 14 |
15 | {% block content %}{% endblock %} 16 |
17 | 18 | 19 | -------------------------------------------------------------------------------- /app/query/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% load render_bundle from webpack_loader %} 3 | 4 | {% block content %} 5 | {% render_bundle 'web' %} 6 | {% endblock %} 7 | -------------------------------------------------------------------------------- /app/query/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /app/query/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url 2 | from . import views 3 | 4 | urlpatterns = [ 5 | url(r'^api/search', views.search, name='search'), 6 | url(r'^api/schema', views.schema, name='schema'), 7 | url(r'^api/subtitles', views.subtitles, name='subtitles'), 8 | url(r'^api/labeled', views.labeled, name='labeled'), 9 | url(r'^api/newthings', views.newthings, name='newthings'), 10 | url(r'^', views.index, name='index') 11 | ] 12 | -------------------------------------------------------------------------------- /app/requirements.app.txt: -------------------------------------------------------------------------------- 1 | jupyter-console==5.2.0 2 | jupyter==1.0.0 3 | psycopg2==2.6.2 4 | django==1.11 5 | gunicorn==19.6.0 6 | django-webpack-loader==0.5.0 7 | progressbar2==3.34.2 8 | django-bulk-update==2.2.0 9 | beakerx 10 | matplotlib==2.1.0 11 | seaborn==0.8.1 12 | django-extensions==1.9.6 13 | jupyter_contrib_nbextensions==0.3.3 14 | jupyter_nbextensions_configurator==0.2.8 15 | autopep8 16 | pysrt==1.1.1 17 | sqlparse==0.2.4 18 | internetarchive 19 | pyspark==2.4.0 20 | pyopenssl 21 | statsmodels 22 | scikit-learn 23 | tqdm 24 | google_images_download 25 | psutil 26 | redis 27 | msgpack 28 | pickle_cache 29 | iterextras 30 | qgrid 31 | ipyparallel 32 | 33 | #pyro-ppl==0.2.1 34 | #vega==0.5.0 35 | #textacy 36 | #gensim 37 | -------------------------------------------------------------------------------- /app/requirements.base.txt: -------------------------------------------------------------------------------- 1 | dill==0.2.7.1 2 | unionfind==0.0.9 3 | numpy==1.15.4 4 | pandas==0.23.4 5 | torch==0.3.1 6 | -------------------------------------------------------------------------------- /app/scripts/build-tf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | if [ "$build_tf" = "on" ]; then 5 | # Install bazel 6 | apt-get update && apt-get install -y openjdk-8-jdk mlocate 7 | echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | \ 8 | tee /etc/apt/sources.list.d/bazel.list 9 | curl https://bazel.build/bazel-release.pub.gpg | apt-key add - 10 | apt-get update && apt-get install -y bazel 11 | 12 | # Install python deps 13 | # keras_*: https://github.com/bazelbuild/continuous-integration/issues/305 14 | pip3 install six numpy wheel keras_applications keras_preprocessing 15 | 16 | git clone -b v${tf_version} https://github.com/tensorflow/tensorflow/ 17 | cd tensorflow 18 | updatedb 19 | 20 | if [ "$device" = "cpu" ]; then 21 | # TODO(wcrichto): getting internal errors w/ MKL on GCE 22 | 23 | PYTHON_BIN_PATH=$(which python3) \ 24 | PYTHON_LIB_PATH=/usr/local/lib/python3.5/dist-packages \ 25 | TF_NEED_MKL=0 \ 26 | CC_OPT_FLAGS=-march=core-avx2 \ 27 | TF_NEED_GCP=0 \ 28 | TF_NEED_S3=0 \ 29 | TF_NEED_GDR=0 \ 30 | TF_NEED_MPI=0 \ 31 | TF_NEED_HDFS=0 \ 32 | TF_ENABLE_XLA=0 \ 33 | TF_NEED_VERBS=0 \ 34 | TF_NEED_OPENCL=0 \ 35 | TF_NEED_CUDA=0 \ 36 | TF_NEED_IGNITE=0 \ 37 | TF_NEED_OPENCL_SYCL=0 \ 38 | TF_NEED_ROCM=0 \ 39 | TF_DOWNLOAD_CLANG=0 \ 40 | TF_SET_ANDROID_WORKSPACE=0 \ 41 | ./configure 42 | 43 | # ares: https://github.com/tensorflow/tensorflow/issues/23402#issuecomment-436932197 44 | bazel build \ 45 | --config=opt \ 46 | --define=grpc_no_ares=true \ 47 | --incompatible_remove_native_http_archive=false \ 48 | //tensorflow/tools/pip_package:build_pip_package 49 | else 50 | echo "No GPU TF support yet" 51 | exit 1 52 | fi 53 | 54 | bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg 55 | pip3 install /tmp/tensorflow_pkg/* 56 | cd .. 57 | rm -rf tensorflow 58 | 59 | else 60 | if [ "$device" = "cpu" ]; then 61 | pip3 install tensorflow==${tf_version}; 62 | else 63 | pip3 install tensorflow-gpu==${tf_version}; 64 | fi 65 | fi 66 | -------------------------------------------------------------------------------- /app/scripts/clear-proxy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # We pass https_proxy from host to container by default. However, if host doesn't have proxy set, then https_proxy 4 | # is set to an empty string in the container. This causes grpc to raise annoying (but benign) errors whenever it's 5 | # run, namely through Scanner. This script unsets those variables if they are set to a blank string to avoid this. 6 | 7 | # Also, note that this script MUST be run in the form: 8 | # $ . ./scripts/clear-proxy 9 | # i.e. with the dot in front, as this ensures that the unset clears variables in the caller's shell, not in just 10 | # this the context of this script. 11 | 12 | if [ -z "$https_proxy" ]; then 13 | unset https_proxy; 14 | unset http_proxy; 15 | fi 16 | -------------------------------------------------------------------------------- /app/scripts/create-sample.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # This script is a reproducible way to create the sample DB provided for the demo. Requires a GPU. 5 | 6 | rm -rf example-dataset.tar.gz example.mp4 paths db-dump.sql assets/thumbnails scanner_db 7 | youtube-dl "https://www.youtube.com/watch?v=dQw4w9WgXcQ" -f mp4 -o example.mp4 8 | echo "example.mp4" > paths 9 | python manage.py cleanup Video 10 | python manage.py cleanup PersonTrack 11 | esper-run query/datasets/default/ingest.py 12 | python manage.py face_detect paths 13 | python manage.py pose_detect paths 14 | python manage.py gender_scanner paths 15 | python manage.py embed_faces_scanner paths tinyfaces 16 | python manage.py track_face paths 17 | esper-run query/datasets/default/export.py 18 | tar -czf example-dataset.tar.gz paths assets/thumbnails db-dump.sql scanner_db 19 | -------------------------------------------------------------------------------- /app/scripts/export-table.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | TABLE=$1 3 | echo "\copy (SELECT * FROM $TABLE) TO '/app/data/pg/$TABLE.csv' WITH CSV HEADER;" | psql -h db esper will 4 | -------------------------------------------------------------------------------- /app/scripts/generate-protobuf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pushd /app 4 | cargo install protobuf-codegen -q || true 5 | protoc --python_out=esper --rust_out=subserver/src -I=.deps datatypes.proto 6 | popd 7 | -------------------------------------------------------------------------------- /app/scripts/google-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is for Docker's use only. Don't run this yourself. 4 | 5 | if [[ ! -z $GOOGLE_PROJECT ]]; then 6 | gcloud config set project ${GOOGLE_PROJECT} 7 | gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS 8 | fi 9 | -------------------------------------------------------------------------------- /app/scripts/jupyter-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | python3 /app/scripts/set-jupyter-password.py 4 | -------------------------------------------------------------------------------- /app/scripts/load-cloud-db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | pg_dump -h db -U will esper > cloud_db.sql 4 | 5 | # read -p "This will delete everything in your local database. Are you sure? [y/N] " choice 6 | # case "$choice" in 7 | # y|Y ) 8 | # echo "Resetting database with cloud data" 9 | # FLAGS="-h db -U ${DJANGO_DB_USER}" 10 | # #echo "drop database esper; create database esper;" | psql ${FLAGS} -d postgres 11 | # mysqldump --set-gtid-purged=off -h db-cloud -u will ${DB_NAME} | ${MYSQL};; 12 | # * ) 13 | # echo "Not doing anything" 14 | # esac 15 | -------------------------------------------------------------------------------- /app/scripts/set-jupyter-password.py: -------------------------------------------------------------------------------- 1 | import pexpect 2 | import os 3 | 4 | # This script changes Jupyter from using a random hash by default to a custom password, set in configure.py. 5 | 6 | if 'JUPYTER_PASSWORD' not in os.environ: 7 | exit() 8 | 9 | JUPYTER_DIR = '/root/.jupyter' 10 | if not os.path.isdir(JUPYTER_DIR): 11 | os.mkdir(JUPYTER_DIR) 12 | 13 | p = pexpect.spawn('jupyter notebook password') 14 | p.expect('Enter password: ') 15 | p.sendline(os.environ['JUPYTER_PASSWORD']) 16 | p.sendline(os.environ['JUPYTER_PASSWORD']) 17 | p.read() 18 | -------------------------------------------------------------------------------- /app/scripts/transfer-to-bigquery.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | TABLE=$1 3 | gsutil cp /app/data/pg/$TABLE.csv gs://esper/tmp/$TABLE.csv 4 | bq load --autodetect --source_format=CSV tvnews.$TABLE gs://esper/tmp/$TABLE.csv 5 | gsutil rm gs://esper/tmp/$TABLE.csv 6 | -------------------------------------------------------------------------------- /app/scripts/upload-sample.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | gsutil mv app/example-dataset.tar.gz gs://esper/ 5 | gsutil acl ch -u AllUsers:R gs://esper/example-dataset.tar.gz 6 | -------------------------------------------------------------------------------- /app/test/README.md: -------------------------------------------------------------------------------- 1 | This directory contains tests for `esper/app/esper` using the Python3 2 | [unittest](http://docs.python.org/3/library/unittest.html) framework. 3 | 4 | To add a test, add a new Python script and create classes that inherit from 5 | `unittest.TestCase`. These classes should have functions that start with 6 | `test`. 7 | 8 | To run the tests: 9 | 10 | From the `esper/app` directory, run `python3 -m unittest discover test`. 11 | 12 | This directory currently has no tests, but it's still around for people to add 13 | tests in the future. 14 | -------------------------------------------------------------------------------- /app/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scanner-research/esper-tv/179ef57d536ebd52f93697aab09bf5abec19ce93/app/test/__init__.py -------------------------------------------------------------------------------- /app/webpack.config.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const path = require('path'); 4 | const BundleTracker = require('webpack-bundle-tracker'); 5 | const ExtractTextPlugin = require('extract-text-webpack-plugin'); 6 | const webpack = require('webpack'); 7 | 8 | module.exports = { 9 | entry: { 10 | web: './assets/js/web', 11 | styles: './assets/css/main', 12 | }, 13 | 14 | context: __dirname, 15 | 16 | // Include source maps for all compiled files 17 | devtool: 'source-map', 18 | 19 | // Put all output files at assets/bundles 20 | output: { 21 | path: path.resolve('./assets/bundles/'), 22 | filename: "[name].js", 23 | }, 24 | 25 | plugins: [ 26 | // BundleTracker lets Django know about the webpack build status, displaying errors if 27 | // they occur 28 | new BundleTracker({filename: './assets/bundles/webpack-stats.json'}), 29 | 30 | // ExtractTextPlugin allows us to separate CSS output files from JS. 31 | // See: https://github.com/webpack-contrib/extract-text-webpack-plugin 32 | new ExtractTextPlugin("[name].css"), 33 | ], 34 | 35 | module: { 36 | rules: [{ 37 | test: /\.scss$/, 38 | use: ExtractTextPlugin.extract({ 39 | use: [{ 40 | loader: "css-loader" 41 | }, { 42 | loader: "sass-loader" 43 | }] 44 | }) 45 | }, { 46 | test: /\.css$/, 47 | use: ExtractTextPlugin.extract({ 48 | use: [{ 49 | loader: "css-loader" 50 | }] 51 | }) 52 | }, { 53 | // Stops Bootstrap from complaining 54 | test: /\.(png|woff|woff2|eot|ttf|svg|otf)$/, 55 | loader: 'url-loader?limit=100000' 56 | }, { 57 | // Compile JSX files to JS 58 | test: /\.jsx?$/, 59 | exclude: /node_modules/, 60 | use: [{ 61 | loader: 'babel-loader', 62 | options: { 63 | plugins: ['transform-decorators-legacy'], 64 | presets: ['env', 'stage-0', 'react'] 65 | } 66 | }] 67 | }, { 68 | test: /\.js$/, 69 | use: ["source-map-loader"], 70 | enforce: "pre" 71 | }] 72 | }, 73 | 74 | // TODO: generic way to resolve aliases? 75 | resolve: { 76 | symlinks: false, // https://github.com/npm/npm/issues/5875 77 | modules: ['node_modules', 'assets'], 78 | extensions: ['.js', '.jsx', '.scss', '.css'] 79 | } 80 | }; 81 | -------------------------------------------------------------------------------- /config/google.toml: -------------------------------------------------------------------------------- 1 | [google] 2 | project = "visualdb-1046" 3 | zone = "us-east1-d" 4 | 5 | [storage] 6 | type = "google" 7 | bucket = "esper" 8 | path = "public" 9 | base_image_name = "esper-base" 10 | 11 | [database] 12 | # type = "local" 13 | type = "google" 14 | name = "esper-dev" 15 | user = "will" 16 | password = "foobar" 17 | 18 | [compute] 19 | gpu = false 20 | -------------------------------------------------------------------------------- /config/local.toml: -------------------------------------------------------------------------------- 1 | [storage] 2 | type = "local" # Video/image files stored on the local machine 3 | path = "/var/esper" # Currently unused, ignore 4 | base_image_name = "esper-base" # Docker base image name 5 | 6 | [database] 7 | type = "local" # Database is stored/run locally 8 | user = "will" # Username 9 | password = "foobar" # Password 10 | 11 | [compute] 12 | gpu = false # Whether or not you have a GPU on the local machine 13 | -------------------------------------------------------------------------------- /db/.dockerignore: -------------------------------------------------------------------------------- 1 | data -------------------------------------------------------------------------------- /db/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:9.5 2 | COPY postgresql.conf /var/lib/postgresql/data/postgresql.conf 3 | -------------------------------------------------------------------------------- /guides/dev.md: -------------------------------------------------------------------------------- 1 | # Developing Esper 2 | 3 | If you're developing features for Esper, first talk to [Will](mailto:wcrichto@cs.stanford.edu) and join our [Slack channel](https://cmugraphics.slack.com). 4 | 5 | 6 | ## Frontend 7 | 8 | While editing the SASS or JSX files, use the Webpack watcher: 9 | ``` 10 | dc exec app npm run watch 11 | ``` 12 | 13 | This will automatically rebuild all the frontend files into `assets/bundles` when you change a relevant file. 14 | 15 | > Note: the watching functionality appears to be broken on OS X, so you'll want to dev on a Linux box. I don't think this is fixable. [See this issue](https://github.com/rails/rails/issues/25186). 16 | -------------------------------------------------------------------------------- /nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:latest 2 | -------------------------------------------------------------------------------- /nginx/google.conf.template: -------------------------------------------------------------------------------- 1 | # TODO(wcrichto): fallback doesn't work, 404 doesn't get caught because it's behind 2 | # a redirect now. 3 | 4 | location /system_media { 5 | rewrite ^/system_media/(.*)$ /${BUCKET}/$1 break; 6 | proxy_pass https://storage.cloud.google.com; 7 | proxy_pass_request_headers on; 8 | proxy_intercept_errors on; 9 | # error_page 301 302 307 = @handle_redirect; 10 | # error_page 403 404 = @fallback; 11 | } 12 | 13 | location @handle_redirect { 14 | set $orig_loc $upstream_http_location; 15 | proxy_pass $orig_loc; 16 | error_page 403 = @fallback; 17 | error_page 404 = @fallback; 18 | } 19 | -------------------------------------------------------------------------------- /nginx/local.conf.template: -------------------------------------------------------------------------------- 1 | # INSECURE - allows reads of any file in your esper directory 2 | location /system_media { 3 | alias /app; 4 | error_page 403 404 = @fallback; 5 | } 6 | -------------------------------------------------------------------------------- /nginx/nginx.conf.template: -------------------------------------------------------------------------------- 1 | worker_processes 32; 2 | 3 | events { 4 | worker_connections 1024; 5 | use epoll; 6 | multi_accept on; 7 | } 8 | 9 | http { 10 | include /etc/nginx/mime.types; 11 | 12 | server { 13 | listen ${PORT}; 14 | server_name server; 15 | 16 | access_log /dev/stdout; 17 | error_log /dev/stdout debug; 18 | 19 | client_max_body_size 100M; 20 | 21 | location /static { 22 | alias /app/assets; 23 | } 24 | 25 | location /frameserver { 26 | rewrite ^/frameserver/(.*)$ /$1 break; 27 | proxy_pass http://frameserver:7500; 28 | } 29 | 30 | include /etc/nginx/${ESPER_ENV}.conf; 31 | 32 | location @fallback { 33 | rewrite ^(.*)$ /fallback$1; 34 | proxy_pass http://app:8000; 35 | } 36 | 37 | location / { 38 | uwsgi_read_timeout 300; 39 | proxy_connect_timeout 300; 40 | proxy_send_timeout 300; 41 | proxy_read_timeout 600; 42 | proxy_pass http://app:8000; 43 | } 44 | } 45 | 46 | server { 47 | listen 8888; 48 | server_name notebook; 49 | 50 | client_max_body_size 100M; 51 | 52 | include /etc/nginx/${ESPER_ENV}.conf; 53 | 54 | location /django { 55 | rewrite ^/django/(.*) /$1 break; 56 | proxy_pass http://localhost:80; 57 | } 58 | 59 | location / { 60 | proxy_pass http://app:8888; 61 | 62 | # https://stackoverflow.com/questions/22665809/how-to-configure-ipython-behind-nginx-in-a-subpath/23912400#23912400 63 | proxy_http_version 1.1; 64 | proxy_set_header Upgrade $http_upgrade; 65 | proxy_set_header Connection "upgrade"; 66 | proxy_set_header Origin ""; 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /nginx/subst.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for f in /tmp/*.template 4 | do 5 | full=$(basename $f) 6 | name="${full%.*}" 7 | envsubst '$ESPER_ENV $BUCKET $PORT $DATA_PATH $HOSTNAME' < $f > /etc/nginx/$name 8 | done 9 | 10 | nginx -g "daemon off;" 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | docker-compose 2 | pyyaml 3 | toml 4 | dotmap 5 | -------------------------------------------------------------------------------- /scripts/emergency-reset.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | import subprocess as sp 3 | 4 | app = Flask(__name__) 5 | 6 | page = """ 7 | 8 | 9 | 17 | 18 | 19 | 20 | """ 21 | 22 | 23 | @app.route("/") 24 | def index(): 25 | return page 26 | 27 | 28 | @app.route('/reset') 29 | def reset(): 30 | sp.check_call( 31 | 'docker-compose stop -t 0 && docker-compose down && docker-compose up -d', shell=True) 32 | return "" 33 | -------------------------------------------------------------------------------- /scripts/gce-gpu-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install NVIDIA GPU driver 4 | # https://cloud.google.com/compute/docs/gpus/add-gpus#install-driver-script 5 | 6 | echo "Checking for CUDA and installing." 7 | # Check for CUDA and try to install. 8 | if ! dpkg-query -W cuda-9-0; then 9 | # The 16.04 installer works with 16.10. 10 | curl -O http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-repo-ubuntu1604_9.0.176-1_amd64.deb 11 | dpkg -i ./cuda-repo-ubuntu1604_9.0.176-1_amd64.deb 12 | apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub 13 | apt-get update 14 | apt-get install cuda-9-0 -y 15 | fi 16 | # Enable persistence mode 17 | nvidia-smi -pm 1 18 | 19 | 20 | # Install nvidia-docker 21 | # https://github.com/NVIDIA/nvidia-docker#ubuntu-140416041804-debian-jessiestretch 22 | 23 | # Add the package repositories 24 | curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \ 25 | sudo apt-key add - 26 | distribution=$(. /etc/os-release;echo $ID$VERSION_ID) 27 | curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \ 28 | sudo tee /etc/apt/sources.list.d/nvidia-docker.list 29 | sudo apt-get update 30 | 31 | # Install nvidia-docker2 and reload the Docker daemon configuration 32 | sudo apt-get install -y nvidia-docker2 33 | sudo pkill -SIGHUP dockerd 34 | -------------------------------------------------------------------------------- /scripts/get-bucket-size.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | bq query -q --format=json "select last(storage_byte_hours) from storageanalysis.storage" | jq '(.[0].f0_ | tonumber) / (1024 * 1024 * 1024 * 1024 * 24)' 3 | echo "terabytes" 4 | -------------------------------------------------------------------------------- /scripts/jupyter-token.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker-compose logs app | grep "?token" | sed 's/.*\?token=\(.*\)/\1/p' | tail -n 1 3 | -------------------------------------------------------------------------------- /scripts/run-app-tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Run tests in app/tests (need to be in the app directory) 4 | cd app 5 | python3 -m unittest discover test 6 | -------------------------------------------------------------------------------- /scripts/serve-reset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pip3 install flask 4 | FLASK_APP=scripts/emergency-reset.py flask run --host=0.0.0.0 --port=9999 5 | -------------------------------------------------------------------------------- /scripts/setup-db-csv-dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker-compose exec db bash -c "mkdir -p /app/pg && chown postgres /app/pg" 4 | -------------------------------------------------------------------------------- /scripts/transfer-db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SERVICE_ACCOUNT=$(gcloud sql instances describe esper-shared --format=json | jq -r '.serviceAccountEmailAddress') 4 | 5 | gcloud sql instances export esper-public-dev gs://esper/tmp-dump.sql --database=esper 6 | gsutil acl ch -u $SERVICE_ACCOUNT:W gs://esper 7 | gsutil acl ch -u $SERVICE_ACCOUNT:R gs://esper/tmp-dump.sql 8 | cat <